From 69a33c80c1976d6be364028d88e0917fa0566770 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Thu, 19 Feb 2026 16:11:47 +0100 Subject: [PATCH] [Fix] Address edge cases in _deserialize_df - Use pd.isna() instead of truthiness check for gzip null guard, fixing incorrect behavior when null is np.nan (which is truthy in Python) - Add bounds check to while loop to prevent IndexError when all non-null values are empty strings converted to None by regex replace --- pyerrors/input/pandas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyerrors/input/pandas.py b/pyerrors/input/pandas.py index 0230c4fb..54bf14f4 100644 --- a/pyerrors/input/pandas.py +++ b/pyerrors/input/pandas.py @@ -171,12 +171,12 @@ def _deserialize_df(df, auto_gamma=False): for column in df.select_dtypes(include=string_like_dtypes): if isinstance(df[column].iloc[0], bytes): if df[column].iloc[0].startswith(b"\x1f\x8b\x08\x00"): - df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8') if x else '') + df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8') if not pd.isna(x) else '') if df[column].notna().any(): df[column] = df[column].replace({r'^$': None}, regex=True) i = 0 - while pd.isna(df[column].iloc[i]): + while i < len(df[column]) and pd.isna(df[column].iloc[i]): i += 1 if isinstance(df[column].iloc[i], str): if '"program":' in df[column].iloc[i][:20]: