[Fix] Address edge cases in _deserialize_df

- Use pd.isna() instead of truthiness check for gzip null guard, fixing incorrect behavior when null is np.nan (which is truthy in Python) - Add bounds check to while loop to prevent IndexError when all non-null values are empty strings converted to None by regex replace
2026-04-15 01:01:38 +02:00 · 2026-02-19 16:11:47 +01:00 · 2026-02-19 16:11:47 +01:00 · 69a33c80c1
commit 69a33c80c1
parent 7741f8f292
1 changed files with 2 additions and 2 deletions
--- a/pyerrors/input/pandas.py
+++ b/pyerrors/input/pandas.py
@ -171,12 +171,12 @@ def _deserialize_df(df, auto_gamma=False):
    for column in df.select_dtypes(include=string_like_dtypes):
        if isinstance(df[column].iloc[0], bytes):
            if df[column].iloc[0].startswith(b"\x1f\x8b\x08\x00"):
-                df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8') if x else '')
+                df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8') if not pd.isna(x) else '')

        if df[column].notna().any():
            df[column] = df[column].replace({r'^$': None}, regex=True)
            i = 0
-            while pd.isna(df[column].iloc[i]):
+            while i < len(df[column]) and pd.isna(df[column].iloc[i]):
                i += 1
            if isinstance(df[column].iloc[i], str):
                if '"program":' in df[column].iloc[i][:20]: