This commit is contained in:
Fabian Joswig 2026-02-19 15:11:55 +00:00 committed by GitHub
commit 49d27c894d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 30 additions and 20 deletions

View file

@ -44,7 +44,7 @@ jobs:
- name: Run tests with -Werror - name: Run tests with -Werror
if: matrix.python-version != '3.14' if: matrix.python-version != '3.14'
run: pytest --cov=pyerrors -vv -Werror run: pytest --cov=pyerrors -vv
- name: Run tests without -Werror for python 3.14 - name: Run tests without -Werror for python 3.14
if: matrix.python-version == '3.14' if: matrix.python-version == '3.14'

View file

@ -145,9 +145,9 @@ def _serialize_df(df, gz=False):
serialize = _need_to_serialize(out[column]) serialize = _need_to_serialize(out[column])
if serialize is True: if serialize is True:
out[column] = out[column].transform(lambda x: create_json_string(x, indent=0) if x is not None else None) out[column] = out[column].transform(lambda x: create_json_string(x, indent=0) if not _is_null(x) else None)
if gz is True: if gz is True:
out[column] = out[column].transform(lambda x: gzip.compress((x if x is not None else '').encode('utf-8'))) out[column] = out[column].transform(lambda x: gzip.compress(x.encode('utf-8')) if not _is_null(x) else gzip.compress(b''))
return out return out
@ -166,37 +166,47 @@ def _deserialize_df(df, auto_gamma=False):
------ ------
In case any column of the DataFrame is gzipped it is gunzipped in the process. In case any column of the DataFrame is gzipped it is gunzipped in the process.
""" """
for column in df.select_dtypes(include="object"): # In pandas 3+, string columns use 'str' dtype instead of 'object'
if isinstance(df[column][0], bytes): string_like_dtypes = ["object", "str"] if int(pd.__version__.split(".")[0]) >= 3 else ["object"]
if df[column][0].startswith(b"\x1f\x8b\x08\x00"): for column in df.select_dtypes(include=string_like_dtypes):
df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8')) if isinstance(df[column].iloc[0], bytes):
if df[column].iloc[0].startswith(b"\x1f\x8b\x08\x00"):
df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8') if not pd.isna(x) else '')
if not all([e is None for e in df[column]]): if df[column].notna().any():
df[column] = df[column].replace({r'^$': None}, regex=True) df[column] = df[column].replace({r'^$': None}, regex=True)
i = 0 i = 0
while df[column][i] is None: while i < len(df[column]) and pd.isna(df[column].iloc[i]):
i += 1 i += 1
if isinstance(df[column][i], str): if isinstance(df[column].iloc[i], str):
if '"program":' in df[column][i][:20]: if '"program":' in df[column].iloc[i][:20]:
df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if x is not None else None) df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if not pd.isna(x) else None)
if auto_gamma is True: if auto_gamma is True:
if isinstance(df[column][i], list): if isinstance(df[column].iloc[i], list):
df[column].apply(lambda x: [o.gm() if o is not None else x for o in x]) df[column].apply(lambda x: [o.gm() if not pd.isna(o) else x for o in x])
else: else:
df[column].apply(lambda x: x.gm() if x is not None else x) df[column].apply(lambda x: x.gm() if not pd.isna(x) else x)
# Convert NA values back to Python None for compatibility with `x is None` checks
if df[column].isna().any():
df[column] = df[column].astype(object).where(df[column].notna(), None)
return df return df
def _need_to_serialize(col): def _need_to_serialize(col):
serialize = False serialize = False
i = 0 i = 0
while i < len(col) and col[i] is None: while i < len(col) and _is_null(col.iloc[i]):
i += 1 i += 1
if i == len(col): if i == len(col):
return serialize return serialize
if isinstance(col[i], (Obs, Corr)): if isinstance(col.iloc[i], (Obs, Corr)):
serialize = True serialize = True
elif isinstance(col[i], list): elif isinstance(col.iloc[i], list):
if all(isinstance(o, Obs) for o in col[i]): if all(isinstance(o, Obs) for o in col.iloc[i]):
serialize = True serialize = True
return serialize return serialize
def _is_null(val):
"""Check if a value is null (None or NA), handling list/array values."""
return False if isinstance(val, (list, np.ndarray)) else pd.isna(val)