mirror of
https://github.com/fjosw/pyerrors.git
synced 2025-03-15 14:50:25 +01:00
feat: pandas DataFrame serialization and deserialization can now also
deal with gzipped json columns. Tests added.
This commit is contained in:
parent
a133030114
commit
cc9f47c686
2 changed files with 23 additions and 3 deletions
|
@ -22,7 +22,7 @@ def dump_df(df, fname, gz=True):
|
|||
If True, the output is a gzipped csv file. If False, the output is a csv file.
|
||||
"""
|
||||
|
||||
out = serialize_df(df)
|
||||
out = serialize_df(df, gz=False)
|
||||
|
||||
if not fname.endswith('.csv'):
|
||||
fname += '.csv'
|
||||
|
@ -62,21 +62,25 @@ def load_df(fname, auto_gamma=False, gz=True):
|
|||
warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
|
||||
re_import = pd.read_csv(fname)
|
||||
|
||||
return deserialize_df(re_import, auto_gamma)
|
||||
return deserialize_df(re_import, auto_gamma=auto_gamma)
|
||||
|
||||
|
||||
def serialize_df(df):
|
||||
def serialize_df(df, gz=False):
|
||||
"""Serializes all Obs or Corr valued columns into json strings according to the pyerrors json specification.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : pandas.DataFrame
|
||||
DataFrame to be serilized.
|
||||
gz: bool
|
||||
gzip the json string represenation. Default False.
|
||||
"""
|
||||
out = df.copy()
|
||||
for column in out:
|
||||
if isinstance(out[column][0], (Obs, Corr)):
|
||||
out[column] = out[column].transform(lambda x: create_json_string(x, indent=0))
|
||||
if gz is True:
|
||||
out[column] = out[column].transform(lambda x: gzip.compress(x.encode('utf-8')))
|
||||
return out
|
||||
|
||||
|
||||
|
@ -90,8 +94,15 @@ def deserialize_df(df, auto_gamma=False):
|
|||
auto_gamma : bool
|
||||
If True applies the gamma_method to all imported Obs objects with the default parameters for
|
||||
the error analysis. Default False.
|
||||
|
||||
Notes:
|
||||
------
|
||||
In case any column of the DataFrame is gzipped it is gunzipped in the process.
|
||||
"""
|
||||
for column in df.select_dtypes(include="object"):
|
||||
if isinstance(df[column][0], bytes):
|
||||
if df[column][0].startswith(b"\x1f\x8b\x08\x00"):
|
||||
df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8'))
|
||||
if isinstance(df[column][0], str):
|
||||
if df[column][0][:20] == '{"program":"pyerrors':
|
||||
df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False))
|
||||
|
|
|
@ -42,3 +42,12 @@ def test_pe_export_default_import(tmp_path):
|
|||
pe.input.pandas.dump_df(df, (tmp_path / 'pe_df').as_posix(), gz=False)
|
||||
re_df = pd.read_csv((tmp_path / 'pe_df.csv').as_posix())
|
||||
assert np.all(df == re_df)
|
||||
|
||||
|
||||
def test_gz_serialization():
|
||||
my_obs = pe.pseudo_Obs(0.1, 0.01, "pandas DataFrame ensemble only for test purposes.")
|
||||
my_df = pd.DataFrame([{"Label": 1, "Obs": my_obs}])
|
||||
for gz in [False, True]:
|
||||
ser = pe.input.pandas.serialize_df(my_df, gz=gz)
|
||||
deser = pe.input.pandas.deserialize_df(ser)
|
||||
np.all(my_df == deser)
|
||||
|
|
Loading…
Add table
Reference in a new issue