mirror of
https://github.com/fjosw/pyerrors.git
synced 2025-03-15 14:50:25 +01:00
taking care of cols with only None values (#184)
This commit is contained in:
parent
bbe74b438c
commit
e97cc519a9
2 changed files with 33 additions and 13 deletions
|
@ -171,26 +171,30 @@ def _deserialize_df(df, auto_gamma=False):
|
||||||
if isinstance(df[column][0], bytes):
|
if isinstance(df[column][0], bytes):
|
||||||
if df[column][0].startswith(b"\x1f\x8b\x08\x00"):
|
if df[column][0].startswith(b"\x1f\x8b\x08\x00"):
|
||||||
df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8'))
|
df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8'))
|
||||||
df = df.replace({r'^$': None}, regex=True)
|
|
||||||
i = 0
|
if not all([e is None for e in df[column]]):
|
||||||
while df[column][i] is None:
|
df[column] = df[column].replace({r'^$': None}, regex=True)
|
||||||
i += 1
|
i = 0
|
||||||
if isinstance(df[column][i], str):
|
while df[column][i] is None:
|
||||||
if '"program":' in df[column][i][:20]:
|
i += 1
|
||||||
df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if x is not None else None)
|
if isinstance(df[column][i], str):
|
||||||
if auto_gamma is True:
|
if '"program":' in df[column][i][:20]:
|
||||||
if isinstance(df[column][i], list):
|
df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if x is not None else None)
|
||||||
df[column].apply(lambda x: [o.gm() if o is not None else x for o in x])
|
if auto_gamma is True:
|
||||||
else:
|
if isinstance(df[column][i], list):
|
||||||
df[column].apply(lambda x: x.gm() if x is not None else x)
|
df[column].apply(lambda x: [o.gm() if o is not None else x for o in x])
|
||||||
|
else:
|
||||||
|
df[column].apply(lambda x: x.gm() if x is not None else x)
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
def _need_to_serialize(col):
|
def _need_to_serialize(col):
|
||||||
serialize = False
|
serialize = False
|
||||||
i = 0
|
i = 0
|
||||||
while col[i] is None:
|
while i < len(col) and col[i] is None:
|
||||||
i += 1
|
i += 1
|
||||||
|
if i == len(col):
|
||||||
|
return serialize
|
||||||
if isinstance(col[i], (Obs, Corr)):
|
if isinstance(col[i], (Obs, Corr)):
|
||||||
serialize = True
|
serialize = True
|
||||||
elif isinstance(col[i], list):
|
elif isinstance(col[i], list):
|
||||||
|
|
|
@ -173,6 +173,22 @@ def test_null_second_line_df_sql_export_import(tmp_path):
|
||||||
assert np.all(reconstructed_df.loc[2:] == my_df.loc[2:])
|
assert np.all(reconstructed_df.loc[2:] == my_df.loc[2:])
|
||||||
|
|
||||||
|
|
||||||
|
def test_null_col_df_gzsql_export_import(tmp_path):
|
||||||
|
my_dict = {"int": 1,
|
||||||
|
"float": -0.01,
|
||||||
|
"Noneval": None,
|
||||||
|
"Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"),
|
||||||
|
"Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")}
|
||||||
|
my_df = pd.DataFrame([my_dict] * 4)
|
||||||
|
pe.input.pandas.to_sql(my_df, 'test', (tmp_path / 'test.db').as_posix(), gz=True)
|
||||||
|
reconstructed_df = pe.input.pandas.read_sql('SELECT * FROM test', (tmp_path / 'test.db').as_posix(), auto_gamma=True)
|
||||||
|
assert np.all(reconstructed_df["int"] == my_df["int"])
|
||||||
|
assert np.all(reconstructed_df["float"] == my_df["float"])
|
||||||
|
assert np.all([e is None for e in reconstructed_df["Noneval"]])
|
||||||
|
assert np.all(reconstructed_df["Obs1"] == my_df["Obs1"])
|
||||||
|
assert np.all(reconstructed_df["Obs2"] == my_df["Obs2"])
|
||||||
|
|
||||||
|
|
||||||
def test_df_Corr(tmp_path):
|
def test_df_Corr(tmp_path):
|
||||||
|
|
||||||
my_corr = pe.Corr([pe.pseudo_Obs(-0.48, 0.04, "test"), pe.pseudo_Obs(-0.154, 0.03, "test")])
|
my_corr = pe.Corr([pe.pseudo_Obs(-0.48, 0.04, "test"), pe.pseudo_Obs(-0.154, 0.03, "test")])
|
||||||
|
|
Loading…
Add table
Reference in a new issue