taking care of cols with only None values (#184)

This commit is contained in:
Justus Kuhlmann 2023-05-22 13:37:46 +02:00 committed by GitHub
parent bbe74b438c
commit e97cc519a9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 33 additions and 13 deletions

View file

@ -171,26 +171,30 @@ def _deserialize_df(df, auto_gamma=False):
if isinstance(df[column][0], bytes): if isinstance(df[column][0], bytes):
if df[column][0].startswith(b"\x1f\x8b\x08\x00"): if df[column][0].startswith(b"\x1f\x8b\x08\x00"):
df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8')) df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8'))
df = df.replace({r'^$': None}, regex=True)
i = 0 if not all([e is None for e in df[column]]):
while df[column][i] is None: df[column] = df[column].replace({r'^$': None}, regex=True)
i += 1 i = 0
if isinstance(df[column][i], str): while df[column][i] is None:
if '"program":' in df[column][i][:20]: i += 1
df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if x is not None else None) if isinstance(df[column][i], str):
if auto_gamma is True: if '"program":' in df[column][i][:20]:
if isinstance(df[column][i], list): df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if x is not None else None)
df[column].apply(lambda x: [o.gm() if o is not None else x for o in x]) if auto_gamma is True:
else: if isinstance(df[column][i], list):
df[column].apply(lambda x: x.gm() if x is not None else x) df[column].apply(lambda x: [o.gm() if o is not None else x for o in x])
else:
df[column].apply(lambda x: x.gm() if x is not None else x)
return df return df
def _need_to_serialize(col): def _need_to_serialize(col):
serialize = False serialize = False
i = 0 i = 0
while col[i] is None: while i < len(col) and col[i] is None:
i += 1 i += 1
if i == len(col):
return serialize
if isinstance(col[i], (Obs, Corr)): if isinstance(col[i], (Obs, Corr)):
serialize = True serialize = True
elif isinstance(col[i], list): elif isinstance(col[i], list):

View file

@ -173,6 +173,22 @@ def test_null_second_line_df_sql_export_import(tmp_path):
assert np.all(reconstructed_df.loc[2:] == my_df.loc[2:]) assert np.all(reconstructed_df.loc[2:] == my_df.loc[2:])
def test_null_col_df_gzsql_export_import(tmp_path):
my_dict = {"int": 1,
"float": -0.01,
"Noneval": None,
"Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"),
"Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")}
my_df = pd.DataFrame([my_dict] * 4)
pe.input.pandas.to_sql(my_df, 'test', (tmp_path / 'test.db').as_posix(), gz=True)
reconstructed_df = pe.input.pandas.read_sql('SELECT * FROM test', (tmp_path / 'test.db').as_posix(), auto_gamma=True)
assert np.all(reconstructed_df["int"] == my_df["int"])
assert np.all(reconstructed_df["float"] == my_df["float"])
assert np.all([e is None for e in reconstructed_df["Noneval"]])
assert np.all(reconstructed_df["Obs1"] == my_df["Obs1"])
assert np.all(reconstructed_df["Obs2"] == my_df["Obs2"])
def test_df_Corr(tmp_path): def test_df_Corr(tmp_path):
my_corr = pe.Corr([pe.pseudo_Obs(-0.48, 0.04, "test"), pe.pseudo_Obs(-0.154, 0.03, "test")]) my_corr = pe.Corr([pe.pseudo_Obs(-0.48, 0.04, "test"), pe.pseudo_Obs(-0.154, 0.03, "test")])