mirror of
https://github.com/fjosw/pyerrors.git
synced 2025-05-14 11:33:42 +02:00
Slightly better Typechecking when exporting to SQL (#174)
* corret type clause * add tests, changes in create_json_string * create json-string now gives back None * revert changes * fix panda sql export * add SQL test * fixed None type export for csv and sql.gz * move None parsing to json io * alter regex * revert changes * only replace None with empty str when necessary * fixed deserialze_df for python 3.7 * add more tesets * fix case where gz was ignored * hand voer gz explicitly * replace nan by None in non-Obs columns * moved warning to csv export, mroe tests * only values able to be nan are put in np.isnan() * added python float for warning
This commit is contained in:
parent
b75aa741a9
commit
a5b6f69160
3 changed files with 198 additions and 24 deletions
|
@ -2,12 +2,14 @@ import numpy as np
|
|||
import pandas as pd
|
||||
import pyerrors as pe
|
||||
import pytest
|
||||
import warnings
|
||||
|
||||
|
||||
def test_df_export_import(tmp_path):
|
||||
my_dict = {"int": 1,
|
||||
"float": -0.01,
|
||||
"Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"),
|
||||
"Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")}
|
||||
"float": -0.01,
|
||||
"Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"),
|
||||
"Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")}
|
||||
for gz in [True, False]:
|
||||
my_df = pd.DataFrame([my_dict] * 10)
|
||||
|
||||
|
@ -18,13 +20,166 @@ def test_df_export_import(tmp_path):
|
|||
pe.input.pandas.load_df((tmp_path / 'df_output.csv').as_posix(), gz=gz)
|
||||
|
||||
|
||||
def test_null_first_line_df_export_import(tmp_path):
|
||||
my_dict = {"int": 1,
|
||||
"float": -0.01,
|
||||
"Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"),
|
||||
"Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")}
|
||||
my_df = pd.DataFrame([my_dict] * 4)
|
||||
my_df.loc[0, "Obs1"] = None
|
||||
my_df.loc[2, "Obs1"] = None
|
||||
for gz in [True, False]:
|
||||
pe.input.pandas.dump_df(my_df, (tmp_path / 'df_output').as_posix(), gz=gz)
|
||||
reconstructed_df = pe.input.pandas.load_df((tmp_path / 'df_output').as_posix(), auto_gamma=True, gz=gz)
|
||||
assert reconstructed_df.loc[0, "Obs1"] is None
|
||||
assert reconstructed_df.loc[2, "Obs1"] is None
|
||||
assert np.all(reconstructed_df.loc[1] == my_df.loc[1])
|
||||
assert np.all(reconstructed_df.loc[3] == my_df.loc[3])
|
||||
|
||||
|
||||
def test_nan_df_export_import(tmp_path):
|
||||
my_dict = {"int": 1,
|
||||
"float": -0.01,
|
||||
"Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"),
|
||||
"Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")}
|
||||
my_df = pd.DataFrame([my_dict] * 4)
|
||||
my_df.loc[1, "int"] = np.nan
|
||||
|
||||
for gz in [True, False]:
|
||||
pe.input.pandas.dump_df(my_df, (tmp_path / 'df_output').as_posix(), gz=gz)
|
||||
reconstructed_df = pe.input.pandas.load_df((tmp_path / 'df_output').as_posix(), auto_gamma=True, gz=gz)
|
||||
with pytest.warns(UserWarning, match="nan value in column int will be replaced by None"):
|
||||
warnings.warn("nan value in column int will be replaced by None", UserWarning)
|
||||
assert reconstructed_df.loc[1, "int"] is None
|
||||
assert np.all(reconstructed_df.loc[:, "float"] == my_df.loc[:, "float"])
|
||||
assert np.all(reconstructed_df.loc[:, "Obs1"] == my_df.loc[:, "Obs1"])
|
||||
assert np.all(reconstructed_df.loc[:, "Obs2"] == my_df.loc[:, "Obs2"])
|
||||
|
||||
|
||||
def test_null_second_line_df_export_import(tmp_path):
|
||||
my_dict = {"int": 1,
|
||||
"float": -0.01,
|
||||
"Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"),
|
||||
"Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")}
|
||||
my_df = pd.DataFrame([my_dict] * 4)
|
||||
my_df.loc[1, "Obs1"] = None
|
||||
for gz in [True, False]:
|
||||
pe.input.pandas.dump_df(my_df, (tmp_path / 'df_output').as_posix(), gz=gz)
|
||||
reconstructed_df = pe.input.pandas.load_df((tmp_path / 'df_output').as_posix(), auto_gamma=True, gz=gz)
|
||||
assert reconstructed_df.loc[1, "Obs1"] is None
|
||||
assert np.all(reconstructed_df.loc[0] == my_df.loc[0])
|
||||
assert np.all(reconstructed_df.loc[2:] == my_df.loc[2:])
|
||||
|
||||
|
||||
def test_null_first_line_df_gzsql_export_import(tmp_path):
|
||||
my_dict = {"int": 1,
|
||||
"float": -0.01,
|
||||
"Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"),
|
||||
"Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")}
|
||||
|
||||
my_df = pd.DataFrame([my_dict] * 4)
|
||||
my_df.loc[0, "Obs1"] = None
|
||||
my_df.loc[2, "Obs1"] = None
|
||||
gz = True
|
||||
pe.input.pandas.to_sql(my_df, 'test', (tmp_path / 'test.db').as_posix(), gz=gz)
|
||||
reconstructed_df = pe.input.pandas.read_sql('SELECT * FROM test', (tmp_path / 'test.db').as_posix(), auto_gamma=True)
|
||||
assert reconstructed_df.loc[0, "Obs1"] is None
|
||||
assert reconstructed_df.loc[2, "Obs1"] is None
|
||||
assert np.all(reconstructed_df.loc[1] == my_df.loc[1])
|
||||
assert np.all(reconstructed_df.loc[3] == my_df.loc[3])
|
||||
|
||||
|
||||
def test_null_second_line_df_gzsql_export_import(tmp_path):
|
||||
my_dict = {"int": 1,
|
||||
"float": -0.01,
|
||||
"Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"),
|
||||
"Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")}
|
||||
|
||||
my_df = pd.DataFrame([my_dict] * 4)
|
||||
my_df.loc[1, "Obs1"] = None
|
||||
gz = True
|
||||
pe.input.pandas.to_sql(my_df, 'test', (tmp_path / 'test.db').as_posix(), gz=gz)
|
||||
reconstructed_df = pe.input.pandas.read_sql('SELECT * FROM test', (tmp_path / 'test.db').as_posix(), auto_gamma=True)
|
||||
assert reconstructed_df.loc[1, "Obs1"] is None
|
||||
assert np.all(reconstructed_df.loc[0] == my_df.loc[0])
|
||||
assert np.all(reconstructed_df.loc[2:] == my_df.loc[2:])
|
||||
|
||||
|
||||
def test_null_first_line_df_sql_export_import(tmp_path):
|
||||
my_dict = {"int": 1,
|
||||
"float": -0.01,
|
||||
"Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"),
|
||||
"Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")}
|
||||
|
||||
my_df = pd.DataFrame([my_dict] * 4)
|
||||
my_df.loc[0, "Obs1"] = None
|
||||
my_df.loc[2, "Obs1"] = None
|
||||
gz = False
|
||||
pe.input.pandas.to_sql(my_df, 'test', (tmp_path / 'test.db').as_posix(), gz=gz)
|
||||
reconstructed_df = pe.input.pandas.read_sql('SELECT * FROM test', (tmp_path / 'test.db').as_posix(), auto_gamma=True)
|
||||
assert reconstructed_df.loc[0, "Obs1"] is None
|
||||
assert reconstructed_df.loc[2, "Obs1"] is None
|
||||
assert np.all(reconstructed_df.loc[1] == my_df.loc[1])
|
||||
assert np.all(reconstructed_df.loc[3] == my_df.loc[3])
|
||||
|
||||
|
||||
def test_nan_sql_export_import(tmp_path):
|
||||
my_dict = {"int": 1,
|
||||
"float": -0.01,
|
||||
"Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"),
|
||||
"Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")}
|
||||
my_df = pd.DataFrame([my_dict] * 4)
|
||||
my_df.loc[1, "int"] = np.nan
|
||||
gz = False
|
||||
pe.input.pandas.to_sql(my_df, 'test', (tmp_path / 'test.db').as_posix(), gz=gz)
|
||||
reconstructed_df = pe.input.pandas.read_sql('SELECT * FROM test', (tmp_path / 'test.db').as_posix(), auto_gamma=True)
|
||||
with pytest.warns(UserWarning, match="nan value in column int will be replaced by None"):
|
||||
warnings.warn("nan value in column int will be replaced by None", UserWarning)
|
||||
assert np.isnan(reconstructed_df.loc[1, "int"])
|
||||
assert np.all(reconstructed_df.loc[:, "float"] == my_df.loc[:, "float"])
|
||||
assert np.all(reconstructed_df.loc[:, "Obs1"] == my_df.loc[:, "Obs1"])
|
||||
assert np.all(reconstructed_df.loc[:, "Obs2"] == my_df.loc[:, "Obs2"])
|
||||
|
||||
|
||||
def test_nan_gzsql_export_import(tmp_path):
|
||||
my_dict = {"int": 1,
|
||||
"float": -0.01,
|
||||
"Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"),
|
||||
"Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")}
|
||||
my_df = pd.DataFrame([my_dict] * 4)
|
||||
my_df.loc[1, "int"] = np.nan
|
||||
gz = True
|
||||
pe.input.pandas.to_sql(my_df, 'test', (tmp_path / 'test.db').as_posix(), gz=gz)
|
||||
reconstructed_df = pe.input.pandas.read_sql('SELECT * FROM test', (tmp_path / 'test.db').as_posix(), auto_gamma=True)
|
||||
assert np.isnan(reconstructed_df.loc[1, "int"])
|
||||
assert np.all(reconstructed_df.loc[:, "float"] == my_df.loc[:, "float"])
|
||||
assert np.all(reconstructed_df.loc[:, "Obs1"] == my_df.loc[:, "Obs1"])
|
||||
assert np.all(reconstructed_df.loc[:, "Obs2"] == my_df.loc[:, "Obs2"])
|
||||
|
||||
|
||||
def test_null_second_line_df_sql_export_import(tmp_path):
|
||||
my_dict = {"int": 1,
|
||||
"float": -0.01,
|
||||
"Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"),
|
||||
"Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")}
|
||||
|
||||
my_df = pd.DataFrame([my_dict] * 4)
|
||||
my_df.loc[1, "Obs1"] = None
|
||||
gz = False
|
||||
pe.input.pandas.to_sql(my_df, 'test', (tmp_path / 'test.db').as_posix(), gz=gz)
|
||||
reconstructed_df = pe.input.pandas.read_sql('SELECT * FROM test', (tmp_path / 'test.db').as_posix(), auto_gamma=True)
|
||||
assert reconstructed_df.loc[1, "Obs1"] is None
|
||||
assert np.all(reconstructed_df.loc[0] == my_df.loc[0])
|
||||
assert np.all(reconstructed_df.loc[2:] == my_df.loc[2:])
|
||||
|
||||
|
||||
def test_df_Corr(tmp_path):
|
||||
|
||||
my_corr = pe.Corr([pe.pseudo_Obs(-0.48, 0.04, "test"), pe.pseudo_Obs(-0.154, 0.03, "test")])
|
||||
|
||||
my_dict = {"int": 1,
|
||||
"float": -0.01,
|
||||
"Corr": my_corr}
|
||||
"float": -0.01,
|
||||
"Corr": my_corr}
|
||||
my_df = pd.DataFrame([my_dict] * 5)
|
||||
|
||||
pe.input.pandas.dump_df(my_df, (tmp_path / 'df_output').as_posix())
|
||||
|
@ -76,8 +231,8 @@ def test_sql_if_exists_fail(tmp_path):
|
|||
|
||||
def test_Obs_list_sql(tmp_path):
|
||||
my_dict = {"int": 1,
|
||||
"Obs1": pe.pseudo_Obs(17, 11, "test_sql_if_exists_failnsemble"),
|
||||
"Obs_list": [[pe.pseudo_Obs(0.0, 0.1, "test_ensemble2"), pe.pseudo_Obs(3.2, 1.1, "test_ensemble2")]]}
|
||||
"Obs1": pe.pseudo_Obs(17, 11, "test_sql_if_exists_failnsemble"),
|
||||
"Obs_list": [[pe.pseudo_Obs(0.0, 0.1, "test_ensemble2"), pe.pseudo_Obs(3.2, 1.1, "test_ensemble2")]]}
|
||||
pe_df = pd.DataFrame(my_dict)
|
||||
my_db = (tmp_path / "test_db.sqlite").as_posix()
|
||||
pe.input.pandas.to_sql(pe_df, "My_table", my_db)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue