mirror of
				https://github.com/fjosw/pyerrors.git
				synced 2025-11-04 09:35:45 +01:00 
			
		
		
		
	taking care of cols with only None values (#184)
This commit is contained in:
		
					parent
					
						
							
								bbe74b438c
							
						
					
				
			
			
				commit
				
					
						e97cc519a9
					
				
			
		
					 2 changed files with 33 additions and 13 deletions
				
			
		| 
						 | 
					@ -171,26 +171,30 @@ def _deserialize_df(df, auto_gamma=False):
 | 
				
			||||||
        if isinstance(df[column][0], bytes):
 | 
					        if isinstance(df[column][0], bytes):
 | 
				
			||||||
            if df[column][0].startswith(b"\x1f\x8b\x08\x00"):
 | 
					            if df[column][0].startswith(b"\x1f\x8b\x08\x00"):
 | 
				
			||||||
                df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8'))
 | 
					                df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8'))
 | 
				
			||||||
        df = df.replace({r'^$': None}, regex=True)
 | 
					
 | 
				
			||||||
        i = 0
 | 
					        if not all([e is None for e in df[column]]):
 | 
				
			||||||
        while df[column][i] is None:
 | 
					            df[column] = df[column].replace({r'^$': None}, regex=True)
 | 
				
			||||||
            i += 1
 | 
					            i = 0
 | 
				
			||||||
        if isinstance(df[column][i], str):
 | 
					            while df[column][i] is None:
 | 
				
			||||||
            if '"program":' in df[column][i][:20]:
 | 
					                i += 1
 | 
				
			||||||
                df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if x is not None else None)
 | 
					            if isinstance(df[column][i], str):
 | 
				
			||||||
                if auto_gamma is True:
 | 
					                if '"program":' in df[column][i][:20]:
 | 
				
			||||||
                    if isinstance(df[column][i], list):
 | 
					                    df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if x is not None else None)
 | 
				
			||||||
                        df[column].apply(lambda x: [o.gm() if o is not None else x for o in x])
 | 
					                    if auto_gamma is True:
 | 
				
			||||||
                    else:
 | 
					                        if isinstance(df[column][i], list):
 | 
				
			||||||
                        df[column].apply(lambda x: x.gm() if x is not None else x)
 | 
					                            df[column].apply(lambda x: [o.gm() if o is not None else x for o in x])
 | 
				
			||||||
 | 
					                        else:
 | 
				
			||||||
 | 
					                            df[column].apply(lambda x: x.gm() if x is not None else x)
 | 
				
			||||||
    return df
 | 
					    return df
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def _need_to_serialize(col):
 | 
					def _need_to_serialize(col):
 | 
				
			||||||
    serialize = False
 | 
					    serialize = False
 | 
				
			||||||
    i = 0
 | 
					    i = 0
 | 
				
			||||||
    while col[i] is None:
 | 
					    while i < len(col) and col[i] is None:
 | 
				
			||||||
        i += 1
 | 
					        i += 1
 | 
				
			||||||
 | 
					    if i == len(col):
 | 
				
			||||||
 | 
					        return serialize
 | 
				
			||||||
    if isinstance(col[i], (Obs, Corr)):
 | 
					    if isinstance(col[i], (Obs, Corr)):
 | 
				
			||||||
        serialize = True
 | 
					        serialize = True
 | 
				
			||||||
    elif isinstance(col[i], list):
 | 
					    elif isinstance(col[i], list):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -173,6 +173,22 @@ def test_null_second_line_df_sql_export_import(tmp_path):
 | 
				
			||||||
    assert np.all(reconstructed_df.loc[2:] == my_df.loc[2:])
 | 
					    assert np.all(reconstructed_df.loc[2:] == my_df.loc[2:])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_null_col_df_gzsql_export_import(tmp_path):
 | 
				
			||||||
 | 
					    my_dict = {"int": 1,
 | 
				
			||||||
 | 
					               "float": -0.01,
 | 
				
			||||||
 | 
					               "Noneval": None,
 | 
				
			||||||
 | 
					               "Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"),
 | 
				
			||||||
 | 
					               "Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")}
 | 
				
			||||||
 | 
					    my_df = pd.DataFrame([my_dict] * 4)
 | 
				
			||||||
 | 
					    pe.input.pandas.to_sql(my_df, 'test', (tmp_path / 'test.db').as_posix(), gz=True)
 | 
				
			||||||
 | 
					    reconstructed_df = pe.input.pandas.read_sql('SELECT * FROM test', (tmp_path / 'test.db').as_posix(), auto_gamma=True)
 | 
				
			||||||
 | 
					    assert np.all(reconstructed_df["int"] == my_df["int"])
 | 
				
			||||||
 | 
					    assert np.all(reconstructed_df["float"] == my_df["float"])
 | 
				
			||||||
 | 
					    assert np.all([e is None for e in reconstructed_df["Noneval"]])
 | 
				
			||||||
 | 
					    assert np.all(reconstructed_df["Obs1"] == my_df["Obs1"])
 | 
				
			||||||
 | 
					    assert np.all(reconstructed_df["Obs2"] == my_df["Obs2"])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_df_Corr(tmp_path):
 | 
					def test_df_Corr(tmp_path):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    my_corr = pe.Corr([pe.pseudo_Obs(-0.48, 0.04, "test"), pe.pseudo_Obs(-0.154, 0.03, "test")])
 | 
					    my_corr = pe.Corr([pe.pseudo_Obs(-0.48, 0.04, "test"), pe.pseudo_Obs(-0.154, 0.03, "test")])
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue