pyerrors.input.pandas
1import warnings 2import gzip 3import sqlite3 4import pandas as pd 5from ..obs import Obs 6from ..correlators import Corr 7from .json import create_json_string, import_json_string 8import numpy as np 9 10 11def to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs): 12 """Write DataFrame including Obs or Corr valued columns to sqlite database. 13 14 Parameters 15 ---------- 16 df : pandas.DataFrame 17 Dataframe to be written to the database. 18 table_name : str 19 Name of the table in the database. 20 db : str 21 Path to the sqlite database. 22 if exists : str 23 How to behave if table already exists. Options 'fail', 'replace', 'append'. 24 gz : bool 25 If True the json strings are gzipped. 26 27 Returns 28 ------- 29 None 30 """ 31 se_df = _serialize_df(df, gz=gz) 32 con = sqlite3.connect(db) 33 se_df.to_sql(table_name, con, if_exists=if_exists, index=False, **kwargs) 34 con.close() 35 36 37def read_sql(sql, db, auto_gamma=False, **kwargs): 38 """Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns. 39 40 Parameters 41 ---------- 42 sql : str 43 SQL query to be executed. 44 db : str 45 Path to the sqlite database. 46 auto_gamma : bool 47 If True applies the gamma_method to all imported Obs objects with the default parameters for 48 the error analysis. Default False. 49 50 Returns 51 ------- 52 data : pandas.DataFrame 53 Dataframe with the content of the sqlite database. 54 """ 55 con = sqlite3.connect(db) 56 extract_df = pd.read_sql(sql, con, **kwargs) 57 con.close() 58 return _deserialize_df(extract_df, auto_gamma=auto_gamma) 59 60 61def dump_df(df, fname, gz=True): 62 """Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file. 63 64 Before making use of pandas to_csv functionality Obs objects are serialized via the standardized 65 json format of pyerrors. 66 67 Parameters 68 ---------- 69 df : pandas.DataFrame 70 Dataframe to be dumped to a file. 71 fname : str 72 Filename of the output file. 73 gz : bool 74 If True, the output is a gzipped csv file. If False, the output is a csv file. 75 76 Returns 77 ------- 78 None 79 """ 80 for column in df: 81 serialize = _need_to_serialize(df[column]) 82 if not serialize: 83 if all(isinstance(entry, (int, np.integer, float, np.floating)) for entry in df[column]): 84 if any([np.isnan(entry) for entry in df[column]]): 85 warnings.warn("nan value in column " + column + " will be replaced by None", UserWarning) 86 87 out = _serialize_df(df, gz=False) 88 89 if not fname.endswith('.csv'): 90 fname += '.csv' 91 92 if gz is True: 93 if not fname.endswith('.gz'): 94 fname += '.gz' 95 out.to_csv(fname, index=False, compression='gzip') 96 else: 97 out.to_csv(fname, index=False) 98 99 100def load_df(fname, auto_gamma=False, gz=True): 101 """Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings. 102 103 Parameters 104 ---------- 105 fname : str 106 Filename of the input file. 107 auto_gamma : bool 108 If True applies the gamma_method to all imported Obs objects with the default parameters for 109 the error analysis. Default False. 110 gz : bool 111 If True, assumes that data is gzipped. If False, assumes JSON file. 112 113 Returns 114 ------- 115 data : pandas.DataFrame 116 Dataframe with the content of the sqlite database. 117 """ 118 if not fname.endswith('.csv') and not fname.endswith('.gz'): 119 fname += '.csv' 120 121 if gz is True: 122 if not fname.endswith('.gz'): 123 fname += '.gz' 124 with gzip.open(fname) as f: 125 re_import = pd.read_csv(f, keep_default_na=False) 126 else: 127 if fname.endswith('.gz'): 128 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 129 re_import = pd.read_csv(fname, keep_default_na=False) 130 131 return _deserialize_df(re_import, auto_gamma=auto_gamma) 132 133 134def _serialize_df(df, gz=False): 135 """Serializes all Obs or Corr valued columns into json strings according to the pyerrors json specification. 136 137 Parameters 138 ---------- 139 df : pandas.DataFrame 140 DataFrame to be serilized. 141 gz: bool 142 gzip the json string representation. Default False. 143 """ 144 out = df.copy() 145 for column in out: 146 serialize = _need_to_serialize(out[column]) 147 148 if serialize is True: 149 out[column] = out[column].transform(lambda x: create_json_string(x, indent=0) if x is not None else None) 150 if gz is True: 151 out[column] = out[column].transform(lambda x: gzip.compress((x if x is not None else '').encode('utf-8'))) 152 return out 153 154 155def _deserialize_df(df, auto_gamma=False): 156 """Deserializes all pyerrors json strings into Obs or Corr objects according to the pyerrors json specification. 157 158 Parameters 159 ---------- 160 df : pandas.DataFrame 161 DataFrame to be deserilized. 162 auto_gamma : bool 163 If True applies the gamma_method to all imported Obs objects with the default parameters for 164 the error analysis. Default False. 165 166 Notes: 167 ------ 168 In case any column of the DataFrame is gzipped it is gunzipped in the process. 169 """ 170 for column in df.select_dtypes(include="object"): 171 if isinstance(df[column][0], bytes): 172 if df[column][0].startswith(b"\x1f\x8b\x08\x00"): 173 df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8')) 174 175 if not all([e is None for e in df[column]]): 176 df[column] = df[column].replace({r'^$': None}, regex=True) 177 i = 0 178 while df[column][i] is None: 179 i += 1 180 if isinstance(df[column][i], str): 181 if '"program":' in df[column][i][:20]: 182 df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if x is not None else None) 183 if auto_gamma is True: 184 if isinstance(df[column][i], list): 185 df[column].apply(lambda x: [o.gm() if o is not None else x for o in x]) 186 else: 187 df[column].apply(lambda x: x.gm() if x is not None else x) 188 return df 189 190 191def _need_to_serialize(col): 192 serialize = False 193 i = 0 194 while i < len(col) and col[i] is None: 195 i += 1 196 if i == len(col): 197 return serialize 198 if isinstance(col[i], (Obs, Corr)): 199 serialize = True 200 elif isinstance(col[i], list): 201 if all(isinstance(o, Obs) for o in col[i]): 202 serialize = True 203 return serialize
def
to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs):
12def to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs): 13 """Write DataFrame including Obs or Corr valued columns to sqlite database. 14 15 Parameters 16 ---------- 17 df : pandas.DataFrame 18 Dataframe to be written to the database. 19 table_name : str 20 Name of the table in the database. 21 db : str 22 Path to the sqlite database. 23 if exists : str 24 How to behave if table already exists. Options 'fail', 'replace', 'append'. 25 gz : bool 26 If True the json strings are gzipped. 27 28 Returns 29 ------- 30 None 31 """ 32 se_df = _serialize_df(df, gz=gz) 33 con = sqlite3.connect(db) 34 se_df.to_sql(table_name, con, if_exists=if_exists, index=False, **kwargs) 35 con.close()
Write DataFrame including Obs or Corr valued columns to sqlite database.
Parameters
- df (pandas.DataFrame): Dataframe to be written to the database.
- table_name (str): Name of the table in the database.
- db (str): Path to the sqlite database.
- if exists (str): How to behave if table already exists. Options 'fail', 'replace', 'append'.
- gz (bool): If True the json strings are gzipped.
Returns
- None
def
read_sql(sql, db, auto_gamma=False, **kwargs):
38def read_sql(sql, db, auto_gamma=False, **kwargs): 39 """Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns. 40 41 Parameters 42 ---------- 43 sql : str 44 SQL query to be executed. 45 db : str 46 Path to the sqlite database. 47 auto_gamma : bool 48 If True applies the gamma_method to all imported Obs objects with the default parameters for 49 the error analysis. Default False. 50 51 Returns 52 ------- 53 data : pandas.DataFrame 54 Dataframe with the content of the sqlite database. 55 """ 56 con = sqlite3.connect(db) 57 extract_df = pd.read_sql(sql, con, **kwargs) 58 con.close() 59 return _deserialize_df(extract_df, auto_gamma=auto_gamma)
Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns.
Parameters
- sql (str): SQL query to be executed.
- db (str): Path to the sqlite database.
- auto_gamma (bool): If True applies the gamma_method to all imported Obs objects with the default parameters for the error analysis. Default False.
Returns
- data (pandas.DataFrame): Dataframe with the content of the sqlite database.
def
dump_df(df, fname, gz=True):
62def dump_df(df, fname, gz=True): 63 """Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file. 64 65 Before making use of pandas to_csv functionality Obs objects are serialized via the standardized 66 json format of pyerrors. 67 68 Parameters 69 ---------- 70 df : pandas.DataFrame 71 Dataframe to be dumped to a file. 72 fname : str 73 Filename of the output file. 74 gz : bool 75 If True, the output is a gzipped csv file. If False, the output is a csv file. 76 77 Returns 78 ------- 79 None 80 """ 81 for column in df: 82 serialize = _need_to_serialize(df[column]) 83 if not serialize: 84 if all(isinstance(entry, (int, np.integer, float, np.floating)) for entry in df[column]): 85 if any([np.isnan(entry) for entry in df[column]]): 86 warnings.warn("nan value in column " + column + " will be replaced by None", UserWarning) 87 88 out = _serialize_df(df, gz=False) 89 90 if not fname.endswith('.csv'): 91 fname += '.csv' 92 93 if gz is True: 94 if not fname.endswith('.gz'): 95 fname += '.gz' 96 out.to_csv(fname, index=False, compression='gzip') 97 else: 98 out.to_csv(fname, index=False)
Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file.
Before making use of pandas to_csv functionality Obs objects are serialized via the standardized json format of pyerrors.
Parameters
- df (pandas.DataFrame): Dataframe to be dumped to a file.
- fname (str): Filename of the output file.
- gz (bool): If True, the output is a gzipped csv file. If False, the output is a csv file.
Returns
- None
def
load_df(fname, auto_gamma=False, gz=True):
101def load_df(fname, auto_gamma=False, gz=True): 102 """Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings. 103 104 Parameters 105 ---------- 106 fname : str 107 Filename of the input file. 108 auto_gamma : bool 109 If True applies the gamma_method to all imported Obs objects with the default parameters for 110 the error analysis. Default False. 111 gz : bool 112 If True, assumes that data is gzipped. If False, assumes JSON file. 113 114 Returns 115 ------- 116 data : pandas.DataFrame 117 Dataframe with the content of the sqlite database. 118 """ 119 if not fname.endswith('.csv') and not fname.endswith('.gz'): 120 fname += '.csv' 121 122 if gz is True: 123 if not fname.endswith('.gz'): 124 fname += '.gz' 125 with gzip.open(fname) as f: 126 re_import = pd.read_csv(f, keep_default_na=False) 127 else: 128 if fname.endswith('.gz'): 129 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 130 re_import = pd.read_csv(fname, keep_default_na=False) 131 132 return _deserialize_df(re_import, auto_gamma=auto_gamma)
Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings.
Parameters
- fname (str): Filename of the input file.
- auto_gamma (bool): If True applies the gamma_method to all imported Obs objects with the default parameters for the error analysis. Default False.
- gz (bool): If True, assumes that data is gzipped. If False, assumes JSON file.
Returns
- data (pandas.DataFrame): Dataframe with the content of the sqlite database.