pyerrors.input.pandas
1import warnings 2import gzip 3import sqlite3 4from contextlib import closing 5import pandas as pd 6from ..obs import Obs 7from ..correlators import Corr 8from .json import create_json_string, import_json_string 9import numpy as np 10 11 12def to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs): 13 """Write DataFrame including Obs or Corr valued columns to sqlite database. 14 15 Parameters 16 ---------- 17 df : pandas.DataFrame 18 Dataframe to be written to the database. 19 table_name : str 20 Name of the table in the database. 21 db : str 22 Path to the sqlite database. 23 if exists : str 24 How to behave if table already exists. Options 'fail', 'replace', 'append'. 25 gz : bool 26 If True the json strings are gzipped. 27 28 Returns 29 ------- 30 None 31 """ 32 se_df = _serialize_df(df, gz=gz) 33 with closing(sqlite3.connect(db)) as con: 34 se_df.to_sql(table_name, con=con, if_exists=if_exists, index=False, **kwargs) 35 36 37def read_sql(sql, db, auto_gamma=False, **kwargs): 38 """Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns. 39 40 Parameters 41 ---------- 42 sql : str 43 SQL query to be executed. 44 db : str 45 Path to the sqlite database. 46 auto_gamma : bool 47 If True applies the gamma_method to all imported Obs objects with the default parameters for 48 the error analysis. Default False. 49 50 Returns 51 ------- 52 data : pandas.DataFrame 53 Dataframe with the content of the sqlite database. 54 """ 55 with closing(sqlite3.connect(db)) as con: 56 extract_df = pd.read_sql(sql, con=con, **kwargs) 57 return _deserialize_df(extract_df, auto_gamma=auto_gamma) 58 59 60def dump_df(df, fname, gz=True): 61 """Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file. 62 63 Before making use of pandas to_csv functionality Obs objects are serialized via the standardized 64 json format of pyerrors. 65 66 Parameters 67 ---------- 68 df : pandas.DataFrame 69 Dataframe to be dumped to a file. 70 fname : str 71 Filename of the output file. 72 gz : bool 73 If True, the output is a gzipped csv file. If False, the output is a csv file. 74 75 Returns 76 ------- 77 None 78 """ 79 for column in df: 80 serialize = _need_to_serialize(df[column]) 81 if not serialize: 82 if all(isinstance(entry, (int, np.integer, float, np.floating)) for entry in df[column]): 83 if any([np.isnan(entry) for entry in df[column]]): 84 warnings.warn("nan value in column " + column + " will be replaced by None", UserWarning) 85 86 out = _serialize_df(df, gz=False) 87 88 if not fname.endswith('.csv'): 89 fname += '.csv' 90 91 if gz is True: 92 if not fname.endswith('.gz'): 93 fname += '.gz' 94 out.to_csv(fname, index=False, compression='gzip') 95 else: 96 out.to_csv(fname, index=False) 97 98 99def load_df(fname, auto_gamma=False, gz=True): 100 """Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings. 101 102 Parameters 103 ---------- 104 fname : str 105 Filename of the input file. 106 auto_gamma : bool 107 If True applies the gamma_method to all imported Obs objects with the default parameters for 108 the error analysis. Default False. 109 gz : bool 110 If True, assumes that data is gzipped. If False, assumes JSON file. 111 112 Returns 113 ------- 114 data : pandas.DataFrame 115 Dataframe with the content of the sqlite database. 116 """ 117 if not fname.endswith('.csv') and not fname.endswith('.gz'): 118 fname += '.csv' 119 120 if gz is True: 121 if not fname.endswith('.gz'): 122 fname += '.gz' 123 with gzip.open(fname) as f: 124 re_import = pd.read_csv(f, keep_default_na=False) 125 else: 126 if fname.endswith('.gz'): 127 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 128 re_import = pd.read_csv(fname, keep_default_na=False) 129 130 return _deserialize_df(re_import, auto_gamma=auto_gamma) 131 132 133def _serialize_df(df, gz=False): 134 """Serializes all Obs or Corr valued columns into json strings according to the pyerrors json specification. 135 136 Parameters 137 ---------- 138 df : pandas.DataFrame 139 DataFrame to be serilized. 140 gz: bool 141 gzip the json string representation. Default False. 142 """ 143 out = df.copy() 144 for column in out: 145 serialize = _need_to_serialize(out[column]) 146 147 if serialize is True: 148 out[column] = out[column].transform(lambda x: create_json_string(x, indent=0) if x is not None else None) 149 if gz is True: 150 out[column] = out[column].transform(lambda x: gzip.compress((x if x is not None else '').encode('utf-8'))) 151 return out 152 153 154def _deserialize_df(df, auto_gamma=False): 155 """Deserializes all pyerrors json strings into Obs or Corr objects according to the pyerrors json specification. 156 157 Parameters 158 ---------- 159 df : pandas.DataFrame 160 DataFrame to be deserilized. 161 auto_gamma : bool 162 If True applies the gamma_method to all imported Obs objects with the default parameters for 163 the error analysis. Default False. 164 165 Notes: 166 ------ 167 In case any column of the DataFrame is gzipped it is gunzipped in the process. 168 """ 169 for column in df.select_dtypes(include="object"): 170 if isinstance(df[column][0], bytes): 171 if df[column][0].startswith(b"\x1f\x8b\x08\x00"): 172 df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8')) 173 174 if not all([e is None for e in df[column]]): 175 df[column] = df[column].replace({r'^$': None}, regex=True) 176 i = 0 177 while df[column][i] is None: 178 i += 1 179 if isinstance(df[column][i], str): 180 if '"program":' in df[column][i][:20]: 181 df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if x is not None else None) 182 if auto_gamma is True: 183 if isinstance(df[column][i], list): 184 df[column].apply(lambda x: [o.gm() if o is not None else x for o in x]) 185 else: 186 df[column].apply(lambda x: x.gm() if x is not None else x) 187 return df 188 189 190def _need_to_serialize(col): 191 serialize = False 192 i = 0 193 while i < len(col) and col[i] is None: 194 i += 1 195 if i == len(col): 196 return serialize 197 if isinstance(col[i], (Obs, Corr)): 198 serialize = True 199 elif isinstance(col[i], list): 200 if all(isinstance(o, Obs) for o in col[i]): 201 serialize = True 202 return serialize
def
to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs):
13def to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs): 14 """Write DataFrame including Obs or Corr valued columns to sqlite database. 15 16 Parameters 17 ---------- 18 df : pandas.DataFrame 19 Dataframe to be written to the database. 20 table_name : str 21 Name of the table in the database. 22 db : str 23 Path to the sqlite database. 24 if exists : str 25 How to behave if table already exists. Options 'fail', 'replace', 'append'. 26 gz : bool 27 If True the json strings are gzipped. 28 29 Returns 30 ------- 31 None 32 """ 33 se_df = _serialize_df(df, gz=gz) 34 with closing(sqlite3.connect(db)) as con: 35 se_df.to_sql(table_name, con=con, if_exists=if_exists, index=False, **kwargs)
Write DataFrame including Obs or Corr valued columns to sqlite database.
Parameters
- df (pandas.DataFrame): Dataframe to be written to the database.
- table_name (str): Name of the table in the database.
- db (str): Path to the sqlite database.
- if exists (str): How to behave if table already exists. Options 'fail', 'replace', 'append'.
- gz (bool): If True the json strings are gzipped.
Returns
- None
def
read_sql(sql, db, auto_gamma=False, **kwargs):
38def read_sql(sql, db, auto_gamma=False, **kwargs): 39 """Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns. 40 41 Parameters 42 ---------- 43 sql : str 44 SQL query to be executed. 45 db : str 46 Path to the sqlite database. 47 auto_gamma : bool 48 If True applies the gamma_method to all imported Obs objects with the default parameters for 49 the error analysis. Default False. 50 51 Returns 52 ------- 53 data : pandas.DataFrame 54 Dataframe with the content of the sqlite database. 55 """ 56 with closing(sqlite3.connect(db)) as con: 57 extract_df = pd.read_sql(sql, con=con, **kwargs) 58 return _deserialize_df(extract_df, auto_gamma=auto_gamma)
Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns.
Parameters
- sql (str): SQL query to be executed.
- db (str): Path to the sqlite database.
- auto_gamma (bool): If True applies the gamma_method to all imported Obs objects with the default parameters for the error analysis. Default False.
Returns
- data (pandas.DataFrame): Dataframe with the content of the sqlite database.
def
dump_df(df, fname, gz=True):
61def dump_df(df, fname, gz=True): 62 """Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file. 63 64 Before making use of pandas to_csv functionality Obs objects are serialized via the standardized 65 json format of pyerrors. 66 67 Parameters 68 ---------- 69 df : pandas.DataFrame 70 Dataframe to be dumped to a file. 71 fname : str 72 Filename of the output file. 73 gz : bool 74 If True, the output is a gzipped csv file. If False, the output is a csv file. 75 76 Returns 77 ------- 78 None 79 """ 80 for column in df: 81 serialize = _need_to_serialize(df[column]) 82 if not serialize: 83 if all(isinstance(entry, (int, np.integer, float, np.floating)) for entry in df[column]): 84 if any([np.isnan(entry) for entry in df[column]]): 85 warnings.warn("nan value in column " + column + " will be replaced by None", UserWarning) 86 87 out = _serialize_df(df, gz=False) 88 89 if not fname.endswith('.csv'): 90 fname += '.csv' 91 92 if gz is True: 93 if not fname.endswith('.gz'): 94 fname += '.gz' 95 out.to_csv(fname, index=False, compression='gzip') 96 else: 97 out.to_csv(fname, index=False)
Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file.
Before making use of pandas to_csv functionality Obs objects are serialized via the standardized json format of pyerrors.
Parameters
- df (pandas.DataFrame): Dataframe to be dumped to a file.
- fname (str): Filename of the output file.
- gz (bool): If True, the output is a gzipped csv file. If False, the output is a csv file.
Returns
- None
def
load_df(fname, auto_gamma=False, gz=True):
100def load_df(fname, auto_gamma=False, gz=True): 101 """Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings. 102 103 Parameters 104 ---------- 105 fname : str 106 Filename of the input file. 107 auto_gamma : bool 108 If True applies the gamma_method to all imported Obs objects with the default parameters for 109 the error analysis. Default False. 110 gz : bool 111 If True, assumes that data is gzipped. If False, assumes JSON file. 112 113 Returns 114 ------- 115 data : pandas.DataFrame 116 Dataframe with the content of the sqlite database. 117 """ 118 if not fname.endswith('.csv') and not fname.endswith('.gz'): 119 fname += '.csv' 120 121 if gz is True: 122 if not fname.endswith('.gz'): 123 fname += '.gz' 124 with gzip.open(fname) as f: 125 re_import = pd.read_csv(f, keep_default_na=False) 126 else: 127 if fname.endswith('.gz'): 128 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 129 re_import = pd.read_csv(fname, keep_default_na=False) 130 131 return _deserialize_df(re_import, auto_gamma=auto_gamma)
Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings.
Parameters
- fname (str): Filename of the input file.
- auto_gamma (bool): If True applies the gamma_method to all imported Obs objects with the default parameters for the error analysis. Default False.
- gz (bool): If True, assumes that data is gzipped. If False, assumes JSON file.
Returns
- data (pandas.DataFrame): Dataframe with the content of the sqlite database.