pyerrors.input.pandas

  1import warnings
  2import gzip
  3import sqlite3
  4import pandas as pd
  5from ..obs import Obs
  6from ..correlators import Corr
  7from .json import create_json_string, import_json_string
  8import numpy as np
  9
 10
 11def to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs):
 12    """Write DataFrame including Obs or Corr valued columns to sqlite database.
 13
 14    Parameters
 15    ----------
 16    df : pandas.DataFrame
 17        Dataframe to be written to the database.
 18    table_name : str
 19        Name of the table in the database.
 20    db : str
 21        Path to the sqlite database.
 22    if exists : str
 23        How to behave if table already exists. Options 'fail', 'replace', 'append'.
 24    gz : bool
 25        If True the json strings are gzipped.
 26
 27    Returns
 28    -------
 29    None
 30    """
 31    se_df = _serialize_df(df, gz=gz)
 32    con = sqlite3.connect(db)
 33    se_df.to_sql(table_name, con, if_exists=if_exists, index=False, **kwargs)
 34    con.close()
 35
 36
 37def read_sql(sql, db, auto_gamma=False, **kwargs):
 38    """Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns.
 39
 40    Parameters
 41    ----------
 42    sql : str
 43        SQL query to be executed.
 44    db : str
 45        Path to the sqlite database.
 46    auto_gamma : bool
 47        If True applies the gamma_method to all imported Obs objects with the default parameters for
 48        the error analysis. Default False.
 49
 50    Returns
 51    -------
 52    data : pandas.DataFrame
 53        Dataframe with the content of the sqlite database.
 54    """
 55    con = sqlite3.connect(db)
 56    extract_df = pd.read_sql(sql, con, **kwargs)
 57    con.close()
 58    return _deserialize_df(extract_df, auto_gamma=auto_gamma)
 59
 60
 61def dump_df(df, fname, gz=True):
 62    """Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file.
 63
 64    Before making use of pandas to_csv functionality Obs objects are serialized via the standardized
 65    json format of pyerrors.
 66
 67    Parameters
 68    ----------
 69    df : pandas.DataFrame
 70        Dataframe to be dumped to a file.
 71    fname : str
 72        Filename of the output file.
 73    gz : bool
 74        If True, the output is a gzipped csv file. If False, the output is a csv file.
 75
 76    Returns
 77    -------
 78    None
 79    """
 80    for column in df:
 81        serialize = _need_to_serialize(df[column])
 82        if not serialize:
 83            if all(isinstance(entry, (int, np.integer, float, np.floating)) for entry in df[column]):
 84                if any([np.isnan(entry) for entry in df[column]]):
 85                    warnings.warn("nan value in column " + column + " will be replaced by None", UserWarning)
 86
 87    out = _serialize_df(df, gz=False)
 88
 89    if not fname.endswith('.csv'):
 90        fname += '.csv'
 91
 92    if gz is True:
 93        if not fname.endswith('.gz'):
 94            fname += '.gz'
 95        out.to_csv(fname, index=False, compression='gzip')
 96    else:
 97        out.to_csv(fname, index=False)
 98
 99
100def load_df(fname, auto_gamma=False, gz=True):
101    """Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings.
102
103    Parameters
104    ----------
105    fname : str
106        Filename of the input file.
107    auto_gamma : bool
108        If True applies the gamma_method to all imported Obs objects with the default parameters for
109        the error analysis. Default False.
110    gz : bool
111        If True, assumes that data is gzipped. If False, assumes JSON file.
112
113    Returns
114    -------
115    data : pandas.DataFrame
116        Dataframe with the content of the sqlite database.
117    """
118    if not fname.endswith('.csv') and not fname.endswith('.gz'):
119        fname += '.csv'
120
121    if gz is True:
122        if not fname.endswith('.gz'):
123            fname += '.gz'
124        with gzip.open(fname) as f:
125            re_import = pd.read_csv(f, keep_default_na=False)
126    else:
127        if fname.endswith('.gz'):
128            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
129        re_import = pd.read_csv(fname, keep_default_na=False)
130
131    return _deserialize_df(re_import, auto_gamma=auto_gamma)
132
133
134def _serialize_df(df, gz=False):
135    """Serializes all Obs or Corr valued columns into json strings according to the pyerrors json specification.
136
137    Parameters
138    ----------
139    df : pandas.DataFrame
140        DataFrame to be serilized.
141    gz: bool
142        gzip the json string representation. Default False.
143    """
144    out = df.copy()
145    for column in out:
146        serialize = _need_to_serialize(out[column])
147
148        if serialize is True:
149            out[column] = out[column].transform(lambda x: create_json_string(x, indent=0) if x is not None else None)
150            if gz is True:
151                out[column] = out[column].transform(lambda x: gzip.compress((x if x is not None else '').encode('utf-8')))
152    return out
153
154
155def _deserialize_df(df, auto_gamma=False):
156    """Deserializes all pyerrors json strings into Obs or Corr objects according to the pyerrors json specification.
157
158    Parameters
159    ----------
160    df : pandas.DataFrame
161        DataFrame to be deserilized.
162    auto_gamma : bool
163        If True applies the gamma_method to all imported Obs objects with the default parameters for
164        the error analysis. Default False.
165
166    Notes:
167    ------
168    In case any column of the DataFrame is gzipped it is gunzipped in the process.
169    """
170    for column in df.select_dtypes(include="object"):
171        if isinstance(df[column][0], bytes):
172            if df[column][0].startswith(b"\x1f\x8b\x08\x00"):
173                df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8'))
174
175        if not all([e is None for e in df[column]]):
176            df[column] = df[column].replace({r'^$': None}, regex=True)
177            i = 0
178            while df[column][i] is None:
179                i += 1
180            if isinstance(df[column][i], str):
181                if '"program":' in df[column][i][:20]:
182                    df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if x is not None else None)
183                    if auto_gamma is True:
184                        if isinstance(df[column][i], list):
185                            df[column].apply(lambda x: [o.gm() if o is not None else x for o in x])
186                        else:
187                            df[column].apply(lambda x: x.gm() if x is not None else x)
188    return df
189
190
191def _need_to_serialize(col):
192    serialize = False
193    i = 0
194    while i < len(col) and col[i] is None:
195        i += 1
196    if i == len(col):
197        return serialize
198    if isinstance(col[i], (Obs, Corr)):
199        serialize = True
200    elif isinstance(col[i], list):
201        if all(isinstance(o, Obs) for o in col[i]):
202            serialize = True
203    return serialize
def to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs):
12def to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs):
13    """Write DataFrame including Obs or Corr valued columns to sqlite database.
14
15    Parameters
16    ----------
17    df : pandas.DataFrame
18        Dataframe to be written to the database.
19    table_name : str
20        Name of the table in the database.
21    db : str
22        Path to the sqlite database.
23    if exists : str
24        How to behave if table already exists. Options 'fail', 'replace', 'append'.
25    gz : bool
26        If True the json strings are gzipped.
27
28    Returns
29    -------
30    None
31    """
32    se_df = _serialize_df(df, gz=gz)
33    con = sqlite3.connect(db)
34    se_df.to_sql(table_name, con, if_exists=if_exists, index=False, **kwargs)
35    con.close()

Write DataFrame including Obs or Corr valued columns to sqlite database.

Parameters
  • df (pandas.DataFrame): Dataframe to be written to the database.
  • table_name (str): Name of the table in the database.
  • db (str): Path to the sqlite database.
  • if exists (str): How to behave if table already exists. Options 'fail', 'replace', 'append'.
  • gz (bool): If True the json strings are gzipped.
Returns
  • None
def read_sql(sql, db, auto_gamma=False, **kwargs):
38def read_sql(sql, db, auto_gamma=False, **kwargs):
39    """Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns.
40
41    Parameters
42    ----------
43    sql : str
44        SQL query to be executed.
45    db : str
46        Path to the sqlite database.
47    auto_gamma : bool
48        If True applies the gamma_method to all imported Obs objects with the default parameters for
49        the error analysis. Default False.
50
51    Returns
52    -------
53    data : pandas.DataFrame
54        Dataframe with the content of the sqlite database.
55    """
56    con = sqlite3.connect(db)
57    extract_df = pd.read_sql(sql, con, **kwargs)
58    con.close()
59    return _deserialize_df(extract_df, auto_gamma=auto_gamma)

Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns.

Parameters
  • sql (str): SQL query to be executed.
  • db (str): Path to the sqlite database.
  • auto_gamma (bool): If True applies the gamma_method to all imported Obs objects with the default parameters for the error analysis. Default False.
Returns
  • data (pandas.DataFrame): Dataframe with the content of the sqlite database.
def dump_df(df, fname, gz=True):
62def dump_df(df, fname, gz=True):
63    """Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file.
64
65    Before making use of pandas to_csv functionality Obs objects are serialized via the standardized
66    json format of pyerrors.
67
68    Parameters
69    ----------
70    df : pandas.DataFrame
71        Dataframe to be dumped to a file.
72    fname : str
73        Filename of the output file.
74    gz : bool
75        If True, the output is a gzipped csv file. If False, the output is a csv file.
76
77    Returns
78    -------
79    None
80    """
81    for column in df:
82        serialize = _need_to_serialize(df[column])
83        if not serialize:
84            if all(isinstance(entry, (int, np.integer, float, np.floating)) for entry in df[column]):
85                if any([np.isnan(entry) for entry in df[column]]):
86                    warnings.warn("nan value in column " + column + " will be replaced by None", UserWarning)
87
88    out = _serialize_df(df, gz=False)
89
90    if not fname.endswith('.csv'):
91        fname += '.csv'
92
93    if gz is True:
94        if not fname.endswith('.gz'):
95            fname += '.gz'
96        out.to_csv(fname, index=False, compression='gzip')
97    else:
98        out.to_csv(fname, index=False)

Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file.

Before making use of pandas to_csv functionality Obs objects are serialized via the standardized json format of pyerrors.

Parameters
  • df (pandas.DataFrame): Dataframe to be dumped to a file.
  • fname (str): Filename of the output file.
  • gz (bool): If True, the output is a gzipped csv file. If False, the output is a csv file.
Returns
  • None
def load_df(fname, auto_gamma=False, gz=True):
101def load_df(fname, auto_gamma=False, gz=True):
102    """Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings.
103
104    Parameters
105    ----------
106    fname : str
107        Filename of the input file.
108    auto_gamma : bool
109        If True applies the gamma_method to all imported Obs objects with the default parameters for
110        the error analysis. Default False.
111    gz : bool
112        If True, assumes that data is gzipped. If False, assumes JSON file.
113
114    Returns
115    -------
116    data : pandas.DataFrame
117        Dataframe with the content of the sqlite database.
118    """
119    if not fname.endswith('.csv') and not fname.endswith('.gz'):
120        fname += '.csv'
121
122    if gz is True:
123        if not fname.endswith('.gz'):
124            fname += '.gz'
125        with gzip.open(fname) as f:
126            re_import = pd.read_csv(f, keep_default_na=False)
127    else:
128        if fname.endswith('.gz'):
129            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
130        re_import = pd.read_csv(fname, keep_default_na=False)
131
132    return _deserialize_df(re_import, auto_gamma=auto_gamma)

Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings.

Parameters
  • fname (str): Filename of the input file.
  • auto_gamma (bool): If True applies the gamma_method to all imported Obs objects with the default parameters for the error analysis. Default False.
  • gz (bool): If True, assumes that data is gzipped. If False, assumes JSON file.
Returns
  • data (pandas.DataFrame): Dataframe with the content of the sqlite database.