mirror of
https://github.com/fjosw/pyerrors.git
synced 2025-03-15 14:50:25 +01:00
Merge pull request #113 from fjosw/feature/pandas_df_support
Pandas DataFrame support
This commit is contained in:
commit
f908b6328d
4 changed files with 107 additions and 1 deletions
|
@ -10,4 +10,5 @@ from . import hadrons
|
|||
from . import json
|
||||
from . import misc
|
||||
from . import openQCD
|
||||
from . import pandas
|
||||
from . import sfcf
|
||||
|
|
75
pyerrors/input/pandas.py
Normal file
75
pyerrors/input/pandas.py
Normal file
|
@ -0,0 +1,75 @@
|
|||
import warnings
|
||||
import gzip
|
||||
import pandas as pd
|
||||
from ..obs import Obs
|
||||
from ..correlators import Corr
|
||||
from .json import create_json_string, import_json_string
|
||||
|
||||
|
||||
def dump_df(df, fname, gz=True):
|
||||
"""Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file.
|
||||
|
||||
Before making use of pandas to_csv functionality Obs objects are serialized via the standardized
|
||||
json format of pyerrors.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : pandas.DataFrame
|
||||
Dataframe to be dumped to a file.
|
||||
fname : str
|
||||
Filename of the output file.
|
||||
gz : bool
|
||||
If True, the output is a gzipped csv file. If False, the output is a csv file.
|
||||
"""
|
||||
|
||||
out = df.copy()
|
||||
for column in out:
|
||||
if isinstance(out[column][0], (Obs, Corr)):
|
||||
out[column] = out[column].transform(lambda x: create_json_string(x, indent=0))
|
||||
|
||||
if not fname.endswith('.csv'):
|
||||
fname += '.csv'
|
||||
|
||||
if gz is True:
|
||||
if not fname.endswith('.gz'):
|
||||
fname += '.gz'
|
||||
out.to_csv(fname, index=False, compression='gzip')
|
||||
else:
|
||||
out.to_csv(fname, index=False)
|
||||
|
||||
|
||||
def load_df(fname, auto_gamma=False, gz=True):
|
||||
"""Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fname : str
|
||||
Filename of the input file.
|
||||
auto_gamma : bool
|
||||
If True applies the gamma_method to all imported Obs objects with the default parameters for
|
||||
the error analysis. Default False.
|
||||
gz : bool
|
||||
If True, assumes that data is gzipped. If False, assumes JSON file.
|
||||
"""
|
||||
|
||||
if not fname.endswith('.csv') and not fname.endswith('.gz'):
|
||||
fname += '.csv'
|
||||
|
||||
if gz is True:
|
||||
if not fname.endswith('.gz'):
|
||||
fname += '.gz'
|
||||
with gzip.open(fname) as f:
|
||||
re_import = pd.read_csv(f)
|
||||
else:
|
||||
if fname.endswith('.gz'):
|
||||
warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
|
||||
re_import = pd.read_csv(fname)
|
||||
|
||||
for column in re_import.select_dtypes(include="object"):
|
||||
if isinstance(re_import[column][0], str):
|
||||
if re_import[column][0][:20] == '{"program":"pyerrors':
|
||||
re_import[column] = re_import[column].transform(lambda x: import_json_string(x, verbose=False))
|
||||
if auto_gamma is True:
|
||||
re_import[column].apply(lambda x: x.gamma_method())
|
||||
|
||||
return re_import
|
2
setup.py
2
setup.py
|
@ -25,7 +25,7 @@ setup(name='pyerrors',
|
|||
license="MIT",
|
||||
packages=find_packages(),
|
||||
python_requires='>=3.6.0',
|
||||
install_requires=['numpy>=1.16', 'autograd>=1.4', 'numdifftools', 'matplotlib>=3.3', 'scipy>=1', 'iminuit>=2', 'h5py>=3', 'lxml>=4', 'python-rapidjson>=1'],
|
||||
install_requires=['numpy>=1.16', 'autograd>=1.4', 'numdifftools', 'matplotlib>=3.3', 'scipy>=1', 'iminuit>=2', 'h5py>=3', 'lxml>=4', 'python-rapidjson>=1', 'pandas>=1.1'],
|
||||
classifiers=[
|
||||
'Development Status :: 5 - Production/Stable',
|
||||
'Intended Audience :: Science/Research',
|
||||
|
|
30
tests/pandas_test.py
Normal file
30
tests/pandas_test.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
import pyerrors as pe
|
||||
|
||||
def test_df_export_import(tmp_path):
|
||||
my_dict = {"int": 1,
|
||||
"float": -0.01,
|
||||
"Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"),
|
||||
"Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")}
|
||||
for gz in [True, False]:
|
||||
my_df = pd.DataFrame([my_dict] * 10)
|
||||
|
||||
pe.input.pandas.dump_df(my_df, (tmp_path / 'df_output').as_posix(), gz=gz)
|
||||
reconstructed_df = pe.input.pandas.load_df((tmp_path / 'df_output').as_posix(), auto_gamma=True, gz=gz)
|
||||
assert np.all(my_df == reconstructed_df)
|
||||
|
||||
pe.input.pandas.load_df((tmp_path / 'df_output.csv').as_posix(), gz=gz)
|
||||
|
||||
|
||||
def test_df_Corr(tmp_path):
|
||||
|
||||
my_corr = pe.Corr([pe.pseudo_Obs(-0.48, 0.04, "test"), pe.pseudo_Obs(-0.154, 0.03, "test")])
|
||||
|
||||
my_dict = {"int": 1,
|
||||
"float": -0.01,
|
||||
"Corr": my_corr}
|
||||
my_df = pd.DataFrame([my_dict] * 5)
|
||||
|
||||
pe.input.pandas.dump_df(my_df, (tmp_path / 'df_output').as_posix())
|
||||
reconstructed_df = pe.input.pandas.load_df((tmp_path / 'df_output').as_posix(), auto_gamma=True)
|
Loading…
Add table
Reference in a new issue