Use rapidjson instead of json for I/O

This commit is contained in:
Simon Kuberski 2022-05-09 12:51:58 +02:00
parent 77f0fbf699
commit 78ab077eb8

View file

@ -1,4 +1,4 @@
import json import rapidjson as json
import gzip import gzip
import getpass import getpass
import socket import socket
@ -6,7 +6,6 @@ import datetime
import platform import platform
import warnings import warnings
import re import re
import gc
import numpy as np import numpy as np
from ..obs import Obs from ..obs import Obs
from ..covobs import Covobs from ..covobs import Covobs
@ -32,47 +31,6 @@ def create_json_string(ol, description='', indent=1):
saves disk space. saves disk space.
""" """
def _default(self, obj):
return str(obj)
my_encoder = json.JSONEncoder
_default.default = json.JSONEncoder().default
my_encoder.default = _default
class Deltalist:
__slots__ = ['cnfg', 'deltas']
def __init__(self, li):
self.cnfg = li[0]
self.deltas = li[1:]
def __repr__(self):
s = '[%d' % (self.cnfg)
for d in self.deltas:
s += ', %1.15e' % (d)
s += ']'
return s
def __str__(self):
return self.__repr__()
class Floatlist:
__slots__ = ['li']
def __init__(self, li):
self.li = list(li)
def __repr__(self):
s = '['
for i in range(len(self.li)):
if i > 0:
s += ', '
s += '%1.15e' % (self.li[i])
s += ']'
return s
def __str__(self):
return self.__repr__()
def _gen_data_d_from_list(ol): def _gen_data_d_from_list(ol):
dl = [] dl = []
for name in ol[0].mc_names: for name in ol[0].mc_names:
@ -89,7 +47,6 @@ def create_json_string(ol, description='', indent=1):
rd['deltas'].append([ol[0].idl[r_name][i]]) rd['deltas'].append([ol[0].idl[r_name][i]])
for o in ol: for o in ol:
rd['deltas'][-1].append(o.deltas[r_name][i]) rd['deltas'][-1].append(o.deltas[r_name][i])
rd['deltas'][-1] = Deltalist(rd['deltas'][-1])
ed['replica'].append(rd) ed['replica'].append(rd)
dl.append(ed) dl.append(ed)
return dl return dl
@ -100,14 +57,13 @@ def create_json_string(ol, description='', indent=1):
ed = {} ed = {}
ed['id'] = name ed['id'] = name
ed['layout'] = str(ol[0].covobs[name].cov.shape).lstrip('(').rstrip(')').rstrip(',') ed['layout'] = str(ol[0].covobs[name].cov.shape).lstrip('(').rstrip(')').rstrip(',')
ed['cov'] = Floatlist(np.ravel(ol[0].covobs[name].cov)) ed['cov'] = list(np.ravel(ol[0].covobs[name].cov))
ncov = ol[0].covobs[name].cov.shape[0] ncov = ol[0].covobs[name].cov.shape[0]
ed['grad'] = [] ed['grad'] = []
for i in range(ncov): for i in range(ncov):
ed['grad'].append([]) ed['grad'].append([])
for o in ol: for o in ol:
ed['grad'][-1].append(o.covobs[name].grad[i][0]) ed['grad'][-1].append(o.covobs[name].grad[i][0])
ed['grad'][-1] = Floatlist(ed['grad'][-1])
dl.append(ed) dl.append(ed)
return dl return dl
@ -214,6 +170,7 @@ def create_json_string(ol, description='', indent=1):
if description: if description:
d['description'] = description d['description'] = description
d['obsdata'] = [] d['obsdata'] = []
for io in ol: for io in ol:
if isinstance(io, Obs): if isinstance(io, Obs):
@ -227,31 +184,10 @@ def create_json_string(ol, description='', indent=1):
else: else:
raise Exception("Unkown datatype.") raise Exception("Unkown datatype.")
jsonstring = '' if indent:
for chunk in my_encoder(indent=indent, ensure_ascii=False).iterencode(d): return json.dumps(d, indent=indent, ensure_ascii=False, write_mode=json.WM_SINGLE_LINE_ARRAY)
jsonstring += chunk else:
return json.dumps(d, indent=indent, ensure_ascii=False, write_mode=json.WM_COMPACT)
del d
gc.collect()
def remove_quotationmarks_split(split):
"""Workaround for un-quoting of delta lists, adds 5% of work
but is save, compared to a simple replace that could destroy the structure
"""
deltas = False
for i in range(len(split)):
if '"deltas":' in split[i] or '"cov":' in split[i] or '"grad":' in split[i]:
deltas = True
if deltas:
split[i] = split[i].replace('"[', '[').replace(']"', ']')
if split[i][-1] == ']':
deltas = False
return '\n'.join(split)
jsonstring = jsonstring.split('\n')
jsonstring = remove_quotationmarks_split(jsonstring)
jsonstring = jsonstring.replace('nan', 'NaN')
return jsonstring
def dump_to_json(ol, fname, description='', indent=1, gz=True): def dump_to_json(ol, fname, description='', indent=1, gz=True):