Use rapidjson instead of json for I/O

2025-07-12 06:39:26 +02:00 · 2022-05-09 12:51:58 +02:00 · 2022-05-09 12:51:58 +02:00 · 78ab077eb8
commit 78ab077eb8
parent 77f0fbf699
1 changed files with 7 additions and 71 deletions
--- a/pyerrors/input/json.py
+++ b/pyerrors/input/json.py
@ -1,4 +1,4 @@
-import json
+import rapidjson as json
 import gzip
 import getpass
 import socket
@ -6,7 +6,6 @@ import datetime
 import platform
 import warnings
 import re
-import gc
 import numpy as np
 from ..obs import Obs
 from ..covobs import Covobs
@ -32,47 +31,6 @@ def create_json_string(ol, description='', indent=1):
        saves disk space.
    """

-    def _default(self, obj):
-        return str(obj)
-    my_encoder = json.JSONEncoder
-    _default.default = json.JSONEncoder().default
-    my_encoder.default = _default
-
-    class Deltalist:
-        __slots__ = ['cnfg', 'deltas']
-
-        def __init__(self, li):
-            self.cnfg = li[0]
-            self.deltas = li[1:]
-
-        def __repr__(self):
-            s = '[%d' % (self.cnfg)
-            for d in self.deltas:
-                s += ', %1.15e' % (d)
-            s += ']'
-            return s
-
-        def __str__(self):
-            return self.__repr__()
-
-    class Floatlist:
-        __slots__ = ['li']
-
-        def __init__(self, li):
-            self.li = list(li)
-
-        def __repr__(self):
-            s = '['
-            for i in range(len(self.li)):
-                if i > 0:
-                    s += ', '
-                s += '%1.15e' % (self.li[i])
-            s += ']'
-            return s
-
-        def __str__(self):
-            return self.__repr__()
-
    def _gen_data_d_from_list(ol):
        dl = []
        for name in ol[0].mc_names:
@ -89,7 +47,6 @@ def create_json_string(ol, description='', indent=1):
                    rd['deltas'].append([ol[0].idl[r_name][i]])
                    for o in ol:
                        rd['deltas'][-1].append(o.deltas[r_name][i])
-                    rd['deltas'][-1] = Deltalist(rd['deltas'][-1])
                ed['replica'].append(rd)
            dl.append(ed)
        return dl
@ -100,14 +57,13 @@ def create_json_string(ol, description='', indent=1):
            ed = {}
            ed['id'] = name
            ed['layout'] = str(ol[0].covobs[name].cov.shape).lstrip('(').rstrip(')').rstrip(',')
-            ed['cov'] = Floatlist(np.ravel(ol[0].covobs[name].cov))
+            ed['cov'] = list(np.ravel(ol[0].covobs[name].cov))
            ncov = ol[0].covobs[name].cov.shape[0]
            ed['grad'] = []
            for i in range(ncov):
                ed['grad'].append([])
                for o in ol:
                    ed['grad'][-1].append(o.covobs[name].grad[i][0])
-                ed['grad'][-1] = Floatlist(ed['grad'][-1])
            dl.append(ed)
        return dl

@ -214,6 +170,7 @@ def create_json_string(ol, description='', indent=1):

    if description:
        d['description'] = description
+
    d['obsdata'] = []
    for io in ol:
        if isinstance(io, Obs):
@ -227,31 +184,10 @@ def create_json_string(ol, description='', indent=1):
        else:
            raise Exception("Unkown datatype.")

-    jsonstring = ''
-    for chunk in my_encoder(indent=indent, ensure_ascii=False).iterencode(d):
-        jsonstring += chunk
-
-    del d
-    gc.collect()
-
-    def remove_quotationmarks_split(split):
-        """Workaround for un-quoting of delta lists, adds 5% of work
-           but is save, compared to a simple replace that could destroy the structure
-        """
-        deltas = False
-        for i in range(len(split)):
-            if '"deltas":' in split[i] or '"cov":' in split[i] or '"grad":' in split[i]:
-                deltas = True
-            if deltas:
-                split[i] = split[i].replace('"[', '[').replace(']"', ']')
-                if split[i][-1] == ']':
-                    deltas = False
-        return '\n'.join(split)
-
-    jsonstring = jsonstring.split('\n')
-    jsonstring = remove_quotationmarks_split(jsonstring)
-    jsonstring = jsonstring.replace('nan', 'NaN')
-    return jsonstring
+    if indent:
+        return json.dumps(d, indent=indent, ensure_ascii=False, write_mode=json.WM_SINGLE_LINE_ARRAY)
+    else:
+        return json.dumps(d, indent=indent, ensure_ascii=False, write_mode=json.WM_COMPACT)


 def dump_to_json(ol, fname, description='', indent=1, gz=True):