From bd20057f5219a0fc75b19bb3d92545c8c8e285ab Mon Sep 17 00:00:00 2001
From: Simon Kuberski <simon.kuberski@uni-muenster.de>
Date: Wed, 16 Feb 2022 11:13:11 +0100
Subject: [PATCH 1/2] Cut RAM requirements for reading JSON in half

---
 pyerrors/input/json.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/pyerrors/input/json.py b/pyerrors/input/json.py
index b53d4e3d..c7f8cf7f 100644
--- a/pyerrors/input/json.py
+++ b/pyerrors/input/json.py
@@ -6,6 +6,7 @@ import datetime
 import platform
 import warnings
 import re
+import gc
 import numpy as np
 from ..obs import Obs
 from ..covobs import Covobs
@@ -38,6 +39,8 @@ def create_json_string(ol, description='', indent=1):
     my_encoder.default = _default
 
     class Deltalist:
+        __slots__ = ['cnfg', 'deltas']
+
         def __init__(self, li):
             self.cnfg = li[0]
             self.deltas = li[1:]
@@ -53,6 +56,8 @@ def create_json_string(ol, description='', indent=1):
             return self.__repr__()
 
     class Floatlist:
+        __slots__ = ['li']
+
         def __init__(self, li):
             self.li = list(li)
 
@@ -222,14 +227,18 @@ def create_json_string(ol, description='', indent=1):
         else:
             raise Exception("Unkown datatype.")
 
-    jsonstring = json.dumps(d, indent=indent, cls=my_encoder, ensure_ascii=False)
+    jsonstring = ''
+    for chunk in my_encoder(indent=indent, ensure_ascii=False).iterencode(d):
+        jsonstring += chunk
 
-    def remove_quotationmarks(s):
+    del d
+    gc.collect()
+
+    def remove_quotationmarks_split(split):
         """Workaround for un-quoting of delta lists, adds 5% of work
            but is save, compared to a simple replace that could destroy the structure
         """
         deltas = False
-        split = s.split('\n')
         for i in range(len(split)):
             if '"deltas":' in split[i] or '"cov":' in split[i] or '"grad":' in split[i]:
                 deltas = True
@@ -239,7 +248,8 @@ def create_json_string(ol, description='', indent=1):
                     deltas = False
         return '\n'.join(split)
 
-    jsonstring = remove_quotationmarks(jsonstring)
+    jsonstring = jsonstring.split('\n')
+    jsonstring = remove_quotationmarks_split(jsonstring)
     jsonstring = jsonstring.replace('nan', 'NaN')
     return jsonstring
 

From 005f54e0d127e1c017e17cb763b5ef81f68ff479 Mon Sep 17 00:00:00 2001
From: Simon Kuberski <simon.kuberski@uni-muenster.de>
Date: Wed, 16 Feb 2022 11:26:06 +0100
Subject: [PATCH 2/2] Reduce RAM requirements when reading a JSON file

---
 pyerrors/input/json.py | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/pyerrors/input/json.py b/pyerrors/input/json.py
index c7f8cf7f..a4fab75e 100644
--- a/pyerrors/input/json.py
+++ b/pyerrors/input/json.py
@@ -291,8 +291,9 @@ def dump_to_json(ol, fname, description='', indent=1, gz=True):
     fp.close()
 
 
-def import_json_string(json_string, verbose=True, full_output=False):
-    """Reconstruct a list of Obs or structures containing Obs from a json string.
+def _parse_json_dict(json_dict, verbose=True, full_output=False):
+    """Reconstruct a list of Obs or structures containing Obs from a dict that
+    was built out of a json string.
 
     The following structures are supported: Obs, list, numpy.ndarray, Corr
     If the list contains only one element, it is unpacked from the list.
@@ -446,8 +447,6 @@ def import_json_string(json_string, verbose=True, full_output=False):
         my_corr.prange = temp_prange
         return my_corr
 
-    json_dict = json.loads(json_string)
-
     prog = json_dict.get('program', '')
     version = json_dict.get('version', '')
     who = json_dict.get('who', '')
@@ -495,6 +494,26 @@ def import_json_string(json_string, verbose=True, full_output=False):
         return ol
 
 
+def import_json_string(json_string, verbose=True, full_output=False):
+    """Reconstruct a list of Obs or structures containing Obs from a json string.
+
+    The following structures are supported: Obs, list, numpy.ndarray, Corr
+    If the list contains only one element, it is unpacked from the list.
+
+    Parameters
+    ----------
+    json_string : str
+        json string containing the data.
+    verbose : bool
+        Print additional information that was written to the file.
+    full_output : bool
+        If True, a dict containing auxiliary information and the data is returned.
+        If False, only the data is returned.
+    """
+
+    return _parse_json_dict(json.loads(json_string), verbose, full_output)
+
+
 def load_json(fname, verbose=True, gz=True, full_output=False):
     """Import a list of Obs or structures containing Obs from a .json(.gz) file.
 
@@ -519,14 +538,14 @@ def load_json(fname, verbose=True, gz=True, full_output=False):
         if not fname.endswith('.gz'):
             fname += '.gz'
         with gzip.open(fname, 'r') as fin:
-            d = fin.read().decode('utf-8')
+            d = json.load(fin)
     else:
         if fname.endswith('.gz'):
             warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
         with open(fname, 'r', encoding='utf-8') as fin:
-            d = fin.read()
+            d = json.loads(fin.read())
 
-    return import_json_string(d, verbose, full_output)
+    return _parse_json_dict(d, verbose, full_output)
 
 
 def _ol_from_dict(ind, reps='DICTOBS'):