Impr/refactor find files (#151)

* wrote small test for ms5_xsf read method

* small bug in ms5_xsf found

* first version of refactoring with test

* built _find_files into openQCD module

* postfix can now be used as before

* altered test data to be smaller

* read_rwms throws better Exception now

* typo corrected

* better tests for postfixes, also added similar solution for ext variable
This commit is contained in:
Justus Kuhlmann 2023-02-08 15:52:29 +01:00 committed by GitHub
parent bd66075f91
commit cef3c3e793
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 139 additions and 60 deletions

View file

@ -12,6 +12,46 @@ from ..obs import CObs
from ..correlators import Corr from ..correlators import Corr
def _find_files(path, prefix, postfix, ext, known_files=[]):
found = []
files = []
if postfix != "":
if postfix[-1] != ".":
postfix = postfix + "."
if postfix[0] != ".":
postfix = "." + postfix
if ext[0] == ".":
ext = ext[1:]
pattern = prefix + "*" + postfix + ext
for (dirpath, dirnames, filenames) in os.walk(path + "/"):
found.extend(filenames)
break
if known_files != []:
for kf in known_files:
if kf not in found:
raise FileNotFoundError("Given file " + kf + " does not exist!")
return known_files
if not found:
raise FileNotFoundError(f"Error, directory '{path}' not found")
for f in found:
if fnmatch.fnmatch(f, pattern):
files.append(f)
if files == []:
raise Exception("No files found after pattern filter!")
files.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
return files
def read_rwms(path, prefix, version='2.0', names=None, **kwargs): def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
"""Read rwms format from given folder structure. Returns a list of length nrw """Read rwms format from given folder structure. Returns a list of length nrw
@ -56,21 +96,14 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
postfix = kwargs.get('postfix') postfix = kwargs.get('postfix')
else: else:
postfix = '' postfix = ''
ls = []
for (dirpath, dirnames, filenames) in os.walk(path):
ls.extend(filenames)
break
if not ls:
raise Exception(f"Error, directory '{path}' not found")
if 'files' in kwargs: if 'files' in kwargs:
ls = kwargs.get('files') known_files = kwargs.get('files')
else: else:
for exc in ls: known_files = []
if not fnmatch.fnmatch(exc, prefix + '*' + postfix + '.dat'):
ls = list(set(ls) - set([exc])) ls = _find_files(path, prefix, postfix, 'dat', known_files=known_files)
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
replica = len(ls) replica = len(ls)
if 'r_start' in kwargs: if 'r_start' in kwargs:
@ -154,7 +187,7 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
nsrc.append(struct.unpack('i', t)[0]) nsrc.append(struct.unpack('i', t)[0])
if version == '2.0': if version == '2.0':
if not struct.unpack('i', fp.read(4))[0] == 0: if not struct.unpack('i', fp.read(4))[0] == 0:
print('something is wrong!') raise Exception("You are using the input for openQCD version 2.0, this is not correct.")
configlist.append([]) configlist.append([])
while True: while True:
@ -297,22 +330,13 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwar
Extracted t0 Extracted t0
""" """
ls = []
for (dirpath, dirnames, filenames) in os.walk(path):
ls.extend(filenames)
break
if not ls:
raise Exception('Error, directory not found')
if 'files' in kwargs: if 'files' in kwargs:
ls = kwargs.get('files') known_files = kwargs.get('files')
else: else:
for exc in ls: known_files = []
if not fnmatch.fnmatch(exc, prefix + '*.ms.dat'):
ls = list(set(ls) - set([exc])) ls = _find_files(path, prefix, 'ms', 'dat', known_files=known_files)
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
replica = len(ls) replica = len(ls)
if 'r_start' in kwargs: if 'r_start' in kwargs:
@ -721,31 +745,23 @@ def _read_flow_obs(path, prefix, c, dtr_cnfg=1, version="openQCD", obspos=0, sum
supposed_L = kwargs.get("L") supposed_L = kwargs.get("L")
else: else:
supposed_L = None supposed_L = None
postfix = ".gfms.dat" postfix = "gfms"
else: else:
if "L" not in kwargs: if "L" not in kwargs:
raise Exception("This version of openQCD needs you to provide the spatial length of the lattice as parameter 'L'.") raise Exception("This version of openQCD needs you to provide the spatial length of the lattice as parameter 'L'.")
else: else:
L = kwargs.get("L") L = kwargs.get("L")
postfix = ".ms.dat" postfix = "ms"
if "postfix" in kwargs: if "postfix" in kwargs:
postfix = kwargs.get("postfix") postfix = kwargs.get("postfix")
if "files" in kwargs: if "files" in kwargs:
files = kwargs.get("files") known_files = kwargs.get("files")
postfix = ''
else: else:
found = [] known_files = []
files = []
for (dirpath, dirnames, filenames) in os.walk(path + "/"):
found.extend(filenames)
break
for f in found:
if fnmatch.fnmatch(f, prefix + "*" + postfix):
files.append(f)
files = sorted(files) files = _find_files(path, prefix, postfix, "dat", known_files=known_files)
if 'r_start' in kwargs: if 'r_start' in kwargs:
r_start = kwargs.get('r_start') r_start = kwargs.get('r_start')
@ -1058,24 +1074,30 @@ def read_ms5_xsf(path, prefix, qc, corr, sep="r", **kwargs):
If there is an error unpacking binary data. If there is an error unpacking binary data.
""" """
found = [] # found = []
files = [] files = []
names = [] names = []
# test if the input is correct
if qc not in ['dd', 'ud', 'du', 'uu']:
raise Exception("Unknown quark conbination!")
if corr not in ["gS", "gP", "gA", "gV", "gVt", "lA", "lV", "lVt", "lT", "lTt", "g1", "l1"]:
raise Exception("Unknown correlator!")
if "files" in kwargs:
known_files = kwargs.get("files")
else:
known_files = []
files = _find_files(path, prefix, "ms5_xsf_" + qc, "dat", known_files=known_files)
if "names" in kwargs: if "names" in kwargs:
names = kwargs.get("names") names = kwargs.get("names")
else:
for (dirpath, dirnames, filenames) in os.walk(path + "/"): for f in files:
found.extend(filenames)
break
for f in found:
if fnmatch.fnmatch(f, prefix + "*.ms5_xsf_" + qc + ".dat"):
files.append(f)
if "names" not in kwargs:
if not sep == "": if not sep == "":
se = f.split(".")[0] se = f.split(".")[0]
for s in f.split(".")[1:-1]: for s in f.split(".")[1:-2]:
se += "." + s se += "." + s
names.append(se.split(sep)[0] + "|r" + se.split(sep)[1]) names.append(se.split(sep)[0] + "|r" + se.split(sep)[1])
else: else:

View file

@ -7,7 +7,7 @@ import pytest
def test_rwms(): def test_rwms():
path = './tests//data/openqcd_test/' path = './tests//data/openqcd_test/'
prefix = 'sfqcd' prefix = 'sfqcd'
postfix = '.rwms' postfix = 'rwms'
# sfqcd-1.6: Trajectories instead of confignumbers are printed to file. # sfqcd-1.6: Trajectories instead of confignumbers are printed to file.
rwfo = pe.input.openQCD.read_rwms(path, prefix, version='1.6', postfix=postfix) rwfo = pe.input.openQCD.read_rwms(path, prefix, version='1.6', postfix=postfix)
@ -108,3 +108,60 @@ def test_gf_coupling():
pe.input.openQCD.read_gf_coupling(path, prefix, c=0.35) pe.input.openQCD.read_gf_coupling(path, prefix, c=0.35)
with pytest.raises(Exception): with pytest.raises(Exception):
pe.input.openQCD.read_gf_coupling(path, prefix, c=0.3, Zeuthen_flow=False) pe.input.openQCD.read_gf_coupling(path, prefix, c=0.3, Zeuthen_flow=False)
def test_read_ms5_xsf():
path = './tests//data/openqcd_test/'
prefix = "ms5_xsf_T24L16"
corr = "gA"
qc = 'dd'
c = pe.input.openQCD.read_ms5_xsf(path, prefix, qc, corr)
assert c.real[12].names == ['ms5_xsf_T24L16|r1', 'ms5_xsf_T24L16|r2', 'ms5_xsf_T24L16|r3']
assert (c.real[12].shape['ms5_xsf_T24L16|r1'] == 10)
assert (c.real[12].shape['ms5_xsf_T24L16|r2'] == 10)
assert (c.real[12].shape['ms5_xsf_T24L16|r3'] == 10)
assert (c.real[12].value == -3.0000000000001923)
fqc = "rq"
with pytest.raises(Exception):
pe.input.openQCD.read_ms5_xsf(path, prefix, fqc, corr)
fcorr = "gX"
with pytest.raises(Exception):
pe.input.openQCD.read_ms5_xsf(path, prefix, qc, fcorr)
def test_find_files():
path = './tests//data/openqcd_test/'
prefix = "ms5_xsf_T24L16"
qc = 'dd'
files = pe.input.openQCD._find_files(path, prefix, "ms5_xsf_" + qc, "dat")
assert (len(files) == 3)
files = pe.input.openQCD._find_files(path, prefix, ".ms5_xsf_" + qc, "dat")
assert (len(files) == 3)
files = pe.input.openQCD._find_files(path, prefix, "ms5_xsf_" + qc + ".", "dat")
assert (len(files) == 3)
files = pe.input.openQCD._find_files(path, prefix, ".ms5_xsf_" + qc + ".", "dat")
assert (len(files) == 3)
files = pe.input.openQCD._find_files(path, prefix, ".ms5_xsf_" + qc + ".", ".dat")
assert (len(files) == 3)
with pytest.raises(FileNotFoundError):
pe.input.openQCD._find_files(path, prefix, "ms5_xsf_" + qc, "dat", known_files="egg")
fpath = './tests//data/openqc_test/'
with pytest.raises(FileNotFoundError):
pe.input.openQCD._find_files(fpath, prefix, "ms5_xsf_" + qc, "dat")
fpre = "tune62"
with pytest.raises(Exception):
pe.input.openQCD._find_files(path, fpre, "ms5_xsf_" + qc, "dat")