diff --git a/pyerrors/input/openQCD.py b/pyerrors/input/openQCD.py index 5e1c8d49..8caede78 100644 --- a/pyerrors/input/openQCD.py +++ b/pyerrors/input/openQCD.py @@ -39,13 +39,15 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): if not ls: raise Exception('Error, directory not found') - - # Exclude files with different names - for exc in ls: - if not fnmatch.fnmatch(exc, prefix + '*' + postfix + '.dat'): - ls = list(set(ls) - set([exc])) - if len(ls) > 1: - ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) + if 'files' in kwargs: + ls = kwargs.get('files') + else: + # Exclude files with different names + for exc in ls: + if not fnmatch.fnmatch(exc, prefix + '*' + postfix + '.dat'): + ls = list(set(ls) - set([exc])) + if len(ls) > 1: + ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) replica = len(ls) if 'r_start' in kwargs: @@ -64,7 +66,8 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): else: r_stop = [None] * replica - print('Read reweighting factors from', prefix[:-1], ',', replica, 'replica', end='') + print('Read reweighting factors from', prefix[:-1], ',', + replica, 'replica', end='') # Adjust replica names to new bookmarking system if names is None: @@ -94,7 +97,8 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): for k in range(nrw): deltas.append([]) else: - if ((nrw != struct.unpack('i', t)[0] and (not version == '2.0')) or (nrw != struct.unpack('i', t)[0] / 2 and version == '2.0')): # little weird if-clause due to the /2 operation needed. + # little weird if-clause due to the /2 operation needed. + if ((nrw != struct.unpack('i', t)[0] and (not version == '2.0')) or (nrw != struct.unpack('i', t)[0] / 2 and version == '2.0')): raise Exception('Error: different number of reweighting factors for replicum', rep) for k in range(nrw): @@ -106,7 +110,8 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): for i in range(nrw): t = fp.read(4) nfct.append(struct.unpack('i', t)[0]) - # print('nfct: ', nfct) # Hasenbusch factor, 1 for rat reweighting + # print('nfct: ', nfct) # Hasenbusch factor, + # 1 for rat reweighting else: for i in range(nrw): nfct.append(1) @@ -135,8 +140,11 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): for j in range(tmpd['n'][0]): tmp_nfct *= np.mean(np.exp(-np.asarray(tmp_rw[j]))) if print_err: - print(config_no, i, j, np.mean(np.exp(-np.asarray(tmp_rw[j]))), np.std(np.exp(-np.asarray(tmp_rw[j])))) - print('Sources:', np.exp(-np.asarray(tmp_rw[j]))) + print(config_no, i, j, + np.mean(np.exp(-np.asarray(tmp_rw[j]))), + np.std(np.exp(-np.asarray(tmp_rw[j])))) + print('Sources:', + np.exp(-np.asarray(tmp_rw[j]))) print('Partial factor:', tmp_nfct) elif version == '1.6' or version == '1.4': tmp_nfct = 1.0 @@ -146,7 +154,9 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): tmp_rw = struct.unpack('d' * nsrc[i], t) tmp_nfct *= np.mean(np.exp(-np.asarray(tmp_rw))) if print_err: - print(config_no, i, j, np.mean(np.exp(-np.asarray(tmp_rw))), np.std(np.exp(-np.asarray(tmp_rw)))) + print(config_no, i, j, + np.mean(np.exp(-np.asarray(tmp_rw))), + np.std(np.exp(-np.asarray(tmp_rw)))) print('Sources:', np.exp(-np.asarray(tmp_rw))) print('Partial factor:', tmp_nfct) tmp_array[i].append(tmp_nfct) @@ -165,11 +175,14 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): return result -def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwargs): +def extract_t0(path, prefix, dtr_read, xmin, + spatial_extent, fit_range=5, **kwargs): """Extract t0 from given .ms.dat files. Returns t0 as Obs. - It is assumed that all boundary effects have sufficiently decayed at x0=xmin. - The data around the zero crossing of t^2 - 0.3 is fitted with a linear function + It is assumed that all boundary effects have + sufficiently decayed at x0=xmin. + The data around the zero crossing of t^2 - 0.3 + is fitted with a linear function from which the exact root is extracted. Only works with openQCD v 1.2. @@ -180,14 +193,17 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwar prefix : str Ensemble prefix dtr_read : int - Determines how many trajectories should be skipped when reading the ms.dat files. + Determines how many trajectories should be skipped + when reading the ms.dat files. Corresponds to dtr_cnfg / dtr_ms in the openQCD input file. xmin : int - First timeslice where the boundary effects have sufficiently decayed. + First timeslice where the boundary + effects have sufficiently decayed. spatial_extent : int spatial extent of the lattice, required for normalization. fit_range : int - Number of data points left and right of the zero crossing to be included in the linear fit. (Default: 5) + Number of data points left and right of the zero + crossing to be included in the linear fit. (Default: 5) r_start : list list which contains the first config to be read for each replicum. r_stop: list @@ -273,7 +289,9 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwar Ysum.append([]) for i, item in enumerate(Ysl): - Ysum[-1].append([np.mean(item[current + xmin:current + tmax - xmin]) for current in range(0, len(item), tmax)]) + Ysum[-1].append([np.mean(item[current + xmin: + current + tmax - xmin]) + for current in range(0, len(item), tmax)]) t2E_dict = {} for n in range(nn + 1): @@ -286,10 +304,13 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwar new_obs = Obs(samples, [(w.split('.'))[0] for w in ls]) t2E_dict[n * dn * eps] = (n * dn * eps) ** 2 * new_obs / (spatial_extent ** 3) - 0.3 - zero_crossing = np.argmax(np.array([o.value for o in t2E_dict.values()]) > 0.0) + zero_crossing = np.argmax(np.array( + [o.value for o in t2E_dict.values()]) > 0.0) - x = list(t2E_dict.keys())[zero_crossing - fit_range: zero_crossing + fit_range] - y = list(t2E_dict.values())[zero_crossing - fit_range: zero_crossing + fit_range] + x = list(t2E_dict.keys())[zero_crossing - fit_range: + zero_crossing + fit_range] + y = list(t2E_dict.values())[zero_crossing - fit_range: + zero_crossing + fit_range] [o.gamma_method() for o in y] fit_result = fit_lin(x, y) @@ -343,3 +364,243 @@ def _read_array_openQCD2(fp): arr = _parse_array_openQCD2(d, n, size, tmp, quadrupel=True) return {'d': d, 'n': n, 'size': size, 'arr': arr} + + +def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs): + """Read qtop format from given folder structure. + + Parameters + ---------- + path: + path of the measurement files + prefix: + prefix of the measurement files, e.g. _id0_r0.ms.dat + c: double + Smearing radius in units of the lattice extent, c = sqrt(8 t0) / L + dtr_cnfg: int + (optional) parameter that specifies the number of trajectories + between two configs. + if it is not set, the distance between two measurements + in the file is assumed to be + the distance between two configurations. + steps: int + (optional) (maybe only necessary for openQCD2.0) + nt step size, guessed if not given + version: str + version string of the openQCD (sfqcd) version used to create + the ensemble + L: int + spatial length of the lattice in L/a. + HAS to be set if version != sfqcd, since openQCD does not provide + this in the header + r_start: list + offset of the first ensemble, making it easier to match + later on with other Obs + r_stop: list + last configurations that need to be read (per replicum) + files: list + specify the exact files that need to be read + from path, pratical if e.g. only one replicum is needed + names: list + Alternative labeling for replicas/ensembles. + Has to have the appropriate length + """ + # one could read L from the header in case of sfQCD + # c = 0.35 + known_versions = ["1.0", "1.2", "1.4", "1.6", "2.0", "sfqcd"] + + if version not in known_versions: + raise Exception("Unknown openQCD version.") + if "steps" in kwargs: + steps = kwargs.get("steps") + if version == "sfqcd": + if "L" in kwargs: + supposed_L = kwargs.get("L") + else: + if "L" not in kwargs: + raise Exception("This version of openQCD needs you to provide the spatial length of the lattice as parameter 'L'.") + else: + L = kwargs.get("L") + r_start = 1 + if "r_start" in kwargs: + r_start = kwargs.get("r_start") + if "r_stop" in kwargs: + r_stop = kwargs.get("r_stop") + # if one wants to read specific files with this method... + if "files" in kwargs: + files = kwargs.get("files") + else: + # find files in path + found = [] + files = [] + for (dirpath, dirnames, filenames) in os.walk(path + "/"): + # print(filenames) + found.extend(filenames) + break + for f in found: + if fnmatch.fnmatch(f, prefix + "*" + ".ms.dat"): + files.append(f) + print(files) + # now that we found our files, we dechiffer them... + rep_names = [] + + deltas = [] + idl = [] + for rep, file in enumerate(files): + with open(path + "/" + file, "rb") as fp: + # header + t = fp.read(12) + header = struct.unpack(' 1: - ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) - replica = len(ls) - print('Read', part, 'part of', name, 'from', prefix, ',', replica, 'replica') - if 'names' in kwargs: - new_names = kwargs.get('names') - if len(new_names) != replica: - raise Exception('Names does not have the required length', replica) + if "replica" in kwargs: + ls = reps else: - # Adjust replica names to new bookmarking system - new_names = [] - for entry in ls: - idx = entry.index('r') - new_names.append(entry[:idx] + '|' + entry[idx:]) - - print(replica, 'replica') - for i, item in enumerate(ls): - print(item) - sub_ls = [] - for (dirpath, dirnames, filenames) in os.walk(path + '/' + item): - sub_ls.extend(dirnames) - break - for exc in sub_ls: - if fnmatch.fnmatch(exc, 'cfg*'): - sub_ls = list(set(sub_ls) - set(exc)) - sub_ls.sort(key=lambda x: int(x[3:])) - no_cfg = len(sub_ls) - print(no_cfg, 'configurations') - - if i == 0: - with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp: - for k, line in enumerate(fp): - if read == 1 and not line.strip() and k > start + 1: - break - if read == 1 and k >= start: - T += 1 - if '[correlator]' in line: - read = 1 - start = k + 7 + b2b - T -= b2b - - deltas = [] - for j in range(T): - deltas.append([]) - - sublength = len(sub_ls) - for j in range(T): - deltas[j].append(np.zeros(sublength)) - - for cnfg, subitem in enumerate(sub_ls): - with open(path + '/' + item + '/' + subitem + '/' + name) as fp: - for k, line in enumerate(fp): - if(k >= start and k < start + T): - floats = list(map(float, line.split())) - deltas[k - start][i][cnfg] = floats[1 + im - single] - - result = [] - for t in range(T): - result.append(Obs(deltas[t], new_names)) - - return result - - -def read_sfcf_c(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs): - """Read sfcf c format from given folder structure. - - Parameters - ---------- - quarks -- Label of the quarks used in the sfcf input file - noffset -- Offset of the source (only relevant when wavefunctions are used) - wf -- ID of wave function - wf2 -- ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions) - im -- if True, read imaginary instead of real part of the correlation function. - b2b -- if True, read a time-dependent boundary-to-boundary correlation function - names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length - ens_name : str - replaces the name of the ensemble - """ - - if kwargs.get('im'): - im = 1 - part = 'imaginary' - else: - im = 0 - part = 'real' - - if kwargs.get('b2b'): - b2b = 1 - else: - b2b = 0 - - T = 0 - ls = [] - for (dirpath, dirnames, filenames) in os.walk(path): - ls.extend(dirnames) - break - if not ls: - raise Exception('Error, directory not found') - # Exclude folders with different names - for exc in ls: - if not fnmatch.fnmatch(exc, prefix + '*'): - ls = list(set(ls) - set([exc])) - if len(ls) > 1: - ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc. - replica = len(ls) - if 'names' in kwargs: - new_names = kwargs.get('names') - if len(new_names) != replica: - raise Exception('Names does not have the required length', replica) - else: - # Adjust replica names to new bookmarking system - new_names = [] - for entry in ls: - idx = entry.index('r') - if 'ens_name' in kwargs: - new_names.append(kwargs.get('ens_name') + '|' + entry[idx:]) + for (dirpath, dirnames, filenames) in os.walk(path): + if not appended: + ls.extend(dirnames) else: - new_names.append(entry[:idx] + '|' + entry[idx:]) - - print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica') - for i, item in enumerate(ls): - sub_ls = [] - for (dirpath, dirnames, filenames) in os.walk(path + '/' + item): - sub_ls.extend(filenames) + ls.extend(filenames) break - for exc in sub_ls: + if not ls: + raise Exception('Error, directory not found') + # Exclude folders with different names + for exc in ls: if not fnmatch.fnmatch(exc, prefix + '*'): - sub_ls = list(set(sub_ls) - set([exc])) - sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) + ls = list(set(ls) - set([exc])) + if len(ls) > 1: + # New version, to cope with ids, etc. + ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) - first_cfg = int(re.findall(r'\d+', sub_ls[0])[-1]) + if not appended: + replica = len(ls) + else: + replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls])) + print('Read', part, 'part of', name, 'from', prefix[:-1], + ',', replica, 'replica') + if 'names' in kwargs: + new_names = kwargs.get('names') + if len(new_names) != len(set(new_names)): + raise Exception("names are not unique!") + if len(new_names) != replica: + raise Exception('Names does not have the required length', replica) + else: + # Adjust replica names to new bookmarking system - last_cfg = len(sub_ls) + first_cfg - 1 + new_names = [] + if not appended: + for entry in ls: + try: + idx = entry.index('r') + except Exception: + raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") - for cfg in range(1, len(sub_ls)): - if int(re.findall(r'\d+', sub_ls[cfg])[-1]) != first_cfg + cfg: - last_cfg = cfg + first_cfg - 1 - break - - no_cfg = last_cfg - first_cfg + 1 - print(item, ':', no_cfg, 'evenly spaced configurations (', first_cfg, '-', last_cfg, ') ,', len(sub_ls) - no_cfg, 'configs omitted\n') - - if i == 0: - pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) - if b2b: - pattern += '\nwf_2 ' + str(wf2) - - with open(path + '/' + item + '/' + sub_ls[0], 'r') as file: - content = file.read() - match = re.search(pattern, content) - if match: - start_read = content.count('\n', 0, match.start()) + 5 + b2b - end_match = re.search(r'\n\s*\n', content[match.start():]) - T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b - assert T > 0 - print(T, 'entries, starting to read in line', start_read) + if 'ens_name' in kwargs: + new_names.append(kwargs.get('ens_name') + '|' + entry[idx:]) else: - raise Exception('Correlator with pattern\n' + pattern + '\nnot found.') + new_names.append(entry[:idx] + '|' + entry[idx:]) + else: - deltas = [] - for j in range(T): - deltas.append([]) + for exc in ls: + if not fnmatch.fnmatch(exc, prefix + '*.' + name): + ls = list(set(ls) - set([exc])) + ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) + for entry in ls: + myentry = entry[:-len(name) - 1] + # print(myentry) + try: + idx = myentry.index('r') + except Exception: + raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") - sublength = no_cfg - for j in range(T): - deltas[j].append(np.zeros(sublength)) + if 'ens_name' in kwargs: + new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:]) + else: + new_names.append(myentry[:idx] + '|' + myentry[idx:]) + # print(new_names) + idl = [] + if not appended: + for i, item in enumerate(ls): + sub_ls = [] + if "files" in kwargs: + sub_ls = kwargs.get("files") + sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) + else: + for (dirpath, dirnames, filenames) in os.walk(path + '/' + item): + if compact: + sub_ls.extend(filenames) + else: + sub_ls.extend(dirnames) + break - for cfg in range(no_cfg): - with open(path + '/' + item + '/' + sub_ls[cfg]) as fp: - for k, line in enumerate(fp): - if k == start_read - 5 - b2b: - if line.strip() != 'name ' + name: - raise Exception('Wrong format', sub_ls[cfg]) - if(k >= start_read and k < start_read + T): - floats = list(map(float, line.split())) - deltas[k - start_read][i][cfg] = floats[-2:][im] + # print(sub_ls) + for exc in sub_ls: + if compact: + if not fnmatch.fnmatch(exc, prefix + '*'): + sub_ls = list(set(sub_ls) - set([exc])) + sub_ls.sort(key=lambda x: + int(re.findall(r'\d+', x)[-1])) + else: + if not fnmatch.fnmatch(exc, 'cfg*'): + sub_ls = list(set(sub_ls) - set([exc])) + sub_ls.sort(key=lambda x: int(x[3:])) + # print(sub_ls) + rep_idl = [] + no_cfg = len(sub_ls) + for cfg in sub_ls: + try: + if compact: + rep_idl.append(int(cfg.split("n")[-1])) + else: + rep_idl.append(int(cfg[3:])) + except Exception: + raise Exception("Couldn't parse idl from directroy, problem with file " + cfg) + rep_idl.sort() + # maybe there is a better way to print the idls + print(item, ':', no_cfg, ' configurations') + idl.append(rep_idl) + # here we have found all the files we need to look into. + if i == 0: + # here, we want to find the place within the file, + # where the correlator we need is stored. + if compact: + # to do so, the pattern needed is put together + # from the input values + pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) + if b2b: + pattern += '\nwf_2 ' + str(wf2) + # and the file is parsed through to find the pattern + with open(path + '/' + item + '/' + sub_ls[0], 'r') as file: + content = file.read() + match = re.search(pattern, content) + if match: + # the start and end point of the correlator + # in quaetion is extracted for later use in + # the other files + start_read = content.count('\n', 0, match.start()) + 5 + b2b + end_match = re.search(r'\n\s*\n', content[match.start():]) + T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b + assert T > 0 + print(T, 'entries, starting to read in line', start_read) + else: + raise Exception('Correlator with pattern\n' + pattern + '\nnot found.') + else: + # this part does the same as above, + # but for non-compactified versions of the files + with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp: + for k, line in enumerate(fp): + if version == "0.0": + # check if this is really the right file + # by matching pattern similar to above + pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf) + # if b2b, a second wf is needed + if b2b: + pattern += ", wf_2 " + str(wf2) + qs = quarks.split(" ") + pattern += " : " + qs[0] + " - " + qs[1] + # print(pattern) + if read == 1 and not line.strip() and k > start + 1: + break + if read == 1 and k >= start: + T += 1 + if version == "0.0": + if pattern in line: + # print(line) + read = 1 + start = k + 1 + else: + if '[correlator]' in line: + read = 1 + start = k + 7 + b2b + T -= b2b + print(str(T) + " entries found.") + # we found where the correlator + # that is to be read is in the files + # after preparing the datastructure + # the correlators get parsed into... + deltas = [] + for j in range(T): + deltas.append([]) + + for t in range(T): + deltas[t].append(np.zeros(no_cfg)) + # ...the actual parsing can start. + # we iterate through all measurement files in the path given... + if compact: + for cfg in range(no_cfg): + with open(path + '/' + item + '/' + sub_ls[cfg]) as fp: + lines = fp.readlines() + # check, if the correlator is in fact + # printed completely + if(start_read + T > len(lines)): + raise Exception("EOF before end of correlator data! Maybe " + path + '/' + item + '/' + sub_ls[cfg] + " is corrupted?") + # and start to read the correlator. + # the range here is chosen like this, + # since this allows for implementing + # a security check for every read correlator later... + for k in range(start_read - 6, start_read + T): + if k == start_read - 5 - b2b: + if lines[k].strip() != 'name ' + name: + raise Exception('Wrong format', + sub_ls[cfg]) + if(k >= start_read and k < start_read + T): + floats = list(map(float, lines[k].split())) + deltas[k - start_read][i][cfg] = floats[-2:][im] + else: + for cnfg, subitem in enumerate(sub_ls): + with open(path + '/' + item + '/' + subitem + '/' + name) as fp: + # since the non-compatified files + # are typically not so long, + # we can iterate over the whole file. + # here one can also implement the chekc from above. + for k, line in enumerate(fp): + if(k >= start and k < start + T): + floats = list(map(float, line.split())) + if version == "0.0": + deltas[k - start][i][cnfg] = floats[im] + else: + deltas[k - start][i][cnfg] = floats[1 + im - single] + + else: + if "files" in kwargs: + ls = kwargs.get("files") + else: + for exc in ls: + if not fnmatch.fnmatch(exc, prefix + '*.' + name): + ls = list(set(ls) - set([exc])) + ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) + # print(ls) + pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) + if b2b: + pattern += '\nwf_2 ' + str(wf2) + for rep, file in enumerate(ls): + rep_idl = [] + with open(path + '/' + file, 'r') as fp: + content = fp.readlines() + data_starts = [] + for linenumber, line in enumerate(content): + if "[run]" in line: + data_starts.append(linenumber) + if len(set([data_starts[i] - data_starts[i - 1] for i in + range(1, len(data_starts))])) > 1: + raise Exception("Irregularities in file structure found, not all runs have the same output length") + # first chunk of data + chunk = content[:data_starts[1]] + for linenumber, line in enumerate(chunk): + if line.startswith("gauge_name"): + gauge_line = linenumber + elif line.startswith("[correlator]"): + corr_line = linenumber + found_pat = "" + for li in chunk[corr_line + 1:corr_line + 6 + b2b]: + found_pat += li + if re.search(pattern, found_pat): + start_read = corr_line + 7 + b2b + T = len(chunk) - 1 - start_read + if rep == 0: + deltas = [] + for t in range(T): + deltas.append([]) + for t in range(T): + deltas[t].append(np.zeros(len(data_starts))) + # all other chunks should follow the same structure + for cnfg in range(len(data_starts)): + start = data_starts[cnfg] + stop = start + data_starts[1] + chunk = content[start:stop] + # meta_data = {} + try: + rep_idl.append(int(chunk[gauge_line].split("n")[-1])) + except Exception: + raise Exception("Couldn't parse idl from directroy, problem with chunk around line " + gauge_line) + + found_pat = "" + for li in chunk[corr_line + 1:corr_line + 6 + b2b]: + found_pat += li + if re.search(pattern, found_pat): + for t, line in enumerate(chunk[start_read:start_read + T]): + floats = list(map(float, line.split())) + deltas[t][rep][cnfg] = floats[-2:][im] + idl.append(rep_idl) + + if "check_configs" in kwargs: + print("Checking for missing configs...") + che = kwargs.get("check_configs") + if not (len(che) == len(idl)): + raise Exception("check_configs has to be the same length as replica!") + for r in range(len(idl)): + print("checking " + new_names[r]) + utils.check_idl(idl[r], che[r]) + print("Done") result = [] for t in range(T): - result.append(Obs(deltas[t], new_names)) - return result - - -def read_qtop(path, prefix, **kwargs): - """Read qtop format from given folder structure. - - Parameters - ---------- - target -- specifies the topological sector to be reweighted to (default 0) - full -- if true read the charge instead of the reweighting factor. - """ - - if 'target' in kwargs: - target = kwargs.get('target') - else: - target = 0 - - if kwargs.get('full'): - full = 1 - else: - full = 0 - - ls = [] - for (dirpath, dirnames, filenames) in os.walk(path): - ls.extend(filenames) - break - - if not ls: - raise Exception('Error, directory not found') - - # Exclude files with different names - for exc in ls: - if not fnmatch.fnmatch(exc, prefix + '*'): - ls = list(set(ls) - set([exc])) - if len(ls) > 1: - ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc. - replica = len(ls) - print('Read Q_top from', prefix[:-1], ',', replica, 'replica') - - deltas = [] - - for rep in range(replica): - tmp = [] - with open(path + '/' + ls[rep]) as fp: - for k, line in enumerate(fp): - floats = list(map(float, line.split())) - if full == 1: - tmp.append(floats[1]) - else: - if int(floats[1]) == target: - tmp.append(1.0) - else: - tmp.append(0.0) - - deltas.append(np.array(tmp)) - - rep_names = [] - for entry in ls: - truncated_entry = entry.split('.')[0] - idx = truncated_entry.index('r') - rep_names.append(truncated_entry[:idx] + '|' + truncated_entry[idx:]) - - result = Obs(deltas, rep_names) - + result.append(Obs(deltas[t], new_names, idl=idl)) return result diff --git a/pyerrors/input/utils.py b/pyerrors/input/utils.py new file mode 100644 index 00000000..66b60f68 --- /dev/null +++ b/pyerrors/input/utils.py @@ -0,0 +1,15 @@ +"""Utilities for the input""" + + +def check_idl(idl, che): + missing = [] + for c in che: + if c not in idl: + missing.append(c) + # print missing such that it can directly be parsed to slurm terminal + if not (len(missing) == 0): + print(len(missing), "configs missing") + miss_str = str(missing[0]) + for i in missing[1:]: + miss_str += "," + str(i) + print(miss_str) diff --git a/tests/input_test.ipynb b/tests/input_test.ipynb new file mode 100644 index 00000000..f241304a --- /dev/null +++ b/tests/input_test.ipynb @@ -0,0 +1,136 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This file is used for testing some of the input methods." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os,sys,inspect\n", + "current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))\n", + "parent_dir = os.path.dirname(current_dir)\n", + "sys.path.insert(0, parent_dir) \n", + "\n", + "import pyerrors as pe\n", + "import pyerrors.input.openQCD as qcdin\n", + "import pyerrors.input.sfcf as sfin\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we will have a look at the input method for the topological charge $Q_{top}$, which is measured by the program ms from the openQCD package. For now, this part still in the making and depends on an actual file. Later, this should be changed to a more efficient way of making a proper input file.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['T29L20k0.13719r2.ms.dat', 'T29L20k0.13719r3.ms.dat', 'T29L20k0.13719r1.ms.dat', 'T29L20k0.13719r4.ms.dat']\n", + "dn: 10\n", + "nn: 60\n", + "tmax: 30\n", + "eps: 0.02\n", + "max_t: 12.0\n", + "t_aim: 6.125\n", + "index_aim: 31\n", + "T29L20k0.13719r2\n", + "dn: 10\n", + "nn: 60\n", + "tmax: 30\n", + "eps: 0.02\n", + "max_t: 12.0\n", + "t_aim: 6.125\n", + "index_aim: 31\n", + "T29L20k0.13719r3\n", + "dn: 10\n", + "nn: 60\n", + "tmax: 30\n", + "eps: 0.02\n", + "max_t: 12.0\n", + "t_aim: 6.125\n", + "index_aim: 31\n", + "T29L20k0.13719r1\n", + "dn: 10\n", + "nn: 60\n", + "tmax: 30\n", + "eps: 0.02\n", + "max_t: 12.0\n", + "t_aim: 6.125\n", + "index_aim: 31\n", + "T29L20k0.13719r4\n" + ] + } + ], + "source": [ + "r_qtop = qcdin.read_qtop(\"../../test_data\", prefix = \"T29L20k0.13719\",full = True, r_stop = [500,440,447,410])#, files = [\"T29L20k0.13719r1.ms.dat\"], )" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'T29L20k0.13719|r1': 500, 'T29L20k0.13719|r2': 440, 'T29L20k0.13719|r3': 447, 'T29L20k0.13719|r4': 410}\n", + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 -1 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 -2 -2 -2 -2 -3 -3 -3 -3 -2 -2 -2 -2 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 -1 0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 -1 -1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 " + ] + } + ], + "source": [ + "print(r_qtop.shape)\n", + "#print(r_qtop.deltas['T29L20k0.13719|r1'])\n", + "for i in r_qtop.deltas['T29L20k0.13719|r2']:\n", + " print(round(r_qtop.value + i), end =\" \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "interpreter": { + "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" + }, + "kernelspec": { + "display_name": "Python 3.9.7 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}