diff --git a/docs/pyerrors/obs.html b/docs/pyerrors/obs.html index 74afdeb6..3fdf2686 100644 --- a/docs/pyerrors/obs.html +++ b/docs/pyerrors/obs.html @@ -1296,603 +1296,572 @@ 1097 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) 1098 1099 -1100def _collapse_deltas_for_merge(deltas, idx, shape, new_idx): -1101 """Collapse deltas defined on idx to the list of configs that is defined by new_idx. -1102 If idx and new_idx are of type range, the smallest -1103 common divisor of the step sizes is used as new step size. -1104 -1105 Parameters -1106 ---------- -1107 deltas : list -1108 List of fluctuations -1109 idx : list -1110 List or range of configs on which the deltas are defined. -1111 Has to be a subset of new_idx and has to be sorted in ascending order. -1112 shape : list -1113 Number of configs in idx. -1114 new_idx : list -1115 List of configs that defines the new range, has to be sorted in ascending order. -1116 """ -1117 -1118 if type(idx) is range and type(new_idx) is range: -1119 if idx == new_idx: -1120 return deltas -1121 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) -1122 for i in range(shape): -1123 if idx[i] in new_idx: -1124 ret[idx[i] - new_idx[0]] = deltas[i] -1125 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) -1126 +1100def _filter_zeroes(deltas, idx, eps=Obs.filter_eps): +1101 """Filter out all configurations with vanishing fluctuation such that they do not +1102 contribute to the error estimate anymore. Returns the new deltas and +1103 idx according to the filtering. +1104 A fluctuation is considered to be vanishing, if it is smaller than eps times +1105 the mean of the absolute values of all deltas in one list. +1106 +1107 Parameters +1108 ---------- +1109 deltas : list +1110 List of fluctuations +1111 idx : list +1112 List or ranges of configs on which the deltas are defined. +1113 eps : float +1114 Prefactor that enters the filter criterion. +1115 """ +1116 new_deltas = [] +1117 new_idx = [] +1118 maxd = np.mean(np.fabs(deltas)) +1119 for i in range(len(deltas)): +1120 if abs(deltas[i]) > eps * maxd: +1121 new_deltas.append(deltas[i]) +1122 new_idx.append(idx[i]) +1123 if new_idx: +1124 return np.array(new_deltas), new_idx +1125 else: +1126 return deltas, idx 1127 -1128def _filter_zeroes(deltas, idx, eps=Obs.filter_eps): -1129 """Filter out all configurations with vanishing fluctuation such that they do not -1130 contribute to the error estimate anymore. Returns the new deltas and -1131 idx according to the filtering. -1132 A fluctuation is considered to be vanishing, if it is smaller than eps times -1133 the mean of the absolute values of all deltas in one list. -1134 -1135 Parameters -1136 ---------- -1137 deltas : list -1138 List of fluctuations -1139 idx : list -1140 List or ranges of configs on which the deltas are defined. -1141 eps : float -1142 Prefactor that enters the filter criterion. -1143 """ -1144 new_deltas = [] -1145 new_idx = [] -1146 maxd = np.mean(np.fabs(deltas)) -1147 for i in range(len(deltas)): -1148 if abs(deltas[i]) > eps * maxd: -1149 new_deltas.append(deltas[i]) -1150 new_idx.append(idx[i]) -1151 if new_idx: -1152 return np.array(new_deltas), new_idx -1153 else: -1154 return deltas, idx -1155 -1156 -1157def derived_observable(func, data, array_mode=False, **kwargs): -1158 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. -1159 -1160 Parameters -1161 ---------- -1162 func : object -1163 arbitrary function of the form func(data, **kwargs). For the -1164 automatic differentiation to work, all numpy functions have to have -1165 the autograd wrapper (use 'import autograd.numpy as anp'). -1166 data : list -1167 list of Obs, e.g. [obs1, obs2, obs3]. -1168 num_grad : bool -1169 if True, numerical derivatives are used instead of autograd -1170 (default False). To control the numerical differentiation the -1171 kwargs of numdifftools.step_generators.MaxStepGenerator -1172 can be used. -1173 man_grad : list -1174 manually supply a list or an array which contains the jacobian -1175 of func. Use cautiously, supplying the wrong derivative will -1176 not be intercepted. -1177 -1178 Notes -1179 ----- -1180 For simple mathematical operations it can be practical to use anonymous -1181 functions. For the ratio of two observables one can e.g. use -1182 -1183 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) -1184 """ -1185 -1186 data = np.asarray(data) -1187 raveled_data = data.ravel() +1128 +1129def derived_observable(func, data, array_mode=False, **kwargs): +1130 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. +1131 +1132 Parameters +1133 ---------- +1134 func : object +1135 arbitrary function of the form func(data, **kwargs). For the +1136 automatic differentiation to work, all numpy functions have to have +1137 the autograd wrapper (use 'import autograd.numpy as anp'). +1138 data : list +1139 list of Obs, e.g. [obs1, obs2, obs3]. +1140 num_grad : bool +1141 if True, numerical derivatives are used instead of autograd +1142 (default False). To control the numerical differentiation the +1143 kwargs of numdifftools.step_generators.MaxStepGenerator +1144 can be used. +1145 man_grad : list +1146 manually supply a list or an array which contains the jacobian +1147 of func. Use cautiously, supplying the wrong derivative will +1148 not be intercepted. +1149 +1150 Notes +1151 ----- +1152 For simple mathematical operations it can be practical to use anonymous +1153 functions. For the ratio of two observables one can e.g. use +1154 +1155 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) +1156 """ +1157 +1158 data = np.asarray(data) +1159 raveled_data = data.ravel() +1160 +1161 # Workaround for matrix operations containing non Obs data +1162 if not all(isinstance(x, Obs) for x in raveled_data): +1163 for i in range(len(raveled_data)): +1164 if isinstance(raveled_data[i], (int, float)): +1165 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") +1166 +1167 allcov = {} +1168 for o in raveled_data: +1169 for name in o.cov_names: +1170 if name in allcov: +1171 if not np.allclose(allcov[name], o.covobs[name].cov): +1172 raise Exception('Inconsistent covariance matrices for %s!' % (name)) +1173 else: +1174 allcov[name] = o.covobs[name].cov +1175 +1176 n_obs = len(raveled_data) +1177 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) +1178 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) +1179 new_sample_names = sorted(set(new_names) - set(new_cov_names)) +1180 +1181 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} +1182 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 +1183 +1184 if data.ndim == 1: +1185 values = np.array([o.value for o in data]) +1186 else: +1187 values = np.vectorize(lambda x: x.value)(data) 1188 -1189 # Workaround for matrix operations containing non Obs data -1190 if not all(isinstance(x, Obs) for x in raveled_data): -1191 for i in range(len(raveled_data)): -1192 if isinstance(raveled_data[i], (int, float)): -1193 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") -1194 -1195 allcov = {} -1196 for o in raveled_data: -1197 for name in o.cov_names: -1198 if name in allcov: -1199 if not np.allclose(allcov[name], o.covobs[name].cov): -1200 raise Exception('Inconsistent covariance matrices for %s!' % (name)) -1201 else: -1202 allcov[name] = o.covobs[name].cov -1203 -1204 n_obs = len(raveled_data) -1205 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) -1206 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) -1207 new_sample_names = sorted(set(new_names) - set(new_cov_names)) -1208 -1209 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} -1210 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 -1211 -1212 if data.ndim == 1: -1213 values = np.array([o.value for o in data]) -1214 else: -1215 values = np.vectorize(lambda x: x.value)(data) -1216 -1217 new_values = func(values, **kwargs) -1218 -1219 multi = int(isinstance(new_values, np.ndarray)) -1220 -1221 new_r_values = {} -1222 new_idl_d = {} -1223 for name in new_sample_names: -1224 idl = [] -1225 tmp_values = np.zeros(n_obs) -1226 for i, item in enumerate(raveled_data): -1227 tmp_values[i] = item.r_values.get(name, item.value) -1228 tmp_idl = item.idl.get(name) -1229 if tmp_idl is not None: -1230 idl.append(tmp_idl) -1231 if multi > 0: -1232 tmp_values = np.array(tmp_values).reshape(data.shape) -1233 new_r_values[name] = func(tmp_values, **kwargs) -1234 new_idl_d[name] = _merge_idx(idl) -1235 if not is_merged[name]: -1236 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) -1237 -1238 if 'man_grad' in kwargs: -1239 deriv = np.asarray(kwargs.get('man_grad')) -1240 if new_values.shape + data.shape != deriv.shape: -1241 raise Exception('Manual derivative does not have correct shape.') -1242 elif kwargs.get('num_grad') is True: -1243 if multi > 0: -1244 raise Exception('Multi mode currently not supported for numerical derivative') -1245 options = { -1246 'base_step': 0.1, -1247 'step_ratio': 2.5} -1248 for key in options.keys(): -1249 kwarg = kwargs.get(key) -1250 if kwarg is not None: -1251 options[key] = kwarg -1252 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) -1253 if tmp_df.size == 1: -1254 deriv = np.array([tmp_df.real]) -1255 else: -1256 deriv = tmp_df.real -1257 else: -1258 deriv = jacobian(func)(values, **kwargs) -1259 -1260 final_result = np.zeros(new_values.shape, dtype=object) -1261 -1262 if array_mode is True: -1263 -1264 class _Zero_grad(): -1265 def __init__(self, N): -1266 self.grad = np.zeros((N, 1)) -1267 -1268 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) -1269 d_extracted = {} -1270 g_extracted = {} -1271 for name in new_sample_names: -1272 d_extracted[name] = [] -1273 ens_length = len(new_idl_d[name]) -1274 for i_dat, dat in enumerate(data): -1275 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) -1276 for name in new_cov_names: -1277 g_extracted[name] = [] -1278 zero_grad = _Zero_grad(new_covobs_lengths[name]) -1279 for i_dat, dat in enumerate(data): -1280 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) -1281 -1282 for i_val, new_val in np.ndenumerate(new_values): -1283 new_deltas = {} -1284 new_grad = {} -1285 if array_mode is True: -1286 for name in new_sample_names: -1287 ens_length = d_extracted[name][0].shape[-1] -1288 new_deltas[name] = np.zeros(ens_length) -1289 for i_dat, dat in enumerate(d_extracted[name]): -1290 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) -1291 for name in new_cov_names: -1292 new_grad[name] = 0 -1293 for i_dat, dat in enumerate(g_extracted[name]): -1294 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) -1295 else: -1296 for j_obs, obs in np.ndenumerate(data): -1297 for name in obs.names: -1298 if name in obs.cov_names: -1299 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad -1300 else: -1301 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) +1189 new_values = func(values, **kwargs) +1190 +1191 multi = int(isinstance(new_values, np.ndarray)) +1192 +1193 new_r_values = {} +1194 new_idl_d = {} +1195 for name in new_sample_names: +1196 idl = [] +1197 tmp_values = np.zeros(n_obs) +1198 for i, item in enumerate(raveled_data): +1199 tmp_values[i] = item.r_values.get(name, item.value) +1200 tmp_idl = item.idl.get(name) +1201 if tmp_idl is not None: +1202 idl.append(tmp_idl) +1203 if multi > 0: +1204 tmp_values = np.array(tmp_values).reshape(data.shape) +1205 new_r_values[name] = func(tmp_values, **kwargs) +1206 new_idl_d[name] = _merge_idx(idl) +1207 if not is_merged[name]: +1208 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) +1209 +1210 if 'man_grad' in kwargs: +1211 deriv = np.asarray(kwargs.get('man_grad')) +1212 if new_values.shape + data.shape != deriv.shape: +1213 raise Exception('Manual derivative does not have correct shape.') +1214 elif kwargs.get('num_grad') is True: +1215 if multi > 0: +1216 raise Exception('Multi mode currently not supported for numerical derivative') +1217 options = { +1218 'base_step': 0.1, +1219 'step_ratio': 2.5} +1220 for key in options.keys(): +1221 kwarg = kwargs.get(key) +1222 if kwarg is not None: +1223 options[key] = kwarg +1224 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) +1225 if tmp_df.size == 1: +1226 deriv = np.array([tmp_df.real]) +1227 else: +1228 deriv = tmp_df.real +1229 else: +1230 deriv = jacobian(func)(values, **kwargs) +1231 +1232 final_result = np.zeros(new_values.shape, dtype=object) +1233 +1234 if array_mode is True: +1235 +1236 class _Zero_grad(): +1237 def __init__(self, N): +1238 self.grad = np.zeros((N, 1)) +1239 +1240 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) +1241 d_extracted = {} +1242 g_extracted = {} +1243 for name in new_sample_names: +1244 d_extracted[name] = [] +1245 ens_length = len(new_idl_d[name]) +1246 for i_dat, dat in enumerate(data): +1247 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) +1248 for name in new_cov_names: +1249 g_extracted[name] = [] +1250 zero_grad = _Zero_grad(new_covobs_lengths[name]) +1251 for i_dat, dat in enumerate(data): +1252 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) +1253 +1254 for i_val, new_val in np.ndenumerate(new_values): +1255 new_deltas = {} +1256 new_grad = {} +1257 if array_mode is True: +1258 for name in new_sample_names: +1259 ens_length = d_extracted[name][0].shape[-1] +1260 new_deltas[name] = np.zeros(ens_length) +1261 for i_dat, dat in enumerate(d_extracted[name]): +1262 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) +1263 for name in new_cov_names: +1264 new_grad[name] = 0 +1265 for i_dat, dat in enumerate(g_extracted[name]): +1266 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) +1267 else: +1268 for j_obs, obs in np.ndenumerate(data): +1269 for name in obs.names: +1270 if name in obs.cov_names: +1271 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad +1272 else: +1273 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) +1274 +1275 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} +1276 +1277 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): +1278 raise Exception('The same name has been used for deltas and covobs!') +1279 new_samples = [] +1280 new_means = [] +1281 new_idl = [] +1282 new_names_obs = [] +1283 for name in new_names: +1284 if name not in new_covobs: +1285 if is_merged[name]: +1286 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) +1287 else: +1288 filtered_deltas = new_deltas[name] +1289 filtered_idl_d = new_idl_d[name] +1290 +1291 new_samples.append(filtered_deltas) +1292 new_idl.append(filtered_idl_d) +1293 new_means.append(new_r_values[name][i_val]) +1294 new_names_obs.append(name) +1295 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) +1296 for name in new_covobs: +1297 final_result[i_val].names.append(name) +1298 final_result[i_val]._covobs = new_covobs +1299 final_result[i_val]._value = new_val +1300 final_result[i_val].is_merged = is_merged +1301 final_result[i_val].reweighted = reweighted 1302 -1303 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} -1304 -1305 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): -1306 raise Exception('The same name has been used for deltas and covobs!') -1307 new_samples = [] -1308 new_means = [] -1309 new_idl = [] -1310 new_names_obs = [] -1311 for name in new_names: -1312 if name not in new_covobs: -1313 if is_merged[name]: -1314 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) -1315 else: -1316 filtered_deltas = new_deltas[name] -1317 filtered_idl_d = new_idl_d[name] -1318 -1319 new_samples.append(filtered_deltas) -1320 new_idl.append(filtered_idl_d) -1321 new_means.append(new_r_values[name][i_val]) -1322 new_names_obs.append(name) -1323 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) -1324 for name in new_covobs: -1325 final_result[i_val].names.append(name) -1326 final_result[i_val]._covobs = new_covobs -1327 final_result[i_val]._value = new_val -1328 final_result[i_val].is_merged = is_merged -1329 final_result[i_val].reweighted = reweighted -1330 -1331 if multi == 0: -1332 final_result = final_result.item() -1333 -1334 return final_result -1335 -1336 -1337def _reduce_deltas(deltas, idx_old, idx_new): -1338 """Extract deltas defined on idx_old on all configs of idx_new. -1339 -1340 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they -1341 are ordered in an ascending order. +1303 if multi == 0: +1304 final_result = final_result.item() +1305 +1306 return final_result +1307 +1308 +1309def _reduce_deltas(deltas, idx_old, idx_new): +1310 """Extract deltas defined on idx_old on all configs of idx_new. +1311 +1312 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they +1313 are ordered in an ascending order. +1314 +1315 Parameters +1316 ---------- +1317 deltas : list +1318 List of fluctuations +1319 idx_old : list +1320 List or range of configs on which the deltas are defined +1321 idx_new : list +1322 List of configs for which we want to extract the deltas. +1323 Has to be a subset of idx_old. +1324 """ +1325 if not len(deltas) == len(idx_old): +1326 raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) +1327 if type(idx_old) is range and type(idx_new) is range: +1328 if idx_old == idx_new: +1329 return deltas +1330 # Use groupby to efficiently check whether all elements of idx_old and idx_new are identical +1331 try: +1332 g = groupby([idx_old, idx_new]) +1333 if next(g, True) and not next(g, False): +1334 return deltas +1335 except Exception: +1336 pass +1337 indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1] +1338 if len(indices) < len(idx_new): +1339 raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old') +1340 return np.array(deltas)[indices] +1341 1342 -1343 Parameters -1344 ---------- -1345 deltas : list -1346 List of fluctuations -1347 idx_old : list -1348 List or range of configs on which the deltas are defined -1349 idx_new : list -1350 List of configs for which we want to extract the deltas. -1351 Has to be a subset of idx_old. -1352 """ -1353 if not len(deltas) == len(idx_old): -1354 raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) -1355 if type(idx_old) is range and type(idx_new) is range: -1356 if idx_old == idx_new: -1357 return deltas -1358 shape = len(idx_new) -1359 ret = np.zeros(shape) -1360 oldpos = 0 -1361 for i in range(shape): -1362 pos = -1 -1363 for j in range(oldpos, len(idx_old)): -1364 if idx_old[j] == idx_new[i]: -1365 pos = j -1366 break -1367 if pos < 0: -1368 raise Exception('Error in _reduce_deltas: Config %d not in idx_old' % (idx_new[i])) -1369 ret[i] = deltas[pos] -1370 oldpos = pos -1371 return np.array(ret) -1372 +1343def reweight(weight, obs, **kwargs): +1344 """Reweight a list of observables. +1345 +1346 Parameters +1347 ---------- +1348 weight : Obs +1349 Reweighting factor. An Observable that has to be defined on a superset of the +1350 configurations in obs[i].idl for all i. +1351 obs : list +1352 list of Obs, e.g. [obs1, obs2, obs3]. +1353 all_configs : bool +1354 if True, the reweighted observables are normalized by the average of +1355 the reweighting factor on all configurations in weight.idl and not +1356 on the configurations in obs[i].idl. Default False. +1357 """ +1358 result = [] +1359 for i in range(len(obs)): +1360 if len(obs[i].cov_names): +1361 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') +1362 if not set(obs[i].names).issubset(weight.names): +1363 raise Exception('Error: Ensembles do not fit') +1364 for name in obs[i].names: +1365 if not set(obs[i].idl[name]).issubset(weight.idl[name]): +1366 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) +1367 new_samples = [] +1368 w_deltas = {} +1369 for name in sorted(obs[i].names): +1370 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) +1371 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) +1372 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1373 -1374def reweight(weight, obs, **kwargs): -1375 """Reweight a list of observables. -1376 -1377 Parameters -1378 ---------- -1379 weight : Obs -1380 Reweighting factor. An Observable that has to be defined on a superset of the -1381 configurations in obs[i].idl for all i. -1382 obs : list -1383 list of Obs, e.g. [obs1, obs2, obs3]. -1384 all_configs : bool -1385 if True, the reweighted observables are normalized by the average of -1386 the reweighting factor on all configurations in weight.idl and not -1387 on the configurations in obs[i].idl. Default False. -1388 """ -1389 result = [] -1390 for i in range(len(obs)): -1391 if len(obs[i].cov_names): -1392 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') -1393 if not set(obs[i].names).issubset(weight.names): -1394 raise Exception('Error: Ensembles do not fit') -1395 for name in obs[i].names: -1396 if not set(obs[i].idl[name]).issubset(weight.idl[name]): -1397 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) -1398 new_samples = [] -1399 w_deltas = {} -1400 for name in sorted(obs[i].names): -1401 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) -1402 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) -1403 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) -1404 -1405 if kwargs.get('all_configs'): -1406 new_weight = weight -1407 else: -1408 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) -1409 -1410 result.append(tmp_obs / new_weight) -1411 result[-1].reweighted = True -1412 result[-1].is_merged = obs[i].is_merged -1413 -1414 return result -1415 -1416 -1417def correlate(obs_a, obs_b): -1418 """Correlate two observables. -1419 -1420 Parameters -1421 ---------- -1422 obs_a : Obs -1423 First observable -1424 obs_b : Obs -1425 Second observable -1426 -1427 Notes -1428 ----- -1429 Keep in mind to only correlate primary observables which have not been reweighted -1430 yet. The reweighting has to be applied after correlating the observables. -1431 Currently only works if ensembles are identical (this is not strictly necessary). -1432 """ -1433 -1434 if sorted(obs_a.names) != sorted(obs_b.names): -1435 raise Exception('Ensembles do not fit') -1436 if len(obs_a.cov_names) or len(obs_b.cov_names): -1437 raise Exception('Error: Not possible to correlate Obs that contain covobs!') -1438 for name in obs_a.names: -1439 if obs_a.shape[name] != obs_b.shape[name]: -1440 raise Exception('Shapes of ensemble', name, 'do not fit') -1441 if obs_a.idl[name] != obs_b.idl[name]: -1442 raise Exception('idl of ensemble', name, 'do not fit') -1443 -1444 if obs_a.reweighted is True: -1445 warnings.warn("The first observable is already reweighted.", RuntimeWarning) -1446 if obs_b.reweighted is True: -1447 warnings.warn("The second observable is already reweighted.", RuntimeWarning) +1374 if kwargs.get('all_configs'): +1375 new_weight = weight +1376 else: +1377 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) +1378 +1379 result.append(tmp_obs / new_weight) +1380 result[-1].reweighted = True +1381 result[-1].is_merged = obs[i].is_merged +1382 +1383 return result +1384 +1385 +1386def correlate(obs_a, obs_b): +1387 """Correlate two observables. +1388 +1389 Parameters +1390 ---------- +1391 obs_a : Obs +1392 First observable +1393 obs_b : Obs +1394 Second observable +1395 +1396 Notes +1397 ----- +1398 Keep in mind to only correlate primary observables which have not been reweighted +1399 yet. The reweighting has to be applied after correlating the observables. +1400 Currently only works if ensembles are identical (this is not strictly necessary). +1401 """ +1402 +1403 if sorted(obs_a.names) != sorted(obs_b.names): +1404 raise Exception('Ensembles do not fit') +1405 if len(obs_a.cov_names) or len(obs_b.cov_names): +1406 raise Exception('Error: Not possible to correlate Obs that contain covobs!') +1407 for name in obs_a.names: +1408 if obs_a.shape[name] != obs_b.shape[name]: +1409 raise Exception('Shapes of ensemble', name, 'do not fit') +1410 if obs_a.idl[name] != obs_b.idl[name]: +1411 raise Exception('idl of ensemble', name, 'do not fit') +1412 +1413 if obs_a.reweighted is True: +1414 warnings.warn("The first observable is already reweighted.", RuntimeWarning) +1415 if obs_b.reweighted is True: +1416 warnings.warn("The second observable is already reweighted.", RuntimeWarning) +1417 +1418 new_samples = [] +1419 new_idl = [] +1420 for name in sorted(obs_a.names): +1421 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) +1422 new_idl.append(obs_a.idl[name]) +1423 +1424 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) +1425 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} +1426 o.reweighted = obs_a.reweighted or obs_b.reweighted +1427 return o +1428 +1429 +1430def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): +1431 r'''Calculates the error covariance matrix of a set of observables. +1432 +1433 The gamma method has to be applied first to all observables. +1434 +1435 Parameters +1436 ---------- +1437 obs : list or numpy.ndarray +1438 List or one dimensional array of Obs +1439 visualize : bool +1440 If True plots the corresponding normalized correlation matrix (default False). +1441 correlation : bool +1442 If True the correlation matrix instead of the error covariance matrix is returned (default False). +1443 smooth : None or int +1444 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue +1445 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the +1446 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely +1447 small ones. 1448 -1449 new_samples = [] -1450 new_idl = [] -1451 for name in sorted(obs_a.names): -1452 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) -1453 new_idl.append(obs_a.idl[name]) -1454 -1455 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) -1456 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} -1457 o.reweighted = obs_a.reweighted or obs_b.reweighted -1458 return o -1459 +1449 Notes +1450 ----- +1451 The error covariance is defined such that it agrees with the squared standard error for two identical observables +1452 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ +1453 in the absence of autocorrelation. +1454 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite +1455 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. +1456 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. +1457 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ +1458 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). +1459 ''' 1460 -1461def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): -1462 r'''Calculates the error covariance matrix of a set of observables. -1463 -1464 The gamma method has to be applied first to all observables. -1465 -1466 Parameters -1467 ---------- -1468 obs : list or numpy.ndarray -1469 List or one dimensional array of Obs -1470 visualize : bool -1471 If True plots the corresponding normalized correlation matrix (default False). -1472 correlation : bool -1473 If True the correlation matrix instead of the error covariance matrix is returned (default False). -1474 smooth : None or int -1475 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue -1476 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the -1477 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely -1478 small ones. -1479 -1480 Notes -1481 ----- -1482 The error covariance is defined such that it agrees with the squared standard error for two identical observables -1483 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ -1484 in the absence of autocorrelation. -1485 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite -1486 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. -1487 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. -1488 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ -1489 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). -1490 ''' -1491 -1492 length = len(obs) +1461 length = len(obs) +1462 +1463 max_samples = np.max([o.N for o in obs]) +1464 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: +1465 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) +1466 +1467 cov = np.zeros((length, length)) +1468 for i in range(length): +1469 for j in range(i, length): +1470 cov[i, j] = _covariance_element(obs[i], obs[j]) +1471 cov = cov + cov.T - np.diag(np.diag(cov)) +1472 +1473 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) +1474 +1475 if isinstance(smooth, int): +1476 corr = _smooth_eigenvalues(corr, smooth) +1477 +1478 if visualize: +1479 plt.matshow(corr, vmin=-1, vmax=1) +1480 plt.set_cmap('RdBu') +1481 plt.colorbar() +1482 plt.draw() +1483 +1484 if correlation is True: +1485 return corr +1486 +1487 errors = [o.dvalue for o in obs] +1488 cov = np.diag(errors) @ corr @ np.diag(errors) +1489 +1490 eigenvalues = np.linalg.eigh(cov)[0] +1491 if not np.all(eigenvalues >= 0): +1492 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1493 -1494 max_samples = np.max([o.N for o in obs]) -1495 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: -1496 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) -1497 -1498 cov = np.zeros((length, length)) -1499 for i in range(length): -1500 for j in range(i, length): -1501 cov[i, j] = _covariance_element(obs[i], obs[j]) -1502 cov = cov + cov.T - np.diag(np.diag(cov)) -1503 -1504 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) -1505 -1506 if isinstance(smooth, int): -1507 corr = _smooth_eigenvalues(corr, smooth) -1508 -1509 if visualize: -1510 plt.matshow(corr, vmin=-1, vmax=1) -1511 plt.set_cmap('RdBu') -1512 plt.colorbar() -1513 plt.draw() -1514 -1515 if correlation is True: -1516 return corr -1517 -1518 errors = [o.dvalue for o in obs] -1519 cov = np.diag(errors) @ corr @ np.diag(errors) -1520 -1521 eigenvalues = np.linalg.eigh(cov)[0] -1522 if not np.all(eigenvalues >= 0): -1523 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) +1494 return cov +1495 +1496 +1497def _smooth_eigenvalues(corr, E): +1498 """Eigenvalue smoothing as described in hep-lat/9412087 +1499 +1500 corr : np.ndarray +1501 correlation matrix +1502 E : integer +1503 Number of eigenvalues to be left substantially unchanged +1504 """ +1505 if not (2 < E < corr.shape[0] - 1): +1506 raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") +1507 vals, vec = np.linalg.eigh(corr) +1508 lambda_min = np.mean(vals[:-E]) +1509 vals[vals < lambda_min] = lambda_min +1510 vals /= np.mean(vals) +1511 return vec @ np.diag(vals) @ vec.T +1512 +1513 +1514def _covariance_element(obs1, obs2): +1515 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" +1516 +1517 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): +1518 deltas1 = _reduce_deltas(deltas1, idx1, new_idx) +1519 deltas2 = _reduce_deltas(deltas2, idx2, new_idx) +1520 return np.sum(deltas1 * deltas2) +1521 +1522 if set(obs1.names).isdisjoint(set(obs2.names)): +1523 return 0.0 1524 -1525 return cov -1526 +1525 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): +1526 raise Exception('The gamma method has to be applied to both Obs first.') 1527 -1528def _smooth_eigenvalues(corr, E): -1529 """Eigenvalue smoothing as described in hep-lat/9412087 -1530 -1531 corr : np.ndarray -1532 correlation matrix -1533 E : integer -1534 Number of eigenvalues to be left substantially unchanged -1535 """ -1536 if not (2 < E < corr.shape[0] - 1): -1537 raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") -1538 vals, vec = np.linalg.eigh(corr) -1539 lambda_min = np.mean(vals[:-E]) -1540 vals[vals < lambda_min] = lambda_min -1541 vals /= np.mean(vals) -1542 return vec @ np.diag(vals) @ vec.T -1543 -1544 -1545def _covariance_element(obs1, obs2): -1546 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" -1547 -1548 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): -1549 deltas1 = _collapse_deltas_for_merge(deltas1, idx1, len(idx1), new_idx) -1550 deltas2 = _collapse_deltas_for_merge(deltas2, idx2, len(idx2), new_idx) -1551 return np.sum(deltas1 * deltas2) +1528 dvalue = 0.0 +1529 +1530 for e_name in obs1.mc_names: +1531 +1532 if e_name not in obs2.mc_names: +1533 continue +1534 +1535 idl_d = {} +1536 for r_name in obs1.e_content[e_name]: +1537 if r_name not in obs2.e_content[e_name]: +1538 continue +1539 idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]]) +1540 +1541 gamma = 0.0 +1542 +1543 for r_name in obs1.e_content[e_name]: +1544 if r_name not in obs2.e_content[e_name]: +1545 continue +1546 if len(idl_d[r_name]) == 0: +1547 continue +1548 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) +1549 +1550 if gamma == 0.0: +1551 continue 1552 -1553 if set(obs1.names).isdisjoint(set(obs2.names)): -1554 return 0.0 -1555 -1556 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): -1557 raise Exception('The gamma method has to be applied to both Obs first.') -1558 -1559 dvalue = 0.0 -1560 -1561 for e_name in obs1.mc_names: -1562 -1563 if e_name not in obs2.mc_names: -1564 continue +1553 gamma_div = 0.0 +1554 for r_name in obs1.e_content[e_name]: +1555 if r_name not in obs2.e_content[e_name]: +1556 continue +1557 if len(idl_d[r_name]) == 0: +1558 continue +1559 gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name])) +1560 gamma /= gamma_div +1561 +1562 dvalue += gamma +1563 +1564 for e_name in obs1.cov_names: 1565 -1566 idl_d = {} -1567 for r_name in obs1.e_content[e_name]: -1568 if r_name not in obs2.e_content[e_name]: -1569 continue -1570 idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]]) -1571 -1572 gamma = 0.0 +1566 if e_name not in obs2.cov_names: +1567 continue +1568 +1569 dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad))) +1570 +1571 return dvalue +1572 1573 -1574 for r_name in obs1.e_content[e_name]: -1575 if r_name not in obs2.e_content[e_name]: -1576 continue -1577 if len(idl_d[r_name]) == 0: -1578 continue -1579 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) -1580 -1581 if gamma == 0.0: -1582 continue -1583 -1584 gamma_div = 0.0 -1585 for r_name in obs1.e_content[e_name]: -1586 if r_name not in obs2.e_content[e_name]: -1587 continue -1588 if len(idl_d[r_name]) == 0: -1589 continue -1590 gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name])) -1591 gamma /= gamma_div +1574def import_jackknife(jacks, name, idl=None): +1575 """Imports jackknife samples and returns an Obs +1576 +1577 Parameters +1578 ---------- +1579 jacks : numpy.ndarray +1580 numpy array containing the mean value as zeroth entry and +1581 the N jackknife samples as first to Nth entry. +1582 name : str +1583 name of the ensemble the samples are defined on. +1584 """ +1585 length = len(jacks) - 1 +1586 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) +1587 samples = jacks[1:] @ prj +1588 mean = np.mean(samples) +1589 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) +1590 new_obs._value = jacks[0] +1591 return new_obs 1592 -1593 dvalue += gamma -1594 -1595 for e_name in obs1.cov_names: +1593 +1594def merge_obs(list_of_obs): +1595 """Combine all observables in list_of_obs into one new observable 1596 -1597 if e_name not in obs2.cov_names: -1598 continue -1599 -1600 dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad))) +1597 Parameters +1598 ---------- +1599 list_of_obs : list +1600 list of the Obs object to be combined 1601 -1602 return dvalue -1603 -1604 -1605def import_jackknife(jacks, name, idl=None): -1606 """Imports jackknife samples and returns an Obs -1607 -1608 Parameters -1609 ---------- -1610 jacks : numpy.ndarray -1611 numpy array containing the mean value as zeroth entry and -1612 the N jackknife samples as first to Nth entry. -1613 name : str -1614 name of the ensemble the samples are defined on. -1615 """ -1616 length = len(jacks) - 1 -1617 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) -1618 samples = jacks[1:] @ prj -1619 mean = np.mean(samples) -1620 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) -1621 new_obs._value = jacks[0] -1622 return new_obs +1602 Notes +1603 ----- +1604 It is not possible to combine obs which are based on the same replicum +1605 """ +1606 replist = [item for obs in list_of_obs for item in obs.names] +1607 if (len(replist) == len(set(replist))) is False: +1608 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) +1609 if any([len(o.cov_names) for o in list_of_obs]): +1610 raise Exception('Not possible to merge data that contains covobs!') +1611 new_dict = {} +1612 idl_dict = {} +1613 for o in list_of_obs: +1614 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) +1615 for key in set(o.deltas) | set(o.r_values)}) +1616 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) +1617 +1618 names = sorted(new_dict.keys()) +1619 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) +1620 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} +1621 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) +1622 return o 1623 1624 -1625def merge_obs(list_of_obs): -1626 """Combine all observables in list_of_obs into one new observable +1625def cov_Obs(means, cov, name, grad=None): +1626 """Create an Obs based on mean(s) and a covariance matrix 1627 1628 Parameters 1629 ---------- -1630 list_of_obs : list -1631 list of the Obs object to be combined -1632 -1633 Notes -1634 ----- -1635 It is not possible to combine obs which are based on the same replicum -1636 """ -1637 replist = [item for obs in list_of_obs for item in obs.names] -1638 if (len(replist) == len(set(replist))) is False: -1639 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) -1640 if any([len(o.cov_names) for o in list_of_obs]): -1641 raise Exception('Not possible to merge data that contains covobs!') -1642 new_dict = {} -1643 idl_dict = {} -1644 for o in list_of_obs: -1645 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) -1646 for key in set(o.deltas) | set(o.r_values)}) -1647 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) -1648 -1649 names = sorted(new_dict.keys()) -1650 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) -1651 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} -1652 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) -1653 return o +1630 mean : list of floats or float +1631 N mean value(s) of the new Obs +1632 cov : list or array +1633 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance +1634 name : str +1635 identifier for the covariance matrix +1636 grad : list or array +1637 Gradient of the Covobs wrt. the means belonging to cov. +1638 """ +1639 +1640 def covobs_to_obs(co): +1641 """Make an Obs out of a Covobs +1642 +1643 Parameters +1644 ---------- +1645 co : Covobs +1646 Covobs to be embedded into the Obs +1647 """ +1648 o = Obs([], [], means=[]) +1649 o._value = co.value +1650 o.names.append(co.name) +1651 o._covobs[co.name] = co +1652 o._dvalue = np.sqrt(co.errsq()) +1653 return o 1654 -1655 -1656def cov_Obs(means, cov, name, grad=None): -1657 """Create an Obs based on mean(s) and a covariance matrix +1655 ol = [] +1656 if isinstance(means, (float, int)): +1657 means = [means] 1658 -1659 Parameters -1660 ---------- -1661 mean : list of floats or float -1662 N mean value(s) of the new Obs -1663 cov : list or array -1664 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance -1665 name : str -1666 identifier for the covariance matrix -1667 grad : list or array -1668 Gradient of the Covobs wrt. the means belonging to cov. -1669 """ -1670 -1671 def covobs_to_obs(co): -1672 """Make an Obs out of a Covobs -1673 -1674 Parameters -1675 ---------- -1676 co : Covobs -1677 Covobs to be embedded into the Obs -1678 """ -1679 o = Obs([], [], means=[]) -1680 o._value = co.value -1681 o.names.append(co.name) -1682 o._covobs[co.name] = co -1683 o._dvalue = np.sqrt(co.errsq()) -1684 return o -1685 -1686 ol = [] -1687 if isinstance(means, (float, int)): -1688 means = [means] -1689 -1690 for i in range(len(means)): -1691 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) -1692 if ol[0].covobs[name].N != len(means): -1693 raise Exception('You have to provide %d mean values!' % (ol[0].N)) -1694 if len(ol) == 1: -1695 return ol[0] -1696 return ol +1659 for i in range(len(means)): +1660 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) +1661 if ol[0].covobs[name].N != len(means): +1662 raise Exception('You have to provide %d mean values!' % (ol[0].N)) +1663 if len(ol) == 1: +1664 return ol[0] +1665 return ol @@ -4226,184 +4195,184 @@ should agree with samples from a full jackknife analysis up to O(1/N). -
1158def derived_observable(func, data, array_mode=False, **kwargs): -1159 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. -1160 -1161 Parameters -1162 ---------- -1163 func : object -1164 arbitrary function of the form func(data, **kwargs). For the -1165 automatic differentiation to work, all numpy functions have to have -1166 the autograd wrapper (use 'import autograd.numpy as anp'). -1167 data : list -1168 list of Obs, e.g. [obs1, obs2, obs3]. -1169 num_grad : bool -1170 if True, numerical derivatives are used instead of autograd -1171 (default False). To control the numerical differentiation the -1172 kwargs of numdifftools.step_generators.MaxStepGenerator -1173 can be used. -1174 man_grad : list -1175 manually supply a list or an array which contains the jacobian -1176 of func. Use cautiously, supplying the wrong derivative will -1177 not be intercepted. -1178 -1179 Notes -1180 ----- -1181 For simple mathematical operations it can be practical to use anonymous -1182 functions. For the ratio of two observables one can e.g. use -1183 -1184 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) -1185 """ -1186 -1187 data = np.asarray(data) -1188 raveled_data = data.ravel() +@@ -4450,47 +4419,47 @@ functions. For the ratio of two observables one can e.g. use1130def derived_observable(func, data, array_mode=False, **kwargs): +1131 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. +1132 +1133 Parameters +1134 ---------- +1135 func : object +1136 arbitrary function of the form func(data, **kwargs). For the +1137 automatic differentiation to work, all numpy functions have to have +1138 the autograd wrapper (use 'import autograd.numpy as anp'). +1139 data : list +1140 list of Obs, e.g. [obs1, obs2, obs3]. +1141 num_grad : bool +1142 if True, numerical derivatives are used instead of autograd +1143 (default False). To control the numerical differentiation the +1144 kwargs of numdifftools.step_generators.MaxStepGenerator +1145 can be used. +1146 man_grad : list +1147 manually supply a list or an array which contains the jacobian +1148 of func. Use cautiously, supplying the wrong derivative will +1149 not be intercepted. +1150 +1151 Notes +1152 ----- +1153 For simple mathematical operations it can be practical to use anonymous +1154 functions. For the ratio of two observables one can e.g. use +1155 +1156 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) +1157 """ +1158 +1159 data = np.asarray(data) +1160 raveled_data = data.ravel() +1161 +1162 # Workaround for matrix operations containing non Obs data +1163 if not all(isinstance(x, Obs) for x in raveled_data): +1164 for i in range(len(raveled_data)): +1165 if isinstance(raveled_data[i], (int, float)): +1166 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") +1167 +1168 allcov = {} +1169 for o in raveled_data: +1170 for name in o.cov_names: +1171 if name in allcov: +1172 if not np.allclose(allcov[name], o.covobs[name].cov): +1173 raise Exception('Inconsistent covariance matrices for %s!' % (name)) +1174 else: +1175 allcov[name] = o.covobs[name].cov +1176 +1177 n_obs = len(raveled_data) +1178 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) +1179 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) +1180 new_sample_names = sorted(set(new_names) - set(new_cov_names)) +1181 +1182 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} +1183 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 +1184 +1185 if data.ndim == 1: +1186 values = np.array([o.value for o in data]) +1187 else: +1188 values = np.vectorize(lambda x: x.value)(data) 1189 -1190 # Workaround for matrix operations containing non Obs data -1191 if not all(isinstance(x, Obs) for x in raveled_data): -1192 for i in range(len(raveled_data)): -1193 if isinstance(raveled_data[i], (int, float)): -1194 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") -1195 -1196 allcov = {} -1197 for o in raveled_data: -1198 for name in o.cov_names: -1199 if name in allcov: -1200 if not np.allclose(allcov[name], o.covobs[name].cov): -1201 raise Exception('Inconsistent covariance matrices for %s!' % (name)) -1202 else: -1203 allcov[name] = o.covobs[name].cov -1204 -1205 n_obs = len(raveled_data) -1206 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) -1207 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) -1208 new_sample_names = sorted(set(new_names) - set(new_cov_names)) -1209 -1210 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} -1211 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 -1212 -1213 if data.ndim == 1: -1214 values = np.array([o.value for o in data]) -1215 else: -1216 values = np.vectorize(lambda x: x.value)(data) -1217 -1218 new_values = func(values, **kwargs) -1219 -1220 multi = int(isinstance(new_values, np.ndarray)) -1221 -1222 new_r_values = {} -1223 new_idl_d = {} -1224 for name in new_sample_names: -1225 idl = [] -1226 tmp_values = np.zeros(n_obs) -1227 for i, item in enumerate(raveled_data): -1228 tmp_values[i] = item.r_values.get(name, item.value) -1229 tmp_idl = item.idl.get(name) -1230 if tmp_idl is not None: -1231 idl.append(tmp_idl) -1232 if multi > 0: -1233 tmp_values = np.array(tmp_values).reshape(data.shape) -1234 new_r_values[name] = func(tmp_values, **kwargs) -1235 new_idl_d[name] = _merge_idx(idl) -1236 if not is_merged[name]: -1237 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) -1238 -1239 if 'man_grad' in kwargs: -1240 deriv = np.asarray(kwargs.get('man_grad')) -1241 if new_values.shape + data.shape != deriv.shape: -1242 raise Exception('Manual derivative does not have correct shape.') -1243 elif kwargs.get('num_grad') is True: -1244 if multi > 0: -1245 raise Exception('Multi mode currently not supported for numerical derivative') -1246 options = { -1247 'base_step': 0.1, -1248 'step_ratio': 2.5} -1249 for key in options.keys(): -1250 kwarg = kwargs.get(key) -1251 if kwarg is not None: -1252 options[key] = kwarg -1253 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) -1254 if tmp_df.size == 1: -1255 deriv = np.array([tmp_df.real]) -1256 else: -1257 deriv = tmp_df.real -1258 else: -1259 deriv = jacobian(func)(values, **kwargs) -1260 -1261 final_result = np.zeros(new_values.shape, dtype=object) -1262 -1263 if array_mode is True: -1264 -1265 class _Zero_grad(): -1266 def __init__(self, N): -1267 self.grad = np.zeros((N, 1)) -1268 -1269 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) -1270 d_extracted = {} -1271 g_extracted = {} -1272 for name in new_sample_names: -1273 d_extracted[name] = [] -1274 ens_length = len(new_idl_d[name]) -1275 for i_dat, dat in enumerate(data): -1276 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) -1277 for name in new_cov_names: -1278 g_extracted[name] = [] -1279 zero_grad = _Zero_grad(new_covobs_lengths[name]) -1280 for i_dat, dat in enumerate(data): -1281 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) -1282 -1283 for i_val, new_val in np.ndenumerate(new_values): -1284 new_deltas = {} -1285 new_grad = {} -1286 if array_mode is True: -1287 for name in new_sample_names: -1288 ens_length = d_extracted[name][0].shape[-1] -1289 new_deltas[name] = np.zeros(ens_length) -1290 for i_dat, dat in enumerate(d_extracted[name]): -1291 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) -1292 for name in new_cov_names: -1293 new_grad[name] = 0 -1294 for i_dat, dat in enumerate(g_extracted[name]): -1295 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) -1296 else: -1297 for j_obs, obs in np.ndenumerate(data): -1298 for name in obs.names: -1299 if name in obs.cov_names: -1300 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad -1301 else: -1302 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) +1190 new_values = func(values, **kwargs) +1191 +1192 multi = int(isinstance(new_values, np.ndarray)) +1193 +1194 new_r_values = {} +1195 new_idl_d = {} +1196 for name in new_sample_names: +1197 idl = [] +1198 tmp_values = np.zeros(n_obs) +1199 for i, item in enumerate(raveled_data): +1200 tmp_values[i] = item.r_values.get(name, item.value) +1201 tmp_idl = item.idl.get(name) +1202 if tmp_idl is not None: +1203 idl.append(tmp_idl) +1204 if multi > 0: +1205 tmp_values = np.array(tmp_values).reshape(data.shape) +1206 new_r_values[name] = func(tmp_values, **kwargs) +1207 new_idl_d[name] = _merge_idx(idl) +1208 if not is_merged[name]: +1209 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) +1210 +1211 if 'man_grad' in kwargs: +1212 deriv = np.asarray(kwargs.get('man_grad')) +1213 if new_values.shape + data.shape != deriv.shape: +1214 raise Exception('Manual derivative does not have correct shape.') +1215 elif kwargs.get('num_grad') is True: +1216 if multi > 0: +1217 raise Exception('Multi mode currently not supported for numerical derivative') +1218 options = { +1219 'base_step': 0.1, +1220 'step_ratio': 2.5} +1221 for key in options.keys(): +1222 kwarg = kwargs.get(key) +1223 if kwarg is not None: +1224 options[key] = kwarg +1225 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) +1226 if tmp_df.size == 1: +1227 deriv = np.array([tmp_df.real]) +1228 else: +1229 deriv = tmp_df.real +1230 else: +1231 deriv = jacobian(func)(values, **kwargs) +1232 +1233 final_result = np.zeros(new_values.shape, dtype=object) +1234 +1235 if array_mode is True: +1236 +1237 class _Zero_grad(): +1238 def __init__(self, N): +1239 self.grad = np.zeros((N, 1)) +1240 +1241 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) +1242 d_extracted = {} +1243 g_extracted = {} +1244 for name in new_sample_names: +1245 d_extracted[name] = [] +1246 ens_length = len(new_idl_d[name]) +1247 for i_dat, dat in enumerate(data): +1248 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) +1249 for name in new_cov_names: +1250 g_extracted[name] = [] +1251 zero_grad = _Zero_grad(new_covobs_lengths[name]) +1252 for i_dat, dat in enumerate(data): +1253 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) +1254 +1255 for i_val, new_val in np.ndenumerate(new_values): +1256 new_deltas = {} +1257 new_grad = {} +1258 if array_mode is True: +1259 for name in new_sample_names: +1260 ens_length = d_extracted[name][0].shape[-1] +1261 new_deltas[name] = np.zeros(ens_length) +1262 for i_dat, dat in enumerate(d_extracted[name]): +1263 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) +1264 for name in new_cov_names: +1265 new_grad[name] = 0 +1266 for i_dat, dat in enumerate(g_extracted[name]): +1267 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) +1268 else: +1269 for j_obs, obs in np.ndenumerate(data): +1270 for name in obs.names: +1271 if name in obs.cov_names: +1272 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad +1273 else: +1274 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) +1275 +1276 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} +1277 +1278 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): +1279 raise Exception('The same name has been used for deltas and covobs!') +1280 new_samples = [] +1281 new_means = [] +1282 new_idl = [] +1283 new_names_obs = [] +1284 for name in new_names: +1285 if name not in new_covobs: +1286 if is_merged[name]: +1287 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) +1288 else: +1289 filtered_deltas = new_deltas[name] +1290 filtered_idl_d = new_idl_d[name] +1291 +1292 new_samples.append(filtered_deltas) +1293 new_idl.append(filtered_idl_d) +1294 new_means.append(new_r_values[name][i_val]) +1295 new_names_obs.append(name) +1296 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) +1297 for name in new_covobs: +1298 final_result[i_val].names.append(name) +1299 final_result[i_val]._covobs = new_covobs +1300 final_result[i_val]._value = new_val +1301 final_result[i_val].is_merged = is_merged +1302 final_result[i_val].reweighted = reweighted 1303 -1304 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} -1305 -1306 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): -1307 raise Exception('The same name has been used for deltas and covobs!') -1308 new_samples = [] -1309 new_means = [] -1310 new_idl = [] -1311 new_names_obs = [] -1312 for name in new_names: -1313 if name not in new_covobs: -1314 if is_merged[name]: -1315 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) -1316 else: -1317 filtered_deltas = new_deltas[name] -1318 filtered_idl_d = new_idl_d[name] -1319 -1320 new_samples.append(filtered_deltas) -1321 new_idl.append(filtered_idl_d) -1322 new_means.append(new_r_values[name][i_val]) -1323 new_names_obs.append(name) -1324 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) -1325 for name in new_covobs: -1326 final_result[i_val].names.append(name) -1327 final_result[i_val]._covobs = new_covobs -1328 final_result[i_val]._value = new_val -1329 final_result[i_val].is_merged = is_merged -1330 final_result[i_val].reweighted = reweighted -1331 -1332 if multi == 0: -1333 final_result = final_result.item() -1334 -1335 return final_result +1304 if multi == 0: +1305 final_result = final_result.item() +1306 +1307 return final_result
1375def reweight(weight, obs, **kwargs): -1376 """Reweight a list of observables. -1377 -1378 Parameters -1379 ---------- -1380 weight : Obs -1381 Reweighting factor. An Observable that has to be defined on a superset of the -1382 configurations in obs[i].idl for all i. -1383 obs : list -1384 list of Obs, e.g. [obs1, obs2, obs3]. -1385 all_configs : bool -1386 if True, the reweighted observables are normalized by the average of -1387 the reweighting factor on all configurations in weight.idl and not -1388 on the configurations in obs[i].idl. Default False. -1389 """ -1390 result = [] -1391 for i in range(len(obs)): -1392 if len(obs[i].cov_names): -1393 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') -1394 if not set(obs[i].names).issubset(weight.names): -1395 raise Exception('Error: Ensembles do not fit') -1396 for name in obs[i].names: -1397 if not set(obs[i].idl[name]).issubset(weight.idl[name]): -1398 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) -1399 new_samples = [] -1400 w_deltas = {} -1401 for name in sorted(obs[i].names): -1402 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) -1403 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) -1404 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) -1405 -1406 if kwargs.get('all_configs'): -1407 new_weight = weight -1408 else: -1409 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) -1410 -1411 result.append(tmp_obs / new_weight) -1412 result[-1].reweighted = True -1413 result[-1].is_merged = obs[i].is_merged -1414 -1415 return result +@@ -4524,48 +4493,48 @@ on the configurations in obs[i].idl. Default False.1344def reweight(weight, obs, **kwargs): +1345 """Reweight a list of observables. +1346 +1347 Parameters +1348 ---------- +1349 weight : Obs +1350 Reweighting factor. An Observable that has to be defined on a superset of the +1351 configurations in obs[i].idl for all i. +1352 obs : list +1353 list of Obs, e.g. [obs1, obs2, obs3]. +1354 all_configs : bool +1355 if True, the reweighted observables are normalized by the average of +1356 the reweighting factor on all configurations in weight.idl and not +1357 on the configurations in obs[i].idl. Default False. +1358 """ +1359 result = [] +1360 for i in range(len(obs)): +1361 if len(obs[i].cov_names): +1362 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') +1363 if not set(obs[i].names).issubset(weight.names): +1364 raise Exception('Error: Ensembles do not fit') +1365 for name in obs[i].names: +1366 if not set(obs[i].idl[name]).issubset(weight.idl[name]): +1367 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) +1368 new_samples = [] +1369 w_deltas = {} +1370 for name in sorted(obs[i].names): +1371 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) +1372 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) +1373 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) +1374 +1375 if kwargs.get('all_configs'): +1376 new_weight = weight +1377 else: +1378 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) +1379 +1380 result.append(tmp_obs / new_weight) +1381 result[-1].reweighted = True +1382 result[-1].is_merged = obs[i].is_merged +1383 +1384 return result
1418def correlate(obs_a, obs_b): -1419 """Correlate two observables. -1420 -1421 Parameters -1422 ---------- -1423 obs_a : Obs -1424 First observable -1425 obs_b : Obs -1426 Second observable -1427 -1428 Notes -1429 ----- -1430 Keep in mind to only correlate primary observables which have not been reweighted -1431 yet. The reweighting has to be applied after correlating the observables. -1432 Currently only works if ensembles are identical (this is not strictly necessary). -1433 """ -1434 -1435 if sorted(obs_a.names) != sorted(obs_b.names): -1436 raise Exception('Ensembles do not fit') -1437 if len(obs_a.cov_names) or len(obs_b.cov_names): -1438 raise Exception('Error: Not possible to correlate Obs that contain covobs!') -1439 for name in obs_a.names: -1440 if obs_a.shape[name] != obs_b.shape[name]: -1441 raise Exception('Shapes of ensemble', name, 'do not fit') -1442 if obs_a.idl[name] != obs_b.idl[name]: -1443 raise Exception('idl of ensemble', name, 'do not fit') -1444 -1445 if obs_a.reweighted is True: -1446 warnings.warn("The first observable is already reweighted.", RuntimeWarning) -1447 if obs_b.reweighted is True: -1448 warnings.warn("The second observable is already reweighted.", RuntimeWarning) -1449 -1450 new_samples = [] -1451 new_idl = [] -1452 for name in sorted(obs_a.names): -1453 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) -1454 new_idl.append(obs_a.idl[name]) -1455 -1456 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) -1457 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} -1458 o.reweighted = obs_a.reweighted or obs_b.reweighted -1459 return o +@@ -4600,71 +4569,71 @@ Currently only works if ensembles are identical (this is not strictly necessary)1387def correlate(obs_a, obs_b): +1388 """Correlate two observables. +1389 +1390 Parameters +1391 ---------- +1392 obs_a : Obs +1393 First observable +1394 obs_b : Obs +1395 Second observable +1396 +1397 Notes +1398 ----- +1399 Keep in mind to only correlate primary observables which have not been reweighted +1400 yet. The reweighting has to be applied after correlating the observables. +1401 Currently only works if ensembles are identical (this is not strictly necessary). +1402 """ +1403 +1404 if sorted(obs_a.names) != sorted(obs_b.names): +1405 raise Exception('Ensembles do not fit') +1406 if len(obs_a.cov_names) or len(obs_b.cov_names): +1407 raise Exception('Error: Not possible to correlate Obs that contain covobs!') +1408 for name in obs_a.names: +1409 if obs_a.shape[name] != obs_b.shape[name]: +1410 raise Exception('Shapes of ensemble', name, 'do not fit') +1411 if obs_a.idl[name] != obs_b.idl[name]: +1412 raise Exception('idl of ensemble', name, 'do not fit') +1413 +1414 if obs_a.reweighted is True: +1415 warnings.warn("The first observable is already reweighted.", RuntimeWarning) +1416 if obs_b.reweighted is True: +1417 warnings.warn("The second observable is already reweighted.", RuntimeWarning) +1418 +1419 new_samples = [] +1420 new_idl = [] +1421 for name in sorted(obs_a.names): +1422 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) +1423 new_idl.append(obs_a.idl[name]) +1424 +1425 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) +1426 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} +1427 o.reweighted = obs_a.reweighted or obs_b.reweighted +1428 return o
1462def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): -1463 r'''Calculates the error covariance matrix of a set of observables. -1464 -1465 The gamma method has to be applied first to all observables. -1466 -1467 Parameters -1468 ---------- -1469 obs : list or numpy.ndarray -1470 List or one dimensional array of Obs -1471 visualize : bool -1472 If True plots the corresponding normalized correlation matrix (default False). -1473 correlation : bool -1474 If True the correlation matrix instead of the error covariance matrix is returned (default False). -1475 smooth : None or int -1476 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue -1477 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the -1478 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely -1479 small ones. -1480 -1481 Notes -1482 ----- -1483 The error covariance is defined such that it agrees with the squared standard error for two identical observables -1484 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ -1485 in the absence of autocorrelation. -1486 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite -1487 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. -1488 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. -1489 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ -1490 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). -1491 ''' -1492 -1493 length = len(obs) +@@ -4713,24 +4682,24 @@ This construction ensures that the estimated covariance matrix is positive semi-1431def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): +1432 r'''Calculates the error covariance matrix of a set of observables. +1433 +1434 The gamma method has to be applied first to all observables. +1435 +1436 Parameters +1437 ---------- +1438 obs : list or numpy.ndarray +1439 List or one dimensional array of Obs +1440 visualize : bool +1441 If True plots the corresponding normalized correlation matrix (default False). +1442 correlation : bool +1443 If True the correlation matrix instead of the error covariance matrix is returned (default False). +1444 smooth : None or int +1445 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue +1446 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the +1447 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely +1448 small ones. +1449 +1450 Notes +1451 ----- +1452 The error covariance is defined such that it agrees with the squared standard error for two identical observables +1453 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ +1454 in the absence of autocorrelation. +1455 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite +1456 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. +1457 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. +1458 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ +1459 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). +1460 ''' +1461 +1462 length = len(obs) +1463 +1464 max_samples = np.max([o.N for o in obs]) +1465 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: +1466 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) +1467 +1468 cov = np.zeros((length, length)) +1469 for i in range(length): +1470 for j in range(i, length): +1471 cov[i, j] = _covariance_element(obs[i], obs[j]) +1472 cov = cov + cov.T - np.diag(np.diag(cov)) +1473 +1474 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) +1475 +1476 if isinstance(smooth, int): +1477 corr = _smooth_eigenvalues(corr, smooth) +1478 +1479 if visualize: +1480 plt.matshow(corr, vmin=-1, vmax=1) +1481 plt.set_cmap('RdBu') +1482 plt.colorbar() +1483 plt.draw() +1484 +1485 if correlation is True: +1486 return corr +1487 +1488 errors = [o.dvalue for o in obs] +1489 cov = np.diag(errors) @ corr @ np.diag(errors) +1490 +1491 eigenvalues = np.linalg.eigh(cov)[0] +1492 if not np.all(eigenvalues >= 0): +1493 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1494 -1495 max_samples = np.max([o.N for o in obs]) -1496 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: -1497 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) -1498 -1499 cov = np.zeros((length, length)) -1500 for i in range(length): -1501 for j in range(i, length): -1502 cov[i, j] = _covariance_element(obs[i], obs[j]) -1503 cov = cov + cov.T - np.diag(np.diag(cov)) -1504 -1505 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) -1506 -1507 if isinstance(smooth, int): -1508 corr = _smooth_eigenvalues(corr, smooth) -1509 -1510 if visualize: -1511 plt.matshow(corr, vmin=-1, vmax=1) -1512 plt.set_cmap('RdBu') -1513 plt.colorbar() -1514 plt.draw() -1515 -1516 if correlation is True: -1517 return corr -1518 -1519 errors = [o.dvalue for o in obs] -1520 cov = np.diag(errors) @ corr @ np.diag(errors) -1521 -1522 eigenvalues = np.linalg.eigh(cov)[0] -1523 if not np.all(eigenvalues >= 0): -1524 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) -1525 -1526 return cov +1495 return cov
1606def import_jackknife(jacks, name, idl=None): -1607 """Imports jackknife samples and returns an Obs -1608 -1609 Parameters -1610 ---------- -1611 jacks : numpy.ndarray -1612 numpy array containing the mean value as zeroth entry and -1613 the N jackknife samples as first to Nth entry. -1614 name : str -1615 name of the ensemble the samples are defined on. -1616 """ -1617 length = len(jacks) - 1 -1618 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) -1619 samples = jacks[1:] @ prj -1620 mean = np.mean(samples) -1621 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) -1622 new_obs._value = jacks[0] -1623 return new_obs +@@ -4760,35 +4729,35 @@ name of the ensemble the samples are defined on.1575def import_jackknife(jacks, name, idl=None): +1576 """Imports jackknife samples and returns an Obs +1577 +1578 Parameters +1579 ---------- +1580 jacks : numpy.ndarray +1581 numpy array containing the mean value as zeroth entry and +1582 the N jackknife samples as first to Nth entry. +1583 name : str +1584 name of the ensemble the samples are defined on. +1585 """ +1586 length = len(jacks) - 1 +1587 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) +1588 samples = jacks[1:] @ prj +1589 mean = np.mean(samples) +1590 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) +1591 new_obs._value = jacks[0] +1592 return new_obs
1626def merge_obs(list_of_obs): -1627 """Combine all observables in list_of_obs into one new observable -1628 -1629 Parameters -1630 ---------- -1631 list_of_obs : list -1632 list of the Obs object to be combined -1633 -1634 Notes -1635 ----- -1636 It is not possible to combine obs which are based on the same replicum -1637 """ -1638 replist = [item for obs in list_of_obs for item in obs.names] -1639 if (len(replist) == len(set(replist))) is False: -1640 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) -1641 if any([len(o.cov_names) for o in list_of_obs]): -1642 raise Exception('Not possible to merge data that contains covobs!') -1643 new_dict = {} -1644 idl_dict = {} -1645 for o in list_of_obs: -1646 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) -1647 for key in set(o.deltas) | set(o.r_values)}) -1648 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) -1649 -1650 names = sorted(new_dict.keys()) -1651 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) -1652 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} -1653 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) -1654 return o +@@ -4819,47 +4788,47 @@ list of the Obs object to be combined1595def merge_obs(list_of_obs): +1596 """Combine all observables in list_of_obs into one new observable +1597 +1598 Parameters +1599 ---------- +1600 list_of_obs : list +1601 list of the Obs object to be combined +1602 +1603 Notes +1604 ----- +1605 It is not possible to combine obs which are based on the same replicum +1606 """ +1607 replist = [item for obs in list_of_obs for item in obs.names] +1608 if (len(replist) == len(set(replist))) is False: +1609 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) +1610 if any([len(o.cov_names) for o in list_of_obs]): +1611 raise Exception('Not possible to merge data that contains covobs!') +1612 new_dict = {} +1613 idl_dict = {} +1614 for o in list_of_obs: +1615 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) +1616 for key in set(o.deltas) | set(o.r_values)}) +1617 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) +1618 +1619 names = sorted(new_dict.keys()) +1620 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) +1621 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} +1622 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) +1623 return o
1657def cov_Obs(means, cov, name, grad=None): -1658 """Create an Obs based on mean(s) and a covariance matrix +1626def cov_Obs(means, cov, name, grad=None): +1627 """Create an Obs based on mean(s) and a covariance matrix +1628 +1629 Parameters +1630 ---------- +1631 mean : list of floats or float +1632 N mean value(s) of the new Obs +1633 cov : list or array +1634 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance +1635 name : str +1636 identifier for the covariance matrix +1637 grad : list or array +1638 Gradient of the Covobs wrt. the means belonging to cov. +1639 """ +1640 +1641 def covobs_to_obs(co): +1642 """Make an Obs out of a Covobs +1643 +1644 Parameters +1645 ---------- +1646 co : Covobs +1647 Covobs to be embedded into the Obs +1648 """ +1649 o = Obs([], [], means=[]) +1650 o._value = co.value +1651 o.names.append(co.name) +1652 o._covobs[co.name] = co +1653 o._dvalue = np.sqrt(co.errsq()) +1654 return o +1655 +1656 ol = [] +1657 if isinstance(means, (float, int)): +1658 means = [means] 1659 -1660 Parameters -1661 ---------- -1662 mean : list of floats or float -1663 N mean value(s) of the new Obs -1664 cov : list or array -1665 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance -1666 name : str -1667 identifier for the covariance matrix -1668 grad : list or array -1669 Gradient of the Covobs wrt. the means belonging to cov. -1670 """ -1671 -1672 def covobs_to_obs(co): -1673 """Make an Obs out of a Covobs -1674 -1675 Parameters -1676 ---------- -1677 co : Covobs -1678 Covobs to be embedded into the Obs -1679 """ -1680 o = Obs([], [], means=[]) -1681 o._value = co.value -1682 o.names.append(co.name) -1683 o._covobs[co.name] = co -1684 o._dvalue = np.sqrt(co.errsq()) -1685 return o -1686 -1687 ol = [] -1688 if isinstance(means, (float, int)): -1689 means = [means] -1690 -1691 for i in range(len(means)): -1692 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) -1693 if ol[0].covobs[name].N != len(means): -1694 raise Exception('You have to provide %d mean values!' % (ol[0].N)) -1695 if len(ol) == 1: -1696 return ol[0] -1697 return ol +1660 for i in range(len(means)): +1661 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) +1662 if ol[0].covobs[name].N != len(means): +1663 raise Exception('You have to provide %d mean values!' % (ol[0].N)) +1664 if len(ol) == 1: +1665 return ol[0] +1666 return ol