diff --git a/examples/07_data_management.ipynb b/examples/07_data_management.ipynb index d317456a..329d87a9 100644 --- a/examples/07_data_management.ipynb +++ b/examples/07_data_management.ipynb @@ -18,6 +18,13 @@ "import pyerrors as pe" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For the data management example we reuse the data from the correlator example." + ] + }, { "cell_type": "code", "execution_count": 2, @@ -52,6 +59,13 @@ " return a[1] * anp.exp(-a[0] * x)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example we perform uncorrelated fits of a single exponential function to the correlator and vary the range of the fit. The fit result can be conveniently stored in a pandas DataFrame together with the corresponding metadata." + ] + }, { "cell_type": "code", "execution_count": 4, @@ -60,7 +74,7 @@ "source": [ "rows = []\n", "for t_start in range(12, 17):\n", - " for t_stop in range(30, 35):\n", + " for t_stop in range(30, 32):\n", " fr = my_correlator.fit(func_exp, [t_start, t_stop], silent=True)\n", " fr.gamma_method()\n", " row = {\"t_start\": t_start,\n", @@ -124,30 +138,6 @@ " \n", " \n", " 2\n", - " 12\n", - " 32\n", - " 21\n", - " 0.064960\n", - " 0.2223(11)\n", - " \n", - " \n", - " 3\n", - " 12\n", - " 33\n", - " 22\n", - " 0.066495\n", - " 0.2224(10)\n", - " \n", - " \n", - " 4\n", - " 12\n", - " 34\n", - " 23\n", - " 0.066606\n", - " 0.2225(10)\n", - " \n", - " \n", - " 5\n", " 13\n", " 30\n", " 18\n", @@ -155,7 +145,7 @@ " 0.2215(12)\n", " \n", " \n", - " 6\n", + " 3\n", " 13\n", " 31\n", " 19\n", @@ -163,31 +153,7 @@ " 0.2219(11)\n", " \n", " \n", - " 7\n", - " 13\n", - " 32\n", - " 20\n", - " 0.063551\n", - " 0.2221(12)\n", - " \n", - " \n", - " 8\n", - " 13\n", - " 33\n", - " 21\n", - " 0.066406\n", - " 0.2223(12)\n", - " \n", - " \n", - " 9\n", - " 13\n", - " 34\n", - " 22\n", - " 0.067237\n", - " 0.2224(12)\n", - " \n", - " \n", - " 10\n", + " 4\n", " 14\n", " 30\n", " 17\n", @@ -195,7 +161,7 @@ " 0.2213(13)\n", " \n", " \n", - " 11\n", + " 5\n", " 14\n", " 31\n", " 18\n", @@ -203,31 +169,7 @@ " 0.2218(13)\n", " \n", " \n", - " 12\n", - " 14\n", - " 32\n", - " 19\n", - " 0.066883\n", - " 0.2220(14)\n", - " \n", - " \n", - " 13\n", - " 14\n", - " 33\n", - " 20\n", - " 0.070019\n", - " 0.2223(15)\n", - " \n", - " \n", - " 14\n", - " 14\n", - " 34\n", - " 21\n", - " 0.070775\n", - " 0.2224(15)\n", - " \n", - " \n", - " 15\n", + " 6\n", " 15\n", " 30\n", " 16\n", @@ -235,7 +177,7 @@ " 0.2213(16)\n", " \n", " \n", - " 16\n", + " 7\n", " 15\n", " 31\n", " 17\n", @@ -243,31 +185,7 @@ " 0.2218(17)\n", " \n", " \n", - " 17\n", - " 15\n", - " 32\n", - " 18\n", - " 0.070170\n", - " 0.2221(18)\n", - " \n", - " \n", - " 18\n", - " 15\n", - " 33\n", - " 19\n", - " 0.072516\n", - " 0.2224(18)\n", - " \n", - " \n", - " 19\n", - " 15\n", - " 34\n", - " 20\n", - " 0.072509\n", - " 0.2225(18)\n", - " \n", - " \n", - " 20\n", + " 8\n", " 16\n", " 30\n", " 15\n", @@ -275,68 +193,29 @@ " 0.2214(21)\n", " \n", " \n", - " 21\n", + " 9\n", " 16\n", " 31\n", " 16\n", " 0.070874\n", " 0.2220(20)\n", " \n", - " \n", - " 22\n", - " 16\n", - " 32\n", - " 17\n", - " 0.072437\n", - " 0.2223(21)\n", - " \n", - " \n", - " 23\n", - " 16\n", - " 33\n", - " 18\n", - " 0.073684\n", - " 0.2225(21)\n", - " \n", - " \n", - " 24\n", - " 16\n", - " 34\n", - " 19\n", - " 0.072767\n", - " 0.2227(20)\n", - " \n", " \n", "\n", "" ], "text/plain": [ - " t_start t_stop datapoints chisquare_by_dof mass\n", - "0 12 30 19 0.057872 0.2218(12)\n", - "1 12 31 20 0.063951 0.2221(11)\n", - "2 12 32 21 0.064960 0.2223(11)\n", - "3 12 33 22 0.066495 0.2224(10)\n", - "4 12 34 23 0.066606 0.2225(10)\n", - "5 13 30 18 0.051577 0.2215(12)\n", - "6 13 31 19 0.060901 0.2219(11)\n", - "7 13 32 20 0.063551 0.2221(12)\n", - "8 13 33 21 0.066406 0.2223(12)\n", - "9 13 34 22 0.067237 0.2224(12)\n", - "10 14 30 17 0.052349 0.2213(13)\n", - "11 14 31 18 0.063640 0.2218(13)\n", - "12 14 32 19 0.066883 0.2220(14)\n", - "13 14 33 20 0.070019 0.2223(15)\n", - "14 14 34 21 0.070775 0.2224(15)\n", - "15 15 30 16 0.056088 0.2213(16)\n", - "16 15 31 17 0.067552 0.2218(17)\n", - "17 15 32 18 0.070170 0.2221(18)\n", - "18 15 33 19 0.072516 0.2224(18)\n", - "19 15 34 20 0.072509 0.2225(18)\n", - "20 16 30 15 0.059969 0.2214(21)\n", - "21 16 31 16 0.070874 0.2220(20)\n", - "22 16 32 17 0.072437 0.2223(21)\n", - "23 16 33 18 0.073684 0.2225(21)\n", - "24 16 34 19 0.072767 0.2227(20)" + " t_start t_stop datapoints chisquare_by_dof mass\n", + "0 12 30 19 0.057872 0.2218(12)\n", + "1 12 31 20 0.063951 0.2221(11)\n", + "2 13 30 18 0.051577 0.2215(12)\n", + "3 13 31 19 0.060901 0.2219(11)\n", + "4 14 30 17 0.052349 0.2213(13)\n", + "5 14 31 18 0.063640 0.2218(13)\n", + "6 15 30 16 0.056088 0.2213(16)\n", + "7 15 31 17 0.067552 0.2218(17)\n", + "8 16 30 15 0.059969 0.2214(21)\n", + "9 16 31 16 0.070874 0.2220(20)" ] }, "execution_count": 5, @@ -348,30 +227,45 @@ "my_df" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The content of this pandas DataFrame can be inserted into a relational database, making use of the `JSON` serialization of `pyerrors` objects. In this example we use an SQLite database." + ] + }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ - "pe.input.pandas.to_sql(my_df, \"mass_table\", \"my_db.sqlite\")" + "pe.input.pandas.to_sql(my_df, \"mass_table\", \"my_db.sqlite\", if_exists='fail')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "At a later stage of the analysis the content of the database can be reconstructed into a DataFrame via SQL queries.\n", + "In this example we extract `t_start`, `t_stop` and the fitted mass for all fits which start at times larger than 14." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "new_df = pe.input.pandas.read_sql(f\"SELECT t_start, t_stop, mass FROM mass_table WHERE t_start > 14\",\n", + " \"my_db.sqlite\",\n", + " auto_gamma=True)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, - "outputs": [], - "source": [ - "new_df = df = pe.input.pandas.read_sql(f\"SELECT t_start, t_stop, mass FROM mass_table WHERE t_start > 13\"\n", - " ,\"my_db.sqlite\"\n", - " ,auto_gamma=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, "outputs": [ { "data": { @@ -402,118 +296,41 @@ " \n", " \n", " 0\n", - " 14\n", - " 30\n", - " 0.2213(13)\n", - " \n", - " \n", - " 1\n", - " 14\n", - " 31\n", - " 0.2218(13)\n", - " \n", - " \n", - " 2\n", - " 14\n", - " 32\n", - " 0.2220(14)\n", - " \n", - " \n", - " 3\n", - " 14\n", - " 33\n", - " 0.2223(15)\n", - " \n", - " \n", - " 4\n", - " 14\n", - " 34\n", - " 0.2224(15)\n", - " \n", - " \n", - " 5\n", " 15\n", " 30\n", " 0.2213(16)\n", " \n", " \n", - " 6\n", + " 1\n", " 15\n", " 31\n", " 0.2218(17)\n", " \n", " \n", - " 7\n", - " 15\n", - " 32\n", - " 0.2221(18)\n", - " \n", - " \n", - " 8\n", - " 15\n", - " 33\n", - " 0.2224(18)\n", - " \n", - " \n", - " 9\n", - " 15\n", - " 34\n", - " 0.2225(18)\n", - " \n", - " \n", - " 10\n", + " 2\n", " 16\n", " 30\n", " 0.2214(21)\n", " \n", " \n", - " 11\n", + " 3\n", " 16\n", " 31\n", " 0.2220(20)\n", " \n", - " \n", - " 12\n", - " 16\n", - " 32\n", - " 0.2223(21)\n", - " \n", - " \n", - " 13\n", - " 16\n", - " 33\n", - " 0.2225(21)\n", - " \n", - " \n", - " 14\n", - " 16\n", - " 34\n", - " 0.2227(20)\n", - " \n", " \n", "\n", "" ], "text/plain": [ - " t_start t_stop mass\n", - "0 14 30 0.2213(13)\n", - "1 14 31 0.2218(13)\n", - "2 14 32 0.2220(14)\n", - "3 14 33 0.2223(15)\n", - "4 14 34 0.2224(15)\n", - "5 15 30 0.2213(16)\n", - "6 15 31 0.2218(17)\n", - "7 15 32 0.2221(18)\n", - "8 15 33 0.2224(18)\n", - "9 15 34 0.2225(18)\n", - "10 16 30 0.2214(21)\n", - "11 16 31 0.2220(20)\n", - "12 16 32 0.2223(21)\n", - "13 16 33 0.2225(21)\n", - "14 16 34 0.2227(20)" + " t_start t_stop mass\n", + "0 15 30 0.2213(16)\n", + "1 15 31 0.2218(17)\n", + "2 16 30 0.2214(21)\n", + "3 16 31 0.2220(20)" ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -522,6 +339,13 @@ "new_df" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The storage of intermediate analysis results in relational databases allows for a convenient and scalable way of splitting up a detailed analysis in multiple independent steps." + ] + }, { "cell_type": "code", "execution_count": null,