docs: data management example refined.

This commit is contained in:
Fabian Joswig 2022-09-29 17:11:24 +01:00
parent 9a7f5679c9
commit 6a57264868
No known key found for this signature in database

View file

@ -18,6 +18,13 @@
"import pyerrors as pe"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For the data management example we reuse the data from the correlator example."
]
},
{
"cell_type": "code",
"execution_count": 2,
@ -52,6 +59,13 @@
" return a[1] * anp.exp(-a[0] * x)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In this example we perform uncorrelated fits of a single exponential function to the correlator and vary the range of the fit. The fit result can be conveniently stored in a pandas DataFrame together with the corresponding metadata."
]
},
{
"cell_type": "code",
"execution_count": 4,
@ -60,7 +74,7 @@
"source": [
"rows = []\n",
"for t_start in range(12, 17):\n",
" for t_stop in range(30, 35):\n",
" for t_stop in range(30, 32):\n",
" fr = my_correlator.fit(func_exp, [t_start, t_stop], silent=True)\n",
" fr.gamma_method()\n",
" row = {\"t_start\": t_start,\n",
@ -124,30 +138,6 @@
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>32</td>\n",
" <td>21</td>\n",
" <td>0.064960</td>\n",
" <td>0.2223(11)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>12</td>\n",
" <td>33</td>\n",
" <td>22</td>\n",
" <td>0.066495</td>\n",
" <td>0.2224(10)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>34</td>\n",
" <td>23</td>\n",
" <td>0.066606</td>\n",
" <td>0.2225(10)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>13</td>\n",
" <td>30</td>\n",
" <td>18</td>\n",
@ -155,7 +145,7 @@
" <td>0.2215(12)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>31</td>\n",
" <td>19</td>\n",
@ -163,31 +153,7 @@
" <td>0.2219(11)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>32</td>\n",
" <td>20</td>\n",
" <td>0.063551</td>\n",
" <td>0.2221(12)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>13</td>\n",
" <td>33</td>\n",
" <td>21</td>\n",
" <td>0.066406</td>\n",
" <td>0.2223(12)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>13</td>\n",
" <td>34</td>\n",
" <td>22</td>\n",
" <td>0.067237</td>\n",
" <td>0.2224(12)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>30</td>\n",
" <td>17</td>\n",
@ -195,7 +161,7 @@
" <td>0.2213(13)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <th>5</th>\n",
" <td>14</td>\n",
" <td>31</td>\n",
" <td>18</td>\n",
@ -203,31 +169,7 @@
" <td>0.2218(13)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>14</td>\n",
" <td>32</td>\n",
" <td>19</td>\n",
" <td>0.066883</td>\n",
" <td>0.2220(14)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>14</td>\n",
" <td>33</td>\n",
" <td>20</td>\n",
" <td>0.070019</td>\n",
" <td>0.2223(15)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>14</td>\n",
" <td>34</td>\n",
" <td>21</td>\n",
" <td>0.070775</td>\n",
" <td>0.2224(15)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <th>6</th>\n",
" <td>15</td>\n",
" <td>30</td>\n",
" <td>16</td>\n",
@ -235,7 +177,7 @@
" <td>0.2213(16)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <th>7</th>\n",
" <td>15</td>\n",
" <td>31</td>\n",
" <td>17</td>\n",
@ -243,31 +185,7 @@
" <td>0.2218(17)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>15</td>\n",
" <td>32</td>\n",
" <td>18</td>\n",
" <td>0.070170</td>\n",
" <td>0.2221(18)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>15</td>\n",
" <td>33</td>\n",
" <td>19</td>\n",
" <td>0.072516</td>\n",
" <td>0.2224(18)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>15</td>\n",
" <td>34</td>\n",
" <td>20</td>\n",
" <td>0.072509</td>\n",
" <td>0.2225(18)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <th>8</th>\n",
" <td>16</td>\n",
" <td>30</td>\n",
" <td>15</td>\n",
@ -275,68 +193,29 @@
" <td>0.2214(21)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <th>9</th>\n",
" <td>16</td>\n",
" <td>31</td>\n",
" <td>16</td>\n",
" <td>0.070874</td>\n",
" <td>0.2220(20)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>16</td>\n",
" <td>32</td>\n",
" <td>17</td>\n",
" <td>0.072437</td>\n",
" <td>0.2223(21)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>16</td>\n",
" <td>33</td>\n",
" <td>18</td>\n",
" <td>0.073684</td>\n",
" <td>0.2225(21)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>16</td>\n",
" <td>34</td>\n",
" <td>19</td>\n",
" <td>0.072767</td>\n",
" <td>0.2227(20)</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" t_start t_stop datapoints chisquare_by_dof mass\n",
"0 12 30 19 0.057872 0.2218(12)\n",
"1 12 31 20 0.063951 0.2221(11)\n",
"2 12 32 21 0.064960 0.2223(11)\n",
"3 12 33 22 0.066495 0.2224(10)\n",
"4 12 34 23 0.066606 0.2225(10)\n",
"5 13 30 18 0.051577 0.2215(12)\n",
"6 13 31 19 0.060901 0.2219(11)\n",
"7 13 32 20 0.063551 0.2221(12)\n",
"8 13 33 21 0.066406 0.2223(12)\n",
"9 13 34 22 0.067237 0.2224(12)\n",
"10 14 30 17 0.052349 0.2213(13)\n",
"11 14 31 18 0.063640 0.2218(13)\n",
"12 14 32 19 0.066883 0.2220(14)\n",
"13 14 33 20 0.070019 0.2223(15)\n",
"14 14 34 21 0.070775 0.2224(15)\n",
"15 15 30 16 0.056088 0.2213(16)\n",
"16 15 31 17 0.067552 0.2218(17)\n",
"17 15 32 18 0.070170 0.2221(18)\n",
"18 15 33 19 0.072516 0.2224(18)\n",
"19 15 34 20 0.072509 0.2225(18)\n",
"20 16 30 15 0.059969 0.2214(21)\n",
"21 16 31 16 0.070874 0.2220(20)\n",
"22 16 32 17 0.072437 0.2223(21)\n",
"23 16 33 18 0.073684 0.2225(21)\n",
"24 16 34 19 0.072767 0.2227(20)"
" t_start t_stop datapoints chisquare_by_dof mass\n",
"0 12 30 19 0.057872 0.2218(12)\n",
"1 12 31 20 0.063951 0.2221(11)\n",
"2 13 30 18 0.051577 0.2215(12)\n",
"3 13 31 19 0.060901 0.2219(11)\n",
"4 14 30 17 0.052349 0.2213(13)\n",
"5 14 31 18 0.063640 0.2218(13)\n",
"6 15 30 16 0.056088 0.2213(16)\n",
"7 15 31 17 0.067552 0.2218(17)\n",
"8 16 30 15 0.059969 0.2214(21)\n",
"9 16 31 16 0.070874 0.2220(20)"
]
},
"execution_count": 5,
@ -348,30 +227,45 @@
"my_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The content of this pandas DataFrame can be inserted into a relational database, making use of the `JSON` serialization of `pyerrors` objects. In this example we use an SQLite database."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"pe.input.pandas.to_sql(my_df, \"mass_table\", \"my_db.sqlite\")"
"pe.input.pandas.to_sql(my_df, \"mass_table\", \"my_db.sqlite\", if_exists='fail')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"At a later stage of the analysis the content of the database can be reconstructed into a DataFrame via SQL queries.\n",
"In this example we extract `t_start`, `t_stop` and the fitted mass for all fits which start at times larger than 14."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"new_df = pe.input.pandas.read_sql(f\"SELECT t_start, t_stop, mass FROM mass_table WHERE t_start > 14\",\n",
" \"my_db.sqlite\",\n",
" auto_gamma=True)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"new_df = df = pe.input.pandas.read_sql(f\"SELECT t_start, t_stop, mass FROM mass_table WHERE t_start > 13\"\n",
" ,\"my_db.sqlite\"\n",
" ,auto_gamma=True)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
@ -402,118 +296,41 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>14</td>\n",
" <td>30</td>\n",
" <td>0.2213(13)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>14</td>\n",
" <td>31</td>\n",
" <td>0.2218(13)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>14</td>\n",
" <td>32</td>\n",
" <td>0.2220(14)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>14</td>\n",
" <td>33</td>\n",
" <td>0.2223(15)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>34</td>\n",
" <td>0.2224(15)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>15</td>\n",
" <td>30</td>\n",
" <td>0.2213(16)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <th>1</th>\n",
" <td>15</td>\n",
" <td>31</td>\n",
" <td>0.2218(17)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>15</td>\n",
" <td>32</td>\n",
" <td>0.2221(18)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>15</td>\n",
" <td>33</td>\n",
" <td>0.2224(18)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>15</td>\n",
" <td>34</td>\n",
" <td>0.2225(18)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <th>2</th>\n",
" <td>16</td>\n",
" <td>30</td>\n",
" <td>0.2214(21)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <th>3</th>\n",
" <td>16</td>\n",
" <td>31</td>\n",
" <td>0.2220(20)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>16</td>\n",
" <td>32</td>\n",
" <td>0.2223(21)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>16</td>\n",
" <td>33</td>\n",
" <td>0.2225(21)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>16</td>\n",
" <td>34</td>\n",
" <td>0.2227(20)</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" t_start t_stop mass\n",
"0 14 30 0.2213(13)\n",
"1 14 31 0.2218(13)\n",
"2 14 32 0.2220(14)\n",
"3 14 33 0.2223(15)\n",
"4 14 34 0.2224(15)\n",
"5 15 30 0.2213(16)\n",
"6 15 31 0.2218(17)\n",
"7 15 32 0.2221(18)\n",
"8 15 33 0.2224(18)\n",
"9 15 34 0.2225(18)\n",
"10 16 30 0.2214(21)\n",
"11 16 31 0.2220(20)\n",
"12 16 32 0.2223(21)\n",
"13 16 33 0.2225(21)\n",
"14 16 34 0.2227(20)"
" t_start t_stop mass\n",
"0 15 30 0.2213(16)\n",
"1 15 31 0.2218(17)\n",
"2 16 30 0.2214(21)\n",
"3 16 31 0.2220(20)"
]
},
"execution_count": 9,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@ -522,6 +339,13 @@
"new_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The storage of intermediate analysis results in relational databases allows for a convenient and scalable way of splitting up a detailed analysis in multiple independent steps."
]
},
{
"cell_type": "code",
"execution_count": null,