pyerrors/examples/07_data_management.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data management"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import pyerrors as pe"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For the data management example we reuse the data from the correlator example."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Data has been written using pyerrors 2.0.0.\n",
      "Format version 0.1\n",
      "Written by fjosw on 2022-01-06 11:11:19 +0100 on host XPS139305, Linux-5.11.0-44-generic-x86_64-with-glibc2.29\n",
      "\n",
      "Description:  Test data for the correlator example\n"
     ]
    }
   ],
   "source": [
    "correlator_data = pe.input.json.load_json(\"./data/correlator_test\")\n",
    "my_correlator = pe.Corr(correlator_data)\n",
    "my_correlator.gamma_method()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import autograd.numpy as anp\n",
    "def func_exp(a, x):\n",
    "    return a[1] * anp.exp(-a[0] * x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In this example we perform uncorrelated fits of a single exponential function to the correlator and vary the range of the fit. The fit result can be conveniently stored in a pandas DataFrame together with the corresponding metadata."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "rows = []\n",
    "for t_start in range(12, 17):\n",
    "    for t_stop in range(30, 32):\n",
    "        fr = my_correlator.fit(func_exp, [t_start, t_stop], silent=True)\n",
    "        fr.gamma_method()\n",
    "        row = {\"t_start\": t_start,\n",
    "               \"t_stop\": t_stop,\n",
    "               \"datapoints\": t_stop - t_start + 1,\n",
    "               \"chisquare_by_dof\": fr.chisquare_by_dof,\n",
    "               \"mass\": fr[0]}\n",
    "        rows.append(row)\n",
    "my_df = pd.DataFrame(rows)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>t_start</th>\n",
       "      <th>t_stop</th>\n",
       "      <th>datapoints</th>\n",
       "      <th>chisquare_by_dof</th>\n",
       "      <th>mass</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>12</td>\n",
       "      <td>30</td>\n",
       "      <td>19</td>\n",
       "      <td>0.057872</td>\n",
       "      <td>0.2218(12)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>12</td>\n",
       "      <td>31</td>\n",
       "      <td>20</td>\n",
       "      <td>0.063951</td>\n",
       "      <td>0.2221(11)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>13</td>\n",
       "      <td>30</td>\n",
       "      <td>18</td>\n",
       "      <td>0.051577</td>\n",
       "      <td>0.2215(12)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>13</td>\n",
       "      <td>31</td>\n",
       "      <td>19</td>\n",
       "      <td>0.060901</td>\n",
       "      <td>0.2219(11)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>14</td>\n",
       "      <td>30</td>\n",
       "      <td>17</td>\n",
       "      <td>0.052349</td>\n",
       "      <td>0.2213(13)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>14</td>\n",
       "      <td>31</td>\n",
       "      <td>18</td>\n",
       "      <td>0.063640</td>\n",
       "      <td>0.2218(13)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>15</td>\n",
       "      <td>30</td>\n",
       "      <td>16</td>\n",
       "      <td>0.056088</td>\n",
       "      <td>0.2213(16)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>15</td>\n",
       "      <td>31</td>\n",
       "      <td>17</td>\n",
       "      <td>0.067552</td>\n",
       "      <td>0.2218(17)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>16</td>\n",
       "      <td>30</td>\n",
       "      <td>15</td>\n",
       "      <td>0.059969</td>\n",
       "      <td>0.2214(21)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>16</td>\n",
       "      <td>31</td>\n",
       "      <td>16</td>\n",
       "      <td>0.070874</td>\n",
       "      <td>0.2220(20)</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   t_start  t_stop  datapoints  chisquare_by_dof        mass\n",
       "0       12      30          19          0.057872  0.2218(12)\n",
       "1       12      31          20          0.063951  0.2221(11)\n",
       "2       13      30          18          0.051577  0.2215(12)\n",
       "3       13      31          19          0.060901  0.2219(11)\n",
       "4       14      30          17          0.052349  0.2213(13)\n",
       "5       14      31          18          0.063640  0.2218(13)\n",
       "6       15      30          16          0.056088  0.2213(16)\n",
       "7       15      31          17          0.067552  0.2218(17)\n",
       "8       16      30          15          0.059969  0.2214(21)\n",
       "9       16      31          16          0.070874  0.2220(20)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "my_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The content of this pandas DataFrame can be inserted into a relational database, making use of the `JSON` serialization of `pyerrors` objects. In this example we use an SQLite database."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "pe.input.pandas.to_sql(my_df, \"mass_table\", \"my_db.sqlite\", if_exists='fail')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "At a later stage of the analysis the content of the database can be reconstructed into a DataFrame via SQL queries.\n",
    "In this example we extract `t_start`, `t_stop` and the fitted mass for all fits which start at times larger than 14."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_df = pe.input.pandas.read_sql(f\"SELECT t_start, t_stop, mass FROM mass_table WHERE t_start > 14\",\n",
    "                                  \"my_db.sqlite\",\n",
    "                                  auto_gamma=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>t_start</th>\n",
       "      <th>t_stop</th>\n",
       "      <th>mass</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>15</td>\n",
       "      <td>30</td>\n",
       "      <td>0.2213(16)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>15</td>\n",
       "      <td>31</td>\n",
       "      <td>0.2218(17)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>16</td>\n",
       "      <td>30</td>\n",
       "      <td>0.2214(21)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>16</td>\n",
       "      <td>31</td>\n",
       "      <td>0.2220(20)</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   t_start  t_stop        mass\n",
       "0       15      30  0.2213(16)\n",
       "1       15      31  0.2218(17)\n",
       "2       16      30  0.2214(21)\n",
       "3       16      31  0.2220(20)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The storage of intermediate analysis results in relational databases allows for a convenient and scalable way of splitting up a detailed analysis in multiple independent steps."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}