{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# `spudtr` epochs dataframe format\n",
    "\n",
    "`spudtr` epochs are `pandas.DataFrame` objects.\n",
    "\n",
    "There are three key elements:\n",
    "\n",
    " 1. `epoch_id` an index-like integer column, where each value designates a unique epoch\n",
    " 2. `time` an index-like column of integer timestamps, the same in each epoch\n",
    " 3.  the rest of the data columns\n",
    " \n",
    "There must be at least one epoch.\n",
    "\n",
    "There must be at least one timepoint.\n",
    "\n",
    "All the epochs must be timestamped exactly the same way.\n",
    "\n",
    "> NOTE: timestamps are positive and negative integers, the units are unspecified: milliseconds, months, nanoseconds, hours."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from matplotlib import pyplot as plt\n",
    "\n",
    "from spudtr import get_demo_df, P3_1500_FEATHER\n",
    "from spudtr import epf\n",
    "import spudtr.fake_epochs_data as fake_data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Example: simulated categorical and continuous data**\n",
    "\n",
    "The `epoch_id` column is \"epoch_id\", there are four epochs: 0, 1, 2, 3.\n",
    "\n",
    "The `time` column is \"days\", there are 31 days in each epoch, 0, 1, 2, ..., 31.\n",
    "\n",
    "The rest of the columns are the data recorded in each epoch at each time stamp."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>epoch_id</th>\n",
       "      <th>days</th>\n",
       "      <th>categorical</th>\n",
       "      <th>continuous</th>\n",
       "      <th>channel0</th>\n",
       "      <th>channel1</th>\n",
       "      <th>channel2</th>\n",
       "      <th>channel3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>cat0</td>\n",
       "      <td>0.771321</td>\n",
       "      <td>-13.170787</td>\n",
       "      <td>-30.197057</td>\n",
       "      <td>19.609869</td>\n",
       "      <td>43.177612</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>cat0</td>\n",
       "      <td>0.020752</td>\n",
       "      <td>4.233125</td>\n",
       "      <td>-7.726009</td>\n",
       "      <td>-65.298259</td>\n",
       "      <td>41.464399</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>cat0</td>\n",
       "      <td>0.633648</td>\n",
       "      <td>8.191480</td>\n",
       "      <td>21.915223</td>\n",
       "      <td>18.568468</td>\n",
       "      <td>27.639613</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>cat0</td>\n",
       "      <td>0.748804</td>\n",
       "      <td>-48.557122</td>\n",
       "      <td>-50.952045</td>\n",
       "      <td>14.317029</td>\n",
       "      <td>-17.186617</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>cat0</td>\n",
       "      <td>0.498507</td>\n",
       "      <td>-17.193401</td>\n",
       "      <td>50.222266</td>\n",
       "      <td>0.782896</td>\n",
       "      <td>38.251473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>123</th>\n",
       "      <td>3</td>\n",
       "      <td>27</td>\n",
       "      <td>cat1</td>\n",
       "      <td>0.744603</td>\n",
       "      <td>33.167254</td>\n",
       "      <td>-7.658414</td>\n",
       "      <td>14.630878</td>\n",
       "      <td>14.329468</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>124</th>\n",
       "      <td>3</td>\n",
       "      <td>28</td>\n",
       "      <td>cat1</td>\n",
       "      <td>0.469785</td>\n",
       "      <td>-60.531560</td>\n",
       "      <td>0.774228</td>\n",
       "      <td>1.689442</td>\n",
       "      <td>0.882024</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125</th>\n",
       "      <td>3</td>\n",
       "      <td>29</td>\n",
       "      <td>cat1</td>\n",
       "      <td>0.598256</td>\n",
       "      <td>16.216221</td>\n",
       "      <td>66.028993</td>\n",
       "      <td>16.373534</td>\n",
       "      <td>4.854384</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>126</th>\n",
       "      <td>3</td>\n",
       "      <td>30</td>\n",
       "      <td>cat1</td>\n",
       "      <td>0.147620</td>\n",
       "      <td>-43.268966</td>\n",
       "      <td>26.531028</td>\n",
       "      <td>-20.493672</td>\n",
       "      <td>-12.327708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>127</th>\n",
       "      <td>3</td>\n",
       "      <td>31</td>\n",
       "      <td>cat1</td>\n",
       "      <td>0.184035</td>\n",
       "      <td>-48.265511</td>\n",
       "      <td>-41.604676</td>\n",
       "      <td>-19.770519</td>\n",
       "      <td>27.925069</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>128 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     epoch_id  days categorical  continuous   channel0   channel1   channel2  \\\n",
       "0           0     0        cat0    0.771321 -13.170787 -30.197057  19.609869   \n",
       "1           0     1        cat0    0.020752   4.233125  -7.726009 -65.298259   \n",
       "2           0     2        cat0    0.633648   8.191480  21.915223  18.568468   \n",
       "3           0     3        cat0    0.748804 -48.557122 -50.952045  14.317029   \n",
       "4           0     4        cat0    0.498507 -17.193401  50.222266   0.782896   \n",
       "..        ...   ...         ...         ...        ...        ...        ...   \n",
       "123         3    27        cat1    0.744603  33.167254  -7.658414  14.630878   \n",
       "124         3    28        cat1    0.469785 -60.531560   0.774228   1.689442   \n",
       "125         3    29        cat1    0.598256  16.216221  66.028993  16.373534   \n",
       "126         3    30        cat1    0.147620 -43.268966  26.531028 -20.493672   \n",
       "127         3    31        cat1    0.184035 -48.265511 -41.604676 -19.770519   \n",
       "\n",
       "      channel3  \n",
       "0    43.177612  \n",
       "1    41.464399  \n",
       "2    27.639613  \n",
       "3   -17.186617  \n",
       "4    38.251473  \n",
       "..         ...  \n",
       "123  14.329468  \n",
       "124   0.882024  \n",
       "125   4.854384  \n",
       "126 -12.327708  \n",
       "127  27.925069  \n",
       "\n",
       "[128 rows x 8 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "n_epochs_per_category = 2\n",
    "sim_epochs_df, channels = fake_data._generate(\n",
    "    n_epochs=n_epochs_per_category,\n",
    "    n_samples=32,\n",
    "    n_categories=2,\n",
    "    n_channels=4,\n",
    "    time=\"days\",\n",
    "    epoch_id=\"epoch_id\",\n",
    "    seed=10,\n",
    ")\n",
    "display(sim_epochs_df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Example: EEG data**\n",
    "\n",
    "The epoch index column is `epoch_id`, there are 600 epochs numbered: 0, 1, 2, ..., 600.  There are 600 not 601 epochs here because `epoch_id` 392 was excluded: the relevant event marked a pause in the recording not a stimulus. The epoch ids must be unique but they can be gappy and out of order.\n",
    "\n",
    "The time column is `time_ms`, there are 375 digital samples in each epoch at 4 ms intervals, -748, -744, ..., 744, 748\n",
    "\n",
    "The rest of the columns are the data recorded in each epoch at each time stamp."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/turbach/miniconda3/envs/mckonda_spudtr_dev/lib/python3.6/site-packages/pyarrow/pandas_compat.py:752: FutureWarning: .labels was deprecated in version 0.24.0. Use .codes instead.\n",
      "  labels, = index.labels\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "600"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>epoch_id</th>\n",
       "      <th>time_ms</th>\n",
       "      <th>sub_id</th>\n",
       "      <th>eeg_artifact</th>\n",
       "      <th>dblock_path</th>\n",
       "      <th>log_evcodes</th>\n",
       "      <th>log_ccodes</th>\n",
       "      <th>dblock_srate</th>\n",
       "      <th>ccode</th>\n",
       "      <th>instrument</th>\n",
       "      <th>...</th>\n",
       "      <th>RMOc</th>\n",
       "      <th>LLTe</th>\n",
       "      <th>RLTe</th>\n",
       "      <th>LLOc</th>\n",
       "      <th>RLOc</th>\n",
       "      <th>MiOc</th>\n",
       "      <th>A2</th>\n",
       "      <th>HEOG</th>\n",
       "      <th>rle</th>\n",
       "      <th>rhz</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>-748</td>\n",
       "      <td>sub000</td>\n",
       "      <td>0</td>\n",
       "      <td>sub000/dblock_0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>250.0</td>\n",
       "      <td>1</td>\n",
       "      <td>eeg</td>\n",
       "      <td>...</td>\n",
       "      <td>-25.093750</td>\n",
       "      <td>-0.753906</td>\n",
       "      <td>1.480469</td>\n",
       "      <td>-13.414062</td>\n",
       "      <td>-18.937500</td>\n",
       "      <td>-17.734375</td>\n",
       "      <td>5.660156</td>\n",
       "      <td>98.875000</td>\n",
       "      <td>-39.500000</td>\n",
       "      <td>38.375000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>-744</td>\n",
       "      <td>sub000</td>\n",
       "      <td>0</td>\n",
       "      <td>sub000/dblock_0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>250.0</td>\n",
       "      <td>1</td>\n",
       "      <td>eeg</td>\n",
       "      <td>...</td>\n",
       "      <td>-24.593750</td>\n",
       "      <td>0.502441</td>\n",
       "      <td>-2.466797</td>\n",
       "      <td>-17.640625</td>\n",
       "      <td>-17.468750</td>\n",
       "      <td>-15.304688</td>\n",
       "      <td>1.968750</td>\n",
       "      <td>104.750000</td>\n",
       "      <td>-38.031250</td>\n",
       "      <td>41.281250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>-740</td>\n",
       "      <td>sub000</td>\n",
       "      <td>0</td>\n",
       "      <td>sub000/dblock_0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>250.0</td>\n",
       "      <td>1</td>\n",
       "      <td>eeg</td>\n",
       "      <td>...</td>\n",
       "      <td>-16.484375</td>\n",
       "      <td>-1.507812</td>\n",
       "      <td>3.947266</td>\n",
       "      <td>-15.648438</td>\n",
       "      <td>-10.085938</td>\n",
       "      <td>-11.171875</td>\n",
       "      <td>8.367188</td>\n",
       "      <td>102.062500</td>\n",
       "      <td>-33.656250</td>\n",
       "      <td>43.718750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>-736</td>\n",
       "      <td>sub000</td>\n",
       "      <td>0</td>\n",
       "      <td>sub000/dblock_0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>250.0</td>\n",
       "      <td>1</td>\n",
       "      <td>eeg</td>\n",
       "      <td>...</td>\n",
       "      <td>-11.804688</td>\n",
       "      <td>-15.070312</td>\n",
       "      <td>9.867188</td>\n",
       "      <td>-14.906250</td>\n",
       "      <td>-7.378906</td>\n",
       "      <td>-8.742188</td>\n",
       "      <td>9.351562</td>\n",
       "      <td>100.562500</td>\n",
       "      <td>-42.906250</td>\n",
       "      <td>37.406250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>-732</td>\n",
       "      <td>sub000</td>\n",
       "      <td>0</td>\n",
       "      <td>sub000/dblock_0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>250.0</td>\n",
       "      <td>1</td>\n",
       "      <td>eeg</td>\n",
       "      <td>...</td>\n",
       "      <td>-6.394531</td>\n",
       "      <td>-4.019531</td>\n",
       "      <td>9.125000</td>\n",
       "      <td>-10.679688</td>\n",
       "      <td>-6.886719</td>\n",
       "      <td>-8.015625</td>\n",
       "      <td>8.125000</td>\n",
       "      <td>98.375000</td>\n",
       "      <td>-43.875000</td>\n",
       "      <td>37.906250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224995</th>\n",
       "      <td>600</td>\n",
       "      <td>732</td>\n",
       "      <td>sub000</td>\n",
       "      <td>0</td>\n",
       "      <td>sub000/dblock_4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>250.0</td>\n",
       "      <td>0</td>\n",
       "      <td>cal</td>\n",
       "      <td>...</td>\n",
       "      <td>-4.671875</td>\n",
       "      <td>-3.517578</td>\n",
       "      <td>-4.441406</td>\n",
       "      <td>-4.718750</td>\n",
       "      <td>-4.671875</td>\n",
       "      <td>-3.400391</td>\n",
       "      <td>-4.429688</td>\n",
       "      <td>-4.406250</td>\n",
       "      <td>-3.900391</td>\n",
       "      <td>-4.371094</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224996</th>\n",
       "      <td>600</td>\n",
       "      <td>736</td>\n",
       "      <td>sub000</td>\n",
       "      <td>0</td>\n",
       "      <td>sub000/dblock_4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>250.0</td>\n",
       "      <td>0</td>\n",
       "      <td>cal</td>\n",
       "      <td>...</td>\n",
       "      <td>-4.179688</td>\n",
       "      <td>-4.019531</td>\n",
       "      <td>-4.195312</td>\n",
       "      <td>-4.222656</td>\n",
       "      <td>-4.425781</td>\n",
       "      <td>-3.644531</td>\n",
       "      <td>-4.429688</td>\n",
       "      <td>-4.160156</td>\n",
       "      <td>-3.412109</td>\n",
       "      <td>-4.371094</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224997</th>\n",
       "      <td>600</td>\n",
       "      <td>740</td>\n",
       "      <td>sub000</td>\n",
       "      <td>0</td>\n",
       "      <td>sub000/dblock_4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>250.0</td>\n",
       "      <td>0</td>\n",
       "      <td>cal</td>\n",
       "      <td>...</td>\n",
       "      <td>-4.425781</td>\n",
       "      <td>-3.767578</td>\n",
       "      <td>-4.441406</td>\n",
       "      <td>-3.974609</td>\n",
       "      <td>-4.425781</td>\n",
       "      <td>-3.400391</td>\n",
       "      <td>-4.429688</td>\n",
       "      <td>-4.160156</td>\n",
       "      <td>-3.900391</td>\n",
       "      <td>-4.859375</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224998</th>\n",
       "      <td>600</td>\n",
       "      <td>744</td>\n",
       "      <td>sub000</td>\n",
       "      <td>0</td>\n",
       "      <td>sub000/dblock_4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>250.0</td>\n",
       "      <td>0</td>\n",
       "      <td>cal</td>\n",
       "      <td>...</td>\n",
       "      <td>-4.425781</td>\n",
       "      <td>-4.269531</td>\n",
       "      <td>-4.195312</td>\n",
       "      <td>-4.222656</td>\n",
       "      <td>-4.425781</td>\n",
       "      <td>-3.886719</td>\n",
       "      <td>-4.429688</td>\n",
       "      <td>-4.406250</td>\n",
       "      <td>-3.900391</td>\n",
       "      <td>-4.371094</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224999</th>\n",
       "      <td>600</td>\n",
       "      <td>748</td>\n",
       "      <td>sub000</td>\n",
       "      <td>0</td>\n",
       "      <td>sub000/dblock_4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>250.0</td>\n",
       "      <td>0</td>\n",
       "      <td>cal</td>\n",
       "      <td>...</td>\n",
       "      <td>-4.179688</td>\n",
       "      <td>-4.019531</td>\n",
       "      <td>-3.947266</td>\n",
       "      <td>-4.222656</td>\n",
       "      <td>-4.179688</td>\n",
       "      <td>-3.400391</td>\n",
       "      <td>-4.183594</td>\n",
       "      <td>-4.406250</td>\n",
       "      <td>-3.412109</td>\n",
       "      <td>-4.371094</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>225000 rows × 47 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        epoch_id  time_ms  sub_id  eeg_artifact      dblock_path  log_evcodes  \\\n",
       "0              0     -748  sub000             0  sub000/dblock_0            0   \n",
       "1              0     -744  sub000             0  sub000/dblock_0            0   \n",
       "2              0     -740  sub000             0  sub000/dblock_0            0   \n",
       "3              0     -736  sub000             0  sub000/dblock_0            0   \n",
       "4              0     -732  sub000             0  sub000/dblock_0            0   \n",
       "...          ...      ...     ...           ...              ...          ...   \n",
       "224995       600      732  sub000             0  sub000/dblock_4            0   \n",
       "224996       600      736  sub000             0  sub000/dblock_4            0   \n",
       "224997       600      740  sub000             0  sub000/dblock_4            0   \n",
       "224998       600      744  sub000             0  sub000/dblock_4            0   \n",
       "224999       600      748  sub000             0  sub000/dblock_4            0   \n",
       "\n",
       "        log_ccodes  dblock_srate  ccode instrument  ...       RMOc       LLTe  \\\n",
       "0                0         250.0      1        eeg  ... -25.093750  -0.753906   \n",
       "1                0         250.0      1        eeg  ... -24.593750   0.502441   \n",
       "2                0         250.0      1        eeg  ... -16.484375  -1.507812   \n",
       "3                0         250.0      1        eeg  ... -11.804688 -15.070312   \n",
       "4                0         250.0      1        eeg  ...  -6.394531  -4.019531   \n",
       "...            ...           ...    ...        ...  ...        ...        ...   \n",
       "224995           0         250.0      0        cal  ...  -4.671875  -3.517578   \n",
       "224996           0         250.0      0        cal  ...  -4.179688  -4.019531   \n",
       "224997           0         250.0      0        cal  ...  -4.425781  -3.767578   \n",
       "224998           0         250.0      0        cal  ...  -4.425781  -4.269531   \n",
       "224999           0         250.0      0        cal  ...  -4.179688  -4.019531   \n",
       "\n",
       "            RLTe       LLOc       RLOc       MiOc        A2        HEOG  \\\n",
       "0       1.480469 -13.414062 -18.937500 -17.734375  5.660156   98.875000   \n",
       "1      -2.466797 -17.640625 -17.468750 -15.304688  1.968750  104.750000   \n",
       "2       3.947266 -15.648438 -10.085938 -11.171875  8.367188  102.062500   \n",
       "3       9.867188 -14.906250  -7.378906  -8.742188  9.351562  100.562500   \n",
       "4       9.125000 -10.679688  -6.886719  -8.015625  8.125000   98.375000   \n",
       "...          ...        ...        ...        ...       ...         ...   \n",
       "224995 -4.441406  -4.718750  -4.671875  -3.400391 -4.429688   -4.406250   \n",
       "224996 -4.195312  -4.222656  -4.425781  -3.644531 -4.429688   -4.160156   \n",
       "224997 -4.441406  -3.974609  -4.425781  -3.400391 -4.429688   -4.160156   \n",
       "224998 -4.195312  -4.222656  -4.425781  -3.886719 -4.429688   -4.406250   \n",
       "224999 -3.947266  -4.222656  -4.179688  -3.400391 -4.183594   -4.406250   \n",
       "\n",
       "              rle        rhz  \n",
       "0      -39.500000  38.375000  \n",
       "1      -38.031250  41.281250  \n",
       "2      -33.656250  43.718750  \n",
       "3      -42.906250  37.406250  \n",
       "4      -43.875000  37.906250  \n",
       "...           ...        ...  \n",
       "224995  -3.900391  -4.371094  \n",
       "224996  -3.412109  -4.371094  \n",
       "224997  -3.900391  -4.859375  \n",
       "224998  -3.900391  -4.371094  \n",
       "224999  -3.412109  -4.371094  \n",
       "\n",
       "[225000 rows x 47 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "eeg_epochs_df = get_demo_df(P3_1500_FEATHER)\n",
    "display(len(eeg_epochs_df[\"epoch_id\"].unique()))\n",
    "eeg_epochs_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>epoch_id</th>\n",
       "      <th>time_ms</th>\n",
       "      <th>sub_id</th>\n",
       "      <th>eeg_artifact</th>\n",
       "      <th>dblock_path</th>\n",
       "      <th>log_evcodes</th>\n",
       "      <th>log_ccodes</th>\n",
       "      <th>dblock_srate</th>\n",
       "      <th>ccode</th>\n",
       "      <th>instrument</th>\n",
       "      <th>...</th>\n",
       "      <th>RMOc</th>\n",
       "      <th>LLTe</th>\n",
       "      <th>RLTe</th>\n",
       "      <th>LLOc</th>\n",
       "      <th>RLOc</th>\n",
       "      <th>MiOc</th>\n",
       "      <th>A2</th>\n",
       "      <th>HEOG</th>\n",
       "      <th>rle</th>\n",
       "      <th>rhz</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>0 rows × 47 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [epoch_id, time_ms, sub_id, eeg_artifact, dblock_path, log_evcodes, log_ccodes, dblock_srate, ccode, instrument, bin, tone, stim, accuracy, acc_type, lle, lhz, MiPf, LLPf, RLPf, LMPf, RMPf, LDFr, RDFr, LLFr, RLFr, LMFr, RMFr, LMCe, RMCe, MiCe, MiPa, LDCe, RDCe, LDPa, RDPa, LMOc, RMOc, LLTe, RLTe, LLOc, RLOc, MiOc, A2, HEOG, rle, rhz]\n",
       "Index: []\n",
       "\n",
       "[0 rows x 47 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "eeg_epochs_df.query(\"epoch_id == 392\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Always check the epoch x time format**\n",
    "\n",
    "When things go well the check quietly succeeds.\n",
    "\n",
    "When they don't the reason appears at the bottom of the messages."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Example: This check of the simulated data **SUCCEEDS**."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "epf.check_epochs(sim_epochs_df, ['channel0', 'channel1'], epoch_id=\"epoch_id\", time=\"days\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Example: this checks **FAILS** because the data column named \"bogus_channel0\"  doesn't exist in the data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "tags": [
     "raises-exception"
    ]
   },
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "data_streams should all be present in the epochs dataframe, the following are missing: ['bogus_channel0']",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-6-eebb523a5162>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mepf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_epochs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msim_epochs_df\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'bogus_channel0'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'channel1'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepoch_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"epoch_id\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"days\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/mnt/cube/home/turbach/TPU_Projects/spudtr/spudtr/epf.py\u001b[0m in \u001b[0;36mcheck_epochs\u001b[0;34m(epochs_df, data_streams, epoch_id, time)\u001b[0m\n\u001b[1;32m    183\u001b[0m     \"\"\"\n\u001b[1;32m    184\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 185\u001b[0;31m     \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_epochs_QC\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepochs_df\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata_streams\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepoch_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mepoch_id\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    186\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    187\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/mnt/cube/home/turbach/TPU_Projects/spudtr/spudtr/epf.py\u001b[0m in \u001b[0;36m_epochs_QC\u001b[0;34m(epochs_df, data_streams, epoch_id, time)\u001b[0m\n\u001b[1;32m     48\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mmissing_channels\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     49\u001b[0m         raise ValueError(\n\u001b[0;32m---> 50\u001b[0;31m             \u001b[0;34m\"data_streams should all be present in the epochs dataframe, \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     51\u001b[0m             \u001b[0;34mf\"the following are missing: {list(missing_channels)}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     52\u001b[0m         )\n",
      "\u001b[0;31mValueError\u001b[0m: data_streams should all be present in the epochs dataframe, the following are missing: ['bogus_channel0']"
     ]
    }
   ],
   "source": [
    "epf.check_epochs(sim_epochs_df, ['bogus_channel0', 'channel1'], epoch_id=\"epoch_id\", time=\"days\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Example: this checks **FAILS** because the `time` column named \"hours\" doesn't exist."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "tags": [
     "raises-exception"
    ]
   },
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "time column not found: hours",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-7-38894973e721>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mepf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_epochs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msim_epochs_df\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'channel0'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'channel1'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepoch_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"epoch_id\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"hours\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/mnt/cube/home/turbach/TPU_Projects/spudtr/spudtr/epf.py\u001b[0m in \u001b[0;36mcheck_epochs\u001b[0;34m(epochs_df, data_streams, epoch_id, time)\u001b[0m\n\u001b[1;32m    183\u001b[0m     \"\"\"\n\u001b[1;32m    184\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 185\u001b[0;31m     \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_epochs_QC\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepochs_df\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata_streams\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepoch_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mepoch_id\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    186\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    187\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/mnt/cube/home/turbach/TPU_Projects/spudtr/spudtr/epf.py\u001b[0m in \u001b[0;36m_epochs_QC\u001b[0;34m(epochs_df, data_streams, epoch_id, time)\u001b[0m\n\u001b[1;32m     58\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     59\u001b[0m     \u001b[0;31m# epoch_id and time must be the columns in the epochs_df\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 60\u001b[0;31m     \u001b[0m_validate_epochs_df\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepochs_df\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepoch_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mepoch_id\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     61\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     62\u001b[0m     \u001b[0;31m# check values of epoch_id in every time group are the same, and\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/mnt/cube/home/turbach/TPU_Projects/spudtr/spudtr/epf.py\u001b[0m in \u001b[0;36m_validate_epochs_df\u001b[0;34m(epochs_df, epoch_id, time)\u001b[0m\n\u001b[1;32m     28\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m\"epoch_id\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mepoch_id\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"time\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     29\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mval\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mepochs_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 30\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"{key} column not found: {val}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     31\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     32\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mValueError\u001b[0m: time column not found: hours"
     ]
    }
   ],
   "source": [
    "epf.check_epochs(sim_epochs_df, ['channel0', 'channel1'], epoch_id=\"epoch_id\", time=\"hours\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}