In [None]:
#! pip install jupyter
#! pip install pandas
#! pip install matplotlib

In [None]:
import os
from pathlib import Path

import re
import datetime
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.dates as mdates


# Data preparation

expecting

```
.
├── data
│   ├── pee.csv
│   └── poo.csv
└── charts.ipynb
```

In [None]:
pee = pd.read_csv('data/pee.csv', sep=',', parse_dates=['timestamp'])
poo = pd.read_csv('data/poo.csv', sep=',', parse_dates=['timestamp'])

# Data Cleaning

## cut November 2023 and September 2024 data

In [None]:
f"dates range from {pee.timestamp.min()} to {pee.timestamp.max()}. {len(pee)} pee events"

In [None]:
pee = pee.loc[pee.timestamp > '2023-11-30 00:00:00']
pee = pee.loc[pee.timestamp < '2024-09-01 00:00:00']

In [None]:
f"dates range from {pee.timestamp.min()} to {pee.timestamp.max()}. {len(pee)} pee events"

In [None]:
f"dates range from {poo.timestamp.min()} to {poo.timestamp.max()}. {len(poo)} poo events"

In [None]:
poo = poo.loc[poo.timestamp > '2023-11-30 00:00:00']
poo = poo.loc[poo.timestamp < '2024-09-01 00:00:00']

In [None]:
f"dates range from {poo.timestamp.min()} to {poo.timestamp.max()}. {len(poo)} poo events"

# Plots

In [None]:
pee.shape

In [None]:
poo.shape

In [None]:
pee.head()

In [None]:
def plot_per_hour(dataframe: pd.DataFrame, label: str):
    fig, axs = plt.subplots(figsize=(12, 4))
    dataframe.groupby(dataframe["timestamp"].dt.hour).count().plot(
        kind='bar', rot=0, ax=axs
    )
    plt.xlabel("Hour of the day")
    plt.ylabel(f"# {label} events per hour")
plot_per_hour(pee, "pee")
plot_per_hour(poo, "poo")

In [None]:
def plot_per_weekday(dataframe: pd.DataFrame, label: str):
    
    fig, axs = plt.subplots(figsize=(12, 4))
    dataframe.groupby(dataframe["timestamp"].dt.dayofweek).count().plot(
        kind='bar', rot=0, ax=axs
    )
    plt.xlabel("Day of the week")
    plt.ylabel(f"# {label} events per weekday")
plot_per_weekday(pee, "pee")
plot_per_weekday(poo, "poo")

In [None]:
pee["event"] = 1
poo["event"] = 1
pee.tail()

In [None]:
def plot_events_per_week(df: pd.DataFrame, label: str):
    weekdf = df.groupby('event').resample('W-Mon', on='timestamp').sum().loc[1].sort_values(by='timestamp')
    fig, ax = plt.subplots(figsize=(18, 4))
    ax.grid(True)
    ax.bar(weekdf.index, width=4, height=weekdf['event'])
    plt.show()
plot_events_per_week(pee, "pee")
plot_events_per_week(poo, "poo")

In [None]:

def plot_events_per_week(df: pd.DataFrame, label: str):
    daydf = df.groupby('event').resample('D', on='timestamp').sum().loc[1].sort_values(by='timestamp')
    fig, ax = plt.subplots(figsize=(18, 4))
    ax.grid(True)
    ax.bar(daydf.index, height='event', data=daydf, alpha=0.5)
    plt.show()

plot_events_per_week(pee, "pee")
plot_events_per_week(poo, "poo")

## pee data


In [None]:
daydf = pee.groupby('event').resample('D', on='timestamp').sum().loc[1].sort_values(by='timestamp')
# filter starting from december 2023
print(daydf.shape)
daydf.head()

In [None]:
f"dates range from {daydf.index.min()} to {daydf.index.max()}. {len(daydf)} days"

In [None]:
fig, ax = plt.subplots(figsize=(18, 4))
ax.grid(True)
ax.bar(daydf.index, height='event', data=daydf, alpha=0.5)
plt.show()

In [None]:
# average pee per month
daydf = daydf.reset_index()
monthdf = daydf.groupby(pd.PeriodIndex(daydf['timestamp'], freq="M"))['event'].mean().reset_index()
monthdf['month'] = monthdf['timestamp'].dt.strftime('%b')
monthdf.tail()

In [None]:
fig, ax = plt.subplots(figsize=(18, 4))
ax.grid(True)
ax.bar(monthdf.index, height='event', data=monthdf, alpha=0.5)
plt.title("Average amount of pee per day per month")
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(18, 4))
ax.grid(True)

step = len(daydf) / len(monthdf)
xmax = 0
for ix, row in monthdf.iterrows():
    monthname = row["month"]
    value = row["event"]
    xmin = xmax
    xmax = xmin + step
    ax.hlines(y=value, xmin=xmin, xmax=xmax)
    
ax.bar(daydf.index, height='event', data=daydf, alpha=0.5)

#ax.set_xticks(daydf.index)
#ax.xaxis.set_major_locator(mdates.MonthLocator(bymonth=daydf.index))
#ax.xaxis.set_minor_locator(mdates.MonthLocator())
#ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%b'))
plt.show()

In [None]:
ppe = pe.set_index(pe["timestamp"])
ppe["timestamp"] = 1
df = ppe.resample('1d').sum()
df.loc[~(df==0).all(axis=1)].plot.hist(bins=20, alpha=0.5)


In [None]:
kke = ke.set_index(ke["timestamp"])
kke["timestamp"] = 1
df = kke.resample('1d').sum()
df.loc[~(df==0).all(axis=1)].plot.hist(alpha=0.5)