In [56]:
import datetime
import matplotlib.pyplot as plt
import pyexcel

# pip install matplotlib pyexcel pyexcel-xls pyexcel-xlsx --upgrade
In [57]:
# book = pyexcel.load_book("Downloads/sonderauswertung-sterbefaelle.xlsx")
In [58]:
import requests
from urllib.parse import urlparse, urlunparse
from bs4 import BeautifulSoup


url = urlparse(
    "https://www.destatis.de/DE/Themen/"
    "Gesellschaft-Umwelt/Bevoelkerung/"
    "Sterbefaelle-Lebenserwartung/"
    "Tabellen/sonderauswertung-sterbefaelle.html"
)
bs = BeautifulSoup(requests.get(url.geturl()).content, "html.parser")
links = [element.get("href") for element in bs.find_all("a", href=True, attrs={"class": "downloadLink"})]


download_link = urlunparse((
    url.scheme,
    url.netloc,
    links[0],
    '',
    '',
    '',
))
req = requests.get(download_link)
content = req.content
book = pyexcel.get_book(file_content=content, file_type="xlsx")
In [59]:
tage = [sheet for sheet in book.dict.keys() if sheet.startswith("D_") and sheet.endswith("_Tage")]
In [60]:
print(tage)
['D_2020_Tage', 'D_2019_Tage', 'D_2018_Tage', 'D_2017_Tage', 'D_2016_Tage']
In [61]:
def sort_by_tage(sheet):
    return int(sheet[2:-5])
In [62]:
tage.sort(key=sort_by_tage)
In [63]:
print(tage)
['D_2016_Tage', 'D_2017_Tage', 'D_2018_Tage', 'D_2019_Tage', 'D_2020_Tage']
In [64]:
def chain(book, sheets):
    x = []
    y = []
    for name in sheets:
        days = book[name].row[8][1:-1]
        dead = book[name].row[9][1:-1]
        x.extend(days)
        y.extend(dead)
    return x, y
In [65]:
x, y = chain(book, tage)
In [66]:
len(x) == len(y)
Out[66]:
True
In [67]:
all(isinstance(e, datetime.date) for e in x)
Out[67]:
True
In [68]:
[(e,n) for n,e in enumerate(x) if not isinstance(e, datetime.date)]
Out[68]:
[]
In [69]:
# pitfall, delete higher index first
# del x[1097]
# del y[1097]
# del x[731]
# del y[731]
In [70]:
def plot(x, y):
    plt.figure(figsize=(12, 5))
    plt.plot(x, y)
    plt.title("Tägliche Sterbefälle in Deutschland\nQuelle: www.destatis.de/.../sonderauswertung-sterbefaelle.html")
    plt.ylabel("Sterbefälle")
    plt.xlabel("Datum")
    plt.savefig("tägliche_sterbefälle_deutschland_insgesamt.png", dpi=300)
    plt.show()
    
In [71]:
plot(x,y)
In [72]:
import numpy as np
In [73]:
def plot_convolve(x, y):
    plt.figure(figsize=(12, 5))
    plt.plot(x[16:-16], np.convolve(y, np.ones(16)/16, "same")[16:-16])
    plt.title("Tägliche Sterbefälle in Deutschland\nQuelle: www.destatis.de/.../sonderauswertung-sterbefaelle.html")
    plt.ylabel("Sterbefälle")
    plt.xlabel("Datum")
    plt.savefig("tägliche_sterbefälle_deutschland_insgesamt_convolve.png", dpi=300)
    plt.show()
In [74]:
plot_convolve(x, y)
In [75]:
from nbconvert import PDFExporter, HTMLExporter
In [76]:
pdf_exporter = PDFExporter()
html_exporter = HTMLExporter()
In [77]:
#with open("Tägliche Sterbefälle in Deutschland.pdf", "wb") as fd:
#    result, metadata = pdf_exporter.from_filename("Tägliche Sterbefälle in Deutschland.ipynb")
#    fd.write(result)
#with open("Tägliche Sterbefälle in Deutschland.html", "wt") as fd:
#    result, metadata = html_exporter.from_filename("Tägliche Sterbefälle in Deutschland.ipynb")
#    fd.write(result)
In [78]:
import plotly.express as px
In [79]:
fig = px.line(
    x=x,
    y=y,
    labels={"x":"Datum", "y": "Sterbefälle"},
    render_mode="webgl",
    title="Sterberate",
    template="presentation",
)
fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector = {
        "buttons": [
            {"count": 1, "label": "1m", "step": "month", "stepmode": "backward"},
            {"count": 6, "label": "6m", "step":"month", "stepmode": "backward"},
            {"count": 1, "label": "YTD", "step": "year", "stepmode": "todate"},
            {"count": 1, "label": "1y", "step": "year", "stepmode": "backward"},
            {"step": "all"},
        ]
    }
)
fig.show()
In [80]:
import statistics
yMean = statistics.mean(y)
In [81]:
plot_convolve(x, np.array(y) - np.array(yMean))
In [82]:
x[-1]
Out[82]:
datetime.date(2020, 8, 8)
In [ ]: