import pandas as pd
import os
import glob
df = pd.read_csv("agg.csv", names=["index1", "name", "sex", "occurences", "yearofbirth"])
df = df.drop(['index1'], axis=1)To analyze this data, I pulled the aggregate CSV file from https://ssa.gov/oact/babynames/.
Then, I ran a preliminary analysis on the most common household tech products that had unique names. I excluded names that could have many origins (like Alexa or Echo).
tempdf = df[(df.name=="Siri") | (df.name=="Cortana") | (df.name=="Kindle")]
tempdf.groupby(["name"]).count()['occurences']name
Cortana 15
Kindle 51
Siri 77
Name: occurences, dtype: int64
I zoomed into girls named Kindle and charted out the number of occurences per year of birth.
tempdf = df[(df.name=="Kindle") & (df.sex=="F")]
tempdf| name | sex | occurences | yearofbirth | |
|---|---|---|---|---|
| 628404 | Kindle | F | 5 | 1964 |
| 718475 | Kindle | F | 7 | 1971 |
| 751282 | Kindle | F | 5 | 1973 |
| 799770 | Kindle | F | 6 | 1976 |
| 837255 | Kindle | F | 5 | 1978 |
| 875210 | Kindle | F | 5 | 1980 |
| 892000 | Kindle | F | 7 | 1981 |
| 910118 | Kindle | F | 9 | 1982 |
| 931133 | Kindle | F | 7 | 1983 |
| 947415 | Kindle | F | 13 | 1984 |
| 990687 | Kindle | F | 7 | 1986 |
| 1008865 | Kindle | F | 11 | 1987 |
| 1059555 | Kindle | F | 5 | 1989 |
| 1078745 | Kindle | F | 9 | 1990 |
| 1129684 | Kindle | F | 8 | 1992 |
| 1152413 | Kindle | F | 12 | 1993 |
| 1178001 | Kindle | F | 13 | 1994 |
| 1202241 | Kindle | F | 19 | 1995 |
| 1229122 | Kindle | F | 16 | 1996 |
| 1258334 | Kindle | F | 10 | 1997 |
| 1288386 | Kindle | F | 7 | 1998 |
| 1313674 | Kindle | F | 10 | 1999 |
| 1342609 | Kindle | F | 10 | 2000 |
| 1370378 | Kindle | F | 14 | 2001 |
| 1404795 | Kindle | F | 8 | 2002 |
| 1434580 | Kindle | F | 9 | 2003 |
| 1463384 | Kindle | F | 13 | 2004 |
| 1497494 | Kindle | F | 10 | 2005 |
| 1526831 | Kindle | F | 17 | 2006 |
| 1564101 | Kindle | F | 11 | 2007 |
| 1595269 | Kindle | F | 20 | 2008 |
| 1629469 | Kindle | F | 24 | 2009 |
| 1664457 | Kindle | F | 22 | 2010 |
| 1697111 | Kindle | F | 31 | 2011 |
| 1732426 | Kindle | F | 22 | 2012 |
| 1766554 | Kindle | F | 20 | 2013 |
| 1800676 | Kindle | F | 17 | 2014 |
| 1833161 | Kindle | F | 20 | 2015 |
| 1866532 | Kindle | F | 19 | 2016 |
| 1905264 | Kindle | F | 8 | 2017 |
| 1934607 | Kindle | F | 12 | 2018 |
| 1967350 | Kindle | F | 11 | 2019 |
| 2004347 | Kindle | F | 6 | 2020 |
| 2033196 | Kindle | F | 8 | 2021 |
| 2067673 | Kindle | F | 6 | 2022 |
import plotly.express as px
fig = px.line(tempdf,
x="yearofbirth",
y="occurences",
color_discrete_sequence=["#a11D83"],
template="plotly_white",
# opacity=1,
# height=1200,
)
import plotly.graph_objects as go
# fig = go.Figure(data=go.Scatter(x=tempdf["yearofbirth"], y=tempdf["occurences"]))
fig.show()
fig.write_image("./babynames.jpeg")I finished the map with context on major Kindle release dates in Illustrator.