import pandas as pd
import os
import glob
= pd.read_csv("agg.csv", names=["index1", "name", "sex", "occurences", "yearofbirth"])
df = df.drop(['index1'], axis=1) df
To analyze this data, I pulled the aggregate CSV file from https://ssa.gov/oact/babynames/.
Then, I ran a preliminary analysis on the most common household tech products that had unique names. I excluded names that could have many origins (like Alexa or Echo).
= df[(df.name=="Siri") | (df.name=="Cortana") | (df.name=="Kindle")]
tempdf "name"]).count()['occurences'] tempdf.groupby([
name
Cortana 15
Kindle 51
Siri 77
Name: occurences, dtype: int64
I zoomed into girls named Kindle and charted out the number of occurences per year of birth.
= df[(df.name=="Kindle") & (df.sex=="F")]
tempdf tempdf
name | sex | occurences | yearofbirth | |
---|---|---|---|---|
628404 | Kindle | F | 5 | 1964 |
718475 | Kindle | F | 7 | 1971 |
751282 | Kindle | F | 5 | 1973 |
799770 | Kindle | F | 6 | 1976 |
837255 | Kindle | F | 5 | 1978 |
875210 | Kindle | F | 5 | 1980 |
892000 | Kindle | F | 7 | 1981 |
910118 | Kindle | F | 9 | 1982 |
931133 | Kindle | F | 7 | 1983 |
947415 | Kindle | F | 13 | 1984 |
990687 | Kindle | F | 7 | 1986 |
1008865 | Kindle | F | 11 | 1987 |
1059555 | Kindle | F | 5 | 1989 |
1078745 | Kindle | F | 9 | 1990 |
1129684 | Kindle | F | 8 | 1992 |
1152413 | Kindle | F | 12 | 1993 |
1178001 | Kindle | F | 13 | 1994 |
1202241 | Kindle | F | 19 | 1995 |
1229122 | Kindle | F | 16 | 1996 |
1258334 | Kindle | F | 10 | 1997 |
1288386 | Kindle | F | 7 | 1998 |
1313674 | Kindle | F | 10 | 1999 |
1342609 | Kindle | F | 10 | 2000 |
1370378 | Kindle | F | 14 | 2001 |
1404795 | Kindle | F | 8 | 2002 |
1434580 | Kindle | F | 9 | 2003 |
1463384 | Kindle | F | 13 | 2004 |
1497494 | Kindle | F | 10 | 2005 |
1526831 | Kindle | F | 17 | 2006 |
1564101 | Kindle | F | 11 | 2007 |
1595269 | Kindle | F | 20 | 2008 |
1629469 | Kindle | F | 24 | 2009 |
1664457 | Kindle | F | 22 | 2010 |
1697111 | Kindle | F | 31 | 2011 |
1732426 | Kindle | F | 22 | 2012 |
1766554 | Kindle | F | 20 | 2013 |
1800676 | Kindle | F | 17 | 2014 |
1833161 | Kindle | F | 20 | 2015 |
1866532 | Kindle | F | 19 | 2016 |
1905264 | Kindle | F | 8 | 2017 |
1934607 | Kindle | F | 12 | 2018 |
1967350 | Kindle | F | 11 | 2019 |
2004347 | Kindle | F | 6 | 2020 |
2033196 | Kindle | F | 8 | 2021 |
2067673 | Kindle | F | 6 | 2022 |
import plotly.express as px
= px.line(tempdf,
fig ="yearofbirth",
x="occurences",
y=["#a11D83"],
color_discrete_sequence="plotly_white",
template# opacity=1,
# height=1200,
)
import plotly.graph_objects as go
# fig = go.Figure(data=go.Scatter(x=tempdf["yearofbirth"], y=tempdf["occurences"]))
fig.show()"./babynames.jpeg") fig.write_image(
I finished the map with context on major Kindle release dates in Illustrator.