进口及;数据
import pandas as pd
import numpy as np # for test data
import seaborn as sns # only for seaborn option
# test data
np.random.seed(365)
rows = 100000
data = {'YEAR': np.random.choice(range(2014, 2021), size=rows),
'RESPONSIBLE DISTRICT': np.random.choice(['05 - LUBBOCK', '15 - SAN ANTONIO', '18 - DALLAS', '04 - AMARILLO', '08 - ABILENE', '21 - PHARR', '25 - CHILDRESS', '20 - BEAUMONT', '22 - LAREDO', '24 - EL PASO'], size=rows)}
df = pd.DataFrame(data)
# get the value count of each district by year and pivot the shape
dfp = df.value_counts(subset=['YEAR', 'RESPONSIBLE DISTRICT']).reset_index(name='VC').pivot(index='YEAR', columns='RESPONSIBLE DISTRICT', values='VC')
# display(dfp)
RESPONSIBLE DISTRICT 04 - AMARILLO 05 - LUBBOCK 08 - ABILENE 15 - SAN ANTONIO 18 - DALLAS 20 - BEAUMONT 21 - PHARR 22 - LAREDO 24 - EL PASO 25 - CHILDRESS
YEAR
2014 1407 1406 1485 1456 1392 1456 1499 1458 1394 1452
2015 1436 1423 1428 1441 1395 1400 1423 1442 1375 1399
2016 1480 1381 1393 1415 1446 1442 1414 1435 1452 1454
2017 1422 1388 1485 1447 1404 1401 1413 1470 1424 1426
2018 1479 1424 1384 1450 1390 1384 1445 1435 1478 1386
2019 1387 1317 1379 1457 1457 1476 1447 1459 1451 1406
2020 1462 1452 1454 1448 1441 1428 1411 1407 1402 1445
熊猫。数据帧。情节
# plot the dataframe
fig = dfp.plot(kind='bar', subplots=True, layout=(5, 5), figsize=(20, 20), legend=False)
seaborn.catplot
-
seaborn
是用于
matplotlib
-
这是最简单的方法,因为数据帧不需要重塑。
p = sns.catplot(kind='count', data=df, col='RESPONSIBLE DISTRICT', col_wrap=5, x='YEAR', height=3.5, )
p.set_titles(row_template='{row_name}', col_template='{col_name}') # shortens the titles