Py学习  »  Python

Python爬取腾讯新闻疫情数据,并用pyecharts将数据可视化!

python爱好者 • 3 年前 • 384 次点击  
阅读 6

Python爬取腾讯新闻疫情数据,并用pyecharts将数据可视化!

使用Python爬取腾讯新闻疫情数据,并使用pyecharts可视化,绘制国内、国际日增长人数地图,matplotlib绘制方寸图。

写在前面:这个已经不是什么新鲜的话题了,所以请大佬勿喷

导入相关模块

import time
import json
import requests
from datetime import datetime
import pandas as pd
import numpy as np
复制代码

1. 疫情数据抓取

通过腾讯新闻公布的数据进行爬取

网址:news.qq.com/zt2020/page…

对于静态网页,我们只需要把网页地址栏中的url传到get请求中就可以轻松地获取到网页的数据。 对于动态网页抓取的关键是先分析网页数据获取和跳转的逻辑,再去写代码 。

右击检查,选择Network,Ctrl+R即可

# 定义抓取数据函数:https://beishan.blog.csdn.net/
def Domestic():
    url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
    reponse = requests.get(url=url).json()
    data = json.loads(reponse['data'])
    return data
def Oversea():
    url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_foreign'
    reponse = requests.get(url=url).json()
    data = json.loads(reponse['data'])
    return data
domestic = Domestic()
oversea = Oversea()
print(domestic.keys())
print(oversea.keys())



dict_keys(['lastUpdateTime', 'chinaTotal', 'chinaAdd', 'isShowAdd', 'showAddSwitch', 'areaTree'])
dict_keys(['foreignList', 'globalStatis', 'globalDailyHistory', 'importStatis', 'countryAddConfirmRankList', 'countryConfirmWeekCompareRankList', 'continentStatis'])
复制代码

2. 初步分析

提取各地区数据明细

# 提取各地区数据明细
areaTree = domestic['areaTree']
# 查看并分析具体数据
areaTree
复制代码

提取国外地区数据明细

# 提取国外地区数据明细
foreignList = oversea['foreignList']
# 查看并分析具体数据
foreignList
复制代码

就可以看到在json数据存储的结构了

3. 数据处理

3.1 国内各省疫情数据提取

# Adresss:https://beishan.blog.csdn.net/
china_data = areaTree[0]['children'] 
china_list = []
for a in range(len(china_data)):
    province = china_data[a]['name']  
    confirm = china_data[a]['total']['confirm'] 
    heal = china_data[a]['total']['heal']  
    dead = china_data[a]['total']['dead']  
    nowConfirm = confirm - heal - dead 
    china_dict = {} 
    china_dict['province'] = province  
    china_dict['nowConfirm'] = nowConfirm 
    china_list.append(china_dict) 
china_data = pd.DataFrame(china_list) 
china_data.to_excel("国内疫情.xlsx", index=False) #存储为EXCEL文件
china_data.head()
复制代码

3.2 国际疫情数据提取

world_data = foreignList  
world_list = []  
for a in range(len(world_data)):
    # 提取数据
    country = world_data[a]['name']
    nowConfirm = world_data[a]['nowConfirm']  
    confirm = world_data[a]['confirm']
    dead = world_data[a]['dead']  
    heal = world_data[a]['heal'] 
    # 存放数据
    world_dict = {}
    world_dict['country'] = country
    world_dict['nowConfirm'] = nowConfirm
    world_dict['confirm'] = confirm
    world_dict['dead'] = dead
    world_dict['heal'] = heal
    world_list.append(world_dict)
world_data = pd.DataFrame(world_list)
world_data.to_excel("国外疫情.xlsx", index=False)
world_data.head()
复制代码

3.3 数据整合

将国内数据和海外数据合并

查询数据中是否含有中国疫情数据

world_data.loc[world_data['country'] == "中国"]
复制代码

从新增areaTree中提取中国数据,并添加至world_data

confirm = areaTree[0]['total']['confirm']  # 提取中国累计确诊数据
heal = areaTree[0]['total']['heal']  # 提取中国累计治愈数据
dead = areaTree[0]['total']['dead']  # 提取中国累计死亡数据
nowConfirm = confirm - heal - dead  # 计算中国现有确诊数量
world_data = world_data.append(
    {
        'country': "中国",
        'nowConfirm': nowConfirm,
        'confirm': confirm,
        'heal': heal,
        'dead': dead
    },
    ignore_index=True)
复制代码

再次查询数据中是否含有中国疫情数据

world_data.loc[world_data['country'] == "中国"]
复制代码

4. 可视化展示

4.1 国内疫情态势可视化

导入pyecharts相关库

import pyecharts.options as opts
from pyecharts.charts import Map
from pyecharts.globals import CurrentConfig, NotebookType
CurrentConfig.NOTEBOOK_TYPE = NotebookType.JUPYTER_LAB
复制代码

国内各地区现有确诊人数地图

m = Map()
m.add("", [
    list(z)
    for z in zip(list(china_data["province"]), list(china_data["nowConfirm"]))
],
      maptype="china",
      is_map_symbol_show=False)
m.set_global_opts(
    title_opts=opts.TitleOpts(title="COVID-19中国现有地区现有确诊人数地图"),
    visualmap_opts=opts.VisualMapOpts(
        is_piecewise=True,
        pieces=[
            {
                "min": 5000,
                "label": '>5000',
                "color": "#893448"
            },  # 不指定 max,表示 max 为无限大
            {
                "min": 1000,
                "max": 4999,
                "label": '1000-4999',
                "color": "#ff585e"
            },
            {
                "min": 500,
                "max": 999,
                "label": '500-1000',
                "color": "#fb8146"
            },
            {
                "min": 101,
                "max": 499,
                "label": '101-499',
                "color": "#ffA500"
            },
            {
                "min": 10,
                "max": 100,
                "label": '10-100',
                "color": "#ffb248"
            },
            {
                "min": 1,
                "max": 9,
                "label": '1-9',
                "color": "#fff2d1"
            },
            {
                "max": 1,
                "label": '0',
                "color": "#ffffff"
            }
        ]))
m.render_notebook()
复制代码

4.2 国际疫情态势可视化

将各国的中文名称转换成英文名称,使用pandas中的merge方法

pd.merge( left, right, how=‘inner’, on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, suffixes=(’_x’, ‘_y’), copy=True, indicator=False, validate=None,)

how: One of ‘left’, ‘right’, ‘outer’, ‘inner’. 默认inner。inner是取交集,outer取并集

world_name = pd.read_excel("国家中英文对照表.xlsx")
world_data_t = pd.merge(world_data,
                        world_name,
                        left_on="country",
                        right_on="中文",
                        how="inner")
world_data_t
复制代码

169 rows × 7 columns

世界各国现有确诊人数地图

m2 = Map()
m2.add("", [
    list(z)
    for z in zip(list(world_data_t["英文"]), list(world_data_t["nowConfirm"]))
],
       maptype="world",
       is_map_symbol_show=False)
m2.set_global_opts(title_opts=opts.TitleOpts(title="COVID-19世界各国现有确诊人数地图"),
                   visualmap_opts=opts.VisualMapOpts(is_piecewise=True,
                                                     pieces=[{
                                                         "min": 5000,
                                                         "label": '>5000',
                                                         "color": "#893448"
                                                     }, {
                                                         "min": 1000,
                                                         "max": 4999,
                                                         "label": '1000-4999',
                                                         "color": "#ff585e"
                                                     }, {
                                                         "min": 500,
                                                         "max": 999,
                                                         "label": '500-1000',
                                                         "color": "#fb8146"
                                                     }, {
                                                         "min": 101,
                                                         "max": 499,
                                                         "label": '101-499',
                                                         "color": "#ffA500"
                                                     }, {
                                                         "min": 10,
                                                         "max": 100,
                                                         "label": '10-100',
                                                         "color": "#ffb248"
                                                     }, {
                                                         "min": 0,
                                                         "max": 9,
                                                         "label": '0-9',
                                                         "color": "#fff2d1"
                                                     }]))
"""取消显示国家名称"""
m2.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
m2.render_notebook()
复制代码

4.3 国内疫情方寸间

单独取出中国疫情数据

# 单独取出中国疫情数据
China_data = world_data.loc[world_data['country'] == "中国"]
# 使索引从0开始递增
China_data.reset_index(drop=True, inplace=True)
China_data
复制代码

提取China_data的累计确诊、累计治愈与累计死亡数据

# 提取China_data的累计确诊、累计治愈与累计死亡数据
# data.at[n,'name']代表根据行索引和列名,获取对应元素的值
w_confirm = China_data.at[0, 'confirm']
w_heal = China_data.at[0, 'heal']
w_dead = China_data.at[0, 'dead']
复制代码

导入matplotlib相关库

import matplotlib.pyplot as plt
import matplotlib.patches as patches
复制代码

构建国内疫情方寸间图示

# -*- coding: utf-8 -*-
%matplotlib inline
fig1 = plt.figure()
ax1 = fig1.add_subplot(111, aspect='equal', facecolor='#fafaf0')
ax1.set_xlim(-w_confirm / 2, w_confirm / 2)
ax1.set_ylim(-w_confirm / 2, w_confirm / 2)
ax1.spines['top'].set_color('none')
ax1.spines['right'].set_color('none')
ax1.spines['bottom'].set_position(('data', 0))
ax1.spines['left'].set_position(('data', 0))
ax1.set_xticks([])
ax1.set_yticks([])
p0 = patches.Rectangle((-w_confirm / 2, -w_confirm / 2),
                       width=w_confirm,
                       height=w_confirm,
                       facecolor='#29648c',
                       label='confirm')
p1 = patches.Rectangle((-w_heal / 2, -w_heal / 2),
                       width=w_heal,
                       height=w_heal,
                       facecolor='#69c864',
                       label='heal')
p2 = patches.Rectangle((-w_dead / 2, -w_dead / 2),
                       width=w_dead,
                       height=w_dead,
                       facecolor='#000000',
                       label='dead')
plt.gca().add_patch(p0)
plt.gca().add_patch(p1)
plt.gca().add_patch(p2)
plt.title('COVID-19 Square - China', fontdict={'size': 20})
plt.legend(loc='best')
plt.show()
复制代码

4.4 国际疫情方寸间

重新排序数据

world_data.sort_values("confirm", ascending=False, inplace=True)
world_data.reset_index(drop=True, inplace=True)
world_data
复制代码

162 rows × 5 columns

构建国际疫情方寸间图示




    
# -*- coding: utf-8 -*-
plt.rcParams['font.sans-serif'] = [u'SimHei']
plt.rcParams['axes.unicode_minus'] = False
fig1 = plt.figure(figsize=(25, 25))
for a in range(20):
    w_confirm = world_data.at[a, 'confirm']
    w_heal = world_data.at[a, 'heal']
    w_dead = world_data.at[a, 'dead']
    ax1 = fig1.add_subplot(20 / 4,
                           4,
                           a + 1,
                           aspect='equal',
                           facecolor='#fafaf0')
    ax1.set_xlim(-w_confirm / 2, w_confirm / 2)
    ax1.set_ylim(-w_confirm / 2, w_confirm / 2)
    ax1.spines['top'].set_color('none')
    ax1.spines['right'].set_color('none')
    ax1.spines['bottom'].set_position(('data', 0))
    ax1.spines['left'].set_position(('data', 0))
    ax1.set_xticks([])
    ax1.set_yticks([])
    p0 = patches.Rectangle((-w_confirm / 2, -w_confirm / 2),
                           width=w_confirm,
                           height=w_confirm,
                           alpha=w_confirm / 90000,
                           facecolor='#29648c',
                           label='confirm')
    p1 = patches.Rectangle((-w_heal / 2, -w_heal / 2),
                           width=w_heal,
                           height=w_heal,
                           alpha=1,
                           facecolor='#69c864',
                           label='heal')
    p2 = patches.Rectangle((-w_dead / 2, -w_dead / 2),
                           width=w_dead,
                           height=w_dead,
                           alpha=1,
                           facecolor='black',
                           label='dead')
    plt.gca().add_patch(p0)
    plt.gca().add_patch(p1)
    plt.gca().add_patch(p2)
    plt.title(world_data.at[a, 'country'], fontdict={'size': 20})
    plt.legend(loc='best')
plt.show()
复制代码

这样就可以清楚地看到各个国家新冠确诊人数、治愈和死亡人数的关系了

Python社区是高质量的Python/Django开发社区
本文地址:http://www.python88.com/topic/110253
 
384 次点击