defget_html(url, params): uapools = [ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14' ]
defget_html(url, params): uapools = [ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14' ]
f = open("content.txt",'r',encoding='utf-8') for line in f: csvRow = line.split() writer.writerow(csvRow)
f.close() csvFile.close()
3. 统计一天各个时间段内的评论数
py.py
# coding=gbk import csv
from pyecharts import options as opts from sympy.combinatorics import Subset from wordcloud import WordCloud
with open('../Spiders/data.csv') as csvfile: reader = csv.reader(csvfile)
data1 = [str(row[1])[0:2] for row in reader]
print(data1) print(type(data1))
#先变成集合得到seq中的所有元素,避免重复遍历 set_seq = set(data1) rst = [] for item in set_seq: rst.append((item,data1.count(item))) #添加元素及出现个数 rst.sort() print(type(rst)) print(rst)
with open("time2.csv", "w+", newline='', encoding='utf-8') as f: writer = csv.writer(f, delimiter=',') for i in rst: # 对于每一行的,将这一行的每个元素分别写在对应的列中 writer.writerow(i)
with open('time2.csv') as csvfile: reader = csv.reader(csvfile) x = [str(row[0]) for row in reader] print(x) with open('time2.csv') as csvfile: reader = csv.reader(csvfile) y1 = [float(row[1]) for row in reader] print(y1)
4. 统计最近评论数
py1.py
# coding=gbk import csv
from pyecharts import options as opts from sympy.combinatorics import Subset from wordcloud import WordCloud
with
open('../Spiders/data.csv') as csvfile: reader = csv.reader(csvfile)
data1 = [str(row[0]) for row in reader] #print(data1) print(type(data1))
#先变成集合得到seq中的所有元素,避免重复遍历 set_seq = set(data1) rst = [] for item in set_seq: rst.append((item,data1.count(item))) #添加元素及出现个数 rst.sort() print(type(rst)) print(rst)
with open("time1.csv", "w+", newline='', encoding='utf-8') as f: writer = csv.writer(f, delimiter=',') for i in rst: # 对于每一行的,将这一行的每个元素分别写在对应的列中 writer.writerow(i)
with open('time1.csv') as csvfile: reader = csv.reader(csvfile) x = [str(row[0]) for row in reader] print(x) with open('time1.csv') as csvfile: reader = csv.reader(csvfile) y1 = [float(row[1]) for row in reader]
# encoding: utf-8 # encoding: utf-8 import csv import pyecharts.options as opts from pyecharts.charts import Bar from pyecharts.globals import ThemeType
import csv from pyecharts import options as opts from pyecharts.charts import Pie from random import randint from pyecharts.globals import ThemeType with open('time1.csv') as csvfile: reader = csv.reader(csvfile) x = [str(row[0]) for row in reader] print(x) with open('time1.csv') as csvfile: reader = csv.reader(csvfile) y1 = [float(row[1]) for row in reader] print(y1) num = y1 lab = x ( Pie(init_opts=opts.InitOpts(width='1700px',height='450px',theme=ThemeType.LIGHT))#默认900,600 .set_global_opts( title_opts=opts.TitleOpts(title="雪中悍刀行近日评论统计", title_textstyle_opts=opts.TextStyleOpts(font_size=27)),legend_opts=opts.LegendOpts(
pos_top="10%", pos_left="1%",# 图例位置调整 ),) .add(series_name='',center=[280, 270], data_pair=[(j, i) for i, j in zip(num, lab)])#饼图 .add(series_name='',center=[845, 270],data_pair=[(j,i) for i,j in
zip(num,lab)],radius=['40%','75%'])#环图 .add(series_name='', center=[1380, 270],data_pair=[(j, i) for i, j in zip(num, lab)], rosetype='radius')#南丁格尔图 ).render('pie_pyecharts.html')
效果图
5. 制作每小时评论饼图
pie_pyecharts2.py
import csv from pyecharts import options as opts from pyecharts.charts import Pie from random import randint from pyecharts.globals import ThemeType str_name1 = '点' with open('time2.csv') as csvfile: reader = csv.reader(csvfile) x = [str(row[0]+str_name1) for row in reader] print(x) with open('time2.csv') as csvfile: reader = csv.reader(csvfile) y1 = [int(row[1]) for row in reader]
print(y1) num = y1 lab = x ( Pie(init_opts=opts.InitOpts(width='1650px',height='500px',theme=ThemeType.LIGHT,))#默认900,600 .set_global_opts( title_opts=opts.TitleOpts(title="雪中悍刀行每小时评论统计" ,title_textstyle_opts=opts.TextStyleOpts(font_size=27)), legend_opts=opts.LegendOpts(
pos_top="8%", pos_left="4%",# 图例位置调整 ), ) .add(series_name='',center=[250, 300], data_pair=[(j, i) for i, j in zip(num, lab)])#饼图 .add(series_name='',center=[810, 300],data_pair=[(j,i) for i,j in zip(num,lab)],radius=['40%','75%'])#环图 .add(series_name='', center=[1350, 300],data_pair=[(j, i) for i, j in zip(num, lab)], rosetype='radius')#南丁格尔图 ).render('pie_pyecharts2.html')
效果图
6. 制作观看时间区间评论统计饼图
pie_pyecharts3.py
# coding=gbk import csv from pyecharts import options as opts from pyecharts.globals import ThemeType from sympy.combinatorics import Subset from wordcloud import WordCloud from pyecharts.charts import Pie from random import randintwith open(/data.csv') as csvfile: reader = csv.reader(csvfile) data2 = [int(row[1].strip('')[0:2]) for row in reader] #print(data2) print(type(data2)) #先变成集合得到seq中的所有元素,避免重复遍历 set_seq = set(data2) list = [] for item in set_seq: list.append((item,data2.count(item))) #添加元素及出现个数 list.sort() print(type(list)) #print(list) with open("time2.csv", "w+", newline='', encoding='utf-8') as f: writer = csv.writer(f, delimiter=',') for i in list: # 对于每一行的,将这一行的每个元素分别写在对应的列中 writer.writerow(i) n = 4#分成n组 m = int(len(list)/n) list2 = [] for i in range(0, len(list), m): list2.append(list[i:i+m])
with open('time2.csv') as csvfile: reader = csv.reader(csvfile) y1 = [int(row[1]) for row in reader]
print(y1)
n =6 groups = [y1[i:i + n] for i in range(0, len(y1), n)]
print(groups)
x=['凌晨','上午','下午','晚上'] y1=[] for y1 in groups: num_sum = 0 for groups in y1: num_sum += groups str_name1 = '点' num = y1 lab = x ( Pie(init_opts=opts.InitOpts(width='1500px',height='450px',theme=ThemeType.LIGHT))#默认900,600 .set_global_opts( title_opts=opts.TitleOpts(title="雪中悍刀行观看时间区间评论统计" , title_textstyle_opts=opts.TextStyleOpts(font_size=30)), legend_opts=opts.LegendOpts(
pos_top="8%", # 图例位置调整 ), ) .add(series_name='',center=[260, 270], data_pair=[(j, i) for i, j in zip(num, lab)])#饼图 .add(series_name='',center=[1230, 270],data_pair=[(j,i) for i,j in zip(num,lab)],radius=['40%','75%'])#环图 .add(series_name='', center=[750, 270],data_pair=[(j, i) for i, j in zip(num, lab)], rosetype='radius')#南丁格尔图 ).render('pie_pyecharts3.html')
效果图
7. 制作雪中悍刀行主演提及占比饼图
pie_pyecharts4.py
import csv from pyecharts import options as opts from pyecharts.charts import Pie from random import randint from pyecharts.globals
import ThemeType f = open('content.txt', 'r', encoding='utf-8') # 这是数据源,也就是想生成词云的数据 words = f.read() # 读取文件 f.close() # 关闭文件,其实用with就好,但是懒得改了
name=["张若昀","李庚希","胡军"]
print(name) count=[float(words.count("张若昀")), float(words.count("李庚希")), float(words.count("胡军"))] print(count) num = count lab = name ( Pie(init_opts=opts.InitOpts(width='1650px',height='450px',theme=ThemeType.LIGHT))#默认900,600 .set_global_opts( title_opts=opts.TitleOpts(title="雪中悍刀行主演提及占比", title_textstyle_opts=opts.TextStyleOpts(font_size=27)),legend_opts=opts.LegendOpts( pos_top="3%", pos_left="33%",# 图例位置调整 ),) .add(series_name='',center=[280, 270], data_pair=[(j, i) for i, j in zip(num, lab)])#饼图 .add(series_name='',center=[800, 270],data_pair=[(j,i) for i,j in zip(num,lab)],radius=['40%','75%'])#环图 .add(series_name='', center=[1300, 270],data_pair=[(j, i) for i, j in zip(num, lab)], rosetype='radius')#南丁格尔图 ).render('pie_pyecharts4.html')
效果图
8. 评论内容情感分析
SnowNLP.py
import numpy as np from snownlp import SnowNLP import matplotlib.pyplot as plt
f = open('content.txt', 'r', encoding='UTF-8') list = f.readlines() sentimentslist = [] for i in list: s = SnowNLP(i)