一、前言
B站上的漂亮的小姐姐真的好多好多,利用 you-get 大法下载了一个 B 站上跳舞的小姐姐视频,利用视频中的弹幕来制作一个漂亮小姐姐词云跳舞视频,一起来看看吧。
二、实现思路
1. 下载视频
安装you-get
pip install you- get - i http: // pypi. douban. com/ simple - - trusted- host pypi. douban. com
利用 you-get 下载 B 站视频到本地
视频链接:
https://www.bilibili.com/video/BV1rD4y1Q7jc?from=search&seid=10634574434789745619
you- get - i https: // www. bilibili. com/ video/ BV1rD4y1Q7jc?from = search& seid= 10634574434789745619
you- get - o 本地保存路径 视频链接
更多 you-get 大法的详细使用,可以参考官方文档:
https://github.com/soimort/you-get/wiki/%E4%B8%AD%E6%96%87%E8%AF%B4%E6%98%8E
2. 获取弹幕内容
写 python 爬虫,解析网页、提取弹幕数据保存到txt,注意构造 URL 参数和伪装请求头。
导入用到的库
import requests
import pandas as pd
import re
import time
import random
from concurrent. futures import ThreadPoolExecutor
import datetime
from fake_useragent import UserAgent
ua = UserAgent( verify_ssl= False , path= 'fake_useragent.json' )
start_time = datetime. datetime. now( )
爬取弹幕数据
def Grab_barrage ( date) :
headers = {
"sec-fetch-dest" : "empty" ,
"sec-fetch-mode" : "cors" ,
"sec-fetch-site" : "same-site" ,
"accept-encoding" : "gzip" ,
"origin" : "https://www.bilibili.com" ,
"referer"
: "https://www.bilibili.com/video/BV1rD4y1Q7jc?from=search&seid=10634574434789745619" ,
"user-agent" : ua. random,
"cookie" : "_uuid=0EBFC9C8-19C3-66CC-4C2B-6A5D8003261093748infoc; buvid3=4169BA78-DEBD-44E2-9780-B790212CCE76155837infoc; sid=ae7q4ujj; rpdid=|(JJmlY|YukR0J'ulmumY~u~m; LIVE_BUVID=AUTO4315952457375679; CURRENT_QUALITY=80; blackside_state=1; CURRENT_FNVAL=80; PVID=1; fingerprint3=89f3acebeacc72a0a25afa9c05f6d87c; fingerprint=2c691e81ffde16dfbb39b8f6d20eb5f0; fingerprint_s=99dc5d2a438924de14f663d6a4cf9cc8; buivd_fp=4169BA78-DEBD-44E2-9780-B790212CCE76155837infoc; bp_video_offset_501048197=472333401972842834; buvid_fp=4169BA78-DEBD-44E2-9780-B790212CCE76155837infoc; buvid_fp_plain=0AE6F247-D84F-48C7-87A8-F8F35A578544184985infoc; bfe_id=1e33d9ad1cb29251013800c68af42315; DedeUserID=501048197; DedeUserID__ckMd5=1d04317f8f8f1021; SESSDATA=2ae431c2%2C1625306326%2Ca312a*11; bili_jct=d4edec1bd2ab84e0eb453adb3971b19a"
}
params = {
'type' : 1 ,
'oid' : '206344228' ,
'date' : date
}
response = requests. get( url, params= params, headers= headers)
response. encoding = 'utf-8'
comment = set ( re. findall( '<d p=".*?">(.*?)</d>' , response. text) )
with open ( 'bullet.txt' , 'a+' ) as f:
for con in comment:
f. write( con + '\n' )
print ( con)
time. sleep( random. randint( 1 , 3 ) )
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
主函数
def main ( ) :
with ThreadPoolExecutor( max_workers= 4 ) as executor:
executor. map ( Grab_barrage, date_list)
delta = ( datetime. datetime. now( ) - start_time) . total_seconds( )
print ( f'用时:{delta}s -----------> 弹幕数据成功保存到本地txt' )
主函数调用
if __name__ == '__main__' :
url = "https://api.bilibili.com/x/v2/dm/history"
start = '20201201'
end = '20210128'
date_list = [ x for x in pd. date_range( start, end) . strftime( '%Y-%m-%d' ) ]
print ( date_list)
count = 0
main( )
结果如下:
3. 从视频中提取图片
经过测试,这个视频完整分离出图片来有 3347 张,本文截取 800 张图片来做词云。
import cv2
cap = cv2. VideoCapture( r"beauty.flv" )
num = 1
while True :
ret, frame = cap. read( )
if ret:
if 88 <= num < 888 :
cv2. imwrite( f"./pictures/img_{num}.jpg" , frame)
print ( f'========== 已成功保存第{num}张图片 ==========' )
num += 1
else :
break
cap. release( )
结果如下:
4. 利用百度AI进行人像分割
创建一个人像分割的应用,记住你的AppID、API Key、Secret Key,后面会用到。
查看人像分割的Python SDK 文档,熟悉它的基本使用。
"""
@File :人像分割.py
@Author :叶庭云
@CSDN :https://yetingyun.blog.csdn.net/
@百度AI :https://ai.baidu.com/tech/body/seg
"""
import cv2
import base64
import numpy as np
import os
from aip import AipBodyAnalysis
import time
import random
APP_ID = '23485847'
API_KEY = 'VwGY053Y1A8ow3CFBTFrK0Pm'
SECRET_KEY = '**********************************'
client = AipBodyAnalysis( APP_ID, API_KEY, SECRET_KEY)
path = './mask_img/'
img_files = os. listdir( './pictures' )
print ( img_files)
for num in range ( 88 , len ( img_files) + 1 ) :
img = f'./pictures/img_{num}.jpg'
img1 = cv2. imread( img)
height, width, _ = img1. shape
with open ( img, 'rb' ) as fp:
img_info = fp. read( )
seg_res = client. bodySeg( img_info)
labelmap = base64. b64decode( seg_res[ 'labelmap' ] )
nparr = np. frombuffer( labelmap, np. uint8)
labelimg = cv2. imdecode( nparr, 1 )
labelimg = cv2. resize( labelimg, ( width, height) , interpolation= cv2. INTER_NEAREST)
new_img = np. where( labelimg == 1 , 255 , labelimg)
mask_name = path + 'mask_{}.png' . format ( num)
cv2. imwrite( mask_name, new_img)
print ( f'======== 第{num}张图像分割完成 ========' )
time. sleep( random. randint( 1 , 2 ) )
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
结果如下:
5. 小姐姐跳舞词云生成
"""
@File :词云.py
@Author :叶庭云
@CSDN :https://yetingyun.blog.csdn.net/
"""
from wordcloud import WordCloud
import collections
import jieba
import re
from PIL import Image
import matplotlib. pyplot as plt
import numpy as np
with open ( 'bullet.txt' ) as f:
data = f. read( )
new_data = re. findall( '[\u4e00-\u9fa5]+' , data, re. S)
new_data = "/" . join( new_data)
seg_list_exact = jieba. cut( new_data, cut_all= True )
result_list = [ ]
with open ( 'stop_words.txt' , encoding= 'utf-8' ) as f:
con = f. read( ) . split( '\n' )
stop_words = set ( )
for i in con:
stop_words. add( i)
for word in seg_list_exact:
if word not in stop_words and len ( word) > 1 :
result_list. append( word)
word_counts = collections. Counter( result_list)
path = './wordcloud/'
for num in range ( 88 , 888 ) :
img = f'./mask_img/mask_{num}'
mask_ = 255 - np. array( Image. open ( img) )
plt. figure( figsize= ( 8 , 5 ) , dpi= 200 )
my_cloud = WordCloud(
background_color= 'black' ,
mask= mask_,
mode= 'RGBA' ,
max_words= 500 ,
font_path= 'simhei.ttf' ,
) . generate_from_frequencies( word_counts)
plt. imshow( my_cloud)
plt. axis( 'off' )
word_cloud_name = path + 'wordcloud_{}.png' . format ( num)
my_cloud. to_file( word_cloud_name)
print ( f'======== 第{num}张词云图生成 ========' )
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
结果如下:
6. 合成跳舞视频
"""
@File :生成视频.py
@Author :叶庭云
@CSDN :https://yetingyun.blog.csdn.net/
"""
import cv2
import os
video_dir = 'result.mp4'
fps = 30
img_size = ( 1920 , 1080 )
fourcc = cv2. VideoWriter_fourcc( 'M' , 'P' , '4' , 'V' )
videoWriter = cv2. VideoWriter( video_dir, fourcc, fps, img_size)
img_files = os. listdir( './wordcloud' )
for i in range ( 88 , 888 ) :
img_path = './wordcloud/' + 'wordcloud_{}.png' . format ( i)
frame = cv2. imread( img_path)
frame = cv2. resize( frame, img_size)
videoWriter. write( frame)
print ( f'======== 按照视频顺序第{i}张图片合进视频 ========' )
videoWriter. release( )
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
20 21 22 23 24 25 26 27 28 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
效果如下:
7. 视频插入音频
漂亮小姐姐跳舞那么好看,再加上自己喜欢的背景音乐,岂不美哉。
"""
@File :插入音频.py
@Author :叶庭云
@CSDN :https://yetingyun.blog.csdn.net/
"""
import moviepy. editor as mpy
my_clip = mpy. VideoFileClip( 'result.mp4' )
audio_background = mpy. AudioFileClip( 'song.mp4' ) . subclip( 17 , 44 )
audio_background. write_audiofile( 'vmt.mp3' )
final_clip = my_clip. set_audio( audio_background)
final_clip. write_videofile( 'final_video.mp4' )
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
结果如下:
作者:叶庭云
CSDN:
https://yetingyun.blog.csdn.net/
本文仅用于交流学习,未经作者允许,禁止转载,更勿做其他用途,违者必究。
热爱可抵岁月漫长,发现求知的乐趣,在不断总结和学习中进步,与诸君共勉。