import time
import requests
from bs4 import BeautifulSoup
class JD_crawl:
def __init__(self, username, password):
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36'
' (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36',
'Referer': 'https://www.jd.com/',
}
self.login_url =
"https://passport.jd.com/new/login.aspx"
self.post_url = "https://passport.jd.com/uc/loginService"
self.auth_url = "https://passport.jd.com/uc/showAuthCode"
self.session = requests.session()
self.username = username
self.password = password
def get_login_info(self):
html = self.session.get(self.login_url, headers=self.headers).content
soup = BeautifulSoup(html, 'lxml')
uuid = soup.select('#uuid')[0].get('value')
eid = soup.select('#eid')[0].get('value')
fp = soup.select('input[name="fp"]')[0].get('value')
_t = soup.select('input[name="_t"]')[0].get('value')
login_type = soup.select('input[name="loginType"]')[0].get('value')
pub_key = soup.select('input[name="pubKey"]')[0].get('value')
sa_token = soup.select('input[name="sa_token"]')[0].get('value')
auth_page = self.session.post(self.auth_url,
data={'loginName': self.username, 'nloginpwd': self.password}).text
if 'true' in auth_page:
auth_code_url = soup.select('#JD_Verification1')[0].get('src2')
auth_code = str(self.get_auth_img(auth_code_url))
else:
auth_code = ''
data = {
'uuid': uuid,
'eid': eid,
'fp': fp,
'_t': _t,
'loginType': login_type,
'loginname': self.username,
'nloginpwd': self.password,
'chkRememberMe': True,
'pubKey': pub_key,
'sa_token': sa_token,
'authcode': auth_code
}
return data
def get_auth_img(self, url):
auth_code_url = 'http:{}&yys={}'.format(url, str(int(time.time()*1000)))
auth_img = self.session.get(auth_code_url, headers=self.headers)
with open('authcode.jpg', 'wb') as f:
f.write(auth_img.content)
code_typein = input('请根据下载图片输入验证码:')
return code_typein
def login(self):
data = self.get_login_info()
headers = {
'Referer': self.post_url,
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36'
' (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest'
}
try:
login_page = self.session.post(self.post_url, data=data, headers=headers)
print(login_page.text)
except Exception as e:
print(e)
def shopping(self):
login = self.session.post('https://cart.jd.com/cart.action', headers=self.headers)
print(login.text)
if __name__ == '__main__':
un = input('请输入京东账号:')
pwd = input('请输入京东密码:')
jd = JD_crawl(un, pwd)
jd.login()
jd.shopping()
import re
import requests
from lxml import etree
from onexima import Xima
def get_id():
"""获取排行榜每一本书的信息"""
main_url = "https://www.ximalaya.com/shangye/top/"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"
}
r = requests.get(main_url, headers=headers)
html = etree.HTML(r.content.decode())
div_list = html.xpath("//div[contains(@class,'e-2997888007 rrc-album-item')]")
all_lsit = []
for div in div_list:
author = {}
r = div.xpath("./a/@href")[0]
print(r)
author['id'] = re.search(r'\/.*?\/(.*)\/', r).group(1)
author['book_name'] = div.xpath("./a/div[3]/div[1]/span/text()")[0]
all_lsit.append(author)
print(all_lsit)
return all_lsit
all_lsit = get_id()
for i in all_lsit:
x = Xima(i['id'], i['book_name'])
x.run()
import requests
from bs4 import BeautifulSoup
import os
import re
Hostreferer = {
'User-Agent':'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
'Referer':'http://www.mzitu.com'
}
Picreferer = {
'User-Agent':'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
'Referer':'http://i.meizitu.net'
}
def get_page_name(url):
html = get_html(url)
soup = BeautifulSoup(html, 'lxml')
span = soup.findAll('span')
title = soup.find('h2', class_="main-title")
return span[10].text, title.text
def get_html(url):
req = requests.get(url, headers=Hostreferer)
html = req.text
return html
def get_img_url(url, name):
html = get_html(url)
soup = BeautifulSoup(html, 'lxml')
img_url = soup.find('img', alt= name)
return img_url['src']
def save_img(img_url, count, name):
req = requests.get(img_url, headers=Picreferer)
new_name = rename(name)
with open(new_name+'/'+str(count)+'.jpg', 'wb') as f:
f.write(req.content)
def rename(name):
rstr = r'[\/\\\:\*\?\\|]'
new_name = re.sub(rstr, "", name)
return new_name
def save_one_atlas(old_url):
page, name = get_page_name(old_url)
new_name = rename(name)
os.mkdir(new_name)
print("图集--" + name + "--开始保存")
for i in range(1, int(page)+1):
url = old_url + "/" + str(i)
img_url = get_img_url(url, name)
save_img(img_url, i, name)
print('正在保存第' + str(i) + '张图片')
print("图集--" + name + "保存成功")
def get_atlas_list(url):
req = requests.get(url, headers=Hostreferer)
soup = BeautifulSoup(req.text, 'lxml')
atlas = soup.find_all(attrs={'class':'lazy'})
atlas_list = []
for atla in atlas:
atlas_list.append(atla.parent['href'])
return atlas_list
def save_one_page(start_url):
atlas_url = get_atlas_list(start_url)
for url in atlas_url:
save_one_atlas(url)
if
__name__ == '__main__':
start_url = "http://www.mzitu.com/"
for count in range(1, 3):
url = start_url + "page/" + str(count) +"/"
save_one_page(url)
print("爬取完成")
from PIL import Image
import sys
def fill_image(image):
width, height = image.size
new_image_length = width if width > height else height
new_image = Image.new(image.mode, (new_image_length, new_image_length), color='white')
if width > height:
new_image.paste(image, (0, int((new_image_length - height) / 2)))
else:
new_image.paste(image, (int((new_image_length - width) / 2),0))
return new_image
def cut_image(image):
width, height = image.size
item_width = int(width / 3)
box_list = []
for i in range(0,3):
for
j in range(0,3):
box = (j*item_width,i*item_width,(j+1)*item_width,(i+1)*item_width)
box_list.append(box)
image_list = [image.crop(box) for box in box_list]
return image_list
def save_images(image_list):
index = 1
for image in image_list:
image.save(str(index) + '.png', 'PNG')
index += 1
if __name__ == '__main__':
file_path = "4.jpg"
image = Image.open(file_path)
image = fill_image(image)
image_list = cut_image(image)
save_images(image_list)
import requests
import re
import json
def getLOLImages():
header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36'}
url_js = 'http://lol.qq.com/biz/hero/champion.js'
res_js = requests.get(url_js).content
html_js = res_js.decode()
req = '"keys":(.*?),"data"'
list_js = re.findall(req,html_js)
dict_js = json.loads(list_js[0])
pic_list = []
for key in dict_js:
for i in range(20):
num = str(i)
if len(num) == 1:
hreo_num = "00"+num
elif len(num) == 2:
hreo_num = "0"+num
numstr = key+hreo_num
url = "http://ossweb-img.qq.com/images/lol/web201310/skin/big"+numstr+".jpg"
pic_list.append(url)
list_filepath = []
path = "E:\\文章\\LOL官网\LOLpic\\"
for name in dict_js.values():
for i in range(20):
file_path = path + name + str(i) + '.jpg'
list_filepath.append(file_path)
n = 0
for picurl in pic_list:
res = requests.get(picurl)
n+=1
if res.status_code ==200:
print("正在下载%s"%list_filepath[n])
with open(list_filepath[n],'wb') as f:
f.write(res.content)
getLOLImages()
最后小编想说一下,如果源码运行之后如果有报错或者不懂的地方可以加小编的微信【mmp9972】
emmmmmm,忽略这个微信号码.....或者可以扫描二维码

不过小编可能会同意的比较慢,所以希望大家不要着急好么,谢谢!
也可以评论留言,小编看到了会给你解答的,但是主要是评论留言没人看不到截图。
最最最最后在说一句.........人生苦短,我用Python!加油!
如果你需要一个良好的Python学习交流环境的话可以点它、点它。