import requests
from bs4 import BeautifulSoup
import re
def Main():
r = requests.get(
"https://hipages.com.au/connect/abcelectricservicespl/service/126298")
soup = BeautifulSoup(r.text, 'html.parser')
name = soup.find("h1", {'class': 'sc-AykKI'}).text
print(name)
person = soup.find(
"span", {'class': 'Contact__Item-sc-1giw2l4-2 kBpGee'}).text.strip()
print(person)
addr = soup.findAll(
"span", {'class': 'Contact__Item-sc-1giw2l4-2 kBpGee'})[1].text
print(addr)
print(re.search('phone\\\\":\\\\"(.*?)\\\\"', r.text).group(1))
print(re.search('mobile\\\\":\\\\"(.*?)\\\\"', r.text).group(1))
print(re.search('abn\\\\":\\\\"(.*?)\\\\"', r.text).group(1))
print(re.search('website\\\\":\\\\"(.*?)\\\\"', r.text).group(1))
Main()
输出:
ABC Electric Services p/l
Mal
222 Henry Lawson DRV, Georges Hall NSW 2198
1800 801 828
0408 600 950
37137808989
www.abcelectricservices.com.au
import requests
from bs4 import BeautifulSoup
import pyjsparser
import json
import re
def Main():
r = requests.get(
"https://hipages.com.au/connect/abcelectricservicespl/service/126298")
soup = BeautifulSoup(r.text, 'html.parser')
phone = soup.findAll("script")[5]
tree = pyjsparser.parse(phone.text)
print(json.loads(tree["body"][0]["expression"]["right"]["value"]))
Main()
另一个版本:
import requests
from bs4 import BeautifulSoup
import re
import json
def Main():
r = requests.get(
"https://hipages.com.au/connect/abcelectricservicespl/service/126298")
soup = BeautifulSoup(r.text, 'html.parser')
data = soup.findAll("script")[5].text
source = re.search(r'__INITIAL_STATE__\s*=\s*"({.*})', data).group(1)
kuku = json.loads(re.sub('(?<!\\\)\\\\"', '"', source))
print(json.dumps(kuku, indent=4))
Main()