一、BeautifulSoup解析数据
import requests
res = requests.get('https://localprod.pandateacher.com/python-manuscript/crawler-html/spider-men5.0.html')
print(res.status_code)
html = res.text
print(html)
import requests
from bs4 import BeautifulSoup
res = requests.get('https://localprod.pandateacher.com/python-manuscript/crawler-html/spider-men5.0.html')
html = res.text
soup = BeautifulSoup(html,'html.parser')
print(type(soup))
print(soup)
二、BeautifulSoup解析数据
1、find()与find_all()
import requests
from bs4 import BeautifulSoup
url = 'https://localprod.pandateacher.com/python-manuscript/crawler-html/spder-men0.0.html'
res = requests.get (url)
print(res.status_code)
soup = BeautifulSoup(res.text,'html.parser')
items = soup.find_all('div')
print(type(items))
print(items)
import requests
from bs4 import BeautifulSoup
res = requests.get('https://localprod.pandateacher.com/python-manuscript/crawler-html/spider-men5.0.html')
html = res.text
soup = BeautifulSoup( html,'html.parser')
items = soup.find_all(class_='books')
print(items)
print(type(items))
2、取出列表的每一个值
import requests
from bs4 import BeautifulSoup
res = requests.get('https://localprod.pandateacher.com/python-manuscript/crawler-html/spider-men5.0.html')
html= res.text
soup = BeautifulSoup( html,'html.parser')
items = soup.find_all(class_='books')
for item in items:
print('想找的数据都包含在这里了:\n',item)
3、Tag对象
import requests
from bs4 import BeautifulSoup
res = requests.get('https://localprod.pandateacher.com/python-manuscript/crawler-html/spider-men5.0.html')
html = res.text
soup = BeautifulSoup( html,'html.parser')
items = soup.find_all(class_='books')
for item in items:
kind = item.find('h2')
title = item.find(class_='title')
brief = item.find(class_='info')
print(kind,'\n',title,'\n',brief)
print(type(kind),type(title),type(brief))
print(kind.text,'\n',title.text,'\n',title['href'],'\n',brief.text)
三、对象的变化过程
|