核心代码:
target=[]
for item in items:
movieName = re.findall(re.compile(
r'div class="channel-detail movie-item-title" title="(.*?)"'), str(item))[0]
integer = re.findall(re.compile(
r'<i class="integer">(.*?)</i>'), str(item))
integer = integer[0] if len(integer) != 0 else 0
fraction = re.findall(re.compile(
r'<i class="fraction">(.*?)</i>'), str(item))
fraction = fraction[0] if len(fraction) != 0 else 0
score = str(integer)+str(fraction)
img = re.findall(re.compile(
r'class="movie-hover-img" src="(.*?)"'), str(item))[0]
time = re.findall(re.compile(
r'<span class="hover-tag">上映时间:</span>\n[\s]*([\s\S]*?)\n[\s]*</div>'), str(item))[0]
act = re.findall(re.compile(
r'<span class="hover-tag">主演:</span>\n[\s]*([\s\S]*?)\n[\s]*</div>'), str(item))[0]
con = re.findall(re.compile(
r'<span class="hover-tag">类型:</span>\n[\s]*([\s\S]*?)\n[\s]*</div>'), str(item))[0]
target.append([movieName, score,time,act,img,con])
out = pd.DataFrame(target)
out.to_csv('猫眼电影.csv')
|