爬取结果
完整代码
主要代码: 在node环境下直接运行此代码
const request = require('request')
const fs = require('fs')
const {
fsRead,
fsWrite,
fsDir
} = require('./lcfs')
let httpUrl = "https://www.1905.com/vod/list/n_1/o3p1.html"
function req(url) {
return new Promise(function (resolve, reject) {
request.get(url, function (err, response, body) {
if (err) {
reject(err)
} else {
resolve(response, body)
}
})
})
}
async function getClassUrl(targetUrl) {
const {
response,
body
} = await req(targetUrl)
const reg = /<span class="search-index-L">类型(.*?)<div class="grid-12x">/isg
let result = reg.exec(body)[1]
const reg1 = /onclick="location\.href='(.*?)';return.*?>(.*?)<\/a>/isg
let arrClass = []
let res = null
while (res = reg1.exec(result)) {
if (res[2] != "全部") {
let obj = {
className: res[2],
url: res[1]
}
arrClass.push(obj)
await fsDir('./movies/' + res[2])
getMovies(res[1], res[2])
}
}
}
getClassUrl(httpUrl);
async function getMovies(url,moviesType) {
let {
response,
body
} = await req(url)
let reg = /<a class="pic-pack-outer" target="_blank" href="(.*?)" title="(.*?)"><img/igs
let res = null
let arrList = []
while (res = reg.exec(body)) {
arrList.push(res[1])
parsePage(res[1],moviesType)
}
}
async function parsePage(url,mT) {
let {response, body} = await req(url)
let reg = /<h1 class="playerBox-info-name playerBox-info-cnName">(.*?)<\/h1>/
let res = reg.exec(body)
let movie = {
name: res[1],
movieUrl: url,
movieType: mT
}
let strMovie = JSON.stringify(movie)
fsWrite('./movies/' + mT + "/" + res[1] + '.json',strMovie)
}
封装的文件操作代码
const fs = require('fs');
function fsWrite(path, content) {
return new Promise(function (resolve, reject) {
fs.writeFile(path, content, {
flag: 'a',
encoding: 'utf-8'
}, function (err) {
if (!err) {
resolve(err);
}
})
})
}
function fsRead(path) {
return new Promise(function (resolve, reject) {
fs.readFile(path, {
flag: 'r',
encoding: 'utf-8'
}, function (err, data) {
if (err) {
reject(err);
} else {
resolve(data);
}
})
})
}
|