/**
* 获取 百度 关键词
*/
public function index(){
$data = $this->getBaiduHotKeyWord();
// 查询 数据库
$where['how_type'] = 4;
$saveData = [];
for ($i = 0;$i<10;$i++){
$saveData[$i] = [
'howid' => $i +1,
'how_type' => 4,
'how_words' => $data[$i],
'sort' => $i+1,
'url' => 'https://www.baidu.com/s?wd='.$data[$i],
'create_time' => date('Y-m-d H:i:s'),
'update_time' => date('Y-m-d H:i:s'),
];
}
$model = M('udc_out_hotwords');
if ($model->where($where)->select()){
for ($j = 0;$j<10;$j++) {
$where['howid'] = $j +1;
unset($saveData[$j]['howid']);
$model->where($where)->save($saveData[$j]); // 根据条件更新记录
}
}else{
$model->addAll($saveData);
}
Log::write($model->getError());
}
/**
* 获取 搜狗关键词
*/
public function souGou(){
$html = $this->getUrlContent("http://top.sogou.com/hot/shishi_1.html?fr=tph_righ");
$newTable = $this->getSouGouHtml($html);
//print_r($data);
$data = $newTable;
foreach ($data as $key=>&$datum){
if ($key<3){
$datum = mb_substr($datum[2],0,8);
}else{
$datum = $datum[1];
}
}
// 查询 数据库
$where['how_type'] = 1;
$saveData = [];
for ($i = 0;$i<10;$i++){
$saveData[$i] = [
'howid' => $i +1+20,
'how_type' => 1,
'how_words' => $data[$i],
'sort' => $i+1,
'url' => 'https://www.sogou.com/sogou?query='.$data[$i],
'create_time' => date('Y-m-d H:i:s'),
'update_time' => date('Y-m-d H:i:s'),
];
}
$model = M('udc_out_hotwords');
if ($model->where($where)->select()){
for ($j = 0;$j<10;$j++) {
$where['howid'] = $j +1+20;
unset($saveData[$j]['howid']);
$model->where($where)->save($saveData[$j]); // 根据条件更新记录
}
}else{
$model->addAll($saveData);
}
Log::write($model->getError());
}
/**
* 获取 微博 关键词
*/
public function wei(){
$html = $this->getUrlContent("https://s.weibo.com/top/summary?Refer=top_hot&topnav=1&wvr=6");
$table = $this->getTable($html);
$table = array_slice($table,2); # 把前面多余部分截掉
$newTable = array_column($table,'1');
foreach ($newTable as &$value){
$str = trim($value);
$str1 = preg_replace('/\s+/','+',$str);
$arr = explode ('+',$str1);
$value = $arr[0];
}
$data = $newTable;
// 查询 数据库
$where['how_type'] = 2;
$saveData = [];
for ($i = 0;$i<10;$i++){
$saveData[$i] = [
'howid' => $i +1+10,
'how_type' => 2,
'how_words' => $data[$i],
'sort' => $i+1,
'url' => 'https://s.weibo.com/weibo?q='.$data[$i],
'create_time' => date('Y-m-d H:i:s'),
'update_time' => date('Y-m-d H:i:s'),
];
}
$model = M('udc_out_hotwords');
if ($model->where($where)->select()){
for ($j = 0;$j<10;$j++) {
$where['howid'] = $j +1+10;
unset($saveData[$j]['howid']);
$model->where($where)->save($saveData[$j]); // 根据条件更新记录
}
}else{
$model->addAll($saveData);
}
Log::write($model->getError());
}
private function getBaiduHotKeyWord()
{
$templateRss = file_get_contents('http://top.baidu.com/rss_xml.php?p=top10');
If (preg_match('/<table>(.*)<\/table>/is', $templateRss, $_description)) {
$templateRss = $_description [0];
$templateRss = str_replace("&", "&", $templateRss);
}
$templateRss = "<?xml version=\"1.0\" encoding=\"GBK\"?>" . $templateRss;
$xml = simplexml_load_String($templateRss);
foreach ($xml->tbody->tr as $temp) {
if (!empty ($temp->td->a)) {
$keyArray [] = trim(($temp->td->a));
}
}
return $keyArray;
}
private function getUrlContent($url){//通过url获取html内容
$ch = curl_init();
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_USERAGENT,"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1 )");
curl_setopt($ch,CURLOPT_HEADER,1);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
$output = curl_exec($ch);
curl_close($ch);
return $output;
}
private function getTable($html) {
preg_match_all("/<table>[\s\S]*?<\/table>/i",$html,$table);
$table = $table[0][0];
$table = preg_replace("'<table[^>]*?>'si","",$table);
$table = preg_replace("'<tr[^>]*?>'si","",$table);
$table = preg_replace("'<td[^>]*?>'si","",$table);
$table = str_replace("</tr>","{tr}",$table);
$table = str_replace("</td>","{td}",$table);
//去掉 HTML 标记
$table = preg_replace("'<[/!]*?[^<>]*?>'si","",$table);
//去掉空白字符
$table = preg_replace("'([rn])[s]+'","",$table);
$table = str_replace(" ","",$table);
$table = str_replace(" ","",$table);
$table = explode('{tr}', $table);
array_pop($table);
foreach ($table as $key=>$tr) {
// 自己可添加对应的替换
$tr = str_replace("\n\n","",$tr);
$td = explode('{td}', $tr);
array_pop($td);
$td_array[] = $td;
}
return $td_array;
}
private function getSouGouHtml($html){
preg_match_all("/<ul class=\"pub-list\">[\s\S]*?<\/ul>/i",$html,$table);
$table = $table[0][0];
$table = preg_replace("'<ul[^>]*?>'si","",$table);
$table = preg_replace("'<li[^>]*?>'si","",$table);
$table = preg_replace("'<span[^>]*?>'si","",$table);
$table = str_replace("</li>","{tr}",$table);
$table = str_replace("</span>","{td}",$table);
$table = preg_replace("'<[/!]*?[^<>]*?>'si","",$table);
//去掉空白字符
$table = preg_replace("'([rn])[s]+'","",$table);
$table = str_replace(" ","",$table);
$table = str_replace(" ","",$table);
$table = explode('{tr}', $table);
array_pop($table);
foreach ($table as $key=>$tr) {
// 自己可添加对应的替换
$tr = str_replace("\n\n","",$tr);
$td = explode('{td}', $tr);
array_pop($td);
$td_array[] = $td;
}
return $td_array;
}
|