草稿片:
<?php
/**
* Created by PhpStorm.
* User: yangpeng
* Date: 2021/7/22
* Time: 16:11
*/
namespace app;
define('PUBLIC_DIR',__DIR__.'/public');
define('IMG_DIR',__DIR__.'/public/image');
use Swoole\Coroutine;
use function Swoole\Coroutine\run;
use Swoole\Coroutine\Channel;
class ReadCsv {
public function readExcel($fileName){
$file = fopen(PUBLIC_DIR.'/'.$fileName,'r');
$goods_list = [];
$downloadChan = new Channel(10000);
while ($data = fgetcsv($file)) { //每次读取CSV里面的一行内容
//print_r($data); //此为一个数组,要获得每一个数据,访问数组下标即可
if($data[0] != 'product_id'){
$array = explode('http:',$data[1]);
foreach($array as $key=> $val){
$str = trim($val);
if(!empty($str) && !in_array($str,$goods_list)){
$temp = explode('max-w',$str);
$path = pathinfo($str);
$goods_list[$key][] = 'https:'.$temp[0].'62x62.'.$path['extension'];
$goods_list[$key][] = 'https:'.$temp[0].'max-440.'.$path['extension'];
$goods_list[$key][] = 'https:'.($str);
}
}
}
}
fclose($file);
foreach($goods_list as $key => $imgArray){
foreach($imgArray as $v){
$path = parse_url($v);
if(isset($path['path'])){
if(!file_exists(IMG_DIR.$path['path'])){
$url = $v;
// $path = IMG_DIR.$path['path'];
// 解析html标签中的需要的连接地址 并进行投递
go(function () use ($downloadChan, $url) {
$downloadChan->push($url);
});
}
}
}
}
$this->downloadImg($downloadChan);
}
public function downloadImg($downloadChan){
for ($i=1;$i<=50;$i++){
go(function () use($downloadChan){
while($downloadChan->length()>0){
$imageUrl = $downloadChan->pop();
$path = parse_url($imageUrl);
if(isset($path['path'])){
if(!file_exists(IMG_DIR.$path['path'])){
$savePath = IMG_DIR.$path['path'];
$url = parse_url($imageUrl);
$domain = $url['host'];
//download image
$cli = new \Swoole\Coroutine\Http\Client($domain, 443, true);
//长连接
$cli->set(['timeout' => -1]);
//header信息
$head = [
'host'=>$domain,
'accept-encoding'=>'gzip, deflate',
'accept-language'=>'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'accept'=>'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'user-agent'=>'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36 Edg/79.0.309.54',
];
//设置header头部
$cli->setHeaders($head);
$cli->get($imageUrl);
if($cli->statusCode == 200){
$dir = pathinfo($savePath)['dirname'];
if(!file_exists($dir)){
mkdir($dir, 0777, true);
}
//下载图片
$cli->download($imageUrl, $savePath,0);
echo sprintf("[%s] 下载完毕".PHP_EOL,$imageUrl);
}
}
}
}
});
}
}
}
修改版:
<?php
/**
* Created by PhpStorm.
* User: yangpeng
* Date: 2021/7/22
* Time: 16:11
*/
namespace app;
define('PUBLIC_DIR',__DIR__.'/public');
define('IMG_DIR',__DIR__.'/public/image');
use Swoole\Coroutine;
use function Swoole\Coroutine\run;
use Swoole\Coroutine\Channel;
class ReadCsvNew {
public function readExcel($fileName){
$s = microtime(true);
run(function() use($fileName){
$file = fopen(PUBLIC_DIR.'/'.$fileName,'r');
$goods_list = [];
$downloadChan = new Channel(10000);
while ($data = fgetcsv($file)) { //每次读取CSV里面的一行内容
//print_r($data); //此为一个数组,要获得每一个数据,访问数组下标即可
if($data[0] != 'product_id'){
$array = explode('http:',$data[1]);
foreach($array as $key=> $val){
$str = trim($val);
if(!empty($str) && !in_array($str,$goods_list)){
$temp = explode('max-w',$str);
$path = pathinfo($str);
$small = 'https:'.$temp[0].'62x62.'.$path['extension'];
$medium = 'https:'.$temp[0].'max-440.'.$path['extension'];
$big = 'https:'.($str);
go(function () use ($downloadChan, $small) {
$downloadChan->push($small);
});
go(function () use ($downloadChan, $medium) {
$downloadChan->push($medium);
});
go(function () use ($downloadChan, $big) {
$downloadChan->push($big);
});
}
}
}
}
fclose($file);
$this->downloadImg($downloadChan);
});
echo 'use ' . (microtime(true) - $s) . ' s';
}
public function downloadImg($downloadChan){
for ($i=1;$i<=100;$i++){
go(function () use($downloadChan){
while($downloadChan->length()>0){
$imageUrl = $downloadChan->pop();
$path = parse_url($imageUrl);
if(isset($path['path'])){
if(!file_exists(IMG_DIR.$path['path'])){
$savePath = IMG_DIR.$path['path'];
$url = parse_url($imageUrl);
$domain = $url['host'];
//download image
$cli = new \Swoole\Coroutine\Http\Client($domain, 443, true);
//长连接
$cli->set(['timeout' => -1]);
//header信息
$head = [
'host'=>$domain,
'accept-encoding'=>'gzip, deflate',
'accept-language'=>'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'accept'=>'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'user-agent'=>'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36 Edg/79.0.309.54',
];
//设置header头部
$cli->setHeaders($head);
$cli->get($imageUrl);
if($cli->statusCode == 200){
$dir = pathinfo($savePath)['dirname'];
if(!file_exists($dir)){
mkdir($dir, 0777, true);
}
//下载图片
$cli->download($imageUrl, $savePath,0);
echo sprintf("[%s] 下载完毕".PHP_EOL,$imageUrl);
}
$cli->close();
}
}
}
});
}
}
}
|