ACM老年人退役了。 感谢Acwing算法全家桶,让我的算法水平提升了不少。 之前就看到有些统计做题数的爬虫,但是通常只有hdu,cf,vj之类的爬虫,没看到有acwing活动打卡页面的。刚好期末大作业准备做一个能统计各大oj做题数的功能,于是在网上查找了些资料,写了这个爬虫。
用的语言是java,可以统计活动打卡5页的做题数(y总现在也只有3页),需要更多可以在for循环那里修改。 食用方法:在main函数里填入自己的id然后运行即可(id在右上角我的空间,上面的url有一串数字就是id) 不是专业搞爬虫的,仅供参考。
package acm.crawler;
import io.swagger.models.auth.In;
import org.apache.http.HttpEntity;
import org.apache.http.HttpStatus;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.config.CookieSpecs;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.HttpClientUtils;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.helper.StringUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
public class HttpClientDownPage {
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36";
public static String sendGet(String url){
CloseableHttpClient httpClient = HttpClients.createDefault();
RequestConfig requestConfig = RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD).setSocketTimeout(2000).setConnectTimeout(2000).build();
CloseableHttpResponse response = null;
StringBuilder html = new StringBuilder();
try {
for(int i=1;i<=5;i++)
{
HttpGet request = new HttpGet(url+"/"+i);
request.setHeader("User-Agent",USER_AGENT);
request.setConfig(requestConfig);
response = httpClient.execute(request);
if(response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
HttpEntity httpEntity = response.getEntity();
html.append(EntityUtils.toString(httpEntity, "GBK"));
} else {
System.out.println("返回状态不是200");
System.out.println(EntityUtils.toString(response.getEntity(), "utf-8"));
}
}
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
HttpClientUtils.closeQuietly(response);
HttpClientUtils.closeQuietly(httpClient);
}
return html.toString();
}
private static Integer paraseList(Document document){
Elements elements = document.select("span[style=color: #6a737c;]");
Integer result = 0;
for(Element element:elements){
String[] msg = element.text().split(" ");
result += Integer.valueOf(msg[1]);
}
return result;
}
public static Integer getAcwing(String id)
{
String html = sendGet("https://www.acwing.com/user/myspace/activity/"+id);
return paraseList(Jsoup.parse(html));
}
public static void main(String[] args)
{
System.out.println(getAcwing("1"));
}
}
知道maven的可以直接导入依赖,就不用弄一堆jar包了
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.9</version>
</dependency>
|