抓取Leetcode的每日一题信息
思路一(发送GraphQL Query获取数据)
参考文章:https://www.cnblogs.com/ZhaoxiCheung/p/9333476.html
接口分析
主要的数据存在于graphql/接口中:
https://leetcode-cn.com/graphql/
首页热门题目接口
是否AC状态查看接口
每日一题接口
构造 GraphQL Query来获取信息
在Headers下的Request Payload中我们可以看到一个query字段,这是我们要构造的 GraphQL Query 的一个重要信息。
利用Postman来分析接口
我们并不一开始就用代码来获取题目信息,而是先利用 Postman 来看看如何获取题目信息。右键 Network 下的 graphql 文件—>Copy—>Copy as cURL(bash)
接着我们打开Postman,点击左上角File里的import,然后找到Raw text栏
将copy下来的cURL粘贴到Raw text中,点击continue,就可以在Postman中查看
在这之前遇到了一个小问题,把copy all as cURL看成了copy as cURL,导致在Postman中解析错误。
curl解析的结果如下:
从解析的结果看,和我们在Headers中看到的query字段类似,不过有一些细节需要更改。
当然,如果不想直接粘贴复制的 cURL,那么我们可以自己在 Postman 中写 Header 和 Body,需要注意的是这边的 Content-Type 是application/graphql ,Body 中的 GraphQL 构造,参照 Request Payload 中的query 的字段来构造
利用Java的Jsoup和okhttp库来发送http请求和解析Json数据
package com.example.leetcode_card.utils;
import com.alibaba.fastjson.JSONObject;
import okhttp3.*;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import java.io.IOException;
import java.util.Map;
import java.util.Objects;
public class GraphqlUtil {
private static String BASE_URL = "https://leetcode-cn.com";
private static String questionUrl = "https://leetcode-cn.com/problems/two-sum/description/";
private static String GRAPHQL_URL = "https://leetcode-cn.com/graphql";
public GraphqlUtil() {
}
public static String getContent(String title) throws IOException {
Connection.Response response = Jsoup.connect(questionUrl)
.method(Connection.Method.GET)
.execute();
String csrftoken = response.cookie("aliyungf_tc");
String __cfduid = response.cookie("__cfduid");
OkHttpClient client = new OkHttpClient.Builder()
.followRedirects(false)
.followSslRedirects(false)
.build();
String query = "query{ question(titleSlug:\"%s\") { questionId translatedTitle translatedContent difficulty } }";
String postBody = String.format(query,title);
assert csrftoken != null;
Request request = new Request.Builder()
.addHeader("Content-Type","application/graphql")
.addHeader("Referer",questionUrl)
.addHeader("Cookie","__cfduid=" + __cfduid + ";" + "csrftoken=" + csrftoken)
.addHeader("x-csrftoken",csrftoken)
.url(GRAPHQL_URL)
.post(RequestBody.create(MediaType.parse("application/graphql; charset=utf-8"),postBody))
.build();
Response response1 = client.newCall(request).execute();
return unicodetoString(response1.body().string());
}
public static String getTitle() throws IOException {
Connection.Response response = Jsoup.connect(questionUrl)
.method(Connection.Method.GET)
.execute();
String csrftoken = response.cookie("aliyungf_tc");
String __cfduid = response.cookie("__cfduid");
OkHttpClient client = new OkHttpClient.Builder()
.followRedirects(false)
.followSslRedirects(false)
.build();
String postBody = "query questionOfToday { todayRecord { question { questionFrontendId questionTitleSlug __typename } lastSubmission { id __typename } date userStatus __typename }}";
assert csrftoken != null;
Request request = new Request.Builder()
.addHeader("Content-Type","application/graphql")
.addHeader("Referer",questionUrl)
.addHeader("Cookie","__cfduid=" + __cfduid + ";" + "csrftoken=" + csrftoken)
.addHeader("x-csrftoken",csrftoken)
.url(GRAPHQL_URL)
.post(RequestBody.create(MediaType.parse("application/graphql; charset=utf-8"),postBody))
.build();
Response response1 = client.newCall(request).execute();
String titleInfo = unicodetoString(response1.body().string());
JSONObject jsonObject = JSONObject.parseObject(titleInfo);
return jsonObject.getJSONObject("data")
.getJSONArray("todayRecord")
.getJSONObject(0)
.getJSONObject("question")
.getString("questionTitleSlug");
}
public static String unicodetoString(String unicode) {
if (unicode == null || "".equals(unicode)) {
return null;
}
StringBuilder sb = new StringBuilder();
int i = -1;
int pos = 0;
while ((i = unicode.indexOf("\\u", pos)) != -1) {
sb.append(unicode.substring(pos, i));
if (i + 5 < unicode.length()) {
pos = i + 6;
sb.append((char) Integer.parseInt(unicode.substring(i + 2, i + 6), 16));
}
}
sb.append(unicode.substring(pos));
return sb.toString();
}
}
引入的maven库:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>LeetcodeSpider</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.14.3</version>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>4.9.2</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.12</version>
</dependency>
<dependency>
<groupId>top.jfunc.common</groupId>
<artifactId>converter</artifactId>
<version>1.8.0</version>
</dependency>
</dependencies>
</project>
思路二(利用python爬虫爬取GraphQL接口)
参考文章:https://blog.csdn.net/malloc_can/article/details/113004579
from datetime import datetime
import requests
import json
import smtplib
from email.mime.text import MIMEText
base_url = 'https://leetcode-cn.com'
response = requests.post(base_url + "/graphql", json={
"operationName": "questionOfToday",
"variables": {},
"query": "query questionOfToday { todayRecord { question { questionFrontendId questionTitleSlug __typename } lastSubmission { id __typename } date userStatus __typename }}"
})
leetcodeTitle = json.loads(response.text).get('data').get('todayRecord')[0].get("question").get('questionTitleSlug')
url = base_url + "/problems/" + leetcodeTitle
response = requests.post(base_url + "/graphql",
json={"operationName": "questionData", "variables": {"titleSlug": leetcodeTitle},
"query": "query questionData($titleSlug: String!) { question(titleSlug: $titleSlug) { questionId questionFrontendId boundTopicId title titleSlug content translatedTitle translatedContent isPaidOnly difficulty likes dislikes isLiked similarQuestions contributors { username profileUrl avatarUrl __typename } langToValidPlayground topicTags { name slug translatedName __typename } companyTagStats codeSnippets { lang langSlug code __typename } stats hints solution { id canSeeDetail __typename } status sampleTestCase metaData judgerAvailable judgeType mysqlSchemas enableRunCode envInfo book { id bookName pressName source shortDescription fullDescription bookImgUrl pressImgUrl productUrl __typename } isSubscribed isDailyQuestion dailyRecordStatus editorType ugcQuestionId style __typename }}"})
jsonText = json.loads(response.text).get('data').get("question")
no = jsonText.get('questionFrontendId')
leetcodeTitle = jsonText.get('translatedTitle')
level = jsonText.get('difficulty')
context = jsonText.get('translatedContent')
response = requests.get("")
json = json.loads(response.text)
ana = json.get('newslist')[0].get('content')
face_url = 'http://wx3.sinaimg.cn/large/007hyfXLly1g0uj7x5jpaj301o02a0sw.jpg'
begin_time = datetime(2020, 12, 23)
info = "<span style='color:cornflowerblue'>本脚本已运行{0}天<span>".format(
(datetime.today() - begin_time).days.__str__())
htmlText = """ <head>
<meta charset=UTF-8>
<link rel="stylesheet">
<style>
code {
color: blue;
font-size: larger;
}
</style>
</link>
</head>
<body>
<div> </B><BR></B><FONT
style="FONT-SIZE: 12pt; FILTER: shadow(color=#af2dco); WIDTH: 100%; COLOR: #730404; LINE-HEIGHT: 100%; FONT-FAMILY: 华文行楷"
size=6><span style="COLOR: cornflowerblue">早安语录:</span>""" + ana + """</FONT><img width="40px" src=""" + face_url + """">
<div>
<h3>Leetcode-每日一题</h3>
<h4>""" + no + '.' + leetcodeTitle + '.' + level + """</h4>""" + context + '本题连接:<a href=' + url + ">" + url + "</a></div>" + info
文章内容仅限用于学习
|