本文共 8373 字,大约阅读时间需要 27 分钟。
前言:因为发现自己的csdn博客被机器人自动评论,这些博客很多都是对别人进行评论,然后别人就有可能回访或者点赞关注等等,基本上总积分非常高,为了覆盖掉这些机器评论,本篇主要是实现java爬虫对自己的博客所有文章进行自动评论。
工具:webmagic
材料:由文件加载的自动随机评论语
/** * 评论语加载 */public class CommentLoad { private AtomicBoolean inited = new AtomicBoolean(false); private Listurls = new ArrayList<>(); // 默认刷新时间20秒 private static final long DEFAULT_REFRESH_TIME = 20000l; private static final String DEFAULT_PATH = "comment.txt"; // 起始时间 private long beforeTime; // 结束时间 private long endTime; // 刷新时间 private long refreshTime = DEFAULT_REFRESH_TIME; public CommentLoad() { } public CommentLoad(long refreshTime) { super(); this.refreshTime = refreshTime; } public static void main(String[] args) throws InterruptedException, IOException { // 功能1):从文件加载评论语列表 String path = DEFAULT_PATH; CommentLoad commentLoad = new CommentLoad(); int i = 0; while (true) { Thread.sleep(1000l); List list = commentLoad.loadComments(path); System.out.println("计数时间:" + ++i); System.out.println(list.size()); System.out.println(list); } // 功能2): 创建多个评论语到文件中 // path = // CommentLoad.class.getClassLoader().getResource(path).getPath(); // // System.out.println(path); // // // 写评论到评论文件中 // PrintWriter printWriter = new PrintWriter(new FileWriter(path, // false)); // String[] str = new String[] { "文章", "很好", "思路清晰,", "大佬", "66", "加油", // "学习了", "你真棒!" }; // for (int i = 0; i < 50; i++) { // // System.out.println(flushArrToString(str)); // printWriter.println(flushArrToString(str)); // printWriter.flush(); // } // printWriter.close(); } /** * 随机洗牌 */ public static String flushArrToString(T[] arr) { int length = arr.length; int index = length - 1; for (int i = 0; i < length && index > 0; i++) { int num = createRandom(index); T temp = arr[num]; arr[num] = arr[index]; arr[index] = temp; index--; } StringBuilder builder = new StringBuilder(); for (T t : arr) { builder.append(t.toString()); } return builder.toString(); } public static int createRandom(int end) { return (new Random().nextInt(end)); } /** * 读取评论文本 */ public List loadComments(String path) { path = path == null ? DEFAULT_PATH : path; if (!inited.get() || System.currentTimeMillis() > this.endTime) { readComments(path); } return urls; } /** * 读取评论文本 */ private synchronized void readComments(String path) { if (!inited.get() || System.currentTimeMillis() > this.endTime) { try { urls = doReadComments("comment.txt"); } catch (IOException e) { e.printStackTrace(); } this.beforeTime = System.currentTimeMillis(); this.endTime = beforeTime + this.refreshTime; inited.set(true); } } /** * 读取评论文本 */ private List doReadComments(String path) throws FileNotFoundException, IOException { String res = CommentLoad.class.getClassLoader().getResource(path).getPath(); List comments = new ArrayList<>(); BufferedReader reader = null; try { reader = new BufferedReader(new FileReader(res)); String line; while ((line = reader.readLine()) != null) { comments.add(line.trim()); } } finally { if (reader != null) { IOUtils.closeQuietly(reader); } } return comments; }}
此类的主要功能就是从指定的文件path加载评论语到list列表
经过测试评论文章需要知道文章id,以及登陆态即可进行评论。
String content = "这个文章非常好啊"; // 评论内容 String articleId = "109261723"; // 评论文章id Request request = new Request("https://blog.csdn.net/phoenix/web/v1/comment/submit"); request.setMethod(HttpConstant.Method.POST); Mapparams = new HashMap<>(); params.put("commentId", ""); params.put("content", content); params.put("articleId", articleId); HttpRequestBody form = HttpRequestBody.form(params , "utf-8"); request.setRequestBody(form); Spider.create(new ComentTest()).addRequest(request).thread(1).run(); // 需要设置登陆cookie
比如从最近发表博客的列表等方法获取,本次批量评论,采用单个博主的文章列表全部评论的方式,单个博主的文章采集列表从分页,开始。
/** * 自动评论---单个博主 */ public static void main(String[] args) { String user = "shuixiou1"; // csdn用户 int page = 3; // 此用户的文章分页数目 String[] alls = createInitUrls(user, page); Spider.create(new CsdnConmentSpider()).addUrl(alls).thread(1).run(); } /** * 创建初始时的url集合 */ private static String[] createInitUrls(String user, int page) { Listurls = new ArrayList<>(); for (int i = 1; i <= page; i++) { urls.add(String.format(listUrl, user) + i); } String[] result = urls.toArray(new String[urls.size()]); return result; }
package com.pc.demos.csdn;import java.util.ArrayList;import java.util.HashMap;import java.util.List;import java.util.Map;import java.util.Random;import org.jsoup.Jsoup;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import com.pc.util.CookieUtil;import us.codecraft.webmagic.Page;import us.codecraft.webmagic.Request;import us.codecraft.webmagic.Site;import us.codecraft.webmagic.Spider;import us.codecraft.webmagic.model.HttpRequestBody;import us.codecraft.webmagic.processor.PageProcessor;import us.codecraft.webmagic.utils.HttpConstant;/** * csdn单个博主自动评论所有文章 */public class CsdnConmentSpider implements PageProcessor { Logger logger = LoggerFactory.getLogger(getClass()); // 列表url private static final String listUrl = "https://blog.csdn.net/%s/article/list/"; // 列表url规则 private static final String listUrlRegex = "https://blog\\.csdn\\.net/(.+)/article/list/(.*)"; // 详细url规则 private static final String detailUrlRegex = "https://blog\\.csdn\\.net/(.+)/article/details/(.*)"; // 评论语加载对象 private CommentLoad commentLoad = new CommentLoad(); @Override public void process(Page page) { // 列表页请求 if(page.getRequest().getUrl().matches(listUrlRegex)) { Listlist = page.getHtml().xpath("//div[@class='article-item-box csdn-tracking-statistics']/h4/a").all(); for (String string : list) { String link = Jsoup.parse(string).select("a").attr("href"); page.addTargetRequest(link); } // 详细页请求 } else if(page.getRequest().getUrl().matches(detailUrlRegex)){ System.out.println("详情页面加载:" + page.getRequest().getUrl()); // 文章id String articleId = page.getRequest().getUrl().substring(page.getRequest().getUrl().lastIndexOf("/") + 1, page.getRequest().getUrl().length()); Request request = new Request("https://blog.csdn.net/phoenix/web/v1/comment/submit"); request.setMethod(HttpConstant.Method.POST); Map params = new HashMap<>(); List comments = commentLoad.loadComments(null); params.put("commentId", ""); params.put("content", comments.get(new Random().nextInt(comments.size()))); params.put("articleId", articleId); HttpRequestBody form = HttpRequestBody.form(params , "utf-8"); request.setRequestBody(form); Map extras = new HashMap<>(); extras.put("articleId", articleId); request.setExtras(extras); page.addTargetRequest(request); // 评论请求 } else { String res = page.getJson().jsonPath("$..data").toString(); System.out.println("评论成功:返回id是" + res); } } @Override public Site getSite() { Site site = Site.me().setCycleRetryTimes(3).setSleepTime(2000); site.addHeader(":authority", "blog.csdn.net"); site.addHeader(":method:", "POST"); site.addHeader(":path:", "/phoenix/web/v1/comment/submit"); site.addHeader(":scheme", "https"); site.addHeader("accept", "application/json, text/javascript, */*; q=0.01"); site.addHeader("accept-encoding", "gzip, deflate, br"); site.addHeader("accept-language", "zh-CN,zh;q=0.9"); site.addHeader("origin", "https://blog.csdn.net"); site.addHeader("referer", "https://blog.csdn.net"); // 设置登陆后的cookie字符串 String cookieSpec = "################"; CookieUtil.setSiteCookies(site, cookieSpec ); return site; } /** * 自动评论---单个博主 */ public static void main(String[] args) { String user = "shuixiou1"; // csdn用户 int page = 3; // 此用户的文章分页数目 String[] alls = createInitUrls(user, page); Spider.create(new CsdnConmentSpider()).addUrl(alls).thread(1).run(); } /** * 创建初始时的url集合 */ private static String[] createInitUrls(String user, int page) { List urls = new ArrayList<>(); for (int i = 1; i <= page; i++) { urls.add(String.format(listUrl, user) + i); } String[] result = urls.toArray(new String[urls.size()]); return result; }}
经过一轮测试,没有被频率限制
1) 必须要设置登陆的cookie字符串 (代码中已经替换成######################)
2) 需要拿去使用的注意改写csdn博主名称!!!!