1、添加依赖
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
2、具体实现
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;
import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@SpringBootTest
class SpiderTests {
@Test
void contextLoads() throws IOException {
File folder = new File("C:\\Users\\86139\\Desktop\\姓名 - 副本.txt");
getData(folder);
}
private void getData(File txtFile) throws IOException {
BufferedReader reader = null;
BufferedWriter writer = null;
try {
// 读
reader = new BufferedReader(new InputStreamReader(new FileInputStream(txtFile)));
List<String> lineList = new ArrayList<>();
String line;
while ((line = reader.readLine()) != null) {
if (line == null || line.isEmpty()) {
continue;
}
String url = "https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&tn=62095104_41_oem_dg&wd=" + line + "&oq=%25E6%259E%2597%25E6%25BD%2598%25E6%25AD%25A6&rsv_pq=c04cd2e0002906e5&rsv_t=0a47IpZ7ERpQPcIxllylZwrPOhgeiYs2DLK5Zm%2BB%2Ffe7BOM5ioHXJqKjvKdDPm8KSHdlWt6w%2BkJ9&rqlang=cn&rsv_dl=tb&rsv_enter=0&rsv_btype=t&inputT=2336&rsv_sug3=1582&rsv_n=2&rsv_sug1=1407&rsv_sug7=100&rsv_sug4=2680";
HashMap<String, String> map = new HashMap<>();
map.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9");
map.put("Accept-Encoding", "gzip, deflate, br");
map.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8");
map.put("Cache-Control", "max-age=0");
map.put("Connection", "keep-alive");
map.put("sec-ch-ua", "\" Not;A Brand\";v=\"99\", \"Google Chrome\";v=\"91\", \"Chromium\";v=\"91\"");
map.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36");
Document doc = Jsoup.connect(url).headers(map).get();
String first = doc.getElementsByAttributeValue("class", "nums_text").first().text();
String res = first.substring(11, first.length() - 1).replace(",", "");
//把读到的数据存到数组里
lineList.add(res);
}
// 写
writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(txtFile)));
boolean firstTime = true;
for (String s : lineList) {
if (firstTime) {
writer.write(s);
firstTime = false;
continue;
}
writer.newLine();
writer.write(s);
}
writer.flush();
} catch (IOException e) {
e.printStackTrace();
}finally {
reader.close();
writer.close();
}
}
}
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。
如需转载请保留出处:https://bianchenghao.cn/38606.html