Merge branch 'bug/crawl_news' into 'develop'
update crawl See merge request india/india_market_java!48
This commit is contained in:
@@ -249,10 +249,17 @@ public class StockService {
|
|||||||
String result = "";
|
String result = "";
|
||||||
try {
|
try {
|
||||||
// 使用Jsoup连接到网页
|
// 使用Jsoup连接到网页
|
||||||
Document doc = Jsoup.connect("https://www.business-standard.com/markets/news")
|
// Document doc = Jsoup.connect("https://www.business-standard.com/markets/news")
|
||||||
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36")
|
// .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36")
|
||||||
.header("Referer", "https://www.business-standard.com/")
|
// .header("Referer", "https://www.business-standard.com/")
|
||||||
|
// .header("Accept-Language", "en-US,en;q=0.9")
|
||||||
|
// .get();
|
||||||
|
String url = "https://www.business-standard.com/markets/news";
|
||||||
|
Document doc = Jsoup.connect(url)
|
||||||
|
.referrer("https://www.business-standard.com/")
|
||||||
.header("Accept-Language", "en-US,en;q=0.9")
|
.header("Accept-Language", "en-US,en;q=0.9")
|
||||||
|
.userAgent("Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1")
|
||||||
|
.timeout(5000) // timeout 5 seconds
|
||||||
.get();
|
.get();
|
||||||
// result = doc.html().substring(doc.html().indexOf("<div class=\"listingstyle_shortvideoimg__0TWuX shortvideoimg\">"),doc.html().lastIndexOf("<div class=\"listingstyle_shortvideoimg__0TWuX shortvideoimg\">")+500);
|
// result = doc.html().substring(doc.html().indexOf("<div class=\"listingstyle_shortvideoimg__0TWuX shortvideoimg\">"),doc.html().lastIndexOf("<div class=\"listingstyle_shortvideoimg__0TWuX shortvideoimg\">")+500);
|
||||||
Elements divElements = doc.select("div.listingstyle_cardlistlist__dfq57");
|
Elements divElements = doc.select("div.listingstyle_cardlistlist__dfq57");
|
||||||
@@ -272,10 +279,16 @@ public class StockService {
|
|||||||
List<String> list = new ArrayList<>();
|
List<String> list = new ArrayList<>();
|
||||||
try {
|
try {
|
||||||
// 使用Jsoup连接到网页
|
// 使用Jsoup连接到网页
|
||||||
|
// Document doc = Jsoup.connect(url)
|
||||||
|
// .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36")
|
||||||
|
// .header("Referer", "https://www.business-standard.com/")
|
||||||
|
// .header("Accept-Language", "en-US,en;q=0.9")
|
||||||
|
// .get();
|
||||||
Document doc = Jsoup.connect(url)
|
Document doc = Jsoup.connect(url)
|
||||||
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36")
|
.referrer("https://www.business-standard.com/")
|
||||||
.header("Referer", "https://www.business-standard.com/")
|
|
||||||
.header("Accept-Language", "en-US,en;q=0.9")
|
.header("Accept-Language", "en-US,en;q=0.9")
|
||||||
|
.userAgent("Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1")
|
||||||
|
.timeout(5000) // timeout 5 seconds
|
||||||
.get();
|
.get();
|
||||||
result = doc.html().substring(doc.html().indexOf("articleBody") + 14, doc.html().indexOf(",\"author\":") - 1);
|
result = doc.html().substring(doc.html().indexOf("articleBody") + 14, doc.html().indexOf(",\"author\":") - 1);
|
||||||
list.add(result);
|
list.add(result);
|
||||||
|
|||||||
Reference in New Issue
Block a user