爬取新闻数据测试

This commit is contained in:
dengli
2023-12-19 19:46:46 +08:00
parent 2ef3844fbb
commit 72517213a0
4 changed files with 74 additions and 21 deletions

View File

@@ -253,7 +253,7 @@ public class StockService {
return ServerResponse.createBySuccess(marketVO);
}
public ServerResponse getNews() {
public String getNews() {
String result = "";
try {
// 使用Jsoup连接到网页
@@ -262,11 +262,11 @@ public class StockService {
.header("Referer", "https://www.business-standard.com/")
.header("Accept-Language", "en-US,en;q=0.9")
.get();
result = doc.html().substring(doc.html().indexOf("<div class=\"short-video-img\">"),doc.html().lastIndexOf("<div class=\"short-video-img\">"));
result = doc.html().substring(doc.html().indexOf("<div class=\"short-video-img\">"),doc.html().lastIndexOf("<div class=\"short-video-img\">")+500);
} catch (Exception e) {
return ServerResponse.createByErrorMsg(e.toString());
return e.toString();
}
return ServerResponse.createBySuccessMsg(result);
return result;
}
public String getNewsInfo(String url) {

File diff suppressed because one or more lines are too long

View File

@@ -1,6 +1,7 @@
package cn.stock.market.infrastructure.job;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Map;
@@ -10,6 +11,11 @@ import java.util.concurrent.TimeUnit;
import javax.annotation.PostConstruct;
import cn.stock.market.domain.basic.entity.SiteNews;
import cn.stock.market.domain.basic.repository.SiteNewsRepository;
import cn.stock.market.domain.basic.service.SiteNewsService;
import cn.stock.market.domain.basic.service.StockService;
import cn.stock.market.infrastructure.db.po.QSiteNewsPO;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Scheduled;
@@ -41,6 +47,8 @@ import lombok.extern.slf4j.Slf4j;
public class StockTask {
@Autowired RealtimeRepository realtimeRepository;
@Autowired StockRepository stockRepository;
@Autowired StockService stockService;
@Autowired SiteNewsRepository newsRepository;
ThreadPoolExecutor pool;
@PostConstruct
@@ -339,4 +347,31 @@ public class StockTask {
//
// log.info("syncAStockList执行, 受影响数{}, 耗时:{}毫秒", count, stopwatch.elapsed(TimeUnit.MILLISECONDS));
// }
/*新闻接口*/
@Scheduled(cron = "0 0 1 * * ?")
public void saveStockNews() {
String news = stockService.getNews();
List<String> newsList = Arrays.asList(news.split("<a href="));
newsList.forEach( n -> {
String contentUrl = n.substring(1, n.indexOf("class=\"img-smllnews\"") - 2);
String id = contentUrl.substring(contentUrl.lastIndexOf("-") + 1,contentUrl.lastIndexOf("_"));
String imgUrl = n.substring(n.indexOf("img src=") + 9,n.indexOf("?"));
String time = n.substring(n.indexOf("Last Updated") + 23,n.indexOf("IST") - 9);
String title = n.substring(n.indexOf("html\">") + 6,n.indexOf("<div class=\"short-video-img\">") - 47);
SiteNews siteNews = new SiteNews();
siteNews.setAddTime(new Date());
siteNews.setSourceId(id);
siteNews.setTitle(title);
siteNews.setDescription(time);
siteNews.setImgurl(imgUrl);
siteNews.setContent(stockService.getNewsInfo(contentUrl));
List<SiteNews> list = newsRepository.findAll(QSiteNewsPO.siteNewsPO.sourceId.eq(id));
if (list.size() == 0) {
newsRepository.save(siteNews);
}
});
}
}

View File

@@ -1,15 +1,15 @@
package cn.stock.market.web;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;
import javax.servlet.http.HttpServletRequest;
import cn.stock.market.domain.basic.entity.SiteNews;
import cn.stock.market.domain.basic.repository.SiteNewsRepository;
import cn.stock.market.infrastructure.db.po.QSiteNewsPO;
import cn.stock.market.web.annotations.EncryptFilter;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiImplicitParam;
@@ -65,6 +65,8 @@ public class StockApiController {
@Autowired
StockService stockService;
@Autowired
SiteNewsRepository newsRepository;
@RequestMapping({"getRawSinaStock.do"})
@ResponseBody
@@ -150,7 +152,28 @@ public class StockApiController {
@ApiOperation(value = "印度热门股票列表", httpMethod = "GET")
@ResponseBody
public ServerResponse getINDNews() {
return ServerResponse.createBySuccess(stockService.getNews());
String news = stockService.getNews();
List<String> newsList = Arrays.asList(news.split("<a href="));
newsList.forEach( n -> {
String contentUrl = n.substring(1, n.indexOf("class=\"img-smllnews\"") - 2);
String id = contentUrl.substring(contentUrl.lastIndexOf("-") + 1, contentUrl.lastIndexOf("_"));
String imgUrl = n.substring(n.indexOf("img src=") + 9, n.indexOf("?"));
String time = n.substring(n.indexOf("Last Updated") + 23, n.indexOf("IST") - 9);
String title = n.substring(n.indexOf("html\">") + 6, n.indexOf("<div class=\"short-video-img\">") - 47);
SiteNews siteNews = new SiteNews();
siteNews.setAddTime(new Date());
siteNews.setSourceId(id);
siteNews.setTitle(title);
siteNews.setDescription(time);
siteNews.setImgurl(imgUrl);
siteNews.setContent(stockService.getNewsInfo(contentUrl));
List<SiteNews> list = newsRepository.findAll(QSiteNewsPO.siteNewsPO.sourceId.eq(id));
if (list.size() == 0) {
newsRepository.save(siteNews);
}
});
return null;
}
@RequestMapping({"getINDNewsInfo.do"})