爬取新闻数据测试

This commit is contained in:
dengli
2023-12-19 19:46:46 +08:00
parent 2ef3844fbb
commit 72517213a0
4 changed files with 74 additions and 21 deletions

View File

@@ -1,15 +1,15 @@
package cn.stock.market.web;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;
import javax.servlet.http.HttpServletRequest;
import cn.stock.market.domain.basic.entity.SiteNews;
import cn.stock.market.domain.basic.repository.SiteNewsRepository;
import cn.stock.market.infrastructure.db.po.QSiteNewsPO;
import cn.stock.market.web.annotations.EncryptFilter;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiImplicitParam;
@@ -65,6 +65,8 @@ public class StockApiController {
@Autowired
StockService stockService;
@Autowired
SiteNewsRepository newsRepository;
@RequestMapping({"getRawSinaStock.do"})
@ResponseBody
@@ -150,7 +152,28 @@ public class StockApiController {
@ApiOperation(value = "印度热门股票列表", httpMethod = "GET")
@ResponseBody
public ServerResponse getINDNews() {
return ServerResponse.createBySuccess(stockService.getNews());
String news = stockService.getNews();
List<String> newsList = Arrays.asList(news.split("<a href="));
newsList.forEach( n -> {
String contentUrl = n.substring(1, n.indexOf("class=\"img-smllnews\"") - 2);
String id = contentUrl.substring(contentUrl.lastIndexOf("-") + 1, contentUrl.lastIndexOf("_"));
String imgUrl = n.substring(n.indexOf("img src=") + 9, n.indexOf("?"));
String time = n.substring(n.indexOf("Last Updated") + 23, n.indexOf("IST") - 9);
String title = n.substring(n.indexOf("html\">") + 6, n.indexOf("<div class=\"short-video-img\">") - 47);
SiteNews siteNews = new SiteNews();
siteNews.setAddTime(new Date());
siteNews.setSourceId(id);
siteNews.setTitle(title);
siteNews.setDescription(time);
siteNews.setImgurl(imgUrl);
siteNews.setContent(stockService.getNewsInfo(contentUrl));
List<SiteNews> list = newsRepository.findAll(QSiteNewsPO.siteNewsPO.sourceId.eq(id));
if (list.size() == 0) {
newsRepository.save(siteNews);
}
});
return null;
}
@RequestMapping({"getINDNewsInfo.do"})