爬取新闻数据测试

This commit is contained in:
dengli
2023-12-19 19:46:46 +08:00
parent 2ef3844fbb
commit 72517213a0
4 changed files with 74 additions and 21 deletions

File diff suppressed because one or more lines are too long

View File

@@ -1,6 +1,7 @@
package cn.stock.market.infrastructure.job;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Map;
@@ -10,6 +11,11 @@ import java.util.concurrent.TimeUnit;
import javax.annotation.PostConstruct;
import cn.stock.market.domain.basic.entity.SiteNews;
import cn.stock.market.domain.basic.repository.SiteNewsRepository;
import cn.stock.market.domain.basic.service.SiteNewsService;
import cn.stock.market.domain.basic.service.StockService;
import cn.stock.market.infrastructure.db.po.QSiteNewsPO;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Scheduled;
@@ -41,6 +47,8 @@ import lombok.extern.slf4j.Slf4j;
public class StockTask {
@Autowired RealtimeRepository realtimeRepository;
@Autowired StockRepository stockRepository;
@Autowired StockService stockService;
@Autowired SiteNewsRepository newsRepository;
ThreadPoolExecutor pool;
@PostConstruct
@@ -339,4 +347,31 @@ public class StockTask {
//
// log.info("syncAStockList执行, 受影响数{}, 耗时:{}毫秒", count, stopwatch.elapsed(TimeUnit.MILLISECONDS));
// }
/*新闻接口*/
@Scheduled(cron = "0 0 1 * * ?")
public void saveStockNews() {
String news = stockService.getNews();
List<String> newsList = Arrays.asList(news.split("<a href="));
newsList.forEach( n -> {
String contentUrl = n.substring(1, n.indexOf("class=\"img-smllnews\"") - 2);
String id = contentUrl.substring(contentUrl.lastIndexOf("-") + 1,contentUrl.lastIndexOf("_"));
String imgUrl = n.substring(n.indexOf("img src=") + 9,n.indexOf("?"));
String time = n.substring(n.indexOf("Last Updated") + 23,n.indexOf("IST") - 9);
String title = n.substring(n.indexOf("html\">") + 6,n.indexOf("<div class=\"short-video-img\">") - 47);
SiteNews siteNews = new SiteNews();
siteNews.setAddTime(new Date());
siteNews.setSourceId(id);
siteNews.setTitle(title);
siteNews.setDescription(time);
siteNews.setImgurl(imgUrl);
siteNews.setContent(stockService.getNewsInfo(contentUrl));
List<SiteNews> list = newsRepository.findAll(QSiteNewsPO.siteNewsPO.sourceId.eq(id));
if (list.size() == 0) {
newsRepository.save(siteNews);
}
});
}
}