update news

This commit is contained in:
vu-tran
2025-07-04 13:41:21 +07:00
parent 309516c40b
commit fc74d7c33f

View File

@@ -16,6 +16,10 @@ import com.alibaba.fastjson.JSONObject;
import com.google.common.base.Stopwatch;
import com.google.common.collect.Lists;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
@@ -28,11 +32,12 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.client.RestTemplate;
import javax.annotation.PostConstruct;
import java.io.IOException;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -183,7 +188,7 @@ public class InvestingTask {
}
/*德国新闻接口*/
@Scheduled(cron = "0 0 0/3 * * ?")
// @Scheduled(cron = "0 0 0/3 * * ?")
public void saveGerNews() {
log.info("德国股票新闻数据同步开始");
int savedCount = 0;
@@ -304,6 +309,88 @@ public class InvestingTask {
}
}
@Scheduled(cron = "0 0 0/3 * * ?")
@PostConstruct
public void getBoerseNews(){
String url_request = "https://www.boerse-online.de";
try {
List<SiteNews> results = new ArrayList<>();
String listUrl = url_request + "/nachrichten/1";
Document doc = Jsoup.connect(listUrl)
.userAgent("Mozilla/5.0")
.get();
Elements articles = doc.select("article.article-list-item");
for (Element article : articles) {
Element aTag = article.selectFirst("h2 a");
String title = aTag != null ? aTag.text().trim() : null;
String link = aTag != null ? url_request + aTag.attr("href") : null;
Element imgTag = article.selectFirst("figure img");
String image = imgTag != null ? imgTag.attr("data-src") : null;
Element timeTag = article.selectFirst("small.article-info time");
Date publishedDate = null;
if (timeTag != null) {
String datetimeAttr = timeTag.attr("datetime");
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm");
LocalDateTime dateTime = LocalDateTime.parse(datetimeAttr, formatter);
ZoneId berlinZone = ZoneId.of("Europe/Berlin");
publishedDate = Date.from(dateTime.atZone(berlinZone).toInstant());
}
Element authorTag = article.selectFirst("small.article-info strong");
String author = authorTag != null ? authorTag.text().trim() : null;
// Fetch article detail page
String htmlContent = "";
if (link != null) {
try {
Document detailPage = Jsoup.connect(link)
.userAgent("Mozilla/5.0")
.get();
Element body = detailPage.selectFirst("div.article-body");
if (body != null) {
htmlContent = body.html(); // ✅ inner HTML only
}
} catch (Exception e) {
System.err.println("Error fetching article detail: " + link);
e.printStackTrace();
}
}
SiteNews siteNews = new SiteNews();
siteNews.setAddTime(new Date());
siteNews.setSourceId(link);
siteNews.setTitle(title);
siteNews.setSourceName("BOERSE");
siteNews.setDescription(title);
siteNews.setImgurl(image);
siteNews.setContent(htmlContent);
siteNews.setStatus(1);
siteNews.setType(1); // Set as financial news type
siteNews.setViews(0);
siteNews.setShowTime(publishedDate);
try {
newsRepository.save(siteNews);
log.info("Saved German news : {}", title);
} catch (Exception e) {
log.warn("Failed to save German news {}: {}", link, e.getMessage());
}
}
}catch (Exception e){
log.error("Error fetching article detail: {}", e.getMessage());
e.printStackTrace();
}
}
/**
* Test method to manually trigger German news sync
* This can be called via REST API or scheduled task