update get news

This commit is contained in:
vu-tran
2025-08-05 15:51:59 +07:00
parent 7deabedfa3
commit 7caaccb580

View File

@@ -36,6 +36,7 @@ import javax.annotation.PostConstruct;
import java.io.IOException;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.concurrent.TimeUnit;
@@ -310,85 +311,98 @@ public class InvestingTask {
}
@Scheduled(cron = "0 0 0/3 * * ?")
// @PostConstruct
public void getBoerseNews(){
String url_request = "https://www.boerse-online.de";
@PostConstruct
public void getCincoDiasNews() {
String baseUrl = "https://cincodias.elpais.com";
try {
List<SiteNews> results = new ArrayList<>();
String listUrl = url_request + "/nachrichten/1";
String listUrl = baseUrl + "/ultimas-noticias/";
Document doc = Jsoup.connect(listUrl)
.userAgent("Mozilla/5.0")
.get();
Elements articles = doc.select("article.article-list-item");
Elements articles = doc.select("article.c");
for (Element article : articles) {
Element aTag = article.selectFirst("h2 a");
// Title and Link
Element aTag = article.selectFirst("h2.c_t a");
String title = aTag != null ? aTag.text().trim() : null;
String link = aTag != null ? url_request + aTag.attr("href") : null;
String link = aTag != null ? aTag.absUrl("href") : null;
Element imgTag = article.selectFirst("figure a picture img");
String image = imgTag != null ? imgTag.attr("src") : null;
Element timeTag = article.selectFirst("small.article-info time");
Date publishedDate = null;
if (timeTag != null) {
String datetimeAttr = timeTag.attr("datetime");
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm");
LocalDateTime dateTime = LocalDateTime.parse(datetimeAttr, formatter);
ZoneId berlinZone = ZoneId.of("Europe/Berlin");
publishedDate = Date.from(dateTime.atZone(berlinZone).toInstant());
}
Element authorTag = article.selectFirst("small.article-info strong");
// Author
Element authorTag = article.selectFirst("a.c_a_a");
String author = authorTag != null ? authorTag.text().trim() : null;
// Fetch article detail page
Element figure = article.selectFirst("figure.c_m a img");
String imageUrl = null;
if (figure != null) {
imageUrl = figure.attr("src");
}
// Date
Date publishedDate = null;
try {
Element timeTag = article.selectFirst("time");
if (timeTag != null) {
String datetimeAttr = timeTag.attr("datetime");
DateTimeFormatter formatter = DateTimeFormatter.ISO_OFFSET_DATE_TIME;
ZonedDateTime zonedDateTime = ZonedDateTime.parse(datetimeAttr, formatter);
publishedDate = Date.from(zonedDateTime.toInstant());
}
} catch (Exception e) {
log.warn("Failed to parse published date for article: {}", link);
}
// Summary
String summary = article.selectFirst("p.c_d") != null ? article.selectFirst("p.c_d").text() : null;
// Optional: Get full content from article detail page
String htmlContent = "";
if (link != null) {
try {
Document detailPage = Jsoup.connect(link)
Document detailDoc = Jsoup.connect(link)
.userAgent("Mozilla/5.0")
.get();
Element body = detailPage.selectFirst("div.article-body");
// ✅ Extract article main content
Element body = detailDoc.selectFirst("div.a_c.clearfix[data-dtm-region=articulo_cuerpo]");
if (body != null) {
htmlContent = body.html(); // ✅ inner HTML only
htmlContent = body.html();
}
} catch (Exception e) {
System.err.println("Error fetching article detail: " + link);
log.warn("Error fetching detail page: {}", link);
e.printStackTrace();
}
}
// Build SiteNews object
SiteNews siteNews = new SiteNews();
siteNews.setAddTime(new Date());
siteNews.setSourceId(link);
siteNews.setTitle(title);
siteNews.setSourceName("BOERSE");
siteNews.setDescription(title);
siteNews.setImgurl(image);
siteNews.setSourceName("CINCO_DIAS");
siteNews.setDescription(summary != null ? summary : title);
siteNews.setImgurl(imageUrl);
siteNews.setContent(htmlContent);
siteNews.setStatus(1);
siteNews.setType(1); // Set as financial news type
siteNews.setType(1);
siteNews.setViews(0);
siteNews.setShowTime(publishedDate);
try {
newsRepository.save(siteNews);
log.info("Saved German news : {}", title);
log.info("Saved Spanish news: {}", title);
} catch (Exception e) {
log.warn("Failed to save German news {}: {}", link, e.getMessage());
log.warn("Failed to save Spanish news {}: {}", link, e.getMessage());
}
}
} catch (Exception e) {
log.error("Error fetching article detail: {}", e.getMessage());
log.error("Error fetching Spanish news: {}", e.getMessage());
e.printStackTrace();
}
}
/**