update get news

This commit is contained in:
vu-tran
2025-08-05 15:51:59 +07:00
parent 7deabedfa3
commit 7caaccb580

View File

@@ -36,6 +36,7 @@ import javax.annotation.PostConstruct;
import java.io.IOException; import java.io.IOException;
import java.time.LocalDateTime; import java.time.LocalDateTime;
import java.time.ZoneId; import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
import java.util.*; import java.util.*;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
@@ -310,85 +311,98 @@ public class InvestingTask {
} }
@Scheduled(cron = "0 0 0/3 * * ?") @Scheduled(cron = "0 0 0/3 * * ?")
// @PostConstruct @PostConstruct
public void getBoerseNews(){ public void getCincoDiasNews() {
String url_request = "https://www.boerse-online.de"; String baseUrl = "https://cincodias.elpais.com";
try { try {
List<SiteNews> results = new ArrayList<>(); List<SiteNews> results = new ArrayList<>();
String listUrl = baseUrl + "/ultimas-noticias/";
String listUrl = url_request + "/nachrichten/1";
Document doc = Jsoup.connect(listUrl) Document doc = Jsoup.connect(listUrl)
.userAgent("Mozilla/5.0") .userAgent("Mozilla/5.0")
.get(); .get();
Elements articles = doc.select("article.article-list-item"); Elements articles = doc.select("article.c");
for (Element article : articles) { for (Element article : articles) {
Element aTag = article.selectFirst("h2 a"); // Title and Link
Element aTag = article.selectFirst("h2.c_t a");
String title = aTag != null ? aTag.text().trim() : null; String title = aTag != null ? aTag.text().trim() : null;
String link = aTag != null ? url_request + aTag.attr("href") : null; String link = aTag != null ? aTag.absUrl("href") : null;
Element imgTag = article.selectFirst("figure a picture img"); // Author
String image = imgTag != null ? imgTag.attr("src") : null; Element authorTag = article.selectFirst("a.c_a_a");
Element timeTag = article.selectFirst("small.article-info time");
Date publishedDate = null;
if (timeTag != null) {
String datetimeAttr = timeTag.attr("datetime");
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm");
LocalDateTime dateTime = LocalDateTime.parse(datetimeAttr, formatter);
ZoneId berlinZone = ZoneId.of("Europe/Berlin");
publishedDate = Date.from(dateTime.atZone(berlinZone).toInstant());
}
Element authorTag = article.selectFirst("small.article-info strong");
String author = authorTag != null ? authorTag.text().trim() : null; String author = authorTag != null ? authorTag.text().trim() : null;
// Fetch article detail page Element figure = article.selectFirst("figure.c_m a img");
String imageUrl = null;
if (figure != null) {
imageUrl = figure.attr("src");
}
// Date
Date publishedDate = null;
try {
Element timeTag = article.selectFirst("time");
if (timeTag != null) {
String datetimeAttr = timeTag.attr("datetime");
DateTimeFormatter formatter = DateTimeFormatter.ISO_OFFSET_DATE_TIME;
ZonedDateTime zonedDateTime = ZonedDateTime.parse(datetimeAttr, formatter);
publishedDate = Date.from(zonedDateTime.toInstant());
}
} catch (Exception e) {
log.warn("Failed to parse published date for article: {}", link);
}
// Summary
String summary = article.selectFirst("p.c_d") != null ? article.selectFirst("p.c_d").text() : null;
// Optional: Get full content from article detail page
String htmlContent = ""; String htmlContent = "";
if (link != null) { if (link != null) {
try { try {
Document detailPage = Jsoup.connect(link) Document detailDoc = Jsoup.connect(link)
.userAgent("Mozilla/5.0") .userAgent("Mozilla/5.0")
.get(); .get();
Element body = detailPage.selectFirst("div.article-body"); // ✅ Extract article main content
Element body = detailDoc.selectFirst("div.a_c.clearfix[data-dtm-region=articulo_cuerpo]");
if (body != null) { if (body != null) {
htmlContent = body.html(); // ✅ inner HTML only htmlContent = body.html();
} }
} catch (Exception e) { } catch (Exception e) {
System.err.println("Error fetching article detail: " + link); log.warn("Error fetching detail page: {}", link);
e.printStackTrace(); e.printStackTrace();
} }
} }
// Build SiteNews object
SiteNews siteNews = new SiteNews(); SiteNews siteNews = new SiteNews();
siteNews.setAddTime(new Date()); siteNews.setAddTime(new Date());
siteNews.setSourceId(link); siteNews.setSourceId(link);
siteNews.setTitle(title); siteNews.setTitle(title);
siteNews.setSourceName("BOERSE"); siteNews.setSourceName("CINCO_DIAS");
siteNews.setDescription(title); siteNews.setDescription(summary != null ? summary : title);
siteNews.setImgurl(image); siteNews.setImgurl(imageUrl);
siteNews.setContent(htmlContent); siteNews.setContent(htmlContent);
siteNews.setStatus(1); siteNews.setStatus(1);
siteNews.setType(1); // Set as financial news type siteNews.setType(1);
siteNews.setViews(0); siteNews.setViews(0);
siteNews.setShowTime(publishedDate); siteNews.setShowTime(publishedDate);
try { try {
newsRepository.save(siteNews); newsRepository.save(siteNews);
log.info("Saved German news : {}", title); log.info("Saved Spanish news: {}", title);
} catch (Exception e) { } catch (Exception e) {
log.warn("Failed to save German news {}: {}", link, e.getMessage()); log.warn("Failed to save Spanish news {}: {}", link, e.getMessage());
} }
} }
}catch (Exception e){
log.error("Error fetching article detail: {}", e.getMessage()); } catch (Exception e) {
log.error("Error fetching Spanish news: {}", e.getMessage());
e.printStackTrace(); e.printStackTrace();
} }
} }
/** /**