From 7caaccb580cefc65ae5d57c3cc809b71f16fe992 Mon Sep 17 00:00:00 2001 From: vu-tran Date: Tue, 5 Aug 2025 15:51:59 +0700 Subject: [PATCH] update get news --- .../infrastructure/job/InvestingTask.java | 90 +++++++++++-------- 1 file changed, 52 insertions(+), 38 deletions(-) diff --git a/src/main/java/cn/stock/market/infrastructure/job/InvestingTask.java b/src/main/java/cn/stock/market/infrastructure/job/InvestingTask.java index d807405..ad74eca 100644 --- a/src/main/java/cn/stock/market/infrastructure/job/InvestingTask.java +++ b/src/main/java/cn/stock/market/infrastructure/job/InvestingTask.java @@ -36,6 +36,7 @@ import javax.annotation.PostConstruct; import java.io.IOException; import java.time.LocalDateTime; import java.time.ZoneId; +import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.util.*; import java.util.concurrent.TimeUnit; @@ -310,85 +311,98 @@ public class InvestingTask { } @Scheduled(cron = "0 0 0/3 * * ?") -// @PostConstruct - public void getBoerseNews(){ - String url_request = "https://www.boerse-online.de"; + @PostConstruct + public void getCincoDiasNews() { + String baseUrl = "https://cincodias.elpais.com"; try { List results = new ArrayList<>(); - - String listUrl = url_request + "/nachrichten/1"; + String listUrl = baseUrl + "/ultimas-noticias/"; Document doc = Jsoup.connect(listUrl) .userAgent("Mozilla/5.0") .get(); - Elements articles = doc.select("article.article-list-item"); + Elements articles = doc.select("article.c"); for (Element article : articles) { - Element aTag = article.selectFirst("h2 a"); + // Title and Link + Element aTag = article.selectFirst("h2.c_t a"); String title = aTag != null ? aTag.text().trim() : null; - String link = aTag != null ? url_request + aTag.attr("href") : null; + String link = aTag != null ? aTag.absUrl("href") : null; - Element imgTag = article.selectFirst("figure a picture img"); - String image = imgTag != null ? imgTag.attr("src") : null; - - Element timeTag = article.selectFirst("small.article-info time"); - Date publishedDate = null; - - if (timeTag != null) { - String datetimeAttr = timeTag.attr("datetime"); - DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm"); - LocalDateTime dateTime = LocalDateTime.parse(datetimeAttr, formatter); - ZoneId berlinZone = ZoneId.of("Europe/Berlin"); - publishedDate = Date.from(dateTime.atZone(berlinZone).toInstant()); - } - - Element authorTag = article.selectFirst("small.article-info strong"); + // Author + Element authorTag = article.selectFirst("a.c_a_a"); String author = authorTag != null ? authorTag.text().trim() : null; - // Fetch article detail page + Element figure = article.selectFirst("figure.c_m a img"); + + String imageUrl = null; + if (figure != null) { + imageUrl = figure.attr("src"); + } + // Date + Date publishedDate = null; + try { + Element timeTag = article.selectFirst("time"); + if (timeTag != null) { + String datetimeAttr = timeTag.attr("datetime"); + DateTimeFormatter formatter = DateTimeFormatter.ISO_OFFSET_DATE_TIME; + ZonedDateTime zonedDateTime = ZonedDateTime.parse(datetimeAttr, formatter); + publishedDate = Date.from(zonedDateTime.toInstant()); + } + } catch (Exception e) { + log.warn("Failed to parse published date for article: {}", link); + } + + // Summary + String summary = article.selectFirst("p.c_d") != null ? article.selectFirst("p.c_d").text() : null; + + // Optional: Get full content from article detail page String htmlContent = ""; if (link != null) { try { - Document detailPage = Jsoup.connect(link) + Document detailDoc = Jsoup.connect(link) .userAgent("Mozilla/5.0") .get(); - Element body = detailPage.selectFirst("div.article-body"); + // ✅ Extract article main content + Element body = detailDoc.selectFirst("div.a_c.clearfix[data-dtm-region=articulo_cuerpo]"); if (body != null) { - htmlContent = body.html(); // ✅ inner HTML only + htmlContent = body.html(); } } catch (Exception e) { - System.err.println("Error fetching article detail: " + link); + log.warn("Error fetching detail page: {}", link); e.printStackTrace(); } } + + // Build SiteNews object SiteNews siteNews = new SiteNews(); siteNews.setAddTime(new Date()); siteNews.setSourceId(link); siteNews.setTitle(title); - siteNews.setSourceName("BOERSE"); - siteNews.setDescription(title); - siteNews.setImgurl(image); + siteNews.setSourceName("CINCO_DIAS"); + siteNews.setDescription(summary != null ? summary : title); + siteNews.setImgurl(imageUrl); siteNews.setContent(htmlContent); siteNews.setStatus(1); - siteNews.setType(1); // Set as financial news type + siteNews.setType(1); siteNews.setViews(0); siteNews.setShowTime(publishedDate); + try { newsRepository.save(siteNews); - log.info("Saved German news : {}", title); + log.info("Saved Spanish news: {}", title); } catch (Exception e) { - log.warn("Failed to save German news {}: {}", link, e.getMessage()); + log.warn("Failed to save Spanish news {}: {}", link, e.getMessage()); } } - }catch (Exception e){ - log.error("Error fetching article detail: {}", e.getMessage()); + + } catch (Exception e) { + log.error("Error fetching Spanish news: {}", e.getMessage()); e.printStackTrace(); } - - } /**