update get news
This commit is contained in:
@@ -36,6 +36,7 @@ import javax.annotation.PostConstruct;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.time.LocalDateTime;
|
import java.time.LocalDateTime;
|
||||||
import java.time.ZoneId;
|
import java.time.ZoneId;
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
@@ -310,85 +311,98 @@ public class InvestingTask {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Scheduled(cron = "0 0 0/3 * * ?")
|
@Scheduled(cron = "0 0 0/3 * * ?")
|
||||||
// @PostConstruct
|
@PostConstruct
|
||||||
public void getBoerseNews(){
|
public void getCincoDiasNews() {
|
||||||
String url_request = "https://www.boerse-online.de";
|
String baseUrl = "https://cincodias.elpais.com";
|
||||||
|
|
||||||
try {
|
try {
|
||||||
List<SiteNews> results = new ArrayList<>();
|
List<SiteNews> results = new ArrayList<>();
|
||||||
|
String listUrl = baseUrl + "/ultimas-noticias/";
|
||||||
String listUrl = url_request + "/nachrichten/1";
|
|
||||||
Document doc = Jsoup.connect(listUrl)
|
Document doc = Jsoup.connect(listUrl)
|
||||||
.userAgent("Mozilla/5.0")
|
.userAgent("Mozilla/5.0")
|
||||||
.get();
|
.get();
|
||||||
|
|
||||||
Elements articles = doc.select("article.article-list-item");
|
Elements articles = doc.select("article.c");
|
||||||
|
|
||||||
for (Element article : articles) {
|
for (Element article : articles) {
|
||||||
Element aTag = article.selectFirst("h2 a");
|
// Title and Link
|
||||||
|
Element aTag = article.selectFirst("h2.c_t a");
|
||||||
String title = aTag != null ? aTag.text().trim() : null;
|
String title = aTag != null ? aTag.text().trim() : null;
|
||||||
String link = aTag != null ? url_request + aTag.attr("href") : null;
|
String link = aTag != null ? aTag.absUrl("href") : null;
|
||||||
|
|
||||||
Element imgTag = article.selectFirst("figure a picture img");
|
// Author
|
||||||
String image = imgTag != null ? imgTag.attr("src") : null;
|
Element authorTag = article.selectFirst("a.c_a_a");
|
||||||
|
|
||||||
Element timeTag = article.selectFirst("small.article-info time");
|
|
||||||
Date publishedDate = null;
|
|
||||||
|
|
||||||
if (timeTag != null) {
|
|
||||||
String datetimeAttr = timeTag.attr("datetime");
|
|
||||||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm");
|
|
||||||
LocalDateTime dateTime = LocalDateTime.parse(datetimeAttr, formatter);
|
|
||||||
ZoneId berlinZone = ZoneId.of("Europe/Berlin");
|
|
||||||
publishedDate = Date.from(dateTime.atZone(berlinZone).toInstant());
|
|
||||||
}
|
|
||||||
|
|
||||||
Element authorTag = article.selectFirst("small.article-info strong");
|
|
||||||
String author = authorTag != null ? authorTag.text().trim() : null;
|
String author = authorTag != null ? authorTag.text().trim() : null;
|
||||||
|
|
||||||
// Fetch article detail page
|
Element figure = article.selectFirst("figure.c_m a img");
|
||||||
|
|
||||||
|
String imageUrl = null;
|
||||||
|
if (figure != null) {
|
||||||
|
imageUrl = figure.attr("src");
|
||||||
|
}
|
||||||
|
// Date
|
||||||
|
Date publishedDate = null;
|
||||||
|
try {
|
||||||
|
Element timeTag = article.selectFirst("time");
|
||||||
|
if (timeTag != null) {
|
||||||
|
String datetimeAttr = timeTag.attr("datetime");
|
||||||
|
DateTimeFormatter formatter = DateTimeFormatter.ISO_OFFSET_DATE_TIME;
|
||||||
|
ZonedDateTime zonedDateTime = ZonedDateTime.parse(datetimeAttr, formatter);
|
||||||
|
publishedDate = Date.from(zonedDateTime.toInstant());
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("Failed to parse published date for article: {}", link);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Summary
|
||||||
|
String summary = article.selectFirst("p.c_d") != null ? article.selectFirst("p.c_d").text() : null;
|
||||||
|
|
||||||
|
// Optional: Get full content from article detail page
|
||||||
String htmlContent = "";
|
String htmlContent = "";
|
||||||
if (link != null) {
|
if (link != null) {
|
||||||
try {
|
try {
|
||||||
Document detailPage = Jsoup.connect(link)
|
Document detailDoc = Jsoup.connect(link)
|
||||||
.userAgent("Mozilla/5.0")
|
.userAgent("Mozilla/5.0")
|
||||||
.get();
|
.get();
|
||||||
|
|
||||||
Element body = detailPage.selectFirst("div.article-body");
|
// ✅ Extract article main content
|
||||||
|
Element body = detailDoc.selectFirst("div.a_c.clearfix[data-dtm-region=articulo_cuerpo]");
|
||||||
if (body != null) {
|
if (body != null) {
|
||||||
htmlContent = body.html(); // ✅ inner HTML only
|
htmlContent = body.html();
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
System.err.println("Error fetching article detail: " + link);
|
log.warn("Error fetching detail page: {}", link);
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Build SiteNews object
|
||||||
SiteNews siteNews = new SiteNews();
|
SiteNews siteNews = new SiteNews();
|
||||||
siteNews.setAddTime(new Date());
|
siteNews.setAddTime(new Date());
|
||||||
siteNews.setSourceId(link);
|
siteNews.setSourceId(link);
|
||||||
siteNews.setTitle(title);
|
siteNews.setTitle(title);
|
||||||
siteNews.setSourceName("BOERSE");
|
siteNews.setSourceName("CINCO_DIAS");
|
||||||
siteNews.setDescription(title);
|
siteNews.setDescription(summary != null ? summary : title);
|
||||||
siteNews.setImgurl(image);
|
siteNews.setImgurl(imageUrl);
|
||||||
siteNews.setContent(htmlContent);
|
siteNews.setContent(htmlContent);
|
||||||
siteNews.setStatus(1);
|
siteNews.setStatus(1);
|
||||||
siteNews.setType(1); // Set as financial news type
|
siteNews.setType(1);
|
||||||
siteNews.setViews(0);
|
siteNews.setViews(0);
|
||||||
siteNews.setShowTime(publishedDate);
|
siteNews.setShowTime(publishedDate);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
newsRepository.save(siteNews);
|
newsRepository.save(siteNews);
|
||||||
log.info("Saved German news : {}", title);
|
log.info("Saved Spanish news: {}", title);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.warn("Failed to save German news {}: {}", link, e.getMessage());
|
log.warn("Failed to save Spanish news {}: {}", link, e.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}catch (Exception e){
|
|
||||||
log.error("Error fetching article detail: {}", e.getMessage());
|
} catch (Exception e) {
|
||||||
|
log.error("Error fetching Spanish news: {}", e.getMessage());
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
Reference in New Issue
Block a user