update get news
This commit is contained in:
@@ -36,6 +36,7 @@ import javax.annotation.PostConstruct;
|
||||
import java.io.IOException;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.ZoneId;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
@@ -310,85 +311,98 @@ public class InvestingTask {
|
||||
}
|
||||
|
||||
@Scheduled(cron = "0 0 0/3 * * ?")
|
||||
// @PostConstruct
|
||||
public void getBoerseNews(){
|
||||
String url_request = "https://www.boerse-online.de";
|
||||
@PostConstruct
|
||||
public void getCincoDiasNews() {
|
||||
String baseUrl = "https://cincodias.elpais.com";
|
||||
|
||||
try {
|
||||
List<SiteNews> results = new ArrayList<>();
|
||||
|
||||
String listUrl = url_request + "/nachrichten/1";
|
||||
String listUrl = baseUrl + "/ultimas-noticias/";
|
||||
Document doc = Jsoup.connect(listUrl)
|
||||
.userAgent("Mozilla/5.0")
|
||||
.get();
|
||||
|
||||
Elements articles = doc.select("article.article-list-item");
|
||||
Elements articles = doc.select("article.c");
|
||||
|
||||
for (Element article : articles) {
|
||||
Element aTag = article.selectFirst("h2 a");
|
||||
// Title and Link
|
||||
Element aTag = article.selectFirst("h2.c_t a");
|
||||
String title = aTag != null ? aTag.text().trim() : null;
|
||||
String link = aTag != null ? url_request + aTag.attr("href") : null;
|
||||
String link = aTag != null ? aTag.absUrl("href") : null;
|
||||
|
||||
Element imgTag = article.selectFirst("figure a picture img");
|
||||
String image = imgTag != null ? imgTag.attr("src") : null;
|
||||
|
||||
Element timeTag = article.selectFirst("small.article-info time");
|
||||
Date publishedDate = null;
|
||||
|
||||
if (timeTag != null) {
|
||||
String datetimeAttr = timeTag.attr("datetime");
|
||||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm");
|
||||
LocalDateTime dateTime = LocalDateTime.parse(datetimeAttr, formatter);
|
||||
ZoneId berlinZone = ZoneId.of("Europe/Berlin");
|
||||
publishedDate = Date.from(dateTime.atZone(berlinZone).toInstant());
|
||||
}
|
||||
|
||||
Element authorTag = article.selectFirst("small.article-info strong");
|
||||
// Author
|
||||
Element authorTag = article.selectFirst("a.c_a_a");
|
||||
String author = authorTag != null ? authorTag.text().trim() : null;
|
||||
|
||||
// Fetch article detail page
|
||||
Element figure = article.selectFirst("figure.c_m a img");
|
||||
|
||||
String imageUrl = null;
|
||||
if (figure != null) {
|
||||
imageUrl = figure.attr("src");
|
||||
}
|
||||
// Date
|
||||
Date publishedDate = null;
|
||||
try {
|
||||
Element timeTag = article.selectFirst("time");
|
||||
if (timeTag != null) {
|
||||
String datetimeAttr = timeTag.attr("datetime");
|
||||
DateTimeFormatter formatter = DateTimeFormatter.ISO_OFFSET_DATE_TIME;
|
||||
ZonedDateTime zonedDateTime = ZonedDateTime.parse(datetimeAttr, formatter);
|
||||
publishedDate = Date.from(zonedDateTime.toInstant());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to parse published date for article: {}", link);
|
||||
}
|
||||
|
||||
// Summary
|
||||
String summary = article.selectFirst("p.c_d") != null ? article.selectFirst("p.c_d").text() : null;
|
||||
|
||||
// Optional: Get full content from article detail page
|
||||
String htmlContent = "";
|
||||
if (link != null) {
|
||||
try {
|
||||
Document detailPage = Jsoup.connect(link)
|
||||
Document detailDoc = Jsoup.connect(link)
|
||||
.userAgent("Mozilla/5.0")
|
||||
.get();
|
||||
|
||||
Element body = detailPage.selectFirst("div.article-body");
|
||||
// ✅ Extract article main content
|
||||
Element body = detailDoc.selectFirst("div.a_c.clearfix[data-dtm-region=articulo_cuerpo]");
|
||||
if (body != null) {
|
||||
htmlContent = body.html(); // ✅ inner HTML only
|
||||
htmlContent = body.html();
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
System.err.println("Error fetching article detail: " + link);
|
||||
log.warn("Error fetching detail page: {}", link);
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
// Build SiteNews object
|
||||
SiteNews siteNews = new SiteNews();
|
||||
siteNews.setAddTime(new Date());
|
||||
siteNews.setSourceId(link);
|
||||
siteNews.setTitle(title);
|
||||
siteNews.setSourceName("BOERSE");
|
||||
siteNews.setDescription(title);
|
||||
siteNews.setImgurl(image);
|
||||
siteNews.setSourceName("CINCO_DIAS");
|
||||
siteNews.setDescription(summary != null ? summary : title);
|
||||
siteNews.setImgurl(imageUrl);
|
||||
siteNews.setContent(htmlContent);
|
||||
siteNews.setStatus(1);
|
||||
siteNews.setType(1); // Set as financial news type
|
||||
siteNews.setType(1);
|
||||
siteNews.setViews(0);
|
||||
siteNews.setShowTime(publishedDate);
|
||||
|
||||
try {
|
||||
newsRepository.save(siteNews);
|
||||
log.info("Saved German news : {}", title);
|
||||
log.info("Saved Spanish news: {}", title);
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to save German news {}: {}", link, e.getMessage());
|
||||
log.warn("Failed to save Spanish news {}: {}", link, e.getMessage());
|
||||
}
|
||||
}
|
||||
}catch (Exception e){
|
||||
log.error("Error fetching article detail: {}", e.getMessage());
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("Error fetching Spanish news: {}", e.getMessage());
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user