");
+ int end = rawHtml.lastIndexOf("
") + 500;
+ String result = (start != -1 && end > start) ? rawHtml.substring(start, end) : "";
+
+ // Extract article list using CSS selector
+ Elements divElements = doc.select("div.listingstyle_cardlistlist__dfq57");
+ StringBuilder sb = new StringBuilder();
+ for (Element divElement : divElements) {
+ sb.append(divElement.outerHtml()).append("\n");
+ }
+
+ // If you only want the div content:
+ result = sb.toString();
+ return result;
+ }
+
public List getNewsInfo(String url) {
+// String result = "";
+// List list = new ArrayList<>();
+// try {
+// // 使用Jsoup连接到网页
+//// Document doc = Jsoup.connect(url)
+//// .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36")
+//// .header("Referer", "https://www.business-standard.com/")
+//// .header("Accept-Language", "en-US,en;q=0.9")
+//// .get();
+// Document doc = Jsoup.connect(url)
+// .referrer("https://www.business-standard.com/")
+// .header("Accept-Language", "en-US,en;q=0.9")
+// .userAgent("Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1")
+// .timeout(5000) // timeout 5 seconds
+// .get();
+// result = doc.html().substring(doc.html().indexOf("articleBody") + 14, doc.html().indexOf(",\"author\":") - 1);
+// list.add(result);
+// list.add(doc.html().substring(doc.html().indexOf("og:title") + 19, doc.html().indexOf(" list = new ArrayList<>();
try {
- // 使用Jsoup连接到网页
-// Document doc = Jsoup.connect(url)
-// .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36")
-// .header("Referer", "https://www.business-standard.com/")
-// .header("Accept-Language", "en-US,en;q=0.9")
-// .get();
- Document doc = Jsoup.connect(url)
- .referrer("https://www.business-standard.com/")
- .header("Accept-Language", "en-US,en;q=0.9")
- .userAgent("Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1")
- .timeout(5000) // timeout 5 seconds
- .get();
- result = doc.html().substring(doc.html().indexOf("articleBody") + 14, doc.html().indexOf(",\"author\":") - 1);
+ // Step 1: Get raw HTML via HttpClient
+ String rawHtml = HttpClientRequest.doGetNews(url);
+
+ // Step 2: Parse HTML with Jsoup
+ Document doc = Jsoup.parse(rawHtml);
+
+ // Step 3: Extract content manually (like original)
+ int startIdx = rawHtml.indexOf("articleBody") + 14;
+ int endIdx = rawHtml.indexOf(",\"author\":") - 1;
+
+ if (startIdx > 0 && endIdx > startIdx) {
+ result = rawHtml.substring(startIdx, endIdx);
+ } else {
+ result = "[articleBody not found]";
+ }
list.add(result);
- list.add(doc.html().substring(doc.html().indexOf("og:title") + 19, doc.html().indexOf(" 0 && titleEnd > titleStart) {
+ list.add(rawHtml.substring(titleStart, titleEnd));
+ } else {
+ list.add("[title not found]");
+ }
+
} catch (Exception e) {
- list.add(e.toString());
- return list;
+ list.add("[Error] " + e.getMessage());
}
return list;
}
diff --git a/src/main/java/cn/stock/market/utils/HttpClientRequest.java b/src/main/java/cn/stock/market/utils/HttpClientRequest.java
index e6c7c9c..50498da 100644
--- a/src/main/java/cn/stock/market/utils/HttpClientRequest.java
+++ b/src/main/java/cn/stock/market/utils/HttpClientRequest.java
@@ -265,6 +265,56 @@ public class HttpClientRequest {
return result;
}
+ public static String doGetNews(String url) {
+ CloseableHttpClient httpClient = null;
+ CloseableHttpResponse response = null;
+ String result = "";
+
+ try {
+ httpClient = HttpClients.createDefault();
+
+ HttpGet httpGet = new HttpGet(url);
+
+ // Spoof real browser headers
+ httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
+ httpGet.setHeader("Accept-Language", "en-US,en;q=0.9,vi;q=0.8,ug;q=0.7,fr;q=0.6");
+ httpGet.setHeader("User-Agent", "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1");
+ httpGet.setHeader("Priority", "u=0, i");
+ httpGet.setHeader("Sec-Fetch-Dest", "document");
+ httpGet.setHeader("Sec-Fetch-Mode", "navigate");
+ httpGet.setHeader("Sec-Fetch-Site", "none");
+ httpGet.setHeader("Sec-Fetch-User", "?1");
+ httpGet.setHeader("Upgrade-Insecure-Requests", "1");
+
+ // Set cookies exactly like in curl
+ httpGet.setHeader("Cookie", "userUid=1747102033185-d570fba9-62fd-40be-93ca-ed08b4de57d4; _sid=MTc0NzEwMjAzMzE4Ni4ycTU%3D; _scor_uid=135c13065ff84620b5318b489af93e87; _gcl_au=1.1.249135292.1747102036; _ga=GA1.1.1031614211.1747102037; WZRK_G=28895afb56ff48dda59fe8de0af746bf; FCNEC=%5B%5B%22AKsRol8sHYeSYz_FYPkInYXN3P4ZDPfVKbsRfILfDuOMLhDtkTuoCJP5MlvT9gIbOe7IlDfY8ZeHszhwdVtAoKF1gWv0pLAq5EqpLpse8CEm_ZNv-bUSs6zEyqpOkeKWFI_Ei6VfNAvnZAO8PcXdF8_ncsaO902X7g%3D%3D%22%5D%5D; _ga_KRGL1M61LX=GS2.1.s1747105135$o2$g0$t1747105135$j60$l0$h0");
+
+ RequestConfig requestConfig = RequestConfig.custom()
+ .setConnectTimeout(10000)
+ .setSocketTimeout(15000)
+ .setConnectionRequestTimeout(10000)
+ .build();
+ httpGet.setConfig(requestConfig);
+
+ response = httpClient.execute(httpGet);
+ HttpEntity entity = response.getEntity();
+
+ if (entity != null) {
+ result = EntityUtils.toString(entity);
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ } finally {
+ try {
+ if (response != null) response.close();
+ if (httpClient != null) httpClient.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ return result;
+ }
+
public static void main(String[] args) {
String url = "https://marketapi.intoday.in/widget/topgainer/view?exchange=nse";
String str = doGet(url);
diff --git a/src/main/resources/application-base-alpha.yml b/src/main/resources/application-base-alpha.yml
index 590859e..9b896bb 100644
--- a/src/main/resources/application-base-alpha.yml
+++ b/src/main/resources/application-base-alpha.yml
@@ -3,9 +3,9 @@ spring:
show-sql: true
# Redis配置
redis:
- host: 43.156.40.39
+ host: 43.153.174.179
password: a5v8b86P4mVzFlUqJV
- port: 30031
+ port: 30001
database: 1
lettuce:
pool:
@@ -17,7 +17,7 @@ spring:
datasource:
stock-market:
driver-class-name: com.mysql.cj.jdbc.Driver
- url: jdbc:mysql://43.156.40.39:30030/india_stock?useUnicode=true&characterEncoding=utf-8
+ url: jdbc:mysql://43.153.174.179:30000/india_stock?useUnicode=true&characterEncoding=utf-8
username: root
password: uNejHIFQGJOUtYTmE
maxActive: 500