bToday代码提交
This commit is contained in:
@@ -4,10 +4,13 @@ import java.lang.Integer;
|
||||
import java.lang.String;
|
||||
import java.util.Date;
|
||||
import javax.persistence.Entity;
|
||||
import javax.persistence.GeneratedValue;
|
||||
import javax.persistence.GenerationType;
|
||||
import javax.persistence.Id;
|
||||
import javax.persistence.Table;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.Generated;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
import org.hibernate.annotations.DynamicInsert;
|
||||
@@ -34,6 +37,9 @@ public class BtodayStockPO {
|
||||
/**
|
||||
* 主键 */
|
||||
@Id
|
||||
@GeneratedValue(
|
||||
strategy = GenerationType.IDENTITY
|
||||
)
|
||||
Integer id;
|
||||
|
||||
/**
|
||||
|
||||
217
src/main/java/cn/stock/market/infrastructure/job/Scraper.java
Normal file
217
src/main/java/cn/stock/market/infrastructure/job/Scraper.java
Normal file
@@ -0,0 +1,217 @@
|
||||
package cn.stock.market.infrastructure.job;
|
||||
|
||||
import cn.stock.market.domain.basic.entity.BtodayStock;
|
||||
import cn.stock.market.domain.basic.repository.BtodayStockRepository;
|
||||
import cn.stock.market.infrastructure.db.repo.BtodayStockRepo;
|
||||
import com.alibaba.fastjson.JSONArray;
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Slf4j
|
||||
@RestController
|
||||
public class Scraper {
|
||||
|
||||
@Autowired
|
||||
private BtodayStockRepository btodayStockRepo;
|
||||
|
||||
private final ExecutorService executorService = Executors.newFixedThreadPool(5);
|
||||
|
||||
|
||||
@Scheduled(cron = "0 0 1 */2 * ?")
|
||||
@RequestMapping("/testScraperGetBusinessToday")
|
||||
public void schedule() {
|
||||
String BASE_URL = "https://akm-img-a-in.tosshub.com/businesstoday/resource/market-widgets/prod/company-master-23-01-2023.json";
|
||||
String company_name = "Bhagawati Oxygen Ltd";
|
||||
|
||||
try {
|
||||
// 获取 JSON 数据
|
||||
String json_data = scrapePage(BASE_URL);
|
||||
// 解析 JSON 数据
|
||||
if (json_data != null) {
|
||||
List<BtodayStock> all = btodayStockRepo.findAll();
|
||||
Map<String, String> sefUrlList = getSefUrl(json_data, company_name);
|
||||
sefUrlList = sefUrlList.entrySet().stream()
|
||||
.filter(entry -> all.stream().noneMatch(stock -> stock.getStockName().equals(entry.getKey())))
|
||||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
|
||||
|
||||
// 将 Map 中的数据分成 5 个线程处理
|
||||
int batchSize = sefUrlList.size() / 5; // 假设分成 5 个线程
|
||||
int threadCount = 5;
|
||||
CompletableFuture<Void>[] futures = new CompletableFuture[threadCount];
|
||||
for (int i = 0; i < threadCount; i++) {
|
||||
int startIndex = i * batchSize;
|
||||
int endIndex = (i == threadCount - 1) ? sefUrlList.size() : (i + 1) * batchSize;
|
||||
|
||||
Map<String, String> subMap = sefUrlList.entrySet().stream()
|
||||
.skip(startIndex)
|
||||
.limit(endIndex - startIndex)
|
||||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
|
||||
|
||||
futures[i] = CompletableFuture.runAsync(() -> processSubMap(subMap), executorService);
|
||||
}
|
||||
// 等待所有异步任务完成
|
||||
CompletableFuture.allOf(futures).get();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("IOException occurred while processing the JSON data", e);
|
||||
}finally {
|
||||
// 关闭线程池
|
||||
executorService.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
private void processSubMap(Map<String, String> sefUrlList) {
|
||||
for (Map.Entry<String, String> entry : sefUrlList.entrySet()) {
|
||||
String companyName = entry.getKey();
|
||||
String url = entry.getValue();
|
||||
|
||||
// 获取网页 HTML
|
||||
String webHtml = null;
|
||||
int maxRetries = 5;
|
||||
int retryCount = 0;
|
||||
|
||||
while (retryCount < maxRetries) {
|
||||
try {
|
||||
webHtml = getWebsiteHtml(url);
|
||||
break; // If successful, break out of the loop
|
||||
} catch (java.net.SocketTimeoutException e) {
|
||||
log.warn("Socket timeout exception occurred. Retrying... (" + (retryCount + 1) + "/" + maxRetries + ")");
|
||||
retryCount++;
|
||||
} catch (IOException e) {
|
||||
log.warn("IOException occurred. Retrying... (" + (retryCount + 1) + "/" + maxRetries + ")");
|
||||
retryCount++;
|
||||
}
|
||||
}
|
||||
if (webHtml != null) {
|
||||
// 获取公司代码
|
||||
String coCode = getCompanyCode(webHtml);
|
||||
|
||||
if (coCode != null) {
|
||||
// 获取股票市场列表
|
||||
String[] stockMarketList = getStockMarket(webHtml);
|
||||
|
||||
for (String stockMarket : stockMarketList) {
|
||||
// 获取网页详情
|
||||
String detailUrl = buildWebDetailUrl(coCode, stockMarket);
|
||||
// String webInfo = getWebDetail(detailUrl);
|
||||
log.info("Stock detail coCode:{}, stockMarket:{}: ,detailUrl:{}", coCode, stockMarket,detailUrl);
|
||||
|
||||
BtodayStock btodayStock = new BtodayStock();
|
||||
btodayStock.setStockName(companyName);
|
||||
btodayStock.setCoCode(coCode);
|
||||
btodayStock.setStockType(stockMarket);
|
||||
btodayStock.setSelfUrl(url);
|
||||
btodayStock.setUrl(detailUrl);
|
||||
btodayStock.setLastUpdateTime(new Date());
|
||||
btodayStockRepo.save(btodayStock);
|
||||
|
||||
/* if (webInfo != null) {
|
||||
log.info("Stock detail for {} in {}: {}", coCode, stockMarket, webInfo);
|
||||
log.info(webInfo);
|
||||
|
||||
} else {
|
||||
log.warn("Failed to retrieve web detail information.");
|
||||
}*/
|
||||
}
|
||||
} else {
|
||||
log.warn("Failed to retrieve company code.");
|
||||
}
|
||||
} else {
|
||||
log.warn("Failed to retrieve website HTML.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static String buildWebDetailUrl(String coCode, String stockMarket) {
|
||||
return "https://marketapi.intoday.in/widget/stockdetail/pullview?co_code=" + coCode + "&exchange=" + stockMarket;
|
||||
}
|
||||
|
||||
private static String scrapePage(String url) throws IOException {
|
||||
log.info("Scraping " + url + "...");
|
||||
return Jsoup.connect(url).ignoreContentType(true).execute().body();
|
||||
}
|
||||
|
||||
private static Map<String, String> getSefUrl(String json_data, String companyName) {
|
||||
Map<String, String> sefUrls = new HashMap<>();
|
||||
|
||||
// 在这里放入你的模糊匹配逻辑
|
||||
JSONArray jsonArray = JSONArray.parseArray(json_data);
|
||||
|
||||
for (int i = 0; i < jsonArray.size(); i++) {
|
||||
JSONObject item = jsonArray.getJSONObject(i);
|
||||
|
||||
// 在这里放入你的模糊匹配逻辑
|
||||
String sef_url = item.getString("sef_url");
|
||||
String company_name = item.getString("companyname");
|
||||
|
||||
if (company_name != null && sef_url != null/* && company_name.equals(companyName)*/) {
|
||||
sefUrls.put(company_name, sef_url);
|
||||
}
|
||||
}
|
||||
|
||||
return sefUrls;
|
||||
}
|
||||
|
||||
private static String getWebsiteHtml(String url) throws IOException {
|
||||
log.info("Getting website URL: " + url + "...");
|
||||
return Jsoup.connect(url).timeout(10000).get().html();
|
||||
}
|
||||
|
||||
private static String getCompanyCode(String text) {
|
||||
Document document = Jsoup.parse(text);
|
||||
Element companyCodeInput = document.selectFirst("input[id=comapnyCodeId]");
|
||||
|
||||
if (companyCodeInput != null) {
|
||||
return companyCodeInput.attr("value");
|
||||
} else {
|
||||
log.warn("No <input> with id=\"companyCodeId\" found on the website.");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static String[] getStockMarket(String text) {
|
||||
Document document = Jsoup.parse(text);
|
||||
Elements ulElements = document.select("ul[class*=wdg_rhs_hdr_ul]");
|
||||
|
||||
List<String> stockMarketList = new ArrayList<>();
|
||||
|
||||
for (Element ulElement : ulElements) {
|
||||
Elements liElements = ulElement.select("li");
|
||||
for (Element liElement : liElements) {
|
||||
Element spanElement = liElement.selectFirst("span[class=wdg_rhs_hdr_lnk]");
|
||||
if (spanElement != null) {
|
||||
stockMarketList.add(spanElement.text());
|
||||
} else {
|
||||
log.warn("Invalid status code while scraping.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return stockMarketList.toArray(new String[0]);
|
||||
}
|
||||
|
||||
private static String getWebDetail(String url) throws IOException {
|
||||
log.info("Getting web detail URL: " + url + "...");
|
||||
return Jsoup.connect(url).ignoreContentType(true).execute().body();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user