From d085d8de121170d1f3e91f75dbb62ef7620c0f0c Mon Sep 17 00:00:00 2001 From: vu-tran Date: Wed, 11 Dec 2024 12:39:15 +0700 Subject: [PATCH 1/5] update crawl ipo --- .../market/infrastructure/job/Scraper.java | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/src/main/java/cn/stock/market/infrastructure/job/Scraper.java b/src/main/java/cn/stock/market/infrastructure/job/Scraper.java index 54e0948..7e9ffbd 100644 --- a/src/main/java/cn/stock/market/infrastructure/job/Scraper.java +++ b/src/main/java/cn/stock/market/infrastructure/job/Scraper.java @@ -144,9 +144,13 @@ public class Scraper { JSONObject jsonObject = JSONObject.parseObject(jsonDataStr); log.info("获取到新股的json信息:"+jsonObject.toJSONString()); + + JSONObject pageProps = jsonObject.getJSONObject("props").getJSONObject("pageProps"); JSONObject ipoTableData = pageProps.getJSONObject("ipoTableData"); + + // 解析 openData 和 upcomingData JSONArray openData = ipoTableData.getJSONArray("openData"); JSONArray upcomingData = ipoTableData.getJSONArray("upcomingData"); @@ -175,8 +179,8 @@ public class Scraper { if (stockIpo.getStockCode() == null || stockIpo.getStockName() == null){ continue; } - - List exchanges = getIpoExchange(stockIpo.getStockName(), stockIpo.getStockCode()); + String codeDetail = entry.getString("url").substring(entry.getString("url").lastIndexOf('/') + 1);; + List exchanges = getIpoExchange(stockIpo.getStockName(), codeDetail); for (String exchange : exchanges) { StockIpo ipo = new StockIpo(); @@ -190,7 +194,7 @@ public class Scraper { ipo.setCreateDate(stockIpo.getCreateDate()); ipo.setUpdateDate(stockIpo.getUpdateDate()); ipo.setExchangeType(exchange); - + ipo.setSourceType("3"); listStockIpoList.add(ipo); } } @@ -234,7 +238,7 @@ public class Scraper { ipo.setCreateDate(stockIpo.getCreateDate()); ipo.setUpdateDate(stockIpo.getUpdateDate()); ipo.setExchangeType(exchange); - + ipo.setSourceType("3"); listStockIpoList.add(ipo); } } @@ -253,9 +257,18 @@ public class Scraper { .map(StockIpo::getStockCode) .collect(Collectors.toList()); - listStockIpoList = listStockIpoList.stream() - .filter(stockIpos -> !existingStockScIds.contains(stockIpos.getStockCode())).filter(stockIpo -> StringUtils.isNotBlank(stockIpo.getStockName())) - .collect(Collectors.toList()); + for (StockIpo stockIpo : listStockIpoList) { + StockIpo existIpo = existStockIpoList.stream().filter(a -> a.getStockCode().equals(stockIpo.getStockCode()) && (a.getExchangeType() == null || a.getExchangeType().isEmpty() || a.getExchangeType().equals(stockIpo.getExchangeType()) )).findFirst().orElse(null); + if (existIpo != null) { + if (existIpo.getExchangeType() == null || existIpo.getExchangeType().isEmpty()){ + existIpo.setExchangeType(stockIpo.getExchangeType()); + } + stockIpo.setId(existIpo.getId()); + } + } +// listStockIpoList = listStockIpoList.stream() +// .filter(stockIpos -> !existingStockScIds.contains(stockIpos.getStockCode())).filter(stockIpo -> StringUtils.isNotBlank(stockIpo.getStockName())) +// .collect(Collectors.toList()); //保存全部的新股 if(CollectionUtil.isNotEmpty(listStockIpoList)){ From b26d3962c453efbb993f5680a6af4ed3a2a19222 Mon Sep 17 00:00:00 2001 From: vu-tran Date: Wed, 11 Dec 2024 14:23:08 +0700 Subject: [PATCH 2/5] update crawl ipo --- .../java/cn/stock/market/infrastructure/db/po/StockIpoPO.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/cn/stock/market/infrastructure/db/po/StockIpoPO.java b/src/main/java/cn/stock/market/infrastructure/db/po/StockIpoPO.java index abedbed..d93d4d3 100644 --- a/src/main/java/cn/stock/market/infrastructure/db/po/StockIpoPO.java +++ b/src/main/java/cn/stock/market/infrastructure/db/po/StockIpoPO.java @@ -61,11 +61,11 @@ public class StockIpoPO { /** * 是否显示【1 显示,2 不显示】 */ - Integer isShow; + Integer isShow = 1; /** * 是否上市【1 未上市,2 已上市】 */ - Integer isList; + Integer isList = 1; Date createDate; From 3e1ff24bb6647551eaf4fa42ef07c30b6f848403 Mon Sep 17 00:00:00 2001 From: vu-tran Date: Wed, 15 Jan 2025 11:27:40 +0700 Subject: [PATCH 3/5] update link ipo --- src/main/java/cn/stock/market/infrastructure/job/Scraper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/cn/stock/market/infrastructure/job/Scraper.java b/src/main/java/cn/stock/market/infrastructure/job/Scraper.java index 7e9ffbd..261befe 100644 --- a/src/main/java/cn/stock/market/infrastructure/job/Scraper.java +++ b/src/main/java/cn/stock/market/infrastructure/job/Scraper.java @@ -116,7 +116,7 @@ public class Scraper { public void getMoneyControllerNewIPOSchedule() { log.info("定时任务执行获取新股ipo的方法开始执行"); // 目标 URL - String url = "https://www.moneycontrol.com/ipo/open-upcoming-ipos"; + String url = "https://www.moneycontrol.com/ipo/upcoming-ipos/"; // 创建 HttpClient 实例 HttpClient client = HttpClients.createDefault(); // 创建 HttpGet 请求 From 07a69f29b98cf6439e2902d50ad9efa1dd7a43ca Mon Sep 17 00:00:00 2001 From: vu-tran Date: Mon, 10 Feb 2025 09:37:39 +0700 Subject: [PATCH 4/5] update get data open ipo --- .../market/infrastructure/job/Scraper.java | 298 +++++++++--------- 1 file changed, 152 insertions(+), 146 deletions(-) diff --git a/src/main/java/cn/stock/market/infrastructure/job/Scraper.java b/src/main/java/cn/stock/market/infrastructure/job/Scraper.java index 261befe..a174531 100644 --- a/src/main/java/cn/stock/market/infrastructure/job/Scraper.java +++ b/src/main/java/cn/stock/market/infrastructure/job/Scraper.java @@ -116,182 +116,188 @@ public class Scraper { public void getMoneyControllerNewIPOSchedule() { log.info("定时任务执行获取新股ipo的方法开始执行"); // 目标 URL - String url = "https://www.moneycontrol.com/ipo/upcoming-ipos/"; + List urls = Arrays.asList("https://www.moneycontrol.com/ipo/upcoming-ipos/", "https://www.moneycontrol.com/ipo/open-ipos/"); // 创建 HttpClient 实例 HttpClient client = HttpClients.createDefault(); // 创建 HttpGet 请求 - HttpGet request = new HttpGet(url); - try { - // 执行请求 - HttpResponse response = client.execute(request); + for (String url : urls) { + HttpGet request = new HttpGet(url); + try { + // 执行请求 + HttpResponse response = client.execute(request); - // 检查请求是否成功 - if (response.getStatusLine().getStatusCode() == 200) { - // 获取响应体 - String responseBody = EntityUtils.toString(response.getEntity()); + // 检查请求是否成功 + if (response.getStatusLine().getStatusCode() == 200) { + // 获取响应体 + String responseBody = EntityUtils.toString(response.getEntity()); - // 使用 Jsoup 解析 HTML - Document doc = Jsoup.parse(responseBody); + // 使用 Jsoup 解析 HTML + Document doc = Jsoup.parse(responseBody); - // 找到包含 JSON 数据的