money control代码提交
This commit is contained in:
@@ -29,6 +29,7 @@ import java.util.concurrent.ExecutionException;
|
|||||||
import java.util.concurrent.LinkedBlockingQueue;
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
import java.util.concurrent.ThreadPoolExecutor;
|
import java.util.concurrent.ThreadPoolExecutor;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author gs
|
* @author gs
|
||||||
@@ -91,9 +92,18 @@ public class MoneyScraper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 带有A B 分组的url
|
||||||
|
* @param url
|
||||||
|
* @param httpClient
|
||||||
|
* @param letter
|
||||||
|
* @return
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
private List<String> sendHttpRequest(String url, HttpClient httpClient, String letter) throws IOException {
|
private List<String> sendHttpRequest(String url, HttpClient httpClient, String letter) throws IOException {
|
||||||
|
List<MoneyStock> allMoneyStock = moneyStockRepository.findAll();
|
||||||
Document document = fetchStockDetails(url);
|
Document document = fetchStockDetails(url);
|
||||||
extractExchangeDetails(document);
|
extractExchangeDetails(document,allMoneyStock);
|
||||||
List<String> result = new ArrayList<>();
|
List<String> result = new ArrayList<>();
|
||||||
result.add("Thread " + Thread.currentThread().getName() + " processed letters: " + letter);
|
result.add("Thread " + Thread.currentThread().getName() + " processed letters: " + letter);
|
||||||
return result;
|
return result;
|
||||||
@@ -137,6 +147,11 @@ public class MoneyScraper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取全部股票url_self
|
||||||
|
* @param url
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
public static Document fetchStockDetails(String url) {
|
public static Document fetchStockDetails(String url) {
|
||||||
return fetchDocumentWithRetry(url);
|
return fetchDocumentWithRetry(url);
|
||||||
}
|
}
|
||||||
@@ -173,16 +188,19 @@ public class MoneyScraper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void extractExchangeDetails(Document soup) {
|
public void extractExchangeDetails(Document soup,List<MoneyStock> moneyStockHadSavedList) {
|
||||||
Elements companies = soup.select("table.pcq_tbl.MT10");
|
Elements companies = soup.select("table.pcq_tbl.MT10");
|
||||||
|
List<String> hadSaveUrl = moneyStockHadSavedList.stream().map(MoneyStock::getSelfUrl).collect(Collectors.toList());
|
||||||
for (Element company : companies) {
|
for (Element company : companies) {
|
||||||
Elements elements = company.select("tr > td > a");
|
Elements elements = company.select("tr > td > a");
|
||||||
|
|
||||||
for (Element element : elements) {
|
for (Element element : elements) {
|
||||||
String textContent = element.text().trim();
|
String textContent = element.text().trim();
|
||||||
String linkAttribute = element.attr("href");
|
String linkAttribute = element.attr("href");
|
||||||
|
if(hadSaveUrl.contains(linkAttribute)){
|
||||||
|
log.error(Thread.currentThread().getName()+"已经存在了不需要重复保存,company_name: " + textContent + ", Link Attribute: " + linkAttribute);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
log.info(Thread.currentThread().getName()+",Text Content: " + textContent + ", Link Attribute: " + linkAttribute);
|
log.info(Thread.currentThread().getName()+",Text Content: " + textContent + ", Link Attribute: " + linkAttribute);
|
||||||
Document soup2 = fetchCompanyDetails(linkAttribute);
|
Document soup2 = fetchCompanyDetails(linkAttribute);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user