money control代码提交
This commit is contained in:
@@ -29,6 +29,7 @@ import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* @author gs
|
||||
@@ -91,9 +92,18 @@ public class MoneyScraper {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 带有A B 分组的url
|
||||
* @param url
|
||||
* @param httpClient
|
||||
* @param letter
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
private List<String> sendHttpRequest(String url, HttpClient httpClient, String letter) throws IOException {
|
||||
List<MoneyStock> allMoneyStock = moneyStockRepository.findAll();
|
||||
Document document = fetchStockDetails(url);
|
||||
extractExchangeDetails(document);
|
||||
extractExchangeDetails(document,allMoneyStock);
|
||||
List<String> result = new ArrayList<>();
|
||||
result.add("Thread " + Thread.currentThread().getName() + " processed letters: " + letter);
|
||||
return result;
|
||||
@@ -137,6 +147,11 @@ public class MoneyScraper {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 获取全部股票url_self
|
||||
* @param url
|
||||
* @return
|
||||
*/
|
||||
public static Document fetchStockDetails(String url) {
|
||||
return fetchDocumentWithRetry(url);
|
||||
}
|
||||
@@ -173,16 +188,19 @@ public class MoneyScraper {
|
||||
}
|
||||
|
||||
|
||||
public void extractExchangeDetails(Document soup) {
|
||||
public void extractExchangeDetails(Document soup,List<MoneyStock> moneyStockHadSavedList) {
|
||||
Elements companies = soup.select("table.pcq_tbl.MT10");
|
||||
|
||||
List<String> hadSaveUrl = moneyStockHadSavedList.stream().map(MoneyStock::getSelfUrl).collect(Collectors.toList());
|
||||
for (Element company : companies) {
|
||||
Elements elements = company.select("tr > td > a");
|
||||
|
||||
for (Element element : elements) {
|
||||
String textContent = element.text().trim();
|
||||
String linkAttribute = element.attr("href");
|
||||
|
||||
if(hadSaveUrl.contains(linkAttribute)){
|
||||
log.error(Thread.currentThread().getName()+"已经存在了不需要重复保存,company_name: " + textContent + ", Link Attribute: " + linkAttribute);
|
||||
continue;
|
||||
}
|
||||
log.info(Thread.currentThread().getName()+",Text Content: " + textContent + ", Link Attribute: " + linkAttribute);
|
||||
Document soup2 = fetchCompanyDetails(linkAttribute);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user