diff --git a/README.md b/README.md index 29a2b68..411452b 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,8 @@ news_service/ mainnews.json ranknews.json worldnews.json + detail-cache.json + images/ src/ app/ config/ @@ -110,6 +112,12 @@ POST /api/news/refresh POST /api/news/refresh?category=flashnews ``` +## 缓存 + +- 分类列表和前端返回数据会落盘到 `data/.json`。 +- 详情正文会按详情页 URL 去重缓存到 `data/detail-cache.json`,同一篇新闻后续刷新不会重复抓详情页。 +- 正文图片会下载到 `data/images/`,正文 HTML 里的图片地址会替换成 `/api/news/images/`。 + ## Docker ```bash diff --git a/data/images/057cf7c518c5c90b9872b761.jpg b/data/images/057cf7c518c5c90b9872b761.jpg new file mode 100644 index 0000000..bac1cf3 Binary files /dev/null and b/data/images/057cf7c518c5c90b9872b761.jpg differ diff --git a/data/images/07358bd3081afa9776ebb30e.jpg b/data/images/07358bd3081afa9776ebb30e.jpg new file mode 100644 index 0000000..dd60596 Binary files /dev/null and b/data/images/07358bd3081afa9776ebb30e.jpg differ diff --git a/data/images/0a2aebe8223c8c0b6b2c57ec.jpg b/data/images/0a2aebe8223c8c0b6b2c57ec.jpg new file mode 100644 index 0000000..aaae25e Binary files /dev/null and b/data/images/0a2aebe8223c8c0b6b2c57ec.jpg differ diff --git a/data/images/0ce10dd9775dfaf0b9175291.jpg b/data/images/0ce10dd9775dfaf0b9175291.jpg new file mode 100644 index 0000000..5893a62 Binary files /dev/null and b/data/images/0ce10dd9775dfaf0b9175291.jpg differ diff --git a/data/images/17fc7495589464ded13521fd.jpg b/data/images/17fc7495589464ded13521fd.jpg new file mode 100644 index 0000000..072c423 Binary files /dev/null and b/data/images/17fc7495589464ded13521fd.jpg differ diff --git a/data/images/21b637d8c89911d6e2cbe5e6.jpg b/data/images/21b637d8c89911d6e2cbe5e6.jpg new file mode 100644 index 0000000..85185be Binary files /dev/null and b/data/images/21b637d8c89911d6e2cbe5e6.jpg differ diff --git a/data/images/298c421db9a631a7bc12af0e.jpg b/data/images/298c421db9a631a7bc12af0e.jpg new file mode 100644 index 0000000..a54f01e Binary files /dev/null and b/data/images/298c421db9a631a7bc12af0e.jpg differ diff --git a/data/images/2c03e4af6b5199ab119ec2dd.jpg b/data/images/2c03e4af6b5199ab119ec2dd.jpg new file mode 100644 index 0000000..dc9e145 Binary files /dev/null and b/data/images/2c03e4af6b5199ab119ec2dd.jpg differ diff --git a/data/images/2f52c8c6e9488bcbc6693247.jpg b/data/images/2f52c8c6e9488bcbc6693247.jpg new file mode 100644 index 0000000..1ff7c21 Binary files /dev/null and b/data/images/2f52c8c6e9488bcbc6693247.jpg differ diff --git a/data/images/3361465e178fa37547179416.jpg b/data/images/3361465e178fa37547179416.jpg new file mode 100644 index 0000000..ce915a7 Binary files /dev/null and b/data/images/3361465e178fa37547179416.jpg differ diff --git a/data/images/38c44c652b2c26e47b2652ef.jpg b/data/images/38c44c652b2c26e47b2652ef.jpg new file mode 100644 index 0000000..d0f2ffb Binary files /dev/null and b/data/images/38c44c652b2c26e47b2652ef.jpg differ diff --git a/data/images/43cea879cae0ed31788cd999.jpg b/data/images/43cea879cae0ed31788cd999.jpg new file mode 100644 index 0000000..5437631 Binary files /dev/null and b/data/images/43cea879cae0ed31788cd999.jpg differ diff --git a/data/images/4484305978d7bdef781bb19f.jpg b/data/images/4484305978d7bdef781bb19f.jpg new file mode 100644 index 0000000..f8305fb Binary files /dev/null and b/data/images/4484305978d7bdef781bb19f.jpg differ diff --git a/data/images/472538488154c075b8f5fdf5.png b/data/images/472538488154c075b8f5fdf5.png new file mode 100644 index 0000000..1cfc4e4 Binary files /dev/null and b/data/images/472538488154c075b8f5fdf5.png differ diff --git a/data/images/4d73b4900255bdce72644572.jpg b/data/images/4d73b4900255bdce72644572.jpg new file mode 100644 index 0000000..3c5648b Binary files /dev/null and b/data/images/4d73b4900255bdce72644572.jpg differ diff --git a/data/images/4ea2ffbe12941c186797e69e.jpg b/data/images/4ea2ffbe12941c186797e69e.jpg new file mode 100644 index 0000000..f00e4ff Binary files /dev/null and b/data/images/4ea2ffbe12941c186797e69e.jpg differ diff --git a/data/images/50b1b779f08c8aa4c878c5e8.jpg b/data/images/50b1b779f08c8aa4c878c5e8.jpg new file mode 100644 index 0000000..7271f45 Binary files /dev/null and b/data/images/50b1b779f08c8aa4c878c5e8.jpg differ diff --git a/data/images/55f7992c5beed3020d27b556.jpg b/data/images/55f7992c5beed3020d27b556.jpg new file mode 100644 index 0000000..1d81a28 Binary files /dev/null and b/data/images/55f7992c5beed3020d27b556.jpg differ diff --git a/data/images/5c82173a788a73e758f45554.jpg b/data/images/5c82173a788a73e758f45554.jpg new file mode 100644 index 0000000..45f9fc3 Binary files /dev/null and b/data/images/5c82173a788a73e758f45554.jpg differ diff --git a/data/images/6152923b4cbf932d3e25460d.jpg b/data/images/6152923b4cbf932d3e25460d.jpg new file mode 100644 index 0000000..2f68780 Binary files /dev/null and b/data/images/6152923b4cbf932d3e25460d.jpg differ diff --git a/data/images/618b9a6caa15f3fa53c5c12a.jpg b/data/images/618b9a6caa15f3fa53c5c12a.jpg new file mode 100644 index 0000000..e27999e Binary files /dev/null and b/data/images/618b9a6caa15f3fa53c5c12a.jpg differ diff --git a/data/images/640c25936e625bb4c17cedd6.jpg b/data/images/640c25936e625bb4c17cedd6.jpg new file mode 100644 index 0000000..a157b44 Binary files /dev/null and b/data/images/640c25936e625bb4c17cedd6.jpg differ diff --git a/data/images/64e094cb88473c3ab4368a4c.jpg b/data/images/64e094cb88473c3ab4368a4c.jpg new file mode 100644 index 0000000..315f21a Binary files /dev/null and b/data/images/64e094cb88473c3ab4368a4c.jpg differ diff --git a/data/images/67930b0f4022206ab76097bc.png b/data/images/67930b0f4022206ab76097bc.png new file mode 100644 index 0000000..4816f88 Binary files /dev/null and b/data/images/67930b0f4022206ab76097bc.png differ diff --git a/data/images/6a3b35edb78fc369e2a8327f.jpg b/data/images/6a3b35edb78fc369e2a8327f.jpg new file mode 100644 index 0000000..b61e860 Binary files /dev/null and b/data/images/6a3b35edb78fc369e2a8327f.jpg differ diff --git a/data/images/6db99101a6561c13cb1212fe.jpg b/data/images/6db99101a6561c13cb1212fe.jpg new file mode 100644 index 0000000..5a8f88f Binary files /dev/null and b/data/images/6db99101a6561c13cb1212fe.jpg differ diff --git a/data/images/7216e1c17092ea09f53d7113.jpg b/data/images/7216e1c17092ea09f53d7113.jpg new file mode 100644 index 0000000..b5ce56c Binary files /dev/null and b/data/images/7216e1c17092ea09f53d7113.jpg differ diff --git a/data/images/737727388097aab3577185a7.jpg b/data/images/737727388097aab3577185a7.jpg new file mode 100644 index 0000000..21df7d4 Binary files /dev/null and b/data/images/737727388097aab3577185a7.jpg differ diff --git a/data/images/7a4d1d68fc708d25ebeab5bc.jpg b/data/images/7a4d1d68fc708d25ebeab5bc.jpg new file mode 100644 index 0000000..b46ade2 Binary files /dev/null and b/data/images/7a4d1d68fc708d25ebeab5bc.jpg differ diff --git a/data/images/7ca8bef725e5eb2b6c3c0e6e.jpg b/data/images/7ca8bef725e5eb2b6c3c0e6e.jpg new file mode 100644 index 0000000..0a2daf3 Binary files /dev/null and b/data/images/7ca8bef725e5eb2b6c3c0e6e.jpg differ diff --git a/data/images/85a5c95a9e0bf8a0a135a12e.jpg b/data/images/85a5c95a9e0bf8a0a135a12e.jpg new file mode 100644 index 0000000..39b116e Binary files /dev/null and b/data/images/85a5c95a9e0bf8a0a135a12e.jpg differ diff --git a/data/images/86371883ed5412b70070d7c3.jpg b/data/images/86371883ed5412b70070d7c3.jpg new file mode 100644 index 0000000..8a924ce Binary files /dev/null and b/data/images/86371883ed5412b70070d7c3.jpg differ diff --git a/data/images/86cfd64e12b1331018ae9215.jpg b/data/images/86cfd64e12b1331018ae9215.jpg new file mode 100644 index 0000000..0e2c896 Binary files /dev/null and b/data/images/86cfd64e12b1331018ae9215.jpg differ diff --git a/data/images/8a3cfef56d348f821b126b23.jpg b/data/images/8a3cfef56d348f821b126b23.jpg new file mode 100644 index 0000000..40b8d66 Binary files /dev/null and b/data/images/8a3cfef56d348f821b126b23.jpg differ diff --git a/data/images/8bb0f18cc423f23d2b2364c0.jpg b/data/images/8bb0f18cc423f23d2b2364c0.jpg new file mode 100644 index 0000000..a9ff694 Binary files /dev/null and b/data/images/8bb0f18cc423f23d2b2364c0.jpg differ diff --git a/data/images/8d2f1a60d05470ebc619f4d3.jpg b/data/images/8d2f1a60d05470ebc619f4d3.jpg new file mode 100644 index 0000000..d708958 Binary files /dev/null and b/data/images/8d2f1a60d05470ebc619f4d3.jpg differ diff --git a/data/images/8dc7002585325a91cdfee9c1.jpg b/data/images/8dc7002585325a91cdfee9c1.jpg new file mode 100644 index 0000000..bc6ab56 Binary files /dev/null and b/data/images/8dc7002585325a91cdfee9c1.jpg differ diff --git a/data/images/9034de78cd8b59d7690944c8.jpg b/data/images/9034de78cd8b59d7690944c8.jpg new file mode 100644 index 0000000..6d8d9ab Binary files /dev/null and b/data/images/9034de78cd8b59d7690944c8.jpg differ diff --git a/data/images/996f2c9469f585ddf44bf7d3.jpg b/data/images/996f2c9469f585ddf44bf7d3.jpg new file mode 100644 index 0000000..ddd74c9 Binary files /dev/null and b/data/images/996f2c9469f585ddf44bf7d3.jpg differ diff --git a/data/images/99e6203c19be9c8d4de54570.jpg b/data/images/99e6203c19be9c8d4de54570.jpg new file mode 100644 index 0000000..a22aa8b Binary files /dev/null and b/data/images/99e6203c19be9c8d4de54570.jpg differ diff --git a/data/images/99f3c6f2eb7400115d29ff20.jpg b/data/images/99f3c6f2eb7400115d29ff20.jpg new file mode 100644 index 0000000..6a94137 Binary files /dev/null and b/data/images/99f3c6f2eb7400115d29ff20.jpg differ diff --git a/data/images/9a9f987cb4a97099133fe042.jpg b/data/images/9a9f987cb4a97099133fe042.jpg new file mode 100644 index 0000000..9bce202 Binary files /dev/null and b/data/images/9a9f987cb4a97099133fe042.jpg differ diff --git a/data/images/9e316f3b71dedf4ebb5a5fe3.jpg b/data/images/9e316f3b71dedf4ebb5a5fe3.jpg new file mode 100644 index 0000000..98af3f1 Binary files /dev/null and b/data/images/9e316f3b71dedf4ebb5a5fe3.jpg differ diff --git a/data/images/9ff7fa078865364fd15d907a.jpg b/data/images/9ff7fa078865364fd15d907a.jpg new file mode 100644 index 0000000..e5a31a3 Binary files /dev/null and b/data/images/9ff7fa078865364fd15d907a.jpg differ diff --git a/data/images/a0de36461eb3bd34d158b664.jpg b/data/images/a0de36461eb3bd34d158b664.jpg new file mode 100644 index 0000000..f589aad Binary files /dev/null and b/data/images/a0de36461eb3bd34d158b664.jpg differ diff --git a/data/images/a6eef42ca54c3db96826ed26.jpg b/data/images/a6eef42ca54c3db96826ed26.jpg new file mode 100644 index 0000000..7f9be3d Binary files /dev/null and b/data/images/a6eef42ca54c3db96826ed26.jpg differ diff --git a/data/images/ab1836c9a3ad11e299536347.jpg b/data/images/ab1836c9a3ad11e299536347.jpg new file mode 100644 index 0000000..58325e7 Binary files /dev/null and b/data/images/ab1836c9a3ad11e299536347.jpg differ diff --git a/data/images/ab2d3bd84fc582636b3f0356.jpg b/data/images/ab2d3bd84fc582636b3f0356.jpg new file mode 100644 index 0000000..2e7bd62 Binary files /dev/null and b/data/images/ab2d3bd84fc582636b3f0356.jpg differ diff --git a/data/images/abaf2c5a02d4220072877791.jpg b/data/images/abaf2c5a02d4220072877791.jpg new file mode 100644 index 0000000..b25f4cd Binary files /dev/null and b/data/images/abaf2c5a02d4220072877791.jpg differ diff --git a/data/images/b0e6bd3a5e868162e132f64c.png b/data/images/b0e6bd3a5e868162e132f64c.png new file mode 100644 index 0000000..be105cd Binary files /dev/null and b/data/images/b0e6bd3a5e868162e132f64c.png differ diff --git a/data/images/b42de033ff495471a4d58e01.jpg b/data/images/b42de033ff495471a4d58e01.jpg new file mode 100644 index 0000000..274b8ab Binary files /dev/null and b/data/images/b42de033ff495471a4d58e01.jpg differ diff --git a/data/images/b5658c725dac90e1ae0b884b.jpg b/data/images/b5658c725dac90e1ae0b884b.jpg new file mode 100644 index 0000000..a736797 Binary files /dev/null and b/data/images/b5658c725dac90e1ae0b884b.jpg differ diff --git a/data/images/b6fbc2f6f185cefc1f3a668f.jpg b/data/images/b6fbc2f6f185cefc1f3a668f.jpg new file mode 100644 index 0000000..4f4129e Binary files /dev/null and b/data/images/b6fbc2f6f185cefc1f3a668f.jpg differ diff --git a/data/images/bbc73c47d365564bf07c1626.jpg b/data/images/bbc73c47d365564bf07c1626.jpg new file mode 100644 index 0000000..2d09b7f Binary files /dev/null and b/data/images/bbc73c47d365564bf07c1626.jpg differ diff --git a/data/images/bce678840415a2c56cb64ad0.jpg b/data/images/bce678840415a2c56cb64ad0.jpg new file mode 100644 index 0000000..e71828b Binary files /dev/null and b/data/images/bce678840415a2c56cb64ad0.jpg differ diff --git a/data/images/c25a8b0a6b01c5032085e827.jpg b/data/images/c25a8b0a6b01c5032085e827.jpg new file mode 100644 index 0000000..c562f64 Binary files /dev/null and b/data/images/c25a8b0a6b01c5032085e827.jpg differ diff --git a/data/images/c7398566cb6f4d1e6e8a5a8b.jpg b/data/images/c7398566cb6f4d1e6e8a5a8b.jpg new file mode 100644 index 0000000..4a65dd2 Binary files /dev/null and b/data/images/c7398566cb6f4d1e6e8a5a8b.jpg differ diff --git a/data/images/c8434f9416a326220d631588.jpg b/data/images/c8434f9416a326220d631588.jpg new file mode 100644 index 0000000..f8ac782 Binary files /dev/null and b/data/images/c8434f9416a326220d631588.jpg differ diff --git a/data/images/ca805ef5c0c9929963251025.jpg b/data/images/ca805ef5c0c9929963251025.jpg new file mode 100644 index 0000000..cacff03 Binary files /dev/null and b/data/images/ca805ef5c0c9929963251025.jpg differ diff --git a/data/images/cc871c0b3c90fbfbe2863095.jpg b/data/images/cc871c0b3c90fbfbe2863095.jpg new file mode 100644 index 0000000..317ca3a Binary files /dev/null and b/data/images/cc871c0b3c90fbfbe2863095.jpg differ diff --git a/data/images/cead6f05f85d660fe6a7c687.jpg b/data/images/cead6f05f85d660fe6a7c687.jpg new file mode 100644 index 0000000..2d09b7f Binary files /dev/null and b/data/images/cead6f05f85d660fe6a7c687.jpg differ diff --git a/data/images/d421a50e572140c48803fcc3.jpg b/data/images/d421a50e572140c48803fcc3.jpg new file mode 100644 index 0000000..eaa9fe0 Binary files /dev/null and b/data/images/d421a50e572140c48803fcc3.jpg differ diff --git a/data/images/d4df2d5249f16f89d4f098ed.jpg b/data/images/d4df2d5249f16f89d4f098ed.jpg new file mode 100644 index 0000000..b60501d Binary files /dev/null and b/data/images/d4df2d5249f16f89d4f098ed.jpg differ diff --git a/data/images/d59dc53a15053b49f972308f.jpg b/data/images/d59dc53a15053b49f972308f.jpg new file mode 100644 index 0000000..1e4e8c4 Binary files /dev/null and b/data/images/d59dc53a15053b49f972308f.jpg differ diff --git a/data/images/d6a3f2676adf39507814e1e3.png b/data/images/d6a3f2676adf39507814e1e3.png new file mode 100644 index 0000000..5117ab1 Binary files /dev/null and b/data/images/d6a3f2676adf39507814e1e3.png differ diff --git a/data/images/d7610a640f6736464603dd6e.jpg b/data/images/d7610a640f6736464603dd6e.jpg new file mode 100644 index 0000000..aa7cb6f Binary files /dev/null and b/data/images/d7610a640f6736464603dd6e.jpg differ diff --git a/data/images/db7bc4a73eed7c064526b02d.jpg b/data/images/db7bc4a73eed7c064526b02d.jpg new file mode 100644 index 0000000..97a6305 Binary files /dev/null and b/data/images/db7bc4a73eed7c064526b02d.jpg differ diff --git a/data/images/dc417c335d55f3a4fc59ed10.jpg b/data/images/dc417c335d55f3a4fc59ed10.jpg new file mode 100644 index 0000000..55c8031 Binary files /dev/null and b/data/images/dc417c335d55f3a4fc59ed10.jpg differ diff --git a/data/images/e11409a796ec7029bc5f8ef1.jpg b/data/images/e11409a796ec7029bc5f8ef1.jpg new file mode 100644 index 0000000..fe3d7fc Binary files /dev/null and b/data/images/e11409a796ec7029bc5f8ef1.jpg differ diff --git a/data/images/e3ed134b9f909c3893e7f2e7.jpg b/data/images/e3ed134b9f909c3893e7f2e7.jpg new file mode 100644 index 0000000..998be0a Binary files /dev/null and b/data/images/e3ed134b9f909c3893e7f2e7.jpg differ diff --git a/data/images/f2b0dfc4fc0598bf63dcb909.jpg b/data/images/f2b0dfc4fc0598bf63dcb909.jpg new file mode 100644 index 0000000..9691eeb Binary files /dev/null and b/data/images/f2b0dfc4fc0598bf63dcb909.jpg differ diff --git a/data/images/f2cc419b2830a0c334fc85c4.jpg b/data/images/f2cc419b2830a0c334fc85c4.jpg new file mode 100644 index 0000000..415c786 Binary files /dev/null and b/data/images/f2cc419b2830a0c334fc85c4.jpg differ diff --git a/data/images/f8d252be8716852ddbffd16e.jpg b/data/images/f8d252be8716852ddbffd16e.jpg new file mode 100644 index 0000000..8ee7089 Binary files /dev/null and b/data/images/f8d252be8716852ddbffd16e.jpg differ diff --git a/data/images/fa6f858e83ce9fb81e3ec434.jpg b/data/images/fa6f858e83ce9fb81e3ec434.jpg new file mode 100644 index 0000000..7112085 Binary files /dev/null and b/data/images/fa6f858e83ce9fb81e3ec434.jpg differ diff --git a/data/images/fbece72ff8d5fa2eae8bdf5f.jpg b/data/images/fbece72ff8d5fa2eae8bdf5f.jpg new file mode 100644 index 0000000..2f35ff4 Binary files /dev/null and b/data/images/fbece72ff8d5fa2eae8bdf5f.jpg differ diff --git a/src/services/NewsApiClient.js b/src/services/NewsApiClient.js index f447c97..d09a429 100644 --- a/src/services/NewsApiClient.js +++ b/src/services/NewsApiClient.js @@ -20,6 +20,7 @@ class NewsApiClient { this._pageSize = options.pageSize || 20 this._fetch = options.fetchImplementation || fetch this._imageStorage = options.imageStorage || new NewsImageStorage(options.dataDirectory) + this._detailCache = options.detailCache || new NewsDetailCache(options.dataDirectory) } /** @@ -172,6 +173,11 @@ class NewsApiClient { * @returns {Promise} */ async _fetchArticleContent(detailUrl, worldNews) { + const cachedContent = await this._detailCache.get(detailUrl) + if (cachedContent) { + return cachedContent + } + let response try { response = await this._fetch(detailUrl) @@ -186,7 +192,11 @@ class NewsApiClient { ? this._extractElementHtmlById(html, 'content') : this._extractTagHtml(html, 'article') - return this._replaceContentImages(content || '', detailUrl) + const normalizedContent = await this._replaceContentImages(content || '', detailUrl) + if (normalizedContent) { + await this._detailCache.set(detailUrl, normalizedContent) + } + return normalizedContent } /** @@ -317,6 +327,60 @@ class NewsImageStorage { } } +class NewsDetailCache { + constructor(dataDirectory) { + this._cachePath = path.resolve(dataDirectory || './data', 'detail-cache.json') + this._entries = null + } + + /** + * @param {string} detailUrl + * @returns {Promise} + */ + async get(detailUrl) { + const entries = await this._load() + const entry = entries[detailUrl] + return entry?.content || '' + } + + /** + * @param {string} detailUrl + * @param {string} content + * @returns {Promise} + */ + async set(detailUrl, content) { + const entries = await this._load() + entries[detailUrl] = { + content, + cachedAt: new Date().toISOString() + } + await fs.mkdir(path.dirname(this._cachePath), { recursive: true }) + await fs.writeFile(this._cachePath, `${JSON.stringify(entries, null, 2)}\n`) + } + + /** + * @returns {Promise>} + */ + async _load() { + if (this._entries) { + return this._entries + } + + try { + const raw = await fs.readFile(this._cachePath, 'utf8') + const parsed = JSON.parse(raw) + this._entries = parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? parsed : {} + } catch (error) { + if (error.code !== 'ENOENT') { + this._entries = {} + return this._entries + } + this._entries = {} + } + return this._entries + } +} + function extensionFromContentType(contentType) { const normalized = String(contentType || '').split(';')[0].trim().toLowerCase() const byContentType = { @@ -370,5 +434,6 @@ function escapeRegExp(value) { module.exports = { NewsApiClient, - NewsImageStorage + NewsImageStorage, + NewsDetailCache }