feat: implement detail caching for news articles to optimize content fetching
All checks were successful
Docker Build and Push - News Service / Build and Push News Service Image (push) Successful in 42s
Docker Build and Push - News Service / Build Summary (push) Successful in 0s

This commit is contained in:
2026-04-30 16:38:02 +08:00
parent 9a0b41459c
commit b57b64ef95
77 changed files with 75 additions and 2 deletions

View File

@@ -26,6 +26,8 @@ news_service/
mainnews.json
ranknews.json
worldnews.json
detail-cache.json
images/
src/
app/
config/
@@ -110,6 +112,12 @@ POST /api/news/refresh
POST /api/news/refresh?category=flashnews
```
## 缓存
- 分类列表和前端返回数据会落盘到 `data/<category>.json`
- 详情正文会按详情页 URL 去重缓存到 `data/detail-cache.json`,同一篇新闻后续刷新不会重复抓详情页。
- 正文图片会下载到 `data/images/`,正文 HTML 里的图片地址会替换成 `/api/news/images/<file>`
## Docker
```bash

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 72 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 815 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 79 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 221 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 77 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 331 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 87 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 459 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 481 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 94 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 77 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 59 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 442 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 490 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 113 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 120 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 101 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 459 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 157 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 187 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 252 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 133 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 193 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 307 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 363 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 651 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 179 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 126 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 385 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 972 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 62 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 141 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 87 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 106 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 327 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 141 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 379 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 441 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 178 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 436 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 301 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 100 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 419 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 881 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 387 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

View File

@@ -20,6 +20,7 @@ class NewsApiClient {
this._pageSize = options.pageSize || 20
this._fetch = options.fetchImplementation || fetch
this._imageStorage = options.imageStorage || new NewsImageStorage(options.dataDirectory)
this._detailCache = options.detailCache || new NewsDetailCache(options.dataDirectory)
}
/**
@@ -172,6 +173,11 @@ class NewsApiClient {
* @returns {Promise<string>}
*/
async _fetchArticleContent(detailUrl, worldNews) {
const cachedContent = await this._detailCache.get(detailUrl)
if (cachedContent) {
return cachedContent
}
let response
try {
response = await this._fetch(detailUrl)
@@ -186,7 +192,11 @@ class NewsApiClient {
? this._extractElementHtmlById(html, 'content')
: this._extractTagHtml(html, 'article')
return this._replaceContentImages(content || '', detailUrl)
const normalizedContent = await this._replaceContentImages(content || '', detailUrl)
if (normalizedContent) {
await this._detailCache.set(detailUrl, normalizedContent)
}
return normalizedContent
}
/**
@@ -317,6 +327,60 @@ class NewsImageStorage {
}
}
class NewsDetailCache {
constructor(dataDirectory) {
this._cachePath = path.resolve(dataDirectory || './data', 'detail-cache.json')
this._entries = null
}
/**
* @param {string} detailUrl
* @returns {Promise<string>}
*/
async get(detailUrl) {
const entries = await this._load()
const entry = entries[detailUrl]
return entry?.content || ''
}
/**
* @param {string} detailUrl
* @param {string} content
* @returns {Promise<void>}
*/
async set(detailUrl, content) {
const entries = await this._load()
entries[detailUrl] = {
content,
cachedAt: new Date().toISOString()
}
await fs.mkdir(path.dirname(this._cachePath), { recursive: true })
await fs.writeFile(this._cachePath, `${JSON.stringify(entries, null, 2)}\n`)
}
/**
* @returns {Promise<Record<string, {content: string, cachedAt: string}>>}
*/
async _load() {
if (this._entries) {
return this._entries
}
try {
const raw = await fs.readFile(this._cachePath, 'utf8')
const parsed = JSON.parse(raw)
this._entries = parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? parsed : {}
} catch (error) {
if (error.code !== 'ENOENT') {
this._entries = {}
return this._entries
}
this._entries = {}
}
return this._entries
}
}
function extensionFromContentType(contentType) {
const normalized = String(contentType || '').split(';')[0].trim().toLowerCase()
const byContentType = {
@@ -370,5 +434,6 @@ function escapeRegExp(value) {
module.exports = {
NewsApiClient,
NewsImageStorage
NewsImageStorage,
NewsDetailCache
}