feat: update news service to fetch Naver Stock news and enhance category handling
@@ -1,7 +1,4 @@
|
||||
PORT=3100
|
||||
NEWS_API_KEY=03f614876f0645948cb9bbce1661f4b2
|
||||
NEWS_API_BASE_URL=https://newsapi.org/v2/everything
|
||||
NEWS_API_LANGUAGE=ko
|
||||
NEWS_PAGE_SIZE=20
|
||||
NEWS_REFRESH_CRON=0 * * * *
|
||||
DATA_DIR=./data
|
||||
8
.idea/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
# 默认忽略的文件
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# 基于编辑器的 HTTP 客户端请求
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
||||
6
.idea/copilot.data.migration.ask2agent.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Ask2AgentMigrationStateService">
|
||||
<option name="migrationStatus" value="COMPLETED" />
|
||||
</component>
|
||||
</project>
|
||||
30
README.md
@@ -1,6 +1,6 @@
|
||||
# News Service
|
||||
|
||||
独立新闻微服务,负责周期性从 NewsAPI 拉取新闻并落盘为 JSON 文件,再通过 HTTP 接口提供给前端。
|
||||
独立新闻微服务,负责周期性从 Naver Stock 新闻接口拉取新闻并落盘为 JSON 文件,再通过 HTTP 接口提供给前端。
|
||||
|
||||
## 功能
|
||||
|
||||
@@ -12,20 +12,20 @@
|
||||
|
||||
## 分类
|
||||
|
||||
- finance
|
||||
- business
|
||||
- technology
|
||||
- market
|
||||
- flashnews
|
||||
- mainnews
|
||||
- ranknews
|
||||
- worldnews
|
||||
|
||||
## 目录
|
||||
|
||||
```text
|
||||
news_service/
|
||||
data/
|
||||
finance.json
|
||||
business.json
|
||||
technology.json
|
||||
market.json
|
||||
flashnews.json
|
||||
mainnews.json
|
||||
ranknews.json
|
||||
worldnews.json
|
||||
src/
|
||||
app/
|
||||
config/
|
||||
@@ -76,15 +76,15 @@ GET /api/news/categories
|
||||
### 获取指定分类新闻
|
||||
|
||||
```http
|
||||
GET /api/news?category=finance&limit=10
|
||||
GET /api/news/finance?limit=10
|
||||
GET /api/news?category=flashnews&limit=10
|
||||
GET /api/news/flashnews?limit=10
|
||||
```
|
||||
|
||||
### 兼容前端现有 NewsAPI 调用
|
||||
|
||||
```http
|
||||
GET /v2/everything?q=finance&language=ko&pageSize=10&page=1
|
||||
GET /v2/top-headlines?category=business&country=ko&pageSize=10
|
||||
GET /v2/everything?q=flashnews&language=ko&pageSize=10&page=1
|
||||
GET /v2/top-headlines?category=mainnews&country=ko&pageSize=10
|
||||
```
|
||||
|
||||
返回结构与前端当前使用的 NewsAPI 结构保持一致:
|
||||
@@ -107,7 +107,7 @@ GET /api/news/all?limit=10
|
||||
|
||||
```http
|
||||
POST /api/news/refresh
|
||||
POST /api/news/refresh?category=finance
|
||||
POST /api/news/refresh?category=flashnews
|
||||
```
|
||||
|
||||
## Docker
|
||||
@@ -123,7 +123,7 @@ cd /Users/wjp/Projects/juYou
|
||||
docker compose -f docker-compose.news-stack.yml up -d --build
|
||||
```
|
||||
|
||||
这样前端容器中的 Nginx 会把同源路径 `/newsapi/*` 代理到容器网络中的 `news-service:3100`,浏览器不会直接访问 NewsAPI,因此不会触发跨域限制。
|
||||
这样前端容器中的 Nginx 会把同源路径 `/newsapi/*` 代理到容器网络中的 `news-service:3100`,浏览器不会直接访问外部新闻接口,因此不会触发跨域限制。
|
||||
|
||||
## 前端接入建议
|
||||
|
||||
|
||||
BIN
data/images/00423321d15041b5393762d8.jpg
Normal file
|
After Width: | Height: | Size: 89 KiB |
BIN
data/images/03af58329dd7bd7fcd08caad.jpg
Normal file
|
After Width: | Height: | Size: 97 KiB |
BIN
data/images/0445b97587e4c18101545440.jpg
Normal file
|
After Width: | Height: | Size: 54 KiB |
BIN
data/images/0f96cb3f70a4acd146221745.jpg
Normal file
|
After Width: | Height: | Size: 101 KiB |
BIN
data/images/11778425c7e41f364095c173.jpg
Normal file
|
After Width: | Height: | Size: 163 KiB |
BIN
data/images/1bd919fe25561f244ba4bcde.jpg
Normal file
|
After Width: | Height: | Size: 183 KiB |
BIN
data/images/1e692bb0a695d254ec4c5b52.png
Normal file
|
After Width: | Height: | Size: 707 KiB |
BIN
data/images/21a2fb69ddc27788a8f90a2d.png
Normal file
|
After Width: | Height: | Size: 210 KiB |
BIN
data/images/29a20d669f90c51ca45028eb.jpg
Normal file
|
After Width: | Height: | Size: 40 KiB |
BIN
data/images/2c1f2057e6af366bf3015f4e.jpg
Normal file
|
After Width: | Height: | Size: 91 KiB |
BIN
data/images/2cf8458e423bfdbf34c2acee.png
Normal file
|
After Width: | Height: | Size: 781 KiB |
BIN
data/images/2d3065fcd29569d3e701f490.jpg
Normal file
|
After Width: | Height: | Size: 152 KiB |
BIN
data/images/2d963b28def0c168195ad9d8.jpg
Normal file
|
After Width: | Height: | Size: 209 KiB |
BIN
data/images/34197e5f6ef10ba5374ba06c.jpg
Normal file
|
After Width: | Height: | Size: 226 KiB |
BIN
data/images/343eccb6d55919e039fc623a.jpg
Normal file
|
After Width: | Height: | Size: 165 KiB |
BIN
data/images/348dc9cbc9f3bfcfb69faaf3.jpg
Normal file
|
After Width: | Height: | Size: 54 KiB |
BIN
data/images/36fd3a527a390add0e94e45a.png
Normal file
|
After Width: | Height: | Size: 26 KiB |
BIN
data/images/39d36e714c455c22a8ae33e8.jpg
Normal file
|
After Width: | Height: | Size: 32 KiB |
BIN
data/images/3b031723f9b8f70495cbe385.jpg
Normal file
|
After Width: | Height: | Size: 146 KiB |
BIN
data/images/3e05220de047caa3995223f7.jpg
Normal file
|
After Width: | Height: | Size: 194 KiB |
BIN
data/images/40f0e5b20b96a98d559b2393.jpg
Normal file
|
After Width: | Height: | Size: 479 KiB |
BIN
data/images/4e61d48bc97636e335c58571.jpg
Normal file
|
After Width: | Height: | Size: 512 KiB |
BIN
data/images/593d13ceade737dab3554b79.jpg
Normal file
|
After Width: | Height: | Size: 635 KiB |
BIN
data/images/594e056c755a3d8896657cb5.jpg
Normal file
|
After Width: | Height: | Size: 170 KiB |
BIN
data/images/6747eff8ce40e840a4bcc543.jpg
Normal file
|
After Width: | Height: | Size: 44 KiB |
BIN
data/images/6e1153f65f086156ab4507ff.jpg
Normal file
|
After Width: | Height: | Size: 28 KiB |
BIN
data/images/774506d32ba3ebee62d98b73.jpg
Normal file
|
After Width: | Height: | Size: 146 KiB |
BIN
data/images/8042891de0f32ce59c98bd0c.jpg
Normal file
|
After Width: | Height: | Size: 68 KiB |
BIN
data/images/8091fc1d13183104e622930b.png
Normal file
|
After Width: | Height: | Size: 331 KiB |
BIN
data/images/8425db5d40cdea4ebc5e48f3.png
Normal file
|
After Width: | Height: | Size: 262 KiB |
BIN
data/images/878e7cffa8e291d9522789eb.jpg
Normal file
|
After Width: | Height: | Size: 121 KiB |
BIN
data/images/8b832a8f758e51991a9d806b.jpg
Normal file
|
After Width: | Height: | Size: 13 KiB |
BIN
data/images/8c36e3ba8584a98cf35d5cc1.jpg
Normal file
|
After Width: | Height: | Size: 27 KiB |
BIN
data/images/91beb422e27fa644a3ded9d8.jpg
Normal file
|
After Width: | Height: | Size: 162 KiB |
BIN
data/images/a3b1c1c137445c6a9f013faa.png
Normal file
|
After Width: | Height: | Size: 879 KiB |
BIN
data/images/a65420883cb64d52d995bf05.jpg
Normal file
|
After Width: | Height: | Size: 36 KiB |
BIN
data/images/a6c2b277c006cae54ecf9693.jpg
Normal file
|
After Width: | Height: | Size: 123 KiB |
BIN
data/images/a9ea4d406363057edacb7374.jpg
Normal file
|
After Width: | Height: | Size: 100 KiB |
BIN
data/images/aa72b4d241ff62d6db375671.jpg
Normal file
|
After Width: | Height: | Size: 367 KiB |
BIN
data/images/ad8ece98c5c1b89ba27a8d25.jpg
Normal file
|
After Width: | Height: | Size: 54 KiB |
BIN
data/images/adea914ec8333f341be544a9.jpg
Normal file
|
After Width: | Height: | Size: 59 KiB |
BIN
data/images/b3d584f58bafaab635d7f163.png
Normal file
|
After Width: | Height: | Size: 405 KiB |
BIN
data/images/b8c0278d1658dbac496e0ec9.jpg
Normal file
|
After Width: | Height: | Size: 614 KiB |
BIN
data/images/be51644c19d367d060b5d000.jpg
Normal file
|
After Width: | Height: | Size: 12 KiB |
BIN
data/images/c13b05772b18a142f0336ec7.jpg
Normal file
|
After Width: | Height: | Size: 323 KiB |
BIN
data/images/c14aad170cd82cb71f8be5bd.jpg
Normal file
|
After Width: | Height: | Size: 330 KiB |
BIN
data/images/c1ae0a2f3988bd10c1cdde1a.jpg
Normal file
|
After Width: | Height: | Size: 849 KiB |
BIN
data/images/c2821ab7c799d75c5500802b.jpg
Normal file
|
After Width: | Height: | Size: 584 KiB |
BIN
data/images/ce3466877b5e2c28cade8501.jpg
Normal file
|
After Width: | Height: | Size: 46 KiB |
BIN
data/images/d55f68fc3dbf94434e03cff2.jpg
Normal file
|
After Width: | Height: | Size: 136 KiB |
BIN
data/images/d57b617286902ce0da1bb251.jpg
Normal file
|
After Width: | Height: | Size: 99 KiB |
BIN
data/images/d624bea44fe62b165261612a.jpg
Normal file
|
After Width: | Height: | Size: 998 KiB |
BIN
data/images/da3e038ca74a61db5394bc61.jpg
Normal file
|
After Width: | Height: | Size: 47 KiB |
BIN
data/images/da744f3531167c5d7001261e.jpg
Normal file
|
After Width: | Height: | Size: 133 KiB |
BIN
data/images/dee3d95f4cbedbc4994f979c.jpg
Normal file
|
After Width: | Height: | Size: 63 KiB |
BIN
data/images/e19e8ee8bb7402e20565e131.jpg
Normal file
|
After Width: | Height: | Size: 1.1 MiB |
BIN
data/images/e57fdc4ec3fe336171017972.jpg
Normal file
|
After Width: | Height: | Size: 108 KiB |
BIN
data/images/e5bf9b4dd404f89b7d348c12.jpg
Normal file
|
After Width: | Height: | Size: 660 KiB |
BIN
data/images/e6678a79c0572cde599ee828.jpg
Normal file
|
After Width: | Height: | Size: 103 KiB |
BIN
data/images/e6702a617db5c421a3510f48.jpg
Normal file
|
After Width: | Height: | Size: 428 KiB |
BIN
data/images/e74737997b7c3f4c8ffa7ea7.jpg
Normal file
|
After Width: | Height: | Size: 155 KiB |
BIN
data/images/e7b005f7b80c0ba4e53e97b3.jpg
Normal file
|
After Width: | Height: | Size: 196 KiB |
BIN
data/images/e7bfe5e2f987a7673da6471a.png
Normal file
|
After Width: | Height: | Size: 29 KiB |
BIN
data/images/ee9eab40f9d168aa9c549a84.png
Normal file
|
After Width: | Height: | Size: 12 KiB |
BIN
data/images/eedd7a47590307fe54239b5e.jpg
Normal file
|
After Width: | Height: | Size: 49 KiB |
BIN
data/images/efa91e02e3651fb87da539ea.jpg
Normal file
|
After Width: | Height: | Size: 33 KiB |
BIN
data/images/f1c24f189a00d53ffee40fdc.jpg
Normal file
|
After Width: | Height: | Size: 69 KiB |
BIN
data/images/f37e986168453dc4ca027ad0.png
Normal file
|
After Width: | Height: | Size: 5.4 KiB |
BIN
data/images/f6af6c96c9b014194de9d35b.jpg
Normal file
|
After Width: | Height: | Size: 80 KiB |
BIN
data/images/fd277d3e4a2f19326a5eb13f.png
Normal file
|
After Width: | Height: | Size: 332 KiB |
BIN
data/images/fe45b775cea5b0a60432da73.png
Normal file
|
After Width: | Height: | Size: 35 KiB |
BIN
data/images/ff314f9050050192460333b5.png
Normal file
|
After Width: | Height: | Size: 962 KiB |
@@ -10,9 +10,6 @@ services:
|
||||
- "3100:3100"
|
||||
environment:
|
||||
PORT: 3100
|
||||
NEWS_API_KEY: ${NEWS_API_KEY}
|
||||
NEWS_API_BASE_URL: ${NEWS_API_BASE_URL:-https://newsapi.org/v2/everything}
|
||||
NEWS_API_LANGUAGE: ${NEWS_API_LANGUAGE:-en}
|
||||
NEWS_PAGE_SIZE: ${NEWS_PAGE_SIZE:-20}
|
||||
NEWS_REFRESH_CRON: ${NEWS_REFRESH_CRON:-0 * * * *}
|
||||
DATA_DIR: ${DATA_DIR:-/app/data}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
const express = require('express')
|
||||
const path = require('path')
|
||||
const { CategoryNewsRepository } = require('../repositories/CategoryNewsRepository')
|
||||
const { NewsApiClient } = require('../services/NewsApiClient')
|
||||
const { NewsStorageService } = require('../services/NewsStorageService')
|
||||
@@ -19,12 +20,10 @@ class NewsApplication {
|
||||
this._logger = new ConsoleLogger()
|
||||
this._categoryCatalog = new NewsCategoryCatalog()
|
||||
this._repository = new CategoryNewsRepository(runtimeConfig.dataDirectory)
|
||||
this._apiClient = new NewsApiClient(
|
||||
runtimeConfig.apiBaseUrl,
|
||||
runtimeConfig.apiKey,
|
||||
runtimeConfig.language,
|
||||
runtimeConfig.pageSize
|
||||
)
|
||||
this._apiClient = new NewsApiClient({
|
||||
pageSize: runtimeConfig.pageSize,
|
||||
dataDirectory: runtimeConfig.dataDirectory
|
||||
})
|
||||
this._storageService = new NewsStorageService(this._categoryCatalog, this._repository, this._apiClient)
|
||||
this._scheduler = new NewsRefreshScheduler(this._storageService, runtimeConfig.refreshCron, this._logger)
|
||||
this._express = express()
|
||||
@@ -69,6 +68,7 @@ class NewsApplication {
|
||||
*/
|
||||
_configureHttpPipeline() {
|
||||
this._express.use(express.json())
|
||||
this._express.use('/api/news/images', express.static(path.join(this._runtimeConfig.dataDirectory, 'images')))
|
||||
|
||||
this._express.get('/health', (_request, response) => {
|
||||
response.json({
|
||||
|
||||
@@ -9,12 +9,16 @@ class NewsCategory {
|
||||
* @param {string} query
|
||||
* @param {string} fileName
|
||||
* @param {string} label
|
||||
* @param {string} apiUrl
|
||||
* @param {boolean} worldNews
|
||||
*/
|
||||
constructor(key, query, fileName, label) {
|
||||
constructor(key, query, fileName, label, apiUrl, worldNews = false) {
|
||||
this.key = key
|
||||
this.query = query
|
||||
this.fileName = fileName
|
||||
this.label = label
|
||||
this.apiUrl = apiUrl
|
||||
this.worldNews = worldNews
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,11 +30,22 @@ class NewsCategory {
|
||||
class NewsCategoryCatalog {
|
||||
constructor() {
|
||||
this._categories = [
|
||||
new NewsCategory('finance', 'finance', 'finance.json', 'Finance'),
|
||||
new NewsCategory('business', 'business', 'business.json', 'Business'),
|
||||
new NewsCategory('technology', 'technology', 'technology.json', 'Technology'),
|
||||
new NewsCategory('market', 'market', 'market.json', 'Market')
|
||||
new NewsCategory('flashnews', 'flashnews', 'flashnews.json', 'Realtime News', 'https://m.stock.naver.com/front-api/news/category?category=flashnews&pageSize=20&page=1'),
|
||||
new NewsCategory('mainnews', 'mainnews', 'mainnews.json', 'Main News', 'https://m.stock.naver.com/front-api/news/category?category=mainnews&pageSize=20&page=1'),
|
||||
new NewsCategory('ranknews', 'ranknews', 'ranknews.json', 'Rank News', 'https://m.stock.naver.com/front-api/news/category?category=ranknews&pageSize=20&page=1'),
|
||||
new NewsCategory('worldnews', 'worldnews', 'worldnews.json', 'World News', 'https://m.stock.naver.com/front-api/news/worldnews?pageSize=20&page=1', true)
|
||||
]
|
||||
this._aliases = new Map([
|
||||
['finance', 'flashnews'],
|
||||
['business', 'mainnews'],
|
||||
['technology', 'ranknews'],
|
||||
['market', 'worldnews'],
|
||||
['flash', 'flashnews'],
|
||||
['main', 'mainnews'],
|
||||
['rank', 'ranknews'],
|
||||
['ranks', 'ranknews'],
|
||||
['world', 'worldnews']
|
||||
])
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -45,7 +60,8 @@ class NewsCategoryCatalog {
|
||||
* @returns {NewsCategory | null}
|
||||
*/
|
||||
getByKey(key) {
|
||||
return this._categories.find((item) => item.key === key) || null
|
||||
const normalizedKey = this._aliases.get(key) || key
|
||||
return this._categories.find((item) => item.key === normalizedKey) || null
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -11,14 +11,11 @@ class ServiceConfig {
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns {{port: number, apiKey: string, apiBaseUrl: string, language: string, pageSize: number, refreshCron: string, dataDirectory: string}}
|
||||
* @returns {{port: number, pageSize: number, refreshCron: string, dataDirectory: string}}
|
||||
*/
|
||||
toRuntimeConfig() {
|
||||
return {
|
||||
port: Number(this._env.PORT || 3100),
|
||||
apiKey: this._env.NEWS_API_KEY || '',
|
||||
apiBaseUrl: this._env.NEWS_API_BASE_URL || 'https://newsapi.org/v2/everything',
|
||||
language: this._env.NEWS_API_LANGUAGE || 'en',
|
||||
pageSize: Number(this._env.NEWS_PAGE_SIZE || 20),
|
||||
refreshCron: this._env.NEWS_REFRESH_CRON || '0 * * * *',
|
||||
dataDirectory: path.resolve(process.cwd(), this._env.DATA_DIR || './data')
|
||||
|
||||
@@ -1,14 +1,25 @@
|
||||
const crypto = require('crypto')
|
||||
const fs = require('fs/promises')
|
||||
const path = require('path')
|
||||
|
||||
const NAVER_ARTICLE_URL = 'https://n.news.naver.com/article/'
|
||||
const NAVER_WORLD_NEWS_URL = 'https://m.stock.naver.com/investment/news/worldnews/'
|
||||
|
||||
/**
|
||||
* NewsApiClient
|
||||
*
|
||||
* Wraps outbound requests to NewsAPI.
|
||||
* Fetches Naver Stock news and normalizes it to the existing NewsAPI-shaped
|
||||
* article contract consumed by the front-end compatibility routes.
|
||||
*/
|
||||
class NewsApiClient {
|
||||
constructor(apiBaseUrl, apiKey, language, pageSize) {
|
||||
this._apiBaseUrl = apiBaseUrl
|
||||
this._apiKey = apiKey
|
||||
this._language = language
|
||||
this._pageSize = pageSize
|
||||
constructor(apiBaseUrlOrOptions, _apiKey, _language, pageSize) {
|
||||
const options = typeof apiBaseUrlOrOptions === 'object'
|
||||
? apiBaseUrlOrOptions
|
||||
: { pageSize }
|
||||
|
||||
this._pageSize = options.pageSize || 20
|
||||
this._fetch = options.fetchImplementation || fetch
|
||||
this._imageStorage = options.imageStorage || new NewsImageStorage(options.dataDirectory)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -16,54 +27,348 @@ class NewsApiClient {
|
||||
* @returns {Promise<object[]>}
|
||||
*/
|
||||
async fetchArticlesByCategory(category) {
|
||||
if (!this._apiKey) {
|
||||
throw new Error('NEWS_API_KEY is required')
|
||||
}
|
||||
|
||||
const url = new URL(this._apiBaseUrl)
|
||||
url.searchParams.set('q', category.query)
|
||||
url.searchParams.set('language', this._language)
|
||||
url.searchParams.set('pageSize', String(this._pageSize))
|
||||
url.searchParams.set('page', '1')
|
||||
url.searchParams.set('sortBy', 'publishedAt')
|
||||
|
||||
const response = await fetch(url, {
|
||||
headers: {
|
||||
'X-Api-Key': this._apiKey
|
||||
}
|
||||
})
|
||||
const listUrl = this._createListUrl(category)
|
||||
const response = await this._fetch(listUrl)
|
||||
|
||||
if (!response.ok) {
|
||||
const failure = await response.json().catch(() => ({}))
|
||||
throw new Error(failure.message || `NewsAPI request failed with status ${response.status}`)
|
||||
throw new Error(`Naver news request failed with status ${response.status}`)
|
||||
}
|
||||
|
||||
const payload = await response.json()
|
||||
if (payload.status !== 'ok') {
|
||||
throw new Error(payload.message || 'NewsAPI returned a non-ok payload')
|
||||
}
|
||||
const items = this._extractNewsItems(payload)
|
||||
|
||||
return (payload.articles || []).map((article) => this._normalizeArticle(article))
|
||||
const articles = []
|
||||
for (const item of items) {
|
||||
const article = await this._normalizeArticle(item, category)
|
||||
if (article) {
|
||||
articles.push(article)
|
||||
}
|
||||
}
|
||||
return articles
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {object} article
|
||||
* @returns {object}
|
||||
* @param {import('../config/CategoryCatalog').NewsCategory} category
|
||||
* @returns {string}
|
||||
*/
|
||||
_normalizeArticle(article) {
|
||||
return {
|
||||
source: article.source || null,
|
||||
author: article.author || null,
|
||||
title: article.title || '',
|
||||
description: article.description || '',
|
||||
url: article.url || '',
|
||||
urlToImage: article.urlToImage || '',
|
||||
publishedAt: article.publishedAt || null,
|
||||
content: article.content || ''
|
||||
_createListUrl(category) {
|
||||
const url = new URL(category.apiUrl)
|
||||
url.searchParams.set('pageSize', String(this._pageSize))
|
||||
url.searchParams.set('page', '1')
|
||||
return url.toString()
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {object} payload
|
||||
* @returns {object[]}
|
||||
*/
|
||||
_extractNewsItems(payload) {
|
||||
if (Array.isArray(payload)) {
|
||||
return payload
|
||||
}
|
||||
|
||||
const directCandidates = [
|
||||
payload?.result?.newsList,
|
||||
payload?.result?.items,
|
||||
payload?.result?.list,
|
||||
payload?.result,
|
||||
payload?.items,
|
||||
payload?.newsList,
|
||||
payload?.list
|
||||
]
|
||||
const direct = directCandidates.find((candidate) => Array.isArray(candidate))
|
||||
if (direct) {
|
||||
return direct
|
||||
}
|
||||
|
||||
return this._findFirstNewsArray(payload) || []
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {unknown} value
|
||||
* @returns {object[] | null}
|
||||
*/
|
||||
_findFirstNewsArray(value) {
|
||||
if (!value || typeof value !== 'object') {
|
||||
return null
|
||||
}
|
||||
if (Array.isArray(value)) {
|
||||
return value.some((item) => item && typeof item === 'object' && this._getSourceId(item))
|
||||
? value
|
||||
: null
|
||||
}
|
||||
for (const child of Object.values(value)) {
|
||||
const found = this._findFirstNewsArray(child)
|
||||
if (found) {
|
||||
return found
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {object} item
|
||||
* @param {import('../config/CategoryCatalog').NewsCategory} category
|
||||
* @returns {Promise<object | null>}
|
||||
*/
|
||||
async _normalizeArticle(item, category) {
|
||||
const sourceId = this._getSourceId(item)
|
||||
if (!sourceId) {
|
||||
return null
|
||||
}
|
||||
|
||||
const detailUrl = this._createDetailUrl(category, sourceId)
|
||||
const content = await this._fetchArticleContent(detailUrl, category.worldNews)
|
||||
const title = this._firstString(item.titleFull, item.title, item.newsTitle, item.articleTitle, item.headline)
|
||||
const sourceName = this._firstString(item.officeName, item.pressName, item.providerName, item.sourceName)
|
||||
|
||||
return {
|
||||
source: {
|
||||
id: sourceId.split('/')[0] || null,
|
||||
name: sourceName || null
|
||||
},
|
||||
author: this._firstString(item.author, item.writerName, item.reporter) || null,
|
||||
title,
|
||||
description: this._firstString(item.summary, item.description, item.body, item.subTitle) || '',
|
||||
url: detailUrl,
|
||||
urlToImage: this._firstString(item.imageUrl, item.thumbnailUrl, item.thumbnail, item.imageOriginLink) || '',
|
||||
publishedAt: normalizePublishedAt(this._firstString(item.datetime, item.dateTime, item.publishedAt, item.officeDateTime, item.regDate)) || null,
|
||||
content
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {object} item
|
||||
* @returns {string}
|
||||
*/
|
||||
_getSourceId(item) {
|
||||
const sourceId = this._firstString(item.sourceId, item.articleSourceId)
|
||||
if (sourceId) {
|
||||
return sourceId.replace(/^\/+/, '')
|
||||
}
|
||||
|
||||
const officeId = this._firstString(item.officeId, item.pressId)
|
||||
const articleId = this._firstString(item.articleId, item.newsId)
|
||||
if (officeId && articleId) {
|
||||
return `${officeId}/${articleId}`
|
||||
}
|
||||
|
||||
return ''
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {import('../config/CategoryCatalog').NewsCategory} category
|
||||
* @param {string} sourceId
|
||||
* @returns {string}
|
||||
*/
|
||||
_createDetailUrl(category, sourceId) {
|
||||
const baseUrl = category.worldNews ? NAVER_WORLD_NEWS_URL : NAVER_ARTICLE_URL
|
||||
return `${baseUrl}${sourceId}`
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} detailUrl
|
||||
* @param {boolean} worldNews
|
||||
* @returns {Promise<string>}
|
||||
*/
|
||||
async _fetchArticleContent(detailUrl, worldNews) {
|
||||
let response
|
||||
try {
|
||||
response = await this._fetch(detailUrl)
|
||||
} catch (_error) {
|
||||
return ''
|
||||
}
|
||||
if (!response.ok) {
|
||||
return ''
|
||||
}
|
||||
const html = await response.text()
|
||||
const content = worldNews
|
||||
? this._extractElementHtmlById(html, 'content')
|
||||
: this._extractTagHtml(html, 'article')
|
||||
|
||||
return this._replaceContentImages(content || '', detailUrl)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} html
|
||||
* @param {string} tagName
|
||||
* @returns {string}
|
||||
*/
|
||||
_extractTagHtml(html, tagName) {
|
||||
const expression = new RegExp(`<${tagName}\\b[^>]*>[\\s\\S]*?<\\/${tagName}>`, 'i')
|
||||
const match = html.match(expression)
|
||||
return match ? match[0] : ''
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} html
|
||||
* @param {string} id
|
||||
* @returns {string}
|
||||
*/
|
||||
_extractElementHtmlById(html, id) {
|
||||
const startExpression = new RegExp(`<([a-z][\\w:-]*)\\b(?=[^>]*\\bid=["']${escapeRegExp(id)}["'])[^>]*>`, 'i')
|
||||
const startMatch = startExpression.exec(html)
|
||||
if (!startMatch) {
|
||||
return ''
|
||||
}
|
||||
|
||||
const tagName = startMatch[1]
|
||||
const startIndex = startMatch.index
|
||||
const openTagEnd = startIndex + startMatch[0].length
|
||||
const tokenExpression = new RegExp(`<\\/?${escapeRegExp(tagName)}\\b[^>]*>`, 'ig')
|
||||
tokenExpression.lastIndex = openTagEnd
|
||||
let depth = 1
|
||||
let tokenMatch
|
||||
|
||||
while ((tokenMatch = tokenExpression.exec(html)) !== null) {
|
||||
if (tokenMatch[0][1] === '/') {
|
||||
depth -= 1
|
||||
if (depth === 0) {
|
||||
return html.slice(startIndex, tokenExpression.lastIndex)
|
||||
}
|
||||
} else {
|
||||
depth += 1
|
||||
}
|
||||
}
|
||||
|
||||
return ''
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} html
|
||||
* @returns {Promise<string>}
|
||||
*/
|
||||
async _replaceContentImages(html, baseUrl) {
|
||||
const imageExpression = /<img\b[^>]*\bsrc=["']([^"']+)["'][^>]*>/ig
|
||||
const replacements = []
|
||||
let match
|
||||
while ((match = imageExpression.exec(html)) !== null) {
|
||||
replacements.push({
|
||||
originalTag: match[0],
|
||||
originalUrl: match[1]
|
||||
})
|
||||
}
|
||||
|
||||
let updatedHtml = html
|
||||
for (const replacement of replacements) {
|
||||
const localUrl = await this._downloadImage(replacement.originalUrl, baseUrl)
|
||||
if (localUrl) {
|
||||
updatedHtml = updatedHtml.replace(
|
||||
replacement.originalTag,
|
||||
replacement.originalTag.replace(replacement.originalUrl, localUrl)
|
||||
)
|
||||
}
|
||||
}
|
||||
return updatedHtml
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} imageUrl
|
||||
* @returns {Promise<string>}
|
||||
*/
|
||||
async _downloadImage(imageUrl, baseUrl) {
|
||||
const absoluteImageUrl = toAbsoluteHttpUrl(imageUrl, baseUrl)
|
||||
if (!absoluteImageUrl) {
|
||||
return ''
|
||||
}
|
||||
|
||||
let response
|
||||
try {
|
||||
response = await this._fetch(absoluteImageUrl)
|
||||
} catch (_error) {
|
||||
return ''
|
||||
}
|
||||
if (!response.ok) {
|
||||
return ''
|
||||
}
|
||||
const contentType = response.headers?.get?.('content-type') || 'application/octet-stream'
|
||||
const buffer = Buffer.from(await response.arrayBuffer())
|
||||
return this._imageStorage.saveImage(absoluteImageUrl, buffer, contentType)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {...unknown} values
|
||||
* @returns {string}
|
||||
*/
|
||||
_firstString(...values) {
|
||||
const value = values.find((item) => typeof item === 'string' && item.trim())
|
||||
return value ? value.trim() : ''
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
NewsApiClient
|
||||
class NewsImageStorage {
|
||||
constructor(dataDirectory) {
|
||||
this._imageDirectory = path.resolve(dataDirectory || './data', 'images')
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} imageUrl
|
||||
* @param {Buffer} buffer
|
||||
* @param {string} contentType
|
||||
* @returns {Promise<string>}
|
||||
*/
|
||||
async saveImage(imageUrl, buffer, contentType) {
|
||||
await fs.mkdir(this._imageDirectory, { recursive: true })
|
||||
const extension = extensionFromContentType(contentType) || extensionFromUrl(imageUrl) || '.bin'
|
||||
const digest = crypto.createHash('sha256').update(imageUrl).digest('hex').slice(0, 24)
|
||||
const fileName = `${digest}${extension}`
|
||||
await fs.writeFile(path.join(this._imageDirectory, fileName), buffer)
|
||||
return `/api/news/images/${fileName}`
|
||||
}
|
||||
}
|
||||
|
||||
function extensionFromContentType(contentType) {
|
||||
const normalized = String(contentType || '').split(';')[0].trim().toLowerCase()
|
||||
const byContentType = {
|
||||
'image/jpeg': '.jpg',
|
||||
'image/jpg': '.jpg',
|
||||
'image/png': '.png',
|
||||
'image/gif': '.gif',
|
||||
'image/webp': '.webp',
|
||||
'image/svg+xml': '.svg'
|
||||
}
|
||||
return byContentType[normalized] || ''
|
||||
}
|
||||
|
||||
function extensionFromUrl(imageUrl) {
|
||||
try {
|
||||
const extension = path.extname(new URL(imageUrl).pathname).toLowerCase()
|
||||
return extension.length <= 6 ? extension : ''
|
||||
} catch (_error) {
|
||||
return ''
|
||||
}
|
||||
}
|
||||
|
||||
function toAbsoluteHttpUrl(value, baseUrl) {
|
||||
try {
|
||||
const url = new URL(value, baseUrl)
|
||||
return ['http:', 'https:'].includes(url.protocol) ? url.toString() : ''
|
||||
} catch (_error) {
|
||||
return ''
|
||||
}
|
||||
}
|
||||
|
||||
function normalizePublishedAt(value) {
|
||||
if (!value) {
|
||||
return ''
|
||||
}
|
||||
if (/^\d{14}$/.test(value)) {
|
||||
const year = value.slice(0, 4)
|
||||
const month = value.slice(4, 6)
|
||||
const day = value.slice(6, 8)
|
||||
const hour = value.slice(8, 10)
|
||||
const minute = value.slice(10, 12)
|
||||
const second = value.slice(12, 14)
|
||||
return `${year}-${month}-${day}T${hour}:${minute}:${second}+09:00`
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
function escapeRegExp(value) {
|
||||
return String(value).replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
NewsApiClient,
|
||||
NewsImageStorage
|
||||
}
|
||||
@@ -86,7 +86,8 @@ class NewsStorageService {
|
||||
key: category.key,
|
||||
label: category.label,
|
||||
query: category.query,
|
||||
fileName: category.fileName
|
||||
fileName: category.fileName,
|
||||
apiUrl: category.apiUrl
|
||||
}))
|
||||
}
|
||||
|
||||
|
||||