feat: update news service to fetch Naver Stock news and enhance category handling
@@ -1,7 +1,4 @@
|
|||||||
PORT=3100
|
PORT=3100
|
||||||
NEWS_API_KEY=03f614876f0645948cb9bbce1661f4b2
|
|
||||||
NEWS_API_BASE_URL=https://newsapi.org/v2/everything
|
|
||||||
NEWS_API_LANGUAGE=ko
|
|
||||||
NEWS_PAGE_SIZE=20
|
NEWS_PAGE_SIZE=20
|
||||||
NEWS_REFRESH_CRON=0 * * * *
|
NEWS_REFRESH_CRON=0 * * * *
|
||||||
DATA_DIR=./data
|
DATA_DIR=./data
|
||||||
8
.idea/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
# 默认忽略的文件
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# 基于编辑器的 HTTP 客户端请求
|
||||||
|
/httpRequests/
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
||||||
6
.idea/copilot.data.migration.ask2agent.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="Ask2AgentMigrationStateService">
|
||||||
|
<option name="migrationStatus" value="COMPLETED" />
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
30
README.md
@@ -1,6 +1,6 @@
|
|||||||
# News Service
|
# News Service
|
||||||
|
|
||||||
独立新闻微服务,负责周期性从 NewsAPI 拉取新闻并落盘为 JSON 文件,再通过 HTTP 接口提供给前端。
|
独立新闻微服务,负责周期性从 Naver Stock 新闻接口拉取新闻并落盘为 JSON 文件,再通过 HTTP 接口提供给前端。
|
||||||
|
|
||||||
## 功能
|
## 功能
|
||||||
|
|
||||||
@@ -12,20 +12,20 @@
|
|||||||
|
|
||||||
## 分类
|
## 分类
|
||||||
|
|
||||||
- finance
|
- flashnews
|
||||||
- business
|
- mainnews
|
||||||
- technology
|
- ranknews
|
||||||
- market
|
- worldnews
|
||||||
|
|
||||||
## 目录
|
## 目录
|
||||||
|
|
||||||
```text
|
```text
|
||||||
news_service/
|
news_service/
|
||||||
data/
|
data/
|
||||||
finance.json
|
flashnews.json
|
||||||
business.json
|
mainnews.json
|
||||||
technology.json
|
ranknews.json
|
||||||
market.json
|
worldnews.json
|
||||||
src/
|
src/
|
||||||
app/
|
app/
|
||||||
config/
|
config/
|
||||||
@@ -76,15 +76,15 @@ GET /api/news/categories
|
|||||||
### 获取指定分类新闻
|
### 获取指定分类新闻
|
||||||
|
|
||||||
```http
|
```http
|
||||||
GET /api/news?category=finance&limit=10
|
GET /api/news?category=flashnews&limit=10
|
||||||
GET /api/news/finance?limit=10
|
GET /api/news/flashnews?limit=10
|
||||||
```
|
```
|
||||||
|
|
||||||
### 兼容前端现有 NewsAPI 调用
|
### 兼容前端现有 NewsAPI 调用
|
||||||
|
|
||||||
```http
|
```http
|
||||||
GET /v2/everything?q=finance&language=ko&pageSize=10&page=1
|
GET /v2/everything?q=flashnews&language=ko&pageSize=10&page=1
|
||||||
GET /v2/top-headlines?category=business&country=ko&pageSize=10
|
GET /v2/top-headlines?category=mainnews&country=ko&pageSize=10
|
||||||
```
|
```
|
||||||
|
|
||||||
返回结构与前端当前使用的 NewsAPI 结构保持一致:
|
返回结构与前端当前使用的 NewsAPI 结构保持一致:
|
||||||
@@ -107,7 +107,7 @@ GET /api/news/all?limit=10
|
|||||||
|
|
||||||
```http
|
```http
|
||||||
POST /api/news/refresh
|
POST /api/news/refresh
|
||||||
POST /api/news/refresh?category=finance
|
POST /api/news/refresh?category=flashnews
|
||||||
```
|
```
|
||||||
|
|
||||||
## Docker
|
## Docker
|
||||||
@@ -123,7 +123,7 @@ cd /Users/wjp/Projects/juYou
|
|||||||
docker compose -f docker-compose.news-stack.yml up -d --build
|
docker compose -f docker-compose.news-stack.yml up -d --build
|
||||||
```
|
```
|
||||||
|
|
||||||
这样前端容器中的 Nginx 会把同源路径 `/newsapi/*` 代理到容器网络中的 `news-service:3100`,浏览器不会直接访问 NewsAPI,因此不会触发跨域限制。
|
这样前端容器中的 Nginx 会把同源路径 `/newsapi/*` 代理到容器网络中的 `news-service:3100`,浏览器不会直接访问外部新闻接口,因此不会触发跨域限制。
|
||||||
|
|
||||||
## 前端接入建议
|
## 前端接入建议
|
||||||
|
|
||||||
|
|||||||
BIN
data/images/00423321d15041b5393762d8.jpg
Normal file
|
After Width: | Height: | Size: 89 KiB |
BIN
data/images/03af58329dd7bd7fcd08caad.jpg
Normal file
|
After Width: | Height: | Size: 97 KiB |
BIN
data/images/0445b97587e4c18101545440.jpg
Normal file
|
After Width: | Height: | Size: 54 KiB |
BIN
data/images/0f96cb3f70a4acd146221745.jpg
Normal file
|
After Width: | Height: | Size: 101 KiB |
BIN
data/images/11778425c7e41f364095c173.jpg
Normal file
|
After Width: | Height: | Size: 163 KiB |
BIN
data/images/1bd919fe25561f244ba4bcde.jpg
Normal file
|
After Width: | Height: | Size: 183 KiB |
BIN
data/images/1e692bb0a695d254ec4c5b52.png
Normal file
|
After Width: | Height: | Size: 707 KiB |
BIN
data/images/21a2fb69ddc27788a8f90a2d.png
Normal file
|
After Width: | Height: | Size: 210 KiB |
BIN
data/images/29a20d669f90c51ca45028eb.jpg
Normal file
|
After Width: | Height: | Size: 40 KiB |
BIN
data/images/2c1f2057e6af366bf3015f4e.jpg
Normal file
|
After Width: | Height: | Size: 91 KiB |
BIN
data/images/2cf8458e423bfdbf34c2acee.png
Normal file
|
After Width: | Height: | Size: 781 KiB |
BIN
data/images/2d3065fcd29569d3e701f490.jpg
Normal file
|
After Width: | Height: | Size: 152 KiB |
BIN
data/images/2d963b28def0c168195ad9d8.jpg
Normal file
|
After Width: | Height: | Size: 209 KiB |
BIN
data/images/34197e5f6ef10ba5374ba06c.jpg
Normal file
|
After Width: | Height: | Size: 226 KiB |
BIN
data/images/343eccb6d55919e039fc623a.jpg
Normal file
|
After Width: | Height: | Size: 165 KiB |
BIN
data/images/348dc9cbc9f3bfcfb69faaf3.jpg
Normal file
|
After Width: | Height: | Size: 54 KiB |
BIN
data/images/36fd3a527a390add0e94e45a.png
Normal file
|
After Width: | Height: | Size: 26 KiB |
BIN
data/images/39d36e714c455c22a8ae33e8.jpg
Normal file
|
After Width: | Height: | Size: 32 KiB |
BIN
data/images/3b031723f9b8f70495cbe385.jpg
Normal file
|
After Width: | Height: | Size: 146 KiB |
BIN
data/images/3e05220de047caa3995223f7.jpg
Normal file
|
After Width: | Height: | Size: 194 KiB |
BIN
data/images/40f0e5b20b96a98d559b2393.jpg
Normal file
|
After Width: | Height: | Size: 479 KiB |
BIN
data/images/4e61d48bc97636e335c58571.jpg
Normal file
|
After Width: | Height: | Size: 512 KiB |
BIN
data/images/593d13ceade737dab3554b79.jpg
Normal file
|
After Width: | Height: | Size: 635 KiB |
BIN
data/images/594e056c755a3d8896657cb5.jpg
Normal file
|
After Width: | Height: | Size: 170 KiB |
BIN
data/images/6747eff8ce40e840a4bcc543.jpg
Normal file
|
After Width: | Height: | Size: 44 KiB |
BIN
data/images/6e1153f65f086156ab4507ff.jpg
Normal file
|
After Width: | Height: | Size: 28 KiB |
BIN
data/images/774506d32ba3ebee62d98b73.jpg
Normal file
|
After Width: | Height: | Size: 146 KiB |
BIN
data/images/8042891de0f32ce59c98bd0c.jpg
Normal file
|
After Width: | Height: | Size: 68 KiB |
BIN
data/images/8091fc1d13183104e622930b.png
Normal file
|
After Width: | Height: | Size: 331 KiB |
BIN
data/images/8425db5d40cdea4ebc5e48f3.png
Normal file
|
After Width: | Height: | Size: 262 KiB |
BIN
data/images/878e7cffa8e291d9522789eb.jpg
Normal file
|
After Width: | Height: | Size: 121 KiB |
BIN
data/images/8b832a8f758e51991a9d806b.jpg
Normal file
|
After Width: | Height: | Size: 13 KiB |
BIN
data/images/8c36e3ba8584a98cf35d5cc1.jpg
Normal file
|
After Width: | Height: | Size: 27 KiB |
BIN
data/images/91beb422e27fa644a3ded9d8.jpg
Normal file
|
After Width: | Height: | Size: 162 KiB |
BIN
data/images/a3b1c1c137445c6a9f013faa.png
Normal file
|
After Width: | Height: | Size: 879 KiB |
BIN
data/images/a65420883cb64d52d995bf05.jpg
Normal file
|
After Width: | Height: | Size: 36 KiB |
BIN
data/images/a6c2b277c006cae54ecf9693.jpg
Normal file
|
After Width: | Height: | Size: 123 KiB |
BIN
data/images/a9ea4d406363057edacb7374.jpg
Normal file
|
After Width: | Height: | Size: 100 KiB |
BIN
data/images/aa72b4d241ff62d6db375671.jpg
Normal file
|
After Width: | Height: | Size: 367 KiB |
BIN
data/images/ad8ece98c5c1b89ba27a8d25.jpg
Normal file
|
After Width: | Height: | Size: 54 KiB |
BIN
data/images/adea914ec8333f341be544a9.jpg
Normal file
|
After Width: | Height: | Size: 59 KiB |
BIN
data/images/b3d584f58bafaab635d7f163.png
Normal file
|
After Width: | Height: | Size: 405 KiB |
BIN
data/images/b8c0278d1658dbac496e0ec9.jpg
Normal file
|
After Width: | Height: | Size: 614 KiB |
BIN
data/images/be51644c19d367d060b5d000.jpg
Normal file
|
After Width: | Height: | Size: 12 KiB |
BIN
data/images/c13b05772b18a142f0336ec7.jpg
Normal file
|
After Width: | Height: | Size: 323 KiB |
BIN
data/images/c14aad170cd82cb71f8be5bd.jpg
Normal file
|
After Width: | Height: | Size: 330 KiB |
BIN
data/images/c1ae0a2f3988bd10c1cdde1a.jpg
Normal file
|
After Width: | Height: | Size: 849 KiB |
BIN
data/images/c2821ab7c799d75c5500802b.jpg
Normal file
|
After Width: | Height: | Size: 584 KiB |
BIN
data/images/ce3466877b5e2c28cade8501.jpg
Normal file
|
After Width: | Height: | Size: 46 KiB |
BIN
data/images/d55f68fc3dbf94434e03cff2.jpg
Normal file
|
After Width: | Height: | Size: 136 KiB |
BIN
data/images/d57b617286902ce0da1bb251.jpg
Normal file
|
After Width: | Height: | Size: 99 KiB |
BIN
data/images/d624bea44fe62b165261612a.jpg
Normal file
|
After Width: | Height: | Size: 998 KiB |
BIN
data/images/da3e038ca74a61db5394bc61.jpg
Normal file
|
After Width: | Height: | Size: 47 KiB |
BIN
data/images/da744f3531167c5d7001261e.jpg
Normal file
|
After Width: | Height: | Size: 133 KiB |
BIN
data/images/dee3d95f4cbedbc4994f979c.jpg
Normal file
|
After Width: | Height: | Size: 63 KiB |
BIN
data/images/e19e8ee8bb7402e20565e131.jpg
Normal file
|
After Width: | Height: | Size: 1.1 MiB |
BIN
data/images/e57fdc4ec3fe336171017972.jpg
Normal file
|
After Width: | Height: | Size: 108 KiB |
BIN
data/images/e5bf9b4dd404f89b7d348c12.jpg
Normal file
|
After Width: | Height: | Size: 660 KiB |
BIN
data/images/e6678a79c0572cde599ee828.jpg
Normal file
|
After Width: | Height: | Size: 103 KiB |
BIN
data/images/e6702a617db5c421a3510f48.jpg
Normal file
|
After Width: | Height: | Size: 428 KiB |
BIN
data/images/e74737997b7c3f4c8ffa7ea7.jpg
Normal file
|
After Width: | Height: | Size: 155 KiB |
BIN
data/images/e7b005f7b80c0ba4e53e97b3.jpg
Normal file
|
After Width: | Height: | Size: 196 KiB |
BIN
data/images/e7bfe5e2f987a7673da6471a.png
Normal file
|
After Width: | Height: | Size: 29 KiB |
BIN
data/images/ee9eab40f9d168aa9c549a84.png
Normal file
|
After Width: | Height: | Size: 12 KiB |
BIN
data/images/eedd7a47590307fe54239b5e.jpg
Normal file
|
After Width: | Height: | Size: 49 KiB |
BIN
data/images/efa91e02e3651fb87da539ea.jpg
Normal file
|
After Width: | Height: | Size: 33 KiB |
BIN
data/images/f1c24f189a00d53ffee40fdc.jpg
Normal file
|
After Width: | Height: | Size: 69 KiB |
BIN
data/images/f37e986168453dc4ca027ad0.png
Normal file
|
After Width: | Height: | Size: 5.4 KiB |
BIN
data/images/f6af6c96c9b014194de9d35b.jpg
Normal file
|
After Width: | Height: | Size: 80 KiB |
BIN
data/images/fd277d3e4a2f19326a5eb13f.png
Normal file
|
After Width: | Height: | Size: 332 KiB |
BIN
data/images/fe45b775cea5b0a60432da73.png
Normal file
|
After Width: | Height: | Size: 35 KiB |
BIN
data/images/ff314f9050050192460333b5.png
Normal file
|
After Width: | Height: | Size: 962 KiB |
@@ -10,9 +10,6 @@ services:
|
|||||||
- "3100:3100"
|
- "3100:3100"
|
||||||
environment:
|
environment:
|
||||||
PORT: 3100
|
PORT: 3100
|
||||||
NEWS_API_KEY: ${NEWS_API_KEY}
|
|
||||||
NEWS_API_BASE_URL: ${NEWS_API_BASE_URL:-https://newsapi.org/v2/everything}
|
|
||||||
NEWS_API_LANGUAGE: ${NEWS_API_LANGUAGE:-en}
|
|
||||||
NEWS_PAGE_SIZE: ${NEWS_PAGE_SIZE:-20}
|
NEWS_PAGE_SIZE: ${NEWS_PAGE_SIZE:-20}
|
||||||
NEWS_REFRESH_CRON: ${NEWS_REFRESH_CRON:-0 * * * *}
|
NEWS_REFRESH_CRON: ${NEWS_REFRESH_CRON:-0 * * * *}
|
||||||
DATA_DIR: ${DATA_DIR:-/app/data}
|
DATA_DIR: ${DATA_DIR:-/app/data}
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
const express = require('express')
|
const express = require('express')
|
||||||
|
const path = require('path')
|
||||||
const { CategoryNewsRepository } = require('../repositories/CategoryNewsRepository')
|
const { CategoryNewsRepository } = require('../repositories/CategoryNewsRepository')
|
||||||
const { NewsApiClient } = require('../services/NewsApiClient')
|
const { NewsApiClient } = require('../services/NewsApiClient')
|
||||||
const { NewsStorageService } = require('../services/NewsStorageService')
|
const { NewsStorageService } = require('../services/NewsStorageService')
|
||||||
@@ -19,12 +20,10 @@ class NewsApplication {
|
|||||||
this._logger = new ConsoleLogger()
|
this._logger = new ConsoleLogger()
|
||||||
this._categoryCatalog = new NewsCategoryCatalog()
|
this._categoryCatalog = new NewsCategoryCatalog()
|
||||||
this._repository = new CategoryNewsRepository(runtimeConfig.dataDirectory)
|
this._repository = new CategoryNewsRepository(runtimeConfig.dataDirectory)
|
||||||
this._apiClient = new NewsApiClient(
|
this._apiClient = new NewsApiClient({
|
||||||
runtimeConfig.apiBaseUrl,
|
pageSize: runtimeConfig.pageSize,
|
||||||
runtimeConfig.apiKey,
|
dataDirectory: runtimeConfig.dataDirectory
|
||||||
runtimeConfig.language,
|
})
|
||||||
runtimeConfig.pageSize
|
|
||||||
)
|
|
||||||
this._storageService = new NewsStorageService(this._categoryCatalog, this._repository, this._apiClient)
|
this._storageService = new NewsStorageService(this._categoryCatalog, this._repository, this._apiClient)
|
||||||
this._scheduler = new NewsRefreshScheduler(this._storageService, runtimeConfig.refreshCron, this._logger)
|
this._scheduler = new NewsRefreshScheduler(this._storageService, runtimeConfig.refreshCron, this._logger)
|
||||||
this._express = express()
|
this._express = express()
|
||||||
@@ -69,6 +68,7 @@ class NewsApplication {
|
|||||||
*/
|
*/
|
||||||
_configureHttpPipeline() {
|
_configureHttpPipeline() {
|
||||||
this._express.use(express.json())
|
this._express.use(express.json())
|
||||||
|
this._express.use('/api/news/images', express.static(path.join(this._runtimeConfig.dataDirectory, 'images')))
|
||||||
|
|
||||||
this._express.get('/health', (_request, response) => {
|
this._express.get('/health', (_request, response) => {
|
||||||
response.json({
|
response.json({
|
||||||
|
|||||||
@@ -9,12 +9,16 @@ class NewsCategory {
|
|||||||
* @param {string} query
|
* @param {string} query
|
||||||
* @param {string} fileName
|
* @param {string} fileName
|
||||||
* @param {string} label
|
* @param {string} label
|
||||||
|
* @param {string} apiUrl
|
||||||
|
* @param {boolean} worldNews
|
||||||
*/
|
*/
|
||||||
constructor(key, query, fileName, label) {
|
constructor(key, query, fileName, label, apiUrl, worldNews = false) {
|
||||||
this.key = key
|
this.key = key
|
||||||
this.query = query
|
this.query = query
|
||||||
this.fileName = fileName
|
this.fileName = fileName
|
||||||
this.label = label
|
this.label = label
|
||||||
|
this.apiUrl = apiUrl
|
||||||
|
this.worldNews = worldNews
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -26,11 +30,22 @@ class NewsCategory {
|
|||||||
class NewsCategoryCatalog {
|
class NewsCategoryCatalog {
|
||||||
constructor() {
|
constructor() {
|
||||||
this._categories = [
|
this._categories = [
|
||||||
new NewsCategory('finance', 'finance', 'finance.json', 'Finance'),
|
new NewsCategory('flashnews', 'flashnews', 'flashnews.json', 'Realtime News', 'https://m.stock.naver.com/front-api/news/category?category=flashnews&pageSize=20&page=1'),
|
||||||
new NewsCategory('business', 'business', 'business.json', 'Business'),
|
new NewsCategory('mainnews', 'mainnews', 'mainnews.json', 'Main News', 'https://m.stock.naver.com/front-api/news/category?category=mainnews&pageSize=20&page=1'),
|
||||||
new NewsCategory('technology', 'technology', 'technology.json', 'Technology'),
|
new NewsCategory('ranknews', 'ranknews', 'ranknews.json', 'Rank News', 'https://m.stock.naver.com/front-api/news/category?category=ranknews&pageSize=20&page=1'),
|
||||||
new NewsCategory('market', 'market', 'market.json', 'Market')
|
new NewsCategory('worldnews', 'worldnews', 'worldnews.json', 'World News', 'https://m.stock.naver.com/front-api/news/worldnews?pageSize=20&page=1', true)
|
||||||
]
|
]
|
||||||
|
this._aliases = new Map([
|
||||||
|
['finance', 'flashnews'],
|
||||||
|
['business', 'mainnews'],
|
||||||
|
['technology', 'ranknews'],
|
||||||
|
['market', 'worldnews'],
|
||||||
|
['flash', 'flashnews'],
|
||||||
|
['main', 'mainnews'],
|
||||||
|
['rank', 'ranknews'],
|
||||||
|
['ranks', 'ranknews'],
|
||||||
|
['world', 'worldnews']
|
||||||
|
])
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -45,7 +60,8 @@ class NewsCategoryCatalog {
|
|||||||
* @returns {NewsCategory | null}
|
* @returns {NewsCategory | null}
|
||||||
*/
|
*/
|
||||||
getByKey(key) {
|
getByKey(key) {
|
||||||
return this._categories.find((item) => item.key === key) || null
|
const normalizedKey = this._aliases.get(key) || key
|
||||||
|
return this._categories.find((item) => item.key === normalizedKey) || null
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -11,14 +11,11 @@ class ServiceConfig {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @returns {{port: number, apiKey: string, apiBaseUrl: string, language: string, pageSize: number, refreshCron: string, dataDirectory: string}}
|
* @returns {{port: number, pageSize: number, refreshCron: string, dataDirectory: string}}
|
||||||
*/
|
*/
|
||||||
toRuntimeConfig() {
|
toRuntimeConfig() {
|
||||||
return {
|
return {
|
||||||
port: Number(this._env.PORT || 3100),
|
port: Number(this._env.PORT || 3100),
|
||||||
apiKey: this._env.NEWS_API_KEY || '',
|
|
||||||
apiBaseUrl: this._env.NEWS_API_BASE_URL || 'https://newsapi.org/v2/everything',
|
|
||||||
language: this._env.NEWS_API_LANGUAGE || 'en',
|
|
||||||
pageSize: Number(this._env.NEWS_PAGE_SIZE || 20),
|
pageSize: Number(this._env.NEWS_PAGE_SIZE || 20),
|
||||||
refreshCron: this._env.NEWS_REFRESH_CRON || '0 * * * *',
|
refreshCron: this._env.NEWS_REFRESH_CRON || '0 * * * *',
|
||||||
dataDirectory: path.resolve(process.cwd(), this._env.DATA_DIR || './data')
|
dataDirectory: path.resolve(process.cwd(), this._env.DATA_DIR || './data')
|
||||||
|
|||||||
@@ -1,14 +1,25 @@
|
|||||||
|
const crypto = require('crypto')
|
||||||
|
const fs = require('fs/promises')
|
||||||
|
const path = require('path')
|
||||||
|
|
||||||
|
const NAVER_ARTICLE_URL = 'https://n.news.naver.com/article/'
|
||||||
|
const NAVER_WORLD_NEWS_URL = 'https://m.stock.naver.com/investment/news/worldnews/'
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* NewsApiClient
|
* NewsApiClient
|
||||||
*
|
*
|
||||||
* Wraps outbound requests to NewsAPI.
|
* Fetches Naver Stock news and normalizes it to the existing NewsAPI-shaped
|
||||||
|
* article contract consumed by the front-end compatibility routes.
|
||||||
*/
|
*/
|
||||||
class NewsApiClient {
|
class NewsApiClient {
|
||||||
constructor(apiBaseUrl, apiKey, language, pageSize) {
|
constructor(apiBaseUrlOrOptions, _apiKey, _language, pageSize) {
|
||||||
this._apiBaseUrl = apiBaseUrl
|
const options = typeof apiBaseUrlOrOptions === 'object'
|
||||||
this._apiKey = apiKey
|
? apiBaseUrlOrOptions
|
||||||
this._language = language
|
: { pageSize }
|
||||||
this._pageSize = pageSize
|
|
||||||
|
this._pageSize = options.pageSize || 20
|
||||||
|
this._fetch = options.fetchImplementation || fetch
|
||||||
|
this._imageStorage = options.imageStorage || new NewsImageStorage(options.dataDirectory)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -16,54 +27,348 @@ class NewsApiClient {
|
|||||||
* @returns {Promise<object[]>}
|
* @returns {Promise<object[]>}
|
||||||
*/
|
*/
|
||||||
async fetchArticlesByCategory(category) {
|
async fetchArticlesByCategory(category) {
|
||||||
if (!this._apiKey) {
|
const listUrl = this._createListUrl(category)
|
||||||
throw new Error('NEWS_API_KEY is required')
|
const response = await this._fetch(listUrl)
|
||||||
}
|
|
||||||
|
|
||||||
const url = new URL(this._apiBaseUrl)
|
|
||||||
url.searchParams.set('q', category.query)
|
|
||||||
url.searchParams.set('language', this._language)
|
|
||||||
url.searchParams.set('pageSize', String(this._pageSize))
|
|
||||||
url.searchParams.set('page', '1')
|
|
||||||
url.searchParams.set('sortBy', 'publishedAt')
|
|
||||||
|
|
||||||
const response = await fetch(url, {
|
|
||||||
headers: {
|
|
||||||
'X-Api-Key': this._apiKey
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
const failure = await response.json().catch(() => ({}))
|
throw new Error(`Naver news request failed with status ${response.status}`)
|
||||||
throw new Error(failure.message || `NewsAPI request failed with status ${response.status}`)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const payload = await response.json()
|
const payload = await response.json()
|
||||||
if (payload.status !== 'ok') {
|
const items = this._extractNewsItems(payload)
|
||||||
throw new Error(payload.message || 'NewsAPI returned a non-ok payload')
|
|
||||||
}
|
|
||||||
|
|
||||||
return (payload.articles || []).map((article) => this._normalizeArticle(article))
|
const articles = []
|
||||||
|
for (const item of items) {
|
||||||
|
const article = await this._normalizeArticle(item, category)
|
||||||
|
if (article) {
|
||||||
|
articles.push(article)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return articles
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param {object} article
|
* @param {import('../config/CategoryCatalog').NewsCategory} category
|
||||||
* @returns {object}
|
* @returns {string}
|
||||||
*/
|
*/
|
||||||
_normalizeArticle(article) {
|
_createListUrl(category) {
|
||||||
return {
|
const url = new URL(category.apiUrl)
|
||||||
source: article.source || null,
|
url.searchParams.set('pageSize', String(this._pageSize))
|
||||||
author: article.author || null,
|
url.searchParams.set('page', '1')
|
||||||
title: article.title || '',
|
return url.toString()
|
||||||
description: article.description || '',
|
|
||||||
url: article.url || '',
|
|
||||||
urlToImage: article.urlToImage || '',
|
|
||||||
publishedAt: article.publishedAt || null,
|
|
||||||
content: article.content || ''
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {object} payload
|
||||||
|
* @returns {object[]}
|
||||||
|
*/
|
||||||
|
_extractNewsItems(payload) {
|
||||||
|
if (Array.isArray(payload)) {
|
||||||
|
return payload
|
||||||
|
}
|
||||||
|
|
||||||
|
const directCandidates = [
|
||||||
|
payload?.result?.newsList,
|
||||||
|
payload?.result?.items,
|
||||||
|
payload?.result?.list,
|
||||||
|
payload?.result,
|
||||||
|
payload?.items,
|
||||||
|
payload?.newsList,
|
||||||
|
payload?.list
|
||||||
|
]
|
||||||
|
const direct = directCandidates.find((candidate) => Array.isArray(candidate))
|
||||||
|
if (direct) {
|
||||||
|
return direct
|
||||||
|
}
|
||||||
|
|
||||||
|
return this._findFirstNewsArray(payload) || []
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {unknown} value
|
||||||
|
* @returns {object[] | null}
|
||||||
|
*/
|
||||||
|
_findFirstNewsArray(value) {
|
||||||
|
if (!value || typeof value !== 'object') {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
if (Array.isArray(value)) {
|
||||||
|
return value.some((item) => item && typeof item === 'object' && this._getSourceId(item))
|
||||||
|
? value
|
||||||
|
: null
|
||||||
|
}
|
||||||
|
for (const child of Object.values(value)) {
|
||||||
|
const found = this._findFirstNewsArray(child)
|
||||||
|
if (found) {
|
||||||
|
return found
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {object} item
|
||||||
|
* @param {import('../config/CategoryCatalog').NewsCategory} category
|
||||||
|
* @returns {Promise<object | null>}
|
||||||
|
*/
|
||||||
|
async _normalizeArticle(item, category) {
|
||||||
|
const sourceId = this._getSourceId(item)
|
||||||
|
if (!sourceId) {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
const detailUrl = this._createDetailUrl(category, sourceId)
|
||||||
|
const content = await this._fetchArticleContent(detailUrl, category.worldNews)
|
||||||
|
const title = this._firstString(item.titleFull, item.title, item.newsTitle, item.articleTitle, item.headline)
|
||||||
|
const sourceName = this._firstString(item.officeName, item.pressName, item.providerName, item.sourceName)
|
||||||
|
|
||||||
|
return {
|
||||||
|
source: {
|
||||||
|
id: sourceId.split('/')[0] || null,
|
||||||
|
name: sourceName || null
|
||||||
|
},
|
||||||
|
author: this._firstString(item.author, item.writerName, item.reporter) || null,
|
||||||
|
title,
|
||||||
|
description: this._firstString(item.summary, item.description, item.body, item.subTitle) || '',
|
||||||
|
url: detailUrl,
|
||||||
|
urlToImage: this._firstString(item.imageUrl, item.thumbnailUrl, item.thumbnail, item.imageOriginLink) || '',
|
||||||
|
publishedAt: normalizePublishedAt(this._firstString(item.datetime, item.dateTime, item.publishedAt, item.officeDateTime, item.regDate)) || null,
|
||||||
|
content
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {object} item
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
_getSourceId(item) {
|
||||||
|
const sourceId = this._firstString(item.sourceId, item.articleSourceId)
|
||||||
|
if (sourceId) {
|
||||||
|
return sourceId.replace(/^\/+/, '')
|
||||||
|
}
|
||||||
|
|
||||||
|
const officeId = this._firstString(item.officeId, item.pressId)
|
||||||
|
const articleId = this._firstString(item.articleId, item.newsId)
|
||||||
|
if (officeId && articleId) {
|
||||||
|
return `${officeId}/${articleId}`
|
||||||
|
}
|
||||||
|
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {import('../config/CategoryCatalog').NewsCategory} category
|
||||||
|
* @param {string} sourceId
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
_createDetailUrl(category, sourceId) {
|
||||||
|
const baseUrl = category.worldNews ? NAVER_WORLD_NEWS_URL : NAVER_ARTICLE_URL
|
||||||
|
return `${baseUrl}${sourceId}`
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {string} detailUrl
|
||||||
|
* @param {boolean} worldNews
|
||||||
|
* @returns {Promise<string>}
|
||||||
|
*/
|
||||||
|
async _fetchArticleContent(detailUrl, worldNews) {
|
||||||
|
let response
|
||||||
|
try {
|
||||||
|
response = await this._fetch(detailUrl)
|
||||||
|
} catch (_error) {
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
if (!response.ok) {
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
const html = await response.text()
|
||||||
|
const content = worldNews
|
||||||
|
? this._extractElementHtmlById(html, 'content')
|
||||||
|
: this._extractTagHtml(html, 'article')
|
||||||
|
|
||||||
|
return this._replaceContentImages(content || '', detailUrl)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {string} html
|
||||||
|
* @param {string} tagName
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
_extractTagHtml(html, tagName) {
|
||||||
|
const expression = new RegExp(`<${tagName}\\b[^>]*>[\\s\\S]*?<\\/${tagName}>`, 'i')
|
||||||
|
const match = html.match(expression)
|
||||||
|
return match ? match[0] : ''
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {string} html
|
||||||
|
* @param {string} id
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
_extractElementHtmlById(html, id) {
|
||||||
|
const startExpression = new RegExp(`<([a-z][\\w:-]*)\\b(?=[^>]*\\bid=["']${escapeRegExp(id)}["'])[^>]*>`, 'i')
|
||||||
|
const startMatch = startExpression.exec(html)
|
||||||
|
if (!startMatch) {
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
|
||||||
|
const tagName = startMatch[1]
|
||||||
|
const startIndex = startMatch.index
|
||||||
|
const openTagEnd = startIndex + startMatch[0].length
|
||||||
|
const tokenExpression = new RegExp(`<\\/?${escapeRegExp(tagName)}\\b[^>]*>`, 'ig')
|
||||||
|
tokenExpression.lastIndex = openTagEnd
|
||||||
|
let depth = 1
|
||||||
|
let tokenMatch
|
||||||
|
|
||||||
|
while ((tokenMatch = tokenExpression.exec(html)) !== null) {
|
||||||
|
if (tokenMatch[0][1] === '/') {
|
||||||
|
depth -= 1
|
||||||
|
if (depth === 0) {
|
||||||
|
return html.slice(startIndex, tokenExpression.lastIndex)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
depth += 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {string} html
|
||||||
|
* @returns {Promise<string>}
|
||||||
|
*/
|
||||||
|
async _replaceContentImages(html, baseUrl) {
|
||||||
|
const imageExpression = /<img\b[^>]*\bsrc=["']([^"']+)["'][^>]*>/ig
|
||||||
|
const replacements = []
|
||||||
|
let match
|
||||||
|
while ((match = imageExpression.exec(html)) !== null) {
|
||||||
|
replacements.push({
|
||||||
|
originalTag: match[0],
|
||||||
|
originalUrl: match[1]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
let updatedHtml = html
|
||||||
|
for (const replacement of replacements) {
|
||||||
|
const localUrl = await this._downloadImage(replacement.originalUrl, baseUrl)
|
||||||
|
if (localUrl) {
|
||||||
|
updatedHtml = updatedHtml.replace(
|
||||||
|
replacement.originalTag,
|
||||||
|
replacement.originalTag.replace(replacement.originalUrl, localUrl)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return updatedHtml
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {string} imageUrl
|
||||||
|
* @returns {Promise<string>}
|
||||||
|
*/
|
||||||
|
async _downloadImage(imageUrl, baseUrl) {
|
||||||
|
const absoluteImageUrl = toAbsoluteHttpUrl(imageUrl, baseUrl)
|
||||||
|
if (!absoluteImageUrl) {
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
|
||||||
|
let response
|
||||||
|
try {
|
||||||
|
response = await this._fetch(absoluteImageUrl)
|
||||||
|
} catch (_error) {
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
if (!response.ok) {
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
const contentType = response.headers?.get?.('content-type') || 'application/octet-stream'
|
||||||
|
const buffer = Buffer.from(await response.arrayBuffer())
|
||||||
|
return this._imageStorage.saveImage(absoluteImageUrl, buffer, contentType)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {...unknown} values
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
_firstString(...values) {
|
||||||
|
const value = values.find((item) => typeof item === 'string' && item.trim())
|
||||||
|
return value ? value.trim() : ''
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
class NewsImageStorage {
|
||||||
NewsApiClient
|
constructor(dataDirectory) {
|
||||||
|
this._imageDirectory = path.resolve(dataDirectory || './data', 'images')
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {string} imageUrl
|
||||||
|
* @param {Buffer} buffer
|
||||||
|
* @param {string} contentType
|
||||||
|
* @returns {Promise<string>}
|
||||||
|
*/
|
||||||
|
async saveImage(imageUrl, buffer, contentType) {
|
||||||
|
await fs.mkdir(this._imageDirectory, { recursive: true })
|
||||||
|
const extension = extensionFromContentType(contentType) || extensionFromUrl(imageUrl) || '.bin'
|
||||||
|
const digest = crypto.createHash('sha256').update(imageUrl).digest('hex').slice(0, 24)
|
||||||
|
const fileName = `${digest}${extension}`
|
||||||
|
await fs.writeFile(path.join(this._imageDirectory, fileName), buffer)
|
||||||
|
return `/api/news/images/${fileName}`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function extensionFromContentType(contentType) {
|
||||||
|
const normalized = String(contentType || '').split(';')[0].trim().toLowerCase()
|
||||||
|
const byContentType = {
|
||||||
|
'image/jpeg': '.jpg',
|
||||||
|
'image/jpg': '.jpg',
|
||||||
|
'image/png': '.png',
|
||||||
|
'image/gif': '.gif',
|
||||||
|
'image/webp': '.webp',
|
||||||
|
'image/svg+xml': '.svg'
|
||||||
|
}
|
||||||
|
return byContentType[normalized] || ''
|
||||||
|
}
|
||||||
|
|
||||||
|
function extensionFromUrl(imageUrl) {
|
||||||
|
try {
|
||||||
|
const extension = path.extname(new URL(imageUrl).pathname).toLowerCase()
|
||||||
|
return extension.length <= 6 ? extension : ''
|
||||||
|
} catch (_error) {
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function toAbsoluteHttpUrl(value, baseUrl) {
|
||||||
|
try {
|
||||||
|
const url = new URL(value, baseUrl)
|
||||||
|
return ['http:', 'https:'].includes(url.protocol) ? url.toString() : ''
|
||||||
|
} catch (_error) {
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizePublishedAt(value) {
|
||||||
|
if (!value) {
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
if (/^\d{14}$/.test(value)) {
|
||||||
|
const year = value.slice(0, 4)
|
||||||
|
const month = value.slice(4, 6)
|
||||||
|
const day = value.slice(6, 8)
|
||||||
|
const hour = value.slice(8, 10)
|
||||||
|
const minute = value.slice(10, 12)
|
||||||
|
const second = value.slice(12, 14)
|
||||||
|
return `${year}-${month}-${day}T${hour}:${minute}:${second}+09:00`
|
||||||
|
}
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
function escapeRegExp(value) {
|
||||||
|
return String(value).replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
NewsApiClient,
|
||||||
|
NewsImageStorage
|
||||||
}
|
}
|
||||||
@@ -86,7 +86,8 @@ class NewsStorageService {
|
|||||||
key: category.key,
|
key: category.key,
|
||||||
label: category.label,
|
label: category.label,
|
||||||
query: category.query,
|
query: category.query,
|
||||||
fileName: category.fileName
|
fileName: category.fileName,
|
||||||
|
apiUrl: category.apiUrl
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||