diff --git a/.env.example b/.env.example index eef1f81..ce11921 100644 --- a/.env.example +++ b/.env.example @@ -1,7 +1,4 @@ PORT=3100 -NEWS_API_KEY=03f614876f0645948cb9bbce1661f4b2 -NEWS_API_BASE_URL=https://newsapi.org/v2/everything -NEWS_API_LANGUAGE=ko NEWS_PAGE_SIZE=20 NEWS_REFRESH_CRON=0 * * * * -DATA_DIR=./data \ No newline at end of file +DATA_DIR=./data diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..35410ca --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/copilot.data.migration.ask2agent.xml b/.idea/copilot.data.migration.ask2agent.xml new file mode 100644 index 0000000..1f2ea11 --- /dev/null +++ b/.idea/copilot.data.migration.ask2agent.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/README.md b/README.md index 70d5f2a..29a2b68 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # News Service -独立新闻微服务,负责周期性从 NewsAPI 拉取新闻并落盘为 JSON 文件,再通过 HTTP 接口提供给前端。 +独立新闻微服务,负责周期性从 Naver Stock 新闻接口拉取新闻并落盘为 JSON 文件,再通过 HTTP 接口提供给前端。 ## 功能 @@ -12,20 +12,20 @@ ## 分类 -- finance -- business -- technology -- market +- flashnews +- mainnews +- ranknews +- worldnews ## 目录 ```text news_service/ data/ - finance.json - business.json - technology.json - market.json + flashnews.json + mainnews.json + ranknews.json + worldnews.json src/ app/ config/ @@ -76,15 +76,15 @@ GET /api/news/categories ### 获取指定分类新闻 ```http -GET /api/news?category=finance&limit=10 -GET /api/news/finance?limit=10 +GET /api/news?category=flashnews&limit=10 +GET /api/news/flashnews?limit=10 ``` ### 兼容前端现有 NewsAPI 调用 ```http -GET /v2/everything?q=finance&language=ko&pageSize=10&page=1 -GET /v2/top-headlines?category=business&country=ko&pageSize=10 +GET /v2/everything?q=flashnews&language=ko&pageSize=10&page=1 +GET /v2/top-headlines?category=mainnews&country=ko&pageSize=10 ``` 返回结构与前端当前使用的 NewsAPI 结构保持一致: @@ -107,7 +107,7 @@ GET /api/news/all?limit=10 ```http POST /api/news/refresh -POST /api/news/refresh?category=finance +POST /api/news/refresh?category=flashnews ``` ## Docker @@ -123,8 +123,8 @@ cd /Users/wjp/Projects/juYou docker compose -f docker-compose.news-stack.yml up -d --build ``` -这样前端容器中的 Nginx 会把同源路径 `/newsapi/*` 代理到容器网络中的 `news-service:3100`,浏览器不会直接访问 NewsAPI,因此不会触发跨域限制。 +这样前端容器中的 Nginx 会把同源路径 `/newsapi/*` 代理到容器网络中的 `news-service:3100`,浏览器不会直接访问外部新闻接口,因此不会触发跨域限制。 ## 前端接入建议 -前端如果继续使用原来的 `/newsapi/v2/*` 请求方式,只需要把代理目标指向本服务即可,不需要修改新闻请求代码。 \ No newline at end of file +前端如果继续使用原来的 `/newsapi/v2/*` 请求方式,只需要把代理目标指向本服务即可,不需要修改新闻请求代码。 diff --git a/data/images/00423321d15041b5393762d8.jpg b/data/images/00423321d15041b5393762d8.jpg new file mode 100644 index 0000000..e7e4ec3 Binary files /dev/null and b/data/images/00423321d15041b5393762d8.jpg differ diff --git a/data/images/03af58329dd7bd7fcd08caad.jpg b/data/images/03af58329dd7bd7fcd08caad.jpg new file mode 100644 index 0000000..8551f46 Binary files /dev/null and b/data/images/03af58329dd7bd7fcd08caad.jpg differ diff --git a/data/images/0445b97587e4c18101545440.jpg b/data/images/0445b97587e4c18101545440.jpg new file mode 100644 index 0000000..c819662 Binary files /dev/null and b/data/images/0445b97587e4c18101545440.jpg differ diff --git a/data/images/0f96cb3f70a4acd146221745.jpg b/data/images/0f96cb3f70a4acd146221745.jpg new file mode 100644 index 0000000..0e2c896 Binary files /dev/null and b/data/images/0f96cb3f70a4acd146221745.jpg differ diff --git a/data/images/11778425c7e41f364095c173.jpg b/data/images/11778425c7e41f364095c173.jpg new file mode 100644 index 0000000..e1ef8b9 Binary files /dev/null and b/data/images/11778425c7e41f364095c173.jpg differ diff --git a/data/images/1bd919fe25561f244ba4bcde.jpg b/data/images/1bd919fe25561f244ba4bcde.jpg new file mode 100644 index 0000000..d9f41f1 Binary files /dev/null and b/data/images/1bd919fe25561f244ba4bcde.jpg differ diff --git a/data/images/1e692bb0a695d254ec4c5b52.png b/data/images/1e692bb0a695d254ec4c5b52.png new file mode 100644 index 0000000..b33d3b8 Binary files /dev/null and b/data/images/1e692bb0a695d254ec4c5b52.png differ diff --git a/data/images/21a2fb69ddc27788a8f90a2d.png b/data/images/21a2fb69ddc27788a8f90a2d.png new file mode 100644 index 0000000..ee9a7aa Binary files /dev/null and b/data/images/21a2fb69ddc27788a8f90a2d.png differ diff --git a/data/images/29a20d669f90c51ca45028eb.jpg b/data/images/29a20d669f90c51ca45028eb.jpg new file mode 100644 index 0000000..a572129 Binary files /dev/null and b/data/images/29a20d669f90c51ca45028eb.jpg differ diff --git a/data/images/2c1f2057e6af366bf3015f4e.jpg b/data/images/2c1f2057e6af366bf3015f4e.jpg new file mode 100644 index 0000000..4c90571 Binary files /dev/null and b/data/images/2c1f2057e6af366bf3015f4e.jpg differ diff --git a/data/images/2cf8458e423bfdbf34c2acee.png b/data/images/2cf8458e423bfdbf34c2acee.png new file mode 100644 index 0000000..f8e7a01 Binary files /dev/null and b/data/images/2cf8458e423bfdbf34c2acee.png differ diff --git a/data/images/2d3065fcd29569d3e701f490.jpg b/data/images/2d3065fcd29569d3e701f490.jpg new file mode 100644 index 0000000..0017c2d Binary files /dev/null and b/data/images/2d3065fcd29569d3e701f490.jpg differ diff --git a/data/images/2d963b28def0c168195ad9d8.jpg b/data/images/2d963b28def0c168195ad9d8.jpg new file mode 100644 index 0000000..33eef6b Binary files /dev/null and b/data/images/2d963b28def0c168195ad9d8.jpg differ diff --git a/data/images/34197e5f6ef10ba5374ba06c.jpg b/data/images/34197e5f6ef10ba5374ba06c.jpg new file mode 100644 index 0000000..0b808ea Binary files /dev/null and b/data/images/34197e5f6ef10ba5374ba06c.jpg differ diff --git a/data/images/343eccb6d55919e039fc623a.jpg b/data/images/343eccb6d55919e039fc623a.jpg new file mode 100644 index 0000000..7113d9a Binary files /dev/null and b/data/images/343eccb6d55919e039fc623a.jpg differ diff --git a/data/images/348dc9cbc9f3bfcfb69faaf3.jpg b/data/images/348dc9cbc9f3bfcfb69faaf3.jpg new file mode 100644 index 0000000..dc9e145 Binary files /dev/null and b/data/images/348dc9cbc9f3bfcfb69faaf3.jpg differ diff --git a/data/images/36fd3a527a390add0e94e45a.png b/data/images/36fd3a527a390add0e94e45a.png new file mode 100644 index 0000000..857729c Binary files /dev/null and b/data/images/36fd3a527a390add0e94e45a.png differ diff --git a/data/images/39d36e714c455c22a8ae33e8.jpg b/data/images/39d36e714c455c22a8ae33e8.jpg new file mode 100644 index 0000000..80bfd13 Binary files /dev/null and b/data/images/39d36e714c455c22a8ae33e8.jpg differ diff --git a/data/images/3b031723f9b8f70495cbe385.jpg b/data/images/3b031723f9b8f70495cbe385.jpg new file mode 100644 index 0000000..8f439c2 Binary files /dev/null and b/data/images/3b031723f9b8f70495cbe385.jpg differ diff --git a/data/images/3e05220de047caa3995223f7.jpg b/data/images/3e05220de047caa3995223f7.jpg new file mode 100644 index 0000000..1fab711 Binary files /dev/null and b/data/images/3e05220de047caa3995223f7.jpg differ diff --git a/data/images/40f0e5b20b96a98d559b2393.jpg b/data/images/40f0e5b20b96a98d559b2393.jpg new file mode 100644 index 0000000..0d371ff Binary files /dev/null and b/data/images/40f0e5b20b96a98d559b2393.jpg differ diff --git a/data/images/4e61d48bc97636e335c58571.jpg b/data/images/4e61d48bc97636e335c58571.jpg new file mode 100644 index 0000000..f1e3da3 Binary files /dev/null and b/data/images/4e61d48bc97636e335c58571.jpg differ diff --git a/data/images/593d13ceade737dab3554b79.jpg b/data/images/593d13ceade737dab3554b79.jpg new file mode 100644 index 0000000..e32350f Binary files /dev/null and b/data/images/593d13ceade737dab3554b79.jpg differ diff --git a/data/images/594e056c755a3d8896657cb5.jpg b/data/images/594e056c755a3d8896657cb5.jpg new file mode 100644 index 0000000..3dfb90b Binary files /dev/null and b/data/images/594e056c755a3d8896657cb5.jpg differ diff --git a/data/images/6747eff8ce40e840a4bcc543.jpg b/data/images/6747eff8ce40e840a4bcc543.jpg new file mode 100644 index 0000000..7c875c8 Binary files /dev/null and b/data/images/6747eff8ce40e840a4bcc543.jpg differ diff --git a/data/images/6e1153f65f086156ab4507ff.jpg b/data/images/6e1153f65f086156ab4507ff.jpg new file mode 100644 index 0000000..6cad6bf Binary files /dev/null and b/data/images/6e1153f65f086156ab4507ff.jpg differ diff --git a/data/images/774506d32ba3ebee62d98b73.jpg b/data/images/774506d32ba3ebee62d98b73.jpg new file mode 100644 index 0000000..92a6760 Binary files /dev/null and b/data/images/774506d32ba3ebee62d98b73.jpg differ diff --git a/data/images/8042891de0f32ce59c98bd0c.jpg b/data/images/8042891de0f32ce59c98bd0c.jpg new file mode 100644 index 0000000..36429bf Binary files /dev/null and b/data/images/8042891de0f32ce59c98bd0c.jpg differ diff --git a/data/images/8091fc1d13183104e622930b.png b/data/images/8091fc1d13183104e622930b.png new file mode 100644 index 0000000..1cfc4e4 Binary files /dev/null and b/data/images/8091fc1d13183104e622930b.png differ diff --git a/data/images/8425db5d40cdea4ebc5e48f3.png b/data/images/8425db5d40cdea4ebc5e48f3.png new file mode 100644 index 0000000..dcab1df Binary files /dev/null and b/data/images/8425db5d40cdea4ebc5e48f3.png differ diff --git a/data/images/878e7cffa8e291d9522789eb.jpg b/data/images/878e7cffa8e291d9522789eb.jpg new file mode 100644 index 0000000..2718940 Binary files /dev/null and b/data/images/878e7cffa8e291d9522789eb.jpg differ diff --git a/data/images/8b832a8f758e51991a9d806b.jpg b/data/images/8b832a8f758e51991a9d806b.jpg new file mode 100644 index 0000000..7631945 Binary files /dev/null and b/data/images/8b832a8f758e51991a9d806b.jpg differ diff --git a/data/images/8c36e3ba8584a98cf35d5cc1.jpg b/data/images/8c36e3ba8584a98cf35d5cc1.jpg new file mode 100644 index 0000000..25d3030 Binary files /dev/null and b/data/images/8c36e3ba8584a98cf35d5cc1.jpg differ diff --git a/data/images/91beb422e27fa644a3ded9d8.jpg b/data/images/91beb422e27fa644a3ded9d8.jpg new file mode 100644 index 0000000..08c50c4 Binary files /dev/null and b/data/images/91beb422e27fa644a3ded9d8.jpg differ diff --git a/data/images/a3b1c1c137445c6a9f013faa.png b/data/images/a3b1c1c137445c6a9f013faa.png new file mode 100644 index 0000000..6384133 Binary files /dev/null and b/data/images/a3b1c1c137445c6a9f013faa.png differ diff --git a/data/images/a65420883cb64d52d995bf05.jpg b/data/images/a65420883cb64d52d995bf05.jpg new file mode 100644 index 0000000..fa69674 Binary files /dev/null and b/data/images/a65420883cb64d52d995bf05.jpg differ diff --git a/data/images/a6c2b277c006cae54ecf9693.jpg b/data/images/a6c2b277c006cae54ecf9693.jpg new file mode 100644 index 0000000..49568d5 Binary files /dev/null and b/data/images/a6c2b277c006cae54ecf9693.jpg differ diff --git a/data/images/a9ea4d406363057edacb7374.jpg b/data/images/a9ea4d406363057edacb7374.jpg new file mode 100644 index 0000000..f5a0db1 Binary files /dev/null and b/data/images/a9ea4d406363057edacb7374.jpg differ diff --git a/data/images/aa72b4d241ff62d6db375671.jpg b/data/images/aa72b4d241ff62d6db375671.jpg new file mode 100644 index 0000000..8723e38 Binary files /dev/null and b/data/images/aa72b4d241ff62d6db375671.jpg differ diff --git a/data/images/ad8ece98c5c1b89ba27a8d25.jpg b/data/images/ad8ece98c5c1b89ba27a8d25.jpg new file mode 100644 index 0000000..2f936eb Binary files /dev/null and b/data/images/ad8ece98c5c1b89ba27a8d25.jpg differ diff --git a/data/images/adea914ec8333f341be544a9.jpg b/data/images/adea914ec8333f341be544a9.jpg new file mode 100644 index 0000000..b61e860 Binary files /dev/null and b/data/images/adea914ec8333f341be544a9.jpg differ diff --git a/data/images/b3d584f58bafaab635d7f163.png b/data/images/b3d584f58bafaab635d7f163.png new file mode 100644 index 0000000..b2d9d67 Binary files /dev/null and b/data/images/b3d584f58bafaab635d7f163.png differ diff --git a/data/images/b8c0278d1658dbac496e0ec9.jpg b/data/images/b8c0278d1658dbac496e0ec9.jpg new file mode 100644 index 0000000..05a9be0 Binary files /dev/null and b/data/images/b8c0278d1658dbac496e0ec9.jpg differ diff --git a/data/images/be51644c19d367d060b5d000.jpg b/data/images/be51644c19d367d060b5d000.jpg new file mode 100644 index 0000000..39b116e Binary files /dev/null and b/data/images/be51644c19d367d060b5d000.jpg differ diff --git a/data/images/c13b05772b18a142f0336ec7.jpg b/data/images/c13b05772b18a142f0336ec7.jpg new file mode 100644 index 0000000..c602798 Binary files /dev/null and b/data/images/c13b05772b18a142f0336ec7.jpg differ diff --git a/data/images/c14aad170cd82cb71f8be5bd.jpg b/data/images/c14aad170cd82cb71f8be5bd.jpg new file mode 100644 index 0000000..9687d74 Binary files /dev/null and b/data/images/c14aad170cd82cb71f8be5bd.jpg differ diff --git a/data/images/c1ae0a2f3988bd10c1cdde1a.jpg b/data/images/c1ae0a2f3988bd10c1cdde1a.jpg new file mode 100644 index 0000000..450aee9 Binary files /dev/null and b/data/images/c1ae0a2f3988bd10c1cdde1a.jpg differ diff --git a/data/images/c2821ab7c799d75c5500802b.jpg b/data/images/c2821ab7c799d75c5500802b.jpg new file mode 100644 index 0000000..44744dd Binary files /dev/null and b/data/images/c2821ab7c799d75c5500802b.jpg differ diff --git a/data/images/ce3466877b5e2c28cade8501.jpg b/data/images/ce3466877b5e2c28cade8501.jpg new file mode 100644 index 0000000..3080b54 Binary files /dev/null and b/data/images/ce3466877b5e2c28cade8501.jpg differ diff --git a/data/images/d55f68fc3dbf94434e03cff2.jpg b/data/images/d55f68fc3dbf94434e03cff2.jpg new file mode 100644 index 0000000..e1c1fdf Binary files /dev/null and b/data/images/d55f68fc3dbf94434e03cff2.jpg differ diff --git a/data/images/d57b617286902ce0da1bb251.jpg b/data/images/d57b617286902ce0da1bb251.jpg new file mode 100644 index 0000000..cb43e49 Binary files /dev/null and b/data/images/d57b617286902ce0da1bb251.jpg differ diff --git a/data/images/d624bea44fe62b165261612a.jpg b/data/images/d624bea44fe62b165261612a.jpg new file mode 100644 index 0000000..17de89c Binary files /dev/null and b/data/images/d624bea44fe62b165261612a.jpg differ diff --git a/data/images/da3e038ca74a61db5394bc61.jpg b/data/images/da3e038ca74a61db5394bc61.jpg new file mode 100644 index 0000000..fca4ab1 Binary files /dev/null and b/data/images/da3e038ca74a61db5394bc61.jpg differ diff --git a/data/images/da744f3531167c5d7001261e.jpg b/data/images/da744f3531167c5d7001261e.jpg new file mode 100644 index 0000000..d5bc2bc Binary files /dev/null and b/data/images/da744f3531167c5d7001261e.jpg differ diff --git a/data/images/dee3d95f4cbedbc4994f979c.jpg b/data/images/dee3d95f4cbedbc4994f979c.jpg new file mode 100644 index 0000000..55c710b Binary files /dev/null and b/data/images/dee3d95f4cbedbc4994f979c.jpg differ diff --git a/data/images/e19e8ee8bb7402e20565e131.jpg b/data/images/e19e8ee8bb7402e20565e131.jpg new file mode 100644 index 0000000..f0a4668 Binary files /dev/null and b/data/images/e19e8ee8bb7402e20565e131.jpg differ diff --git a/data/images/e57fdc4ec3fe336171017972.jpg b/data/images/e57fdc4ec3fe336171017972.jpg new file mode 100644 index 0000000..ae8cc64 Binary files /dev/null and b/data/images/e57fdc4ec3fe336171017972.jpg differ diff --git a/data/images/e5bf9b4dd404f89b7d348c12.jpg b/data/images/e5bf9b4dd404f89b7d348c12.jpg new file mode 100644 index 0000000..b425b2f Binary files /dev/null and b/data/images/e5bf9b4dd404f89b7d348c12.jpg differ diff --git a/data/images/e6678a79c0572cde599ee828.jpg b/data/images/e6678a79c0572cde599ee828.jpg new file mode 100644 index 0000000..ddd74c9 Binary files /dev/null and b/data/images/e6678a79c0572cde599ee828.jpg differ diff --git a/data/images/e6702a617db5c421a3510f48.jpg b/data/images/e6702a617db5c421a3510f48.jpg new file mode 100644 index 0000000..794a508 Binary files /dev/null and b/data/images/e6702a617db5c421a3510f48.jpg differ diff --git a/data/images/e74737997b7c3f4c8ffa7ea7.jpg b/data/images/e74737997b7c3f4c8ffa7ea7.jpg new file mode 100644 index 0000000..818f652 Binary files /dev/null and b/data/images/e74737997b7c3f4c8ffa7ea7.jpg differ diff --git a/data/images/e7b005f7b80c0ba4e53e97b3.jpg b/data/images/e7b005f7b80c0ba4e53e97b3.jpg new file mode 100644 index 0000000..8101558 Binary files /dev/null and b/data/images/e7b005f7b80c0ba4e53e97b3.jpg differ diff --git a/data/images/e7bfe5e2f987a7673da6471a.png b/data/images/e7bfe5e2f987a7673da6471a.png new file mode 100644 index 0000000..94db635 Binary files /dev/null and b/data/images/e7bfe5e2f987a7673da6471a.png differ diff --git a/data/images/ee9eab40f9d168aa9c549a84.png b/data/images/ee9eab40f9d168aa9c549a84.png new file mode 100644 index 0000000..c3b8c42 Binary files /dev/null and b/data/images/ee9eab40f9d168aa9c549a84.png differ diff --git a/data/images/eedd7a47590307fe54239b5e.jpg b/data/images/eedd7a47590307fe54239b5e.jpg new file mode 100644 index 0000000..403fe14 Binary files /dev/null and b/data/images/eedd7a47590307fe54239b5e.jpg differ diff --git a/data/images/efa91e02e3651fb87da539ea.jpg b/data/images/efa91e02e3651fb87da539ea.jpg new file mode 100644 index 0000000..333635c Binary files /dev/null and b/data/images/efa91e02e3651fb87da539ea.jpg differ diff --git a/data/images/f1c24f189a00d53ffee40fdc.jpg b/data/images/f1c24f189a00d53ffee40fdc.jpg new file mode 100644 index 0000000..d808328 Binary files /dev/null and b/data/images/f1c24f189a00d53ffee40fdc.jpg differ diff --git a/data/images/f37e986168453dc4ca027ad0.png b/data/images/f37e986168453dc4ca027ad0.png new file mode 100644 index 0000000..65f29e7 Binary files /dev/null and b/data/images/f37e986168453dc4ca027ad0.png differ diff --git a/data/images/f6af6c96c9b014194de9d35b.jpg b/data/images/f6af6c96c9b014194de9d35b.jpg new file mode 100644 index 0000000..e5a82ec Binary files /dev/null and b/data/images/f6af6c96c9b014194de9d35b.jpg differ diff --git a/data/images/fd277d3e4a2f19326a5eb13f.png b/data/images/fd277d3e4a2f19326a5eb13f.png new file mode 100644 index 0000000..e70f83d Binary files /dev/null and b/data/images/fd277d3e4a2f19326a5eb13f.png differ diff --git a/data/images/fe45b775cea5b0a60432da73.png b/data/images/fe45b775cea5b0a60432da73.png new file mode 100644 index 0000000..2b93486 Binary files /dev/null and b/data/images/fe45b775cea5b0a60432da73.png differ diff --git a/data/images/ff314f9050050192460333b5.png b/data/images/ff314f9050050192460333b5.png new file mode 100644 index 0000000..5fe15ee Binary files /dev/null and b/data/images/ff314f9050050192460333b5.png differ diff --git a/docker-compose.yml b/docker-compose.yml index a7bafb5..cfd4a4e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,12 +10,9 @@ services: - "3100:3100" environment: PORT: 3100 - NEWS_API_KEY: ${NEWS_API_KEY} - NEWS_API_BASE_URL: ${NEWS_API_BASE_URL:-https://newsapi.org/v2/everything} - NEWS_API_LANGUAGE: ${NEWS_API_LANGUAGE:-en} NEWS_PAGE_SIZE: ${NEWS_PAGE_SIZE:-20} NEWS_REFRESH_CRON: ${NEWS_REFRESH_CRON:-0 * * * *} DATA_DIR: ${DATA_DIR:-/app/data} volumes: - ./data:/app/data - restart: unless-stopped \ No newline at end of file + restart: unless-stopped diff --git a/src/app/NewsApplication.js b/src/app/NewsApplication.js index d27dd20..7b4a611 100644 --- a/src/app/NewsApplication.js +++ b/src/app/NewsApplication.js @@ -1,4 +1,5 @@ const express = require('express') +const path = require('path') const { CategoryNewsRepository } = require('../repositories/CategoryNewsRepository') const { NewsApiClient } = require('../services/NewsApiClient') const { NewsStorageService } = require('../services/NewsStorageService') @@ -19,12 +20,10 @@ class NewsApplication { this._logger = new ConsoleLogger() this._categoryCatalog = new NewsCategoryCatalog() this._repository = new CategoryNewsRepository(runtimeConfig.dataDirectory) - this._apiClient = new NewsApiClient( - runtimeConfig.apiBaseUrl, - runtimeConfig.apiKey, - runtimeConfig.language, - runtimeConfig.pageSize - ) + this._apiClient = new NewsApiClient({ + pageSize: runtimeConfig.pageSize, + dataDirectory: runtimeConfig.dataDirectory + }) this._storageService = new NewsStorageService(this._categoryCatalog, this._repository, this._apiClient) this._scheduler = new NewsRefreshScheduler(this._storageService, runtimeConfig.refreshCron, this._logger) this._express = express() @@ -69,6 +68,7 @@ class NewsApplication { */ _configureHttpPipeline() { this._express.use(express.json()) + this._express.use('/api/news/images', express.static(path.join(this._runtimeConfig.dataDirectory, 'images'))) this._express.get('/health', (_request, response) => { response.json({ @@ -95,4 +95,4 @@ class NewsApplication { module.exports = { NewsApplication -} \ No newline at end of file +} diff --git a/src/config/CategoryCatalog.js b/src/config/CategoryCatalog.js index 2854306..7b3e9d8 100644 --- a/src/config/CategoryCatalog.js +++ b/src/config/CategoryCatalog.js @@ -9,12 +9,16 @@ class NewsCategory { * @param {string} query * @param {string} fileName * @param {string} label + * @param {string} apiUrl + * @param {boolean} worldNews */ - constructor(key, query, fileName, label) { + constructor(key, query, fileName, label, apiUrl, worldNews = false) { this.key = key this.query = query this.fileName = fileName this.label = label + this.apiUrl = apiUrl + this.worldNews = worldNews } } @@ -26,11 +30,22 @@ class NewsCategory { class NewsCategoryCatalog { constructor() { this._categories = [ - new NewsCategory('finance', 'finance', 'finance.json', 'Finance'), - new NewsCategory('business', 'business', 'business.json', 'Business'), - new NewsCategory('technology', 'technology', 'technology.json', 'Technology'), - new NewsCategory('market', 'market', 'market.json', 'Market') + new NewsCategory('flashnews', 'flashnews', 'flashnews.json', 'Realtime News', 'https://m.stock.naver.com/front-api/news/category?category=flashnews&pageSize=20&page=1'), + new NewsCategory('mainnews', 'mainnews', 'mainnews.json', 'Main News', 'https://m.stock.naver.com/front-api/news/category?category=mainnews&pageSize=20&page=1'), + new NewsCategory('ranknews', 'ranknews', 'ranknews.json', 'Rank News', 'https://m.stock.naver.com/front-api/news/category?category=ranknews&pageSize=20&page=1'), + new NewsCategory('worldnews', 'worldnews', 'worldnews.json', 'World News', 'https://m.stock.naver.com/front-api/news/worldnews?pageSize=20&page=1', true) ] + this._aliases = new Map([ + ['finance', 'flashnews'], + ['business', 'mainnews'], + ['technology', 'ranknews'], + ['market', 'worldnews'], + ['flash', 'flashnews'], + ['main', 'mainnews'], + ['rank', 'ranknews'], + ['ranks', 'ranknews'], + ['world', 'worldnews'] + ]) } /** @@ -45,11 +60,12 @@ class NewsCategoryCatalog { * @returns {NewsCategory | null} */ getByKey(key) { - return this._categories.find((item) => item.key === key) || null + const normalizedKey = this._aliases.get(key) || key + return this._categories.find((item) => item.key === normalizedKey) || null } } module.exports = { NewsCategory, NewsCategoryCatalog -} \ No newline at end of file +} diff --git a/src/config/ServiceConfig.js b/src/config/ServiceConfig.js index 7848bf9..e9f33e4 100644 --- a/src/config/ServiceConfig.js +++ b/src/config/ServiceConfig.js @@ -11,14 +11,11 @@ class ServiceConfig { } /** - * @returns {{port: number, apiKey: string, apiBaseUrl: string, language: string, pageSize: number, refreshCron: string, dataDirectory: string}} + * @returns {{port: number, pageSize: number, refreshCron: string, dataDirectory: string}} */ toRuntimeConfig() { return { port: Number(this._env.PORT || 3100), - apiKey: this._env.NEWS_API_KEY || '', - apiBaseUrl: this._env.NEWS_API_BASE_URL || 'https://newsapi.org/v2/everything', - language: this._env.NEWS_API_LANGUAGE || 'en', pageSize: Number(this._env.NEWS_PAGE_SIZE || 20), refreshCron: this._env.NEWS_REFRESH_CRON || '0 * * * *', dataDirectory: path.resolve(process.cwd(), this._env.DATA_DIR || './data') @@ -28,4 +25,4 @@ class ServiceConfig { module.exports = { ServiceConfig -} \ No newline at end of file +} diff --git a/src/services/NewsApiClient.js b/src/services/NewsApiClient.js index 230860c..f447c97 100644 --- a/src/services/NewsApiClient.js +++ b/src/services/NewsApiClient.js @@ -1,14 +1,25 @@ +const crypto = require('crypto') +const fs = require('fs/promises') +const path = require('path') + +const NAVER_ARTICLE_URL = 'https://n.news.naver.com/article/' +const NAVER_WORLD_NEWS_URL = 'https://m.stock.naver.com/investment/news/worldnews/' + /** * NewsApiClient * - * Wraps outbound requests to NewsAPI. + * Fetches Naver Stock news and normalizes it to the existing NewsAPI-shaped + * article contract consumed by the front-end compatibility routes. */ class NewsApiClient { - constructor(apiBaseUrl, apiKey, language, pageSize) { - this._apiBaseUrl = apiBaseUrl - this._apiKey = apiKey - this._language = language - this._pageSize = pageSize + constructor(apiBaseUrlOrOptions, _apiKey, _language, pageSize) { + const options = typeof apiBaseUrlOrOptions === 'object' + ? apiBaseUrlOrOptions + : { pageSize } + + this._pageSize = options.pageSize || 20 + this._fetch = options.fetchImplementation || fetch + this._imageStorage = options.imageStorage || new NewsImageStorage(options.dataDirectory) } /** @@ -16,54 +27,348 @@ class NewsApiClient { * @returns {Promise} */ async fetchArticlesByCategory(category) { - if (!this._apiKey) { - throw new Error('NEWS_API_KEY is required') - } - - const url = new URL(this._apiBaseUrl) - url.searchParams.set('q', category.query) - url.searchParams.set('language', this._language) - url.searchParams.set('pageSize', String(this._pageSize)) - url.searchParams.set('page', '1') - url.searchParams.set('sortBy', 'publishedAt') - - const response = await fetch(url, { - headers: { - 'X-Api-Key': this._apiKey - } - }) + const listUrl = this._createListUrl(category) + const response = await this._fetch(listUrl) if (!response.ok) { - const failure = await response.json().catch(() => ({})) - throw new Error(failure.message || `NewsAPI request failed with status ${response.status}`) + throw new Error(`Naver news request failed with status ${response.status}`) } const payload = await response.json() - if (payload.status !== 'ok') { - throw new Error(payload.message || 'NewsAPI returned a non-ok payload') - } + const items = this._extractNewsItems(payload) - return (payload.articles || []).map((article) => this._normalizeArticle(article)) + const articles = [] + for (const item of items) { + const article = await this._normalizeArticle(item, category) + if (article) { + articles.push(article) + } + } + return articles } /** - * @param {object} article - * @returns {object} + * @param {import('../config/CategoryCatalog').NewsCategory} category + * @returns {string} */ - _normalizeArticle(article) { - return { - source: article.source || null, - author: article.author || null, - title: article.title || '', - description: article.description || '', - url: article.url || '', - urlToImage: article.urlToImage || '', - publishedAt: article.publishedAt || null, - content: article.content || '' + _createListUrl(category) { + const url = new URL(category.apiUrl) + url.searchParams.set('pageSize', String(this._pageSize)) + url.searchParams.set('page', '1') + return url.toString() + } + + /** + * @param {object} payload + * @returns {object[]} + */ + _extractNewsItems(payload) { + if (Array.isArray(payload)) { + return payload } + + const directCandidates = [ + payload?.result?.newsList, + payload?.result?.items, + payload?.result?.list, + payload?.result, + payload?.items, + payload?.newsList, + payload?.list + ] + const direct = directCandidates.find((candidate) => Array.isArray(candidate)) + if (direct) { + return direct + } + + return this._findFirstNewsArray(payload) || [] + } + + /** + * @param {unknown} value + * @returns {object[] | null} + */ + _findFirstNewsArray(value) { + if (!value || typeof value !== 'object') { + return null + } + if (Array.isArray(value)) { + return value.some((item) => item && typeof item === 'object' && this._getSourceId(item)) + ? value + : null + } + for (const child of Object.values(value)) { + const found = this._findFirstNewsArray(child) + if (found) { + return found + } + } + return null + } + + /** + * @param {object} item + * @param {import('../config/CategoryCatalog').NewsCategory} category + * @returns {Promise} + */ + async _normalizeArticle(item, category) { + const sourceId = this._getSourceId(item) + if (!sourceId) { + return null + } + + const detailUrl = this._createDetailUrl(category, sourceId) + const content = await this._fetchArticleContent(detailUrl, category.worldNews) + const title = this._firstString(item.titleFull, item.title, item.newsTitle, item.articleTitle, item.headline) + const sourceName = this._firstString(item.officeName, item.pressName, item.providerName, item.sourceName) + + return { + source: { + id: sourceId.split('/')[0] || null, + name: sourceName || null + }, + author: this._firstString(item.author, item.writerName, item.reporter) || null, + title, + description: this._firstString(item.summary, item.description, item.body, item.subTitle) || '', + url: detailUrl, + urlToImage: this._firstString(item.imageUrl, item.thumbnailUrl, item.thumbnail, item.imageOriginLink) || '', + publishedAt: normalizePublishedAt(this._firstString(item.datetime, item.dateTime, item.publishedAt, item.officeDateTime, item.regDate)) || null, + content + } + } + + /** + * @param {object} item + * @returns {string} + */ + _getSourceId(item) { + const sourceId = this._firstString(item.sourceId, item.articleSourceId) + if (sourceId) { + return sourceId.replace(/^\/+/, '') + } + + const officeId = this._firstString(item.officeId, item.pressId) + const articleId = this._firstString(item.articleId, item.newsId) + if (officeId && articleId) { + return `${officeId}/${articleId}` + } + + return '' + } + + /** + * @param {import('../config/CategoryCatalog').NewsCategory} category + * @param {string} sourceId + * @returns {string} + */ + _createDetailUrl(category, sourceId) { + const baseUrl = category.worldNews ? NAVER_WORLD_NEWS_URL : NAVER_ARTICLE_URL + return `${baseUrl}${sourceId}` + } + + /** + * @param {string} detailUrl + * @param {boolean} worldNews + * @returns {Promise} + */ + async _fetchArticleContent(detailUrl, worldNews) { + let response + try { + response = await this._fetch(detailUrl) + } catch (_error) { + return '' + } + if (!response.ok) { + return '' + } + const html = await response.text() + const content = worldNews + ? this._extractElementHtmlById(html, 'content') + : this._extractTagHtml(html, 'article') + + return this._replaceContentImages(content || '', detailUrl) + } + + /** + * @param {string} html + * @param {string} tagName + * @returns {string} + */ + _extractTagHtml(html, tagName) { + const expression = new RegExp(`<${tagName}\\b[^>]*>[\\s\\S]*?<\\/${tagName}>`, 'i') + const match = html.match(expression) + return match ? match[0] : '' + } + + /** + * @param {string} html + * @param {string} id + * @returns {string} + */ + _extractElementHtmlById(html, id) { + const startExpression = new RegExp(`<([a-z][\\w:-]*)\\b(?=[^>]*\\bid=["']${escapeRegExp(id)}["'])[^>]*>`, 'i') + const startMatch = startExpression.exec(html) + if (!startMatch) { + return '' + } + + const tagName = startMatch[1] + const startIndex = startMatch.index + const openTagEnd = startIndex + startMatch[0].length + const tokenExpression = new RegExp(`<\\/?${escapeRegExp(tagName)}\\b[^>]*>`, 'ig') + tokenExpression.lastIndex = openTagEnd + let depth = 1 + let tokenMatch + + while ((tokenMatch = tokenExpression.exec(html)) !== null) { + if (tokenMatch[0][1] === '/') { + depth -= 1 + if (depth === 0) { + return html.slice(startIndex, tokenExpression.lastIndex) + } + } else { + depth += 1 + } + } + + return '' + } + + /** + * @param {string} html + * @returns {Promise} + */ + async _replaceContentImages(html, baseUrl) { + const imageExpression = /]*\bsrc=["']([^"']+)["'][^>]*>/ig + const replacements = [] + let match + while ((match = imageExpression.exec(html)) !== null) { + replacements.push({ + originalTag: match[0], + originalUrl: match[1] + }) + } + + let updatedHtml = html + for (const replacement of replacements) { + const localUrl = await this._downloadImage(replacement.originalUrl, baseUrl) + if (localUrl) { + updatedHtml = updatedHtml.replace( + replacement.originalTag, + replacement.originalTag.replace(replacement.originalUrl, localUrl) + ) + } + } + return updatedHtml + } + + /** + * @param {string} imageUrl + * @returns {Promise} + */ + async _downloadImage(imageUrl, baseUrl) { + const absoluteImageUrl = toAbsoluteHttpUrl(imageUrl, baseUrl) + if (!absoluteImageUrl) { + return '' + } + + let response + try { + response = await this._fetch(absoluteImageUrl) + } catch (_error) { + return '' + } + if (!response.ok) { + return '' + } + const contentType = response.headers?.get?.('content-type') || 'application/octet-stream' + const buffer = Buffer.from(await response.arrayBuffer()) + return this._imageStorage.saveImage(absoluteImageUrl, buffer, contentType) + } + + /** + * @param {...unknown} values + * @returns {string} + */ + _firstString(...values) { + const value = values.find((item) => typeof item === 'string' && item.trim()) + return value ? value.trim() : '' } } +class NewsImageStorage { + constructor(dataDirectory) { + this._imageDirectory = path.resolve(dataDirectory || './data', 'images') + } + + /** + * @param {string} imageUrl + * @param {Buffer} buffer + * @param {string} contentType + * @returns {Promise} + */ + async saveImage(imageUrl, buffer, contentType) { + await fs.mkdir(this._imageDirectory, { recursive: true }) + const extension = extensionFromContentType(contentType) || extensionFromUrl(imageUrl) || '.bin' + const digest = crypto.createHash('sha256').update(imageUrl).digest('hex').slice(0, 24) + const fileName = `${digest}${extension}` + await fs.writeFile(path.join(this._imageDirectory, fileName), buffer) + return `/api/news/images/${fileName}` + } +} + +function extensionFromContentType(contentType) { + const normalized = String(contentType || '').split(';')[0].trim().toLowerCase() + const byContentType = { + 'image/jpeg': '.jpg', + 'image/jpg': '.jpg', + 'image/png': '.png', + 'image/gif': '.gif', + 'image/webp': '.webp', + 'image/svg+xml': '.svg' + } + return byContentType[normalized] || '' +} + +function extensionFromUrl(imageUrl) { + try { + const extension = path.extname(new URL(imageUrl).pathname).toLowerCase() + return extension.length <= 6 ? extension : '' + } catch (_error) { + return '' + } +} + +function toAbsoluteHttpUrl(value, baseUrl) { + try { + const url = new URL(value, baseUrl) + return ['http:', 'https:'].includes(url.protocol) ? url.toString() : '' + } catch (_error) { + return '' + } +} + +function normalizePublishedAt(value) { + if (!value) { + return '' + } + if (/^\d{14}$/.test(value)) { + const year = value.slice(0, 4) + const month = value.slice(4, 6) + const day = value.slice(6, 8) + const hour = value.slice(8, 10) + const minute = value.slice(10, 12) + const second = value.slice(12, 14) + return `${year}-${month}-${day}T${hour}:${minute}:${second}+09:00` + } + return value +} + +function escapeRegExp(value) { + return String(value).replace(/[.*+?^${}()|[\]\\]/g, '\\$&') +} + module.exports = { - NewsApiClient -} \ No newline at end of file + NewsApiClient, + NewsImageStorage +} diff --git a/src/services/NewsStorageService.js b/src/services/NewsStorageService.js index 6174ee8..ca82b73 100644 --- a/src/services/NewsStorageService.js +++ b/src/services/NewsStorageService.js @@ -86,7 +86,8 @@ class NewsStorageService { key: category.key, label: category.label, query: category.query, - fileName: category.fileName + fileName: category.fileName, + apiUrl: category.apiUrl })) } @@ -121,4 +122,4 @@ class NewsStorageService { module.exports = { NewsStorageService -} \ No newline at end of file +}