diff --git a/app/services/zhihu/__init__.py b/app/services/zhihu/__init__.py index b04c71d..133371e 100644 --- a/app/services/zhihu/__init__.py +++ b/app/services/zhihu/__init__.py @@ -18,7 +18,7 @@ from app.utils.network import get_selector, get_redirect_url, get_response_json, get_random_user_agent, \ get_content_async from app.models.metadata_item import MetadataItem, MediaFile, MessageType -from app.config import JINJA2_ENV +from app.config import JINJA2_ENV,FXZHIHU_HOST from .config import ( SHORT_LIMIT, ZHIHU_COLUMNS_API_HOST, @@ -86,8 +86,11 @@ def __init__(self, url: str, data: Optional[Any] = None, **kwargs): # reqeust fields self.httpx_client = zhihu_client self.headers = {"User-Agent": get_random_user_agent(), - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", - "Referer": self.url} + "Accept": "*/*", + "Accept-Encoding": "gzip, deflate, br", + "Referer": self.url, + "Connection": "keep-alive", + } if kwargs.get("cookie"): self.headers["Cookie"] = kwargs.get("cookie") if ZHIHU_COOKIES: @@ -190,6 +193,17 @@ async def _get_request_url(self) -> None: host = self.urlparser.netloc path = self.urlparser.path request_url_path = path + if self.method == "fxzhihu": + if self.zhihu_type == "answer": + self.request_url = ( + "https://" + FXZHIHU_HOST + '/answer/' + self.answer_id + ) + return + elif self.zhihu_type == "article": + self.request_url = ( + "https://" + FXZHIHU_HOST + '/p/' + self.article_id + ) + return if self.zhihu_type == "answer": if self.method == "api": self.request_url = ( @@ -227,9 +241,9 @@ async def _get_zhihu_answer(self) -> None: parse the zhihu answer page and get the metadata. support methods: html, json. Recommend: json """ - if self.method == "api" or self.method == "json": + if self.method in ["api", "json", "fxzhihu"]: answer_data = {} - if self.method == "api": + if self.method in ["api", "fxzhihu"]: try: json_data = await get_response_json(self.request_url, headers=self.headers, client=self.httpx_client) @@ -463,13 +477,13 @@ def _process_picture(pictures, content_attr): async def _get_zhihu_article(self): self.zhihu_type = "article" - if self.method == "api": + if self.method in ["api", "fxzhihu"]: try: json_data = await get_response_json(self.request_url, headers=self.headers, client=self.httpx_client) self.title = json_data["title"] self.raw_content = json_data["content"] self.author = json_data["author"]["name"] - self.author_url = ZHIHU_HOST + "/people/" + json_data["author"]["url_token"] + self.author_url = json_data["author"]["url"] self.upvote = json_data["voteup_count"] except Exception as e: raise Exception("zhihu request failed") diff --git a/app/services/zhihu/config.py b/app/services/zhihu/config.py index 134a556..3b6b2bc 100644 --- a/app/services/zhihu/config.py +++ b/app/services/zhihu/config.py @@ -7,7 +7,7 @@ ZHIHU_API_ANSWER_PARAMS = ("include=content%2Cexcerpt%2Cauthor%2Cvoteup_count%2Ccomment_count%2Cquestion%2Ccreated_time" "%2Cquestion.detail") ZHIHU_HOST = "https://www.zhihu.com" -ALL_METHODS = ["api", "json", "html"] +ALL_METHODS = ["fxzhihu", "api", "json", "html"] """ There are three methods to get zhihu item: from zhihu v4 api(api), a json object in the html script(json), or parsing the html page content directly. diff --git a/app/utils/network.py b/app/utils/network.py index 4d52d54..ff7ec1f 100644 --- a/app/utils/network.py +++ b/app/utils/network.py @@ -1,5 +1,6 @@ import asyncio import datetime +import json import os import uuid from typing import Optional @@ -38,7 +39,7 @@ async def get_response( async def get_response_json(url: str, headers=None, client: httpx.AsyncClient = None) -> dict: try: - response = await get_response(url, headers) + response = await get_response(url, headers=headers, client=client) json_result = response.json() except Exception as e: print(e, traceback.format_exc())