1
- import functools
1
+ import asyncio
2
+ import datetime
2
3
import heapq
3
4
import itertools
4
-
5
- from datetime import date
6
-
7
- from threading import Lock
8
- from typing import List , Tuple , Dict , Callable , Any , Optional , Iterable
5
+ from io import BytesIO
6
+ from typing import Any , Dict , Iterable , List , Optional , Tuple , cast
9
7
from urllib .parse import urljoin
10
- from urllib .request import urlopen , Request
11
8
9
+ import httpx
10
+ from async_lru import alru_cache
12
11
from bs4 import BeautifulSoup
13
12
from sphinx .util .inventory import InventoryFile
13
+ from telegram .ext import Application , ContextTypes , Job , JobQueue
14
14
15
15
from .const import (
16
- USER_AGENT ,
16
+ DEFAULT_HEADERS ,
17
+ DEFAULT_REPO_NAME ,
18
+ DEFAULT_REPO_OWNER ,
17
19
DOCS_URL ,
20
+ EXAMPLES_URL ,
21
+ GITHUB_PATTERN ,
18
22
OFFICIAL_URL ,
19
- WIKI_URL ,
23
+ USER_AGENT ,
20
24
WIKI_CODE_SNIPPETS_URL ,
21
25
WIKI_FAQ_URL ,
22
- EXAMPLES_URL ,
23
- GITHUB_PATTERN ,
24
26
WIKI_FRDP_URL ,
27
+ WIKI_URL ,
25
28
)
26
29
from .entrytypes import (
27
- WikiPage ,
28
- Example ,
30
+ BaseEntry ,
29
31
CodeSnippet ,
30
- FAQEntry ,
31
32
DocEntry ,
32
- BaseEntry ,
33
+ FAQEntry ,
33
34
FRDPEntry ,
34
35
ParamDocEntry ,
36
+ WikiPage ,
35
37
)
36
- from .github import github_issues
38
+ from .github import GitHub
37
39
from .taghints import TAG_HINTS
38
40
39
41
40
- def cached_parsing (func : Callable [..., Any ]) -> Callable [..., Any ]:
41
- @functools .wraps (func )
42
- def checking_cache_time (self : "Search" , * args : Any , ** kwargs : Any ) -> Any :
43
- if date .today () > self .last_cache_date :
44
- self .fetch_entries ()
45
- self .last_cache_date = date .today ()
46
- return func (self , * args , ** kwargs )
47
-
48
- return checking_cache_time
49
-
50
-
51
42
class Search :
52
- def __init__ (self ) -> None :
53
- self .__lock = Lock ()
43
+ def __init__ (self , github_auth : str , github_user_agent : str = USER_AGENT ) -> None :
44
+ self .__lock = asyncio . Lock ()
54
45
self ._docs : List [DocEntry ] = []
55
46
self ._official : Dict [str , str ] = {}
56
47
self ._wiki : List [WikiPage ] = []
57
- self ._examples : List [Example ] = []
58
48
self ._snippets : List [CodeSnippet ] = []
59
49
self ._faq : List [FAQEntry ] = []
60
50
self ._design_patterns : List [FRDPEntry ] = []
61
- self .last_cache_date = date .today ()
62
- self .github_session = github_issues
63
- self .fetch_entries ()
64
-
65
- def fetch_entries (self ) -> None :
66
- with self .__lock :
67
- self .fetch_docs ()
68
- self .fetch_wiki ()
69
- self .fetch_examples ()
70
- self .fetch_wiki_code_snippets ()
71
- self .fetch_wiki_faq ()
72
- self .fetch_wiki_design_patterns ()
73
-
74
- # This is important: If the docs have changed the cache is useless
75
- self .search .cache_clear ()
76
- self .multi_search_combinations .cache_clear ()
77
-
78
- def fetch_official_docs (self ) -> None :
79
- request = Request (OFFICIAL_URL , headers = {"User-Agent" : USER_AGENT })
80
- with urlopen (request ) as file :
81
- official_soup = BeautifulSoup (file , "html.parser" )
51
+ self .github = GitHub (auth = github_auth , user_agent = github_user_agent )
52
+ self ._httpx_client = httpx .AsyncClient ()
53
+
54
+ async def initialize (
55
+ self , application : Application [Any , Any , Any , Any , Any , JobQueue ]
56
+ ) -> None :
57
+ await self .github .initialize ()
58
+ application .job_queue .run_once (callback = self .update_job , when = 1 , data = (None , None , None ))
59
+
60
+ async def shutdown (self ) -> None :
61
+ await self .github .shutdown ()
62
+ await self ._httpx_client .aclose ()
63
+ await self .search .close () # pylint:disable=no-member
64
+ await self .multi_search_combinations .close () # pylint:disable=no-member
65
+
66
+ async def update_job (self , context : ContextTypes .DEFAULT_TYPE ) -> None :
67
+ job = cast (Job , context .job )
68
+ cursors = cast (Tuple [Optional [str ], Optional [str ], Optional [str ]], job .data )
69
+ restart = not any (cursors )
70
+
71
+ if restart :
72
+ await asyncio .gather (
73
+ context .application .create_task (self .github .update_examples ()),
74
+ context .application .create_task (self .github .update_ptb_contribs ()),
75
+ )
76
+ async with self .__lock :
77
+ await asyncio .gather (
78
+ context .application .create_task (self .update_docs ()),
79
+ context .application .create_task (self .update_wiki ()),
80
+ context .application .create_task (self .update_wiki_code_snippets ()),
81
+ context .application .create_task (self .update_wiki_faq ()),
82
+ context .application .create_task (self .update_wiki_design_patterns ()),
83
+ )
84
+
85
+ issue_cursor = (
86
+ await self .github .update_issues (cursor = cursors [0 ]) if restart or cursors [0 ] else None
87
+ )
88
+ pr_cursor = (
89
+ await self .github .update_pull_requests (cursor = cursors [1 ])
90
+ if restart or cursors [1 ]
91
+ else None
92
+ )
93
+ discussion_cursor = (
94
+ await self .github .update_discussions (cursor = cursors [2 ])
95
+ if restart or cursors [2 ]
96
+ else None
97
+ )
98
+
99
+ new_cursors = (issue_cursor , pr_cursor , discussion_cursor )
100
+ when = datetime .timedelta (seconds = 30 ) if any (new_cursors ) else datetime .timedelta (hours = 12 )
101
+ cast (JobQueue , context .job_queue ).run_once (
102
+ callback = self .update_job , when = when , data = new_cursors
103
+ )
104
+
105
+ # This is important: If the docs have changed the cache is useless
106
+ self .search .cache_clear () # pylint:disable=no-member
107
+ self .multi_search_combinations .cache_clear () # pylint:disable=no-member
108
+
109
+ async def _update_official_docs (self ) -> None :
110
+ response = await self ._httpx_client .get (url = OFFICIAL_URL , headers = DEFAULT_HEADERS )
111
+ official_soup = BeautifulSoup (response .content , "html.parser" )
82
112
for anchor in official_soup .select ("a.anchor" ):
83
113
if "-" not in anchor ["href" ]:
84
114
self ._official [anchor ["href" ][1 :]] = anchor .next_sibling
85
115
86
- def fetch_docs (self ) -> None :
87
- self .fetch_official_docs ()
88
- request = Request (urljoin (DOCS_URL , "objects.inv" ), headers = {"User-Agent" : USER_AGENT })
89
- with urlopen (request ) as docs_data :
90
- data = InventoryFile .load (docs_data , DOCS_URL , urljoin )
116
+ async def update_docs (self ) -> None :
117
+ await self ._update_official_docs ()
118
+ response = await self ._httpx_client .get (
119
+ url = urljoin (DOCS_URL , "objects.inv" ),
120
+ headers = DEFAULT_HEADERS ,
121
+ follow_redirects = True ,
122
+ )
123
+ data = InventoryFile .load (BytesIO (response .content ), DOCS_URL , urljoin )
91
124
self ._docs = []
92
125
for entry_type , items in data .items ():
93
126
for name , (_ , _ , url , display_name ) in items .items ():
@@ -99,11 +132,12 @@ def fetch_docs(self) -> None:
99
132
tg_url , tg_test , tg_name = "" , "" , ""
100
133
name_bits = name .split ("." )
101
134
102
- if entry_type in ["py:method" , "py:attribute" ]:
103
- if "telegram.Bot" in name or "telegram.ext.ExtBot" in name :
104
- tg_test = name_bits [- 1 ]
105
- else :
106
- tg_test = name_bits [- 2 ]
135
+ if entry_type == "py:method" and (
136
+ "telegram.Bot" in name or "telegram.ext.ExtBot" in name
137
+ ):
138
+ tg_test = name_bits [- 1 ]
139
+ if entry_type == "py:attribute" :
140
+ tg_test = name_bits [- 2 ]
107
141
if entry_type == "py:class" :
108
142
tg_test = name_bits [- 1 ]
109
143
elif entry_type == "py:parameter" :
@@ -138,10 +172,9 @@ def fetch_docs(self) -> None:
138
172
)
139
173
)
140
174
141
- def fetch_wiki (self ) -> None :
142
- request = Request (WIKI_URL , headers = {"User-Agent" : USER_AGENT })
143
- with urlopen (request ) as file :
144
- wiki_soup = BeautifulSoup (file , "html.parser" )
175
+ async def update_wiki (self ) -> None :
176
+ response = await self ._httpx_client .get (url = WIKI_URL , headers = DEFAULT_HEADERS )
177
+ wiki_soup = BeautifulSoup (response .content , "html.parser" )
145
178
self ._wiki = []
146
179
147
180
# Parse main pages from custom sidebar
@@ -160,10 +193,11 @@ def fetch_wiki(self) -> None:
160
193
161
194
self ._wiki .append (WikiPage (category = "Code Resources" , name = "Examples" , url = EXAMPLES_URL ))
162
195
163
- def fetch_wiki_code_snippets (self ) -> None :
164
- request = Request (WIKI_CODE_SNIPPETS_URL , headers = {"User-Agent" : USER_AGENT })
165
- with urlopen (request ) as file :
166
- code_snippet_soup = BeautifulSoup (file , "html.parser" )
196
+ async def update_wiki_code_snippets (self ) -> None :
197
+ response = await self ._httpx_client .get (
198
+ url = WIKI_CODE_SNIPPETS_URL , headers = DEFAULT_HEADERS
199
+ )
200
+ code_snippet_soup = BeautifulSoup (response .content , "html.parser" )
167
201
self ._snippets = []
168
202
for headline in code_snippet_soup .select (
169
203
"div#wiki-body h4,div#wiki-body h3,div#wiki-body h2"
@@ -175,20 +209,18 @@ def fetch_wiki_code_snippets(self) -> None:
175
209
)
176
210
)
177
211
178
- def fetch_wiki_faq (self ) -> None :
179
- request = Request (WIKI_FAQ_URL , headers = {"User-Agent" : USER_AGENT })
180
- with urlopen (request ) as file :
181
- faq_soup = BeautifulSoup (file , "html.parser" )
212
+ async def update_wiki_faq (self ) -> None :
213
+ response = await self ._httpx_client .get (url = WIKI_FAQ_URL , headers = DEFAULT_HEADERS )
214
+ faq_soup = BeautifulSoup (response .content , "html.parser" )
182
215
self ._faq = []
183
216
for headline in faq_soup .select ("div#wiki-body h3" ):
184
217
self ._faq .append (
185
218
FAQEntry (name = headline .text .strip (), url = urljoin (WIKI_FAQ_URL , headline .a ["href" ]))
186
219
)
187
220
188
- def fetch_wiki_design_patterns (self ) -> None :
189
- request = Request (WIKI_FRDP_URL , headers = {"User-Agent" : USER_AGENT })
190
- with urlopen (request ) as file :
191
- frdp_soup = BeautifulSoup (file , "html.parser" )
221
+ async def update_wiki_design_patterns (self ) -> None :
222
+ response = await self ._httpx_client .get (url = WIKI_FRDP_URL , headers = DEFAULT_HEADERS )
223
+ frdp_soup = BeautifulSoup (response .content , "html.parser" )
192
224
self ._design_patterns = []
193
225
for headline in frdp_soup .select ("div#wiki-body h3,div#wiki-body h2" ):
194
226
self ._design_patterns .append (
@@ -197,18 +229,14 @@ def fetch_wiki_design_patterns(self) -> None:
197
229
)
198
230
)
199
231
200
- def fetch_examples (self ) -> None :
201
- self ._examples = []
202
- for name , _ in self .github_session .get_examples_directory (r"^.*\.py" ):
203
- self ._examples .append (Example (name = name ))
204
-
205
232
@staticmethod
206
233
def _sort_key (entry : BaseEntry , search_query : str ) -> float :
207
234
return entry .compare_to_query (search_query )
208
235
209
- @functools .lru_cache (maxsize = 64 )
210
- @cached_parsing
211
- def search (self , search_query : Optional [str ], amount : int = None ) -> Optional [List [BaseEntry ]]:
236
+ @alru_cache (maxsize = 64 ) # type: ignore[misc]
237
+ async def search (
238
+ self , search_query : Optional [str ], amount : int = None
239
+ ) -> Optional [List [BaseEntry ]]:
212
240
"""Searches all available entries for appropriate results. This includes:
213
241
214
242
* wiki pages
@@ -251,34 +279,40 @@ def search(self, search_query: Optional[str], amount: int = None) -> Optional[Li
251
279
match .groupdict ()[x ]
252
280
for x in ("owner" , "repo" , "number" , "sha" , "query" , "ptbcontrib" )
253
281
)
282
+ owner = owner or DEFAULT_REPO_OWNER
283
+ repo = repo or DEFAULT_REPO_NAME
254
284
255
285
# If it's an issue
256
286
if number :
257
- issue = github_issues . get_issue (int (number ), owner , repo )
287
+ issue = await self . github . get_thread (int (number ), owner , repo )
258
288
return [issue ] if issue else None
259
289
# If it's a commit
260
290
if sha :
261
- commit = github_issues .get_commit (sha , owner , repo )
291
+ commit = await self . github .get_commit (sha , owner , repo )
262
292
return [commit ] if commit else None
263
293
# If it's a search
264
294
if gh_search_query :
265
295
search_query = gh_search_query
266
- search_entries = github_issues .all_issues
296
+ search_entries = itertools .chain (
297
+ self .github .all_issues ,
298
+ self .github .all_pull_requests ,
299
+ self .github .all_discussions ,
300
+ )
267
301
elif ptbcontrib :
268
- search_entries = github_issues .all_ptbcontribs
302
+ search_entries = self . github .all_ptbcontribs
269
303
270
304
if search_query and search_query .startswith ("/" ):
271
305
search_entries = TAG_HINTS .values ()
272
306
273
- with self .__lock :
307
+ async with self .__lock :
274
308
if not search_entries :
275
309
search_entries = itertools .chain (
276
310
self ._wiki ,
277
- self ._examples ,
311
+ self .github . all_examples ,
278
312
self ._faq ,
279
313
self ._design_patterns ,
280
314
self ._snippets ,
281
- github_issues .all_ptbcontribs ,
315
+ self . github .all_ptbcontribs ,
282
316
self ._docs ,
283
317
TAG_HINTS .values (),
284
318
)
@@ -298,9 +332,8 @@ def search(self, search_query: Optional[str], amount: int = None) -> Optional[Li
298
332
key = lambda entry : self ._sort_key (entry , search_query ), # type: ignore[arg-type]
299
333
)
300
334
301
- @functools .lru_cache (64 )
302
- @cached_parsing
303
- def multi_search_combinations (
335
+ @alru_cache (maxsize = 64 ) # type: ignore[misc]
336
+ async def multi_search_combinations (
304
337
self , search_queries : Tuple [str ], results_per_query : int = 3
305
338
) -> List [Dict [str , BaseEntry ]]:
306
339
"""For each query, runs :meth:`search` and fetches the ``results_per_query`` most likely
@@ -322,13 +355,10 @@ def multi_search_combinations(
322
355
# Remove duplicates while maintaining the order
323
356
effective_queries = list (dict .fromkeys (search_queries ))
324
357
for query in effective_queries :
325
- if res := self .search (search_query = query , amount = results_per_query ):
358
+ if res := await self .search (search_query = query , amount = results_per_query ):
326
359
results [query ] = res
327
360
328
361
return [
329
362
dict (zip (effective_queries , query_results ))
330
363
for query_results in itertools .product (* results .values ())
331
364
]
332
-
333
-
334
- search = Search ()
0 commit comments