-
-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathstackoverflow.py
75 lines (73 loc) · 2.77 KB
/
stackoverflow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
'''
Function:
Search code from StackOverflow
Author:
Zhenchao Jin
WeChat public account:
Charles_pikachu
'''
import os
import re
import html
import pyflakes
import subprocess
from .baseengine import BaseEngine
'''StackOverflow'''
class StackOverflow(BaseEngine):
def __init__(self, keyword, proxies={}, **kwargs):
super(StackOverflow, self).__init__(keyword=keyword, proxies=proxies, **kwargs)
'''auto coding'''
def autocoding(self, **kwargs):
answers = self.search()
for answer in answers['items']:
link = answer['link']
code = self.parse(link)
if code: break
if kwargs.get('return_code', True): return code
ctx = compile(code + '\n' + kwargs.get('following_code', ''), '', 'exec')
return exec(ctx, kwargs.get('globals', {}))
'''try to check the correctness of the code'''
def checker(self, code):
# try to check whether the code can be compiled
try:
ctx = compile(code, '', 'exec')
exec(ctx)
except:
return False
# try to check the grammar of the code
tmpfilepath = os.path.join(self.rootdir, 'tmp.py')
fp = open(tmpfilepath, 'w')
fp.write(code)
fp.close()
p = subprocess.run(f'pyflakes {tmpfilepath}', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
os.remove(tmpfilepath)
if len(p.stdout) > 0: return False
return True
'''search the suitable code from StackOverflow'''
def search(self):
keyword = self.keyword.lower().replace('stackoverflow.', '').replace('_', ' ')
params = {
'order': 'desc',
'sort': 'votes',
'tagged': 'python',
'site': 'stackoverflow',
"intitle": keyword,
}
answers = self.session.get(f'{self.api_url}/search', params=params).json()
if not answers['items']: raise RuntimeError('Fail to search the suitable code from StackOverflow')
return answers
'''parse the code from html'''
def parse(self, url):
response = self.session.get(url, headers=self.headers)
answers = re.findall(r'<div id="answer-.*?</table', response.text, re.DOTALL)
def votecount(x):
r = int(re.search(r'\D(\d{1,5})\D', x).group(1))
return -r
for answer in sorted(answers, key=votecount):
codez = re.finditer(r'<pre[^>]*>[^<]*<code[^>]*>((?:\s|[^<]|<span[^>]*>[^<]+</span>)*)</code></pre>', answer)
codez = map(lambda x: x.group(1), codez)
for code in sorted(codez, key=lambda x: -len(x)):
code = html.unescape(code)
code = re.sub(r'<[^>]+>([^<]*)<[^>]*>', '\1', code)
if self.checker(code): return code
return False