forked from Findus23/rss2wallabag
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
executable file
·124 lines (104 loc) · 4.17 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env python
# coding=utf-8
import asyncio
import logging
import sys
from time import mktime
from urllib.parse import urljoin
import aiohttp
import feedparser
import sentry_sdk
import yaml
from wallabag_api.wallabag import Wallabag
import github_stars
logger = logging.getLogger()
logger.handlers = []
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
logger.setLevel(logging.DEBUG)
with open("config.yaml", 'r') as stream:
try:
config = yaml.safe_load(stream)
except (yaml.YAMLError, FileNotFoundError) as exception:
config = None
exit(1)
production = "debug" not in config or not config["debug"]
ch = logging.StreamHandler(stream=sys.stdout)
ch.setLevel(logging.INFO if production else logging.DEBUG)
ch.setFormatter(formatter)
logger.addHandler(ch)
fh = logging.FileHandler('debug.log')
fh.setFormatter(formatter)
fh.setLevel(logging.WARNING if production else logging.DEBUG)
logger.addHandler(fh)
if "sentry_url" in config and (production):
sentry_sdk.init(dsn=config["sentry_url"])
with open("sites.yaml", 'r') as stream:
try:
sites = yaml.safe_load(stream)
except (yaml.YAMLError, FileNotFoundError) as exception:
logger.error(exception)
sites = None
exit(1)
async def fetch(session, url):
try:
async with session.get(url) as response:
return await response.text('utf-8')
except Exception as e:
logging.exception("failed to fetch {url}".format(url=url))
async def main(loop, sites):
token = await Wallabag.get_token(**config["wallabag"])
async with aiohttp.ClientSession(loop=loop) as session:
wall = Wallabag(host=config["wallabag"]["host"], client_secret=config["wallabag"]["client_secret"],
client_id=config["wallabag"]["client_id"], token=token, aio_sess=session)
if "github_username" in config:
sites = github_stars.get_starred_repos(config["github_username"], sites)
await asyncio.gather(*[handle_feed(session, wall, sitetitle, site) for sitetitle, site in sites.items()])
async def handle_feed(session, wall, sitetitle, site):
logger.info("Downloading feed: " + sitetitle)
rss = await fetch(session, site["url"])
logger.info("Parsing feed: " + sitetitle)
f = feedparser.parse(rss)
logger.debug("finished parsing: " + sitetitle)
# feedtitle = f["feed"]["title"]
if "latest_article" in site:
for article in f.entries:
if article.title == site["latest_article"]:
logger.debug("already added: " + article.title)
break
logger.info("article found: " + article.title)
taglist = [sitetitle]
if site["tags"]:
taglist.extend(site["tags"])
tags = ",".join(taglist)
if "published_parsed" in article:
published = mktime(article.published_parsed)
elif "updated_parsed" in article:
published = mktime(article.updated_parsed)
else:
published = None
logger.info("add to wallabag: " + article.title)
if "github" in site and site["github"]:
title = sitetitle + ": " + article.title
else:
title = article.title
if not hasattr(article, 'link'):
logger.info("no link, skipping!")
continue
url = urljoin(site["url"], article.link)
exists = await wall.entries_exists(url)
if exists["exists"]:
logger.info("already found in wallabag: " + article.title)
if production:
await wall.post_entries(url=url, title=title, tags=tags)
else:
logger.info("warning: running in debug mode - not adding links to wallabag")
else:
logger.debug("no latest_article: " + sitetitle)
if f.entries:
sites[sitetitle]["latest_article"] = f.entries[0].title
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main(loop, sites))
if production:
with open("sites.yaml", 'w') as stream:
yaml.dump(sites, stream, default_flow_style=False)