diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..dd7681c --- /dev/null +++ b/.dockerignore @@ -0,0 +1,5 @@ +# .dockerignore +__pycache__ +*.pyc +*.pyo +*.pyd diff --git a/Dockerfile b/Dockerfile index ff409ad..976825f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,14 @@ -FROM python:3.10.6 +FROM python:3.11-alpine -ADD rss-ntfy.py . +RUN mkdir -p /rss-ntfy /etc/rss-ntfy +WORKDIR /rss-ntfy -COPY requirements.txt ./ +ADD ./rss-ntfy/* /rss-ntfy/ +COPY requirements.txt /rss-ntfy/ +RUN pip install --no-cache-dir --upgrade pip RUN pip install --no-cache-dir -r requirements.txt -COPY . . +ENV PYTHONUNBUFFERED=1 -CMD [ "python", "./rss-ntfy.py" ] +CMD ["python", "-u", "/rss-ntfy/rss-ntfy.py"] diff --git a/README.md b/README.md index efb970c..ba5a21f 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,23 @@ # RSS ntfy -Very small RSS notifier using [ntfy](https://ntfy.sh/). I would *highly* recommend using a self hosted ntfy instance, so that you can use whatever ntfy names you want. +Very small RSS notifier using [ntfy](https://ntfy.sh/). +Forked from [julianorchard/rss-ntfy](https://github.com/julianorchard/rss-ntfy) and mostly rewritten to add furhter functionality. -It's designed for use alongside certain 'alternative frontend services'. I use it for: +I would *highly* recommend using a self hosted ntfy instance, so that you can use whatever ntfy names you want. -- [Nitter](https://github.com/zedeus/nitter), Twitter alternative -- [Proxitok](https://github.com/pablouser1/ProxiTok), a TikTok frontend - -Both of these provide RSS feeds, which are on basically every page plus `/rss`: very handy. +Each post from a feed gets send as Markdown text. +In order to avoid duplicate posts, the "link" extracted from the RSS items is stored in a `_hist` file and +subsequently deleted once they are no longer present in the feed. +Some feeds might cause reposts because they re-include "old" listings. ## Usage -### Dependencies - -- Python 3 -- [BeautifulSoup4](https://www.crummy.com/software/BeautifulSoup/bs4/doc/), `pip install beautifulsoup4` -- [requests](https://requests.readthedocs.io/en/latest/) `python -m pip install requests` -- lxml-xml parser for BeautifulSoup +### Installation ```sh -pip install -i requirements.txt +pip install -r requirements.txt +mkdir /etc/rss-ntfy/ +cp ./config/config.yml /etc/rss-ntfy/config.yml ``` Alternatively, use Docker compose: @@ -28,36 +26,84 @@ Alternatively, use Docker compose: docker compose up ``` -This will create a persistent volume for the storage of the _hist files, too (if -run without Docker, these are put in `$XDG_CACHE_HOME`). +This will create a persistent volume for the storage of the `_hist` files, too (at least per default). ### Configuration -Edit the `config.yaml` file: +The script includes a set of default service definition and configuration which you might want to change. + +[default.yml](rss-ntfy/default.yml) defines a set of `feeds`, `services`, `global` settings and `config`. **[DONT EDIT]** +Snippet: +```yaml +--- + +global: # settings to use as defaults + ntfy_server: https://ntfy.sh # server to use if no other is defined on service level + [...] + +services: # service definition + github_release: # service name - referenced within the 'feeds' definition + service_feed: https://github.com/{{ name }}/releases.atom # where the rss feed is located + service_url: https://github.com/{{ name }} # used in the sub-title to link to the feed + ntfy_icon: https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png + ntfy_subtitle_prefix: 🤖 GitHub Release + ntfy_subtitle_seperator: by + ntfy_tags: ['robot'] + + github_commit: + service_feed: https://github.com/{{ name }}/commits.atom + service_url: https://github.com/{{ name }} + ntfy_icon: https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png + ntfy_subtitle_prefix: GitHub Commit on + ntfy_subtitle_seperator: by + ntfy_tags: ['robot'] + [...] + +feeds: # feeds to monitor from those services + github_release: [] + github_commit: [] + [...] + +config: + cache_location: ~/.cache + run_on_startup: false + log_level: info + schedule: '*/30 * * * *' # crontab style expression - concurrent jobs are not possible + service_wait: 60 # time to wait between services + feed_wait: 5 # time to wait between posts of one feed + max_attempts: 3 # retry to send message; consider failed after + retry_wait: 30 # time to wait between retrys +``` + +To personalize your rss ntfycations you can override / extend those defaults with a [config.yml](config/config.yml): +With Docker: Don't mount the file directly and instead mount the whole config directory, otherwise automatic reload does not work. +Without Docker: The script expects the config to be available at `/etc/rss-ntfy/config.yml` ```yaml -# Example configuration --- -proxitok: - service: proxitok - rss-url: https://proxitok.pabloferreiro.es/@{{ user }}/rss - descriptor: 🎶 TikTok +global: + ntfy_topic: some_topic # topic to use if no other is defined on service level -teddit: - service: teddit - rss-url: https://teddit.net/r/{{ sub }}?api&type=rss - descriptor: 🎩 Reddit post +feeds: + reddit_subreddit: + - SysadminHumor+Programmerhumor + youtube: + - name: UCXuqSBlHAE6Xw-yeJA0Tunw + feed_display_name: LTT # the subtitle takes the feed name - which in some cases, like YouTube, is an ID -``` +services: + reddit_subreddit: + ntfy_topic: one_topic -At this point the contents of the handlebar type substitutions (`{{ }}`) don't -matter; this will be replaced with the users/subreddits/thing-you-want-to-follow -in the files in the `rss-ntfy/` folder. + reddit_subreddit: + ntfy_topic: another_topic +``` -*TODO: this is a not-nice way of doing this, possibly use more yaml* +At this point the contents of the handlebar type substitutions (`{{ }}` in `services`) don't matter; +this will be replaced with the users/thing-you-want-to-follow. -You can then use a [crontab](https://man7.org/linux/man-pages/man5/crontab.5.html) or a [systemd service](https://www.freedesktop.org/software/systemd/man/systemd.service.html) (or, on Windows, a [Task Scheduler](https://learn.microsoft.com/en-us/windows/win32/taskschd/task-scheduler-start-page) task) to run the command periodically. +Changes are validated and applyied dynamicly. A reload / restart is not nessesary. ## License diff --git a/config.yaml b/config.yaml deleted file mode 100644 index 510e460..0000000 --- a/config.yaml +++ /dev/null @@ -1,20 +0,0 @@ ---- - -proxitok: - service: proxitok - rss-url: https://proxitok.pabloferreiro.es/@{{ custom }}/rss - descriptor: 🎶 TikTok - ntfy-topic: proxitok-rss-ntfy-test - -invidious: - service: invidious - rss-url: https://invidious.snopyta.org/feed/channel/{{ custom }} - descriptor: 📽 YouTube video - ntfy-topic: invidious-rss-ntfy-test - -teddit: - service: teddit - rss-url: https://teddit.net/r/{{ custom }}?api&type=rss - descriptor: 🎩 Reddit post - ntfy-topic: teddit-rss-ntfy-test - diff --git a/config/config.yml b/config/config.yml new file mode 100644 index 0000000..da4ddff --- /dev/null +++ b/config/config.yml @@ -0,0 +1,52 @@ +--- + +feeds: + github_user: + - binwiederhier + github_release: + - binwiederhier/ntfy + github_commit: + - binwiederhier/ntfy + - Kariton/rss-ntfy + pypi: + - ntfy + discuss_ntfy: + - c/ntfy + - u/binwiederhier + reddit_subreddit: + - SysadminHumor+Programmerhumor + - SelfHosted + - HomeLab + reddit_user: + - u/binwiederhier + youtube: + - name: UCXuqSBlHAE6Xw-yeJA0Tunw + feed_display_name: LTT + +services: + github_user: + ntfy_topic: topic1 + + github_release: + ntfy_topic: topic2 + + github_commit: + ntfy_topic: topic3 + + pypi: + ntfy_topic: topic4 + + discuss_ntfy: + ntfy_topic: topic5 + + reddit_subreddit: + ntfy_topic: topic6 + + reddit_user: + ntfy_topic: topic7 + + youtube: + ntfy_topic: topic8 + +config: + run_on_startup: true diff --git a/docker-compose.yaml b/docker-compose.yml similarity index 78% rename from docker-compose.yaml rename to docker-compose.yml index d0c17dd..3c86e27 100644 --- a/docker-compose.yaml +++ b/docker-compose.yml @@ -1,3 +1,4 @@ +--- version: '3.8' services: @@ -5,6 +6,7 @@ services: build: context: . volumes: + - ./config:/etc/rss-ntfy - histfiles:/root/:rw volumes: diff --git a/requirements.txt b/requirements.txt index 76c516a..03b1bb3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,7 @@ -bs4 +feedparser +markdownify +jsonschema +watchdog +croniter requests -lxml pyyaml diff --git a/rss-ntfy.py b/rss-ntfy.py deleted file mode 100755 index afb5074..0000000 --- a/rss-ntfy.py +++ /dev/null @@ -1,174 +0,0 @@ -#!/usr/bin/env python3 - -## rss-ntfy.py --- Scrape RSS feeds and ntfy yourself. - -# Copyright (c) 2023 Julian Orchard - -## Description: - -# Really simple RSS feed scraper that looks at a list of URL's -# and sends a little ntfy about it. - -# Mainly designed for Proxitok and Nitter. - -## License: - -# See /LICENSE file in the root of this repository. - -## Code: - -from bs4 import BeautifulSoup -from pathlib import Path -import os -import re -import requests -import yaml - -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + "/rss-ntfy/" -with open("config.yaml", "r") as config_file_contents: - CONFIG = yaml.safe_load(config_file_contents) -# TODO: This could also be abstracted to the config file, with a default value -NTFY_INSTANCE = "https://ntfy.sh/" - - -def ntfyr(message, ntfy_topic): - ''' - This just uses the simple example on docs.ntfy.sh to - send a message via ntfy. - ''' - requests.post(f'{NTFY_INSTANCE}{ntfy_topic}', data=f"{message}".encode(encoding="UTF-8")) - -def ntfyr_complex(ntfy_topic, username, title, link, published, description): - ''' - This sends a more complicated notification via ntfy. - - It's more based around the example of 'using a JSON - array', below: - https://docs.ntfy.sh/publish/ - ''' - message_text = f"{description} from {username}" - if title != "": - message_text = f"{message_text}:\n\n{title}!" - else: - message_text = f"{message_text}!" - requests.post(f"{NTFY_INSTANCE}", - json = { - "topic": f"{ntfy_topic}", - "message": f"{message_text}", - "actions": [{ - "action": "view", - "label": "View!", - "url": f"{link}" - }] - } - ) - -def get_user_list(user_list_file): - ''' - Get the users list into a list to iterate. - ''' - with open(user_list_file, encoding="UTF-8") as f: - user_list = [l.rstrip() for l in f] - return user_list - -def handlebar_replace(input, replacement): - ''' - Very simple Handlebar style replace: - https://handlebarsjs.com - - Takes the input URL and replaces the {{ custom }} - part, which will be the current user part. - ''' - return re.sub('{{.*}}', replacement, input) - -def check_file_list_exists(file_list): - ''' - Takes a list o files, checks if they exist, - creates them if they do not! - - I'm using this function instead of just relying on - 'w+', because we 'r+' the History file, at one point, - and 'r+' doesn't create the file if it doesn't exist - (unlike 'w+' and 'a+'). - ''' - for file in file_list: - Path(file).touch(exist_ok=True) - -def mkdirp(folder): - 'mkdir -p folder' - Path(folder).mkdir(parents=True, exist_ok=True) - -def main(): - ''' - This article by Matthew Wimberly got me along the right lines with things: - https://codeburst.io/building-an-rss-feed-scraper-with-python-73715ca06e1f - ''' - - # Logfile Save Location - if "XDG_CACHE_HOME" in os.environ: - log_location = os.environ['XDG_CACHE_HOME'] - else: - log_location = f"{Path.home()}/.cache" - - mkdirp(f"{log_location}/rss-ntfy/") - - for service_name in CONFIG: - # Follow File and History File - user_list_file = f"{SCRIPT_DIR}{CONFIG[service_name]['service']}-follow-list.txt" - service_hist = f"{log_location}{CONFIG[service_name]['service']}_hist" - check_file_list_exists([user_list_file, service_hist]) - - # Instance, Topic, Descriptor - instance = f"{CONFIG[service_name]['rss-url']}" - ntfy_topic = f"{CONFIG[service_name]['ntfy-topic']}" - descriptor = CONFIG[service_name]['descriptor'] - - # Alternative Tags Input - item_tag = CONFIG[service_name].get("item-alt", "item") - title_tag = CONFIG[service_name].get("title-alt", "title") - link_tag = CONFIG[service_name].get("link-alt", "link") - date_tag = CONFIG[service_name].get("pubdate-alt", "pubDate") - - # TODO: Rename everything with 'user', as it's more generally an - # account? Not sure if account is the best name, either. - user_list = get_user_list(user_list_file) - - for username in user_list: - current_instance = handlebar_replace(instance, username) - name_tag = CONFIG[service_name].get("name-alt", username) - try: - req = requests.get(f"{current_instance}") - rss_content = BeautifulSoup(req.content, "lxml-xml") - articles = rss_content.findAll(item_tag) - for a in articles: - title = a.find(title_tag).text - link = a.find(link_tag).text - published = a.find(date_tag).text - - # If we need a different name from the username, - # handle that here. - if name_tag != username: - name = a.find(name_tag).text - else: - name = username - - with open(service_hist, "r+") as hist_file: - data = hist_file.read() - # If the link isn't in data, not only - # do we want to add it to the Hist file, - # we also want to, of course, ntfy: - if not link in data: - ntfyr_complex(ntfy_topic, - name, - title, - link, - published, - descriptor) - hist_file.write(f"{link}\n") - - except Exception as e: - # TODO: Just use the ntfy JSON request format - ntfyr(f"Error with scraping {name_tag}, '{e}'.", ntfy_topic) - -if __name__ == '__main__': - main() diff --git a/rss-ntfy/default.yml b/rss-ntfy/default.yml new file mode 100644 index 0000000..59dd5eb --- /dev/null +++ b/rss-ntfy/default.yml @@ -0,0 +1,138 @@ +--- + +feeds: + github: [] + github_user: [] + github_release: [] + github_commit: [] + pypi: [] + discuss_ntfy: [] + reddit: [] + reddit_subreddit: [] + reddit_user: [] + youtube: [] + youtube_playlist: [] + twitch: [] + tiktok: [] + +global: + ntfy_server: https://ntfy.sh +# ntfy_topic: null +# ntfy_auth: +# username: null +# password: null +# token: null +# ntfy_subtitle_prefix: null +# ntfy_subtitle_seperator: null +# ntfy_icon: null +# ntfy_tags: null +# ntfy_priority: null +# ntfy_cache: null +# ntfy_email: null +# ntfy_call: null +# ntfy_delay: null + +services: + github: + service_feed: https://github.com/{{ name }}.atom + service_url: https://github.com/{{ name }} + ntfy_icon: https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png + ntfy_tags: ['robot'] + + github_user: + service_feed: https://github.com/{{ name }}.atom + service_url: https://github.com/{{ name }} + ntfy_icon: https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png + ntfy_tags: ['robot'] + + github_release: + service_feed: https://github.com/{{ name }}/releases.atom + service_url: https://github.com/{{ name }} + ntfy_icon: https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png + ntfy_subtitle_prefix: 🤖 GitHub Release + ntfy_subtitle_seperator: by + ntfy_tags: ['robot'] + + github_commit: + service_feed: https://github.com/{{ name }}/commits.atom + service_url: https://github.com/{{ name }} + ntfy_icon: https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png + ntfy_subtitle_prefix: GitHub Commit on + ntfy_subtitle_seperator: by + ntfy_tags: ['robot'] + + pypi: + service_feed: https://pypi.org/rss/project/{{ name }}/releases.xml + service_url: https://pypi.org/project/{{ name }} + ntfy_icon: https://s3.dualstack.us-east-2.amazonaws.com/pythondotorg-assets/media/community/logos/python-logo-only.png + ntfy_subtitle_prefix: 🐍 PyPI Release + ntfy_subtitle_seperator: by + ntfy_tags: ['snake'] + + discuss_ntfy: + service_feed: https://discuss.ntfy.sh/feeds/{{ name }}.xml + service_url: https://discuss.ntfy.sh/{{ name }} + ntfy_icon: https://raw.githubusercontent.com/LemmyNet/lemmy-ui/main/src/assets/icons/icon-144x144.png + ntfy_subtitle_seperator: by + ntfy_tags: ['mouse'] + + reddit: + service_feed: https://www.reddit.com/{{ name }}/.rss + service_url: https://www.reddit.com/{{ name }} + ntfy_icon: https://www.redditstatic.com/desktop2x/img/favicon/android-icon-144x144.png + ntfy_subtitle_seperator: by + ntfy_tags: ['tophat'] + + reddit_subreddit: + service_feed: https://www.reddit.com/r/{{ name }}/.rss + service_url: https://www.reddit.com/r/{{ name }} + ntfy_icon: https://www.redditstatic.com/desktop2x/img/favicon/android-icon-144x144.png + ntfy_subtitle_prefix: Posted on + ntfy_subtitle_seperator: by + ntfy_tags: ['tophat'] + + reddit_user: + service_feed: https://www.reddit.com/u/{{ name }}/.rss + service_url: https://www.reddit.com/u/{{ name }} + ntfy_icon: https://www.redditstatic.com/desktop2x/img/favicon/android-icon-144x144.png + ntfy_tags: ['tophat'] + + youtube: + service_feed: https://www.youtube.com/feeds/videos.xml?channel_id={{ name }} + service_url: https://www.youtube.com/channel/{{ name }} + ntfy_icon: https://www.gstatic.com/youtube/img/branding/favicon/favicon_144x144.png + ntfy_subtitle_prefix: YouTube Video + ntfy_subtitle_seperator: by + ntfy_tags: ['video_camera'] + + youtube_playlist: + service_feed: https://www.youtube.com/feeds/videos.xml?playlist_id={{ name }} + service_url: https://www.youtube.com/playlist?list={{ name }} + ntfy_icon: https://www.gstatic.com/youtube/img/branding/favicon/favicon_144x144.png + ntfy_subtitle_prefix: YouTube Playlist + ntfy_subtitle_seperator: by + ntfy_tags: ['video_camera'] + + twitch: + service_feed: https://twitchrss.appspot.com/vod/{{ name }} + service_url: https://www.twitch.tv/{{ name }} + ntfy_icon: https://static.twitchcdn.net/assets/favicon-32-e29e246c157142c94346.png + ntfy_subtitle_prefix: Twitch Stream + ntfy_subtitle_seperator: by + ntfy_tags: ['tv'] + + tiktok: + service_feed: https://proxitok.pabloferreiro.es/@{{ name }}/rss + service_url: https://proxitok.pabloferreiro.es/@{{ name }} + ntfy_subtitle_prefix: 🎶 TikTok by + ntfy_tags: ['notes'] + +config: + cache_location: ~/.cache + run_on_startup: false + log_level: info + schedule: '*/30 * * * *' + service_wait: 60 + feed_wait: 5 + max_attempts: 3 + retry_wait: 30 diff --git a/rss-ntfy/invidious-follow-list.txt b/rss-ntfy/invidious-follow-list.txt deleted file mode 100644 index 60eb002..0000000 --- a/rss-ntfy/invidious-follow-list.txt +++ /dev/null @@ -1 +0,0 @@ -UCPdzbBejsLqZRl4vDclPCnw diff --git a/rss-ntfy/nitter-follow-list.txt b/rss-ntfy/nitter-follow-list.txt deleted file mode 100644 index ac5c841..0000000 --- a/rss-ntfy/nitter-follow-list.txt +++ /dev/null @@ -1 +0,0 @@ -jdorchard diff --git a/rss-ntfy/proxitok-follow-list.txt b/rss-ntfy/proxitok-follow-list.txt deleted file mode 100644 index 8b269b1..0000000 --- a/rss-ntfy/proxitok-follow-list.txt +++ /dev/null @@ -1 +0,0 @@ -0x6a75 diff --git a/rss-ntfy/rss-ntfy.py b/rss-ntfy/rss-ntfy.py new file mode 100755 index 0000000..e828b95 --- /dev/null +++ b/rss-ntfy/rss-ntfy.py @@ -0,0 +1,632 @@ +#!/usr/bin/env python3 + +import os +import re +import yaml +import requests +import feedparser +import base64 +import time +import random +import logging +import json +import threading +import copy +import jsonschema +from watchdog.observers import Observer +from watchdog.events import FileSystemEventHandler +from croniter import croniter +from markdownify import markdownify as md +from pathlib import Path + + +CONFIG = dict() +DEFAULT_CONFIG = dict() +last_modification_time = 0 + +logger = None + + +def merge_configurations(base_config, custom_config): + for key, value in custom_config.items(): + if isinstance(value, dict): + if key in base_config and isinstance(base_config[key], dict): + base_config[key] = merge_configurations(base_config[key], value) + else: + base_config[key] = value + elif value is None: + base_config.pop(key, None) + else: + base_config[key] = value + return base_config + + +def custom_validation(config): + feeds_keys = config.get("feeds", {}).keys() + services_keys = config.get("services", {}).keys() + + for feed_key in feeds_keys: + if feed_key not in services_keys: + raise jsonschema.exceptions.ValidationError( + f"For each 'feeds' entry with key '{feed_key}', there must be a corresponding 'services' entry with the same key." + ) + + if "ntfy_topic" in config["global"]: + return + + for feed_name, feed_config in config["feeds"].items(): + if not "ntfy_topic" in feed_config: + if not "ntfy_topic" in config["services"][feed_name]: + raise jsonschema.ValidationError("The property 'ntfy_topic' must be present in either 'global', 'feeds', or 'services'.") + + +def load_config(file_path): + try: + with open(file_path, 'r') as file: + return yaml.safe_load(file) + except FileNotFoundError: + logger.error("Config file not found.") + return None + except yaml.YAMLError as e: + logger.error(f"Error loading the config file '{file_path}': {e}") + return None + + +def load_schema(schema_path): + with open(schema_path, 'r') as schema_file: + return json.load(schema_file) + + +def update_config(init=False): + global CONFIG, DEFAULT_CONFIG + + config_path = "/etc/rss-ntfy/config.yml" + schema_path = "schema.json" + try: + custom_config = load_config(config_path) + + if custom_config is not None: + if logger is not None: + logger.debug(f'custom_config": {json.dumps(custom_config)}') + logger.debug(f'custom_config": {json.dumps(CONFIG)}') + + merged_config = merge_configurations(copy.deepcopy(DEFAULT_CONFIG), custom_config) + + if logger is not None: + logger.debug(f'custom_config": {json.dumps(merged_config)}') + + json_schema = load_schema(schema_path) + + try: + jsonschema.validate(merged_config, json_schema) + custom_validation(merged_config) + if logger is not None: + logger.info("Config is valid.") + else: + print(f"Config is valid.") + + except jsonschema.exceptions.ValidationError as e: + if logger is not None: + logger.error(f"Config validation failed: {e}") + return + else: + print(f"Config validation failed: {e}") + return + + if not init: + compare_config(CONFIG, merged_config) + + CONFIG = merged_config + + if logger is not None: + logger.debug(f'"CONFIG": {json.dumps(CONFIG)}') + + else: + return + + except FileNotFoundError: + logger.error("Config file not found.") + + except Exception as e: + logger.error(f"An unexpected error occurred: {e}") + + +def init_config(): + global CONFIG, DEFAULT_CONFIG + + default_config = "default.yml" + + defaults = load_config(default_config) + DEFAULT_CONFIG = defaults + CONFIG = copy.deepcopy(DEFAULT_CONFIG) + + update_config(init=True) + + config_thread = threading.Thread(target=watch_config) + config_thread.start() + + +def compare_config(running, update): + logger.debug(f"\n\n{running}\n\n{update}\n\n") + if running != update: + logger.info("Reloading...") + if update['config']['cache_location'] != running['config']['cache_location']: + logger.warning("Cache location has changed. Please restart for the changes to take effect.") + if update['config']['log_level'] != running['config']['log_level']: + logger.warning("Log level has changed. Please restart for the changes to take effect.") + if update['config']['schedule'] != running['config']['schedule']: + logger.warning("Schedule has changed. Please restart for the changes to take effect.") + + +def watch_config(): + global CONFIG + + config_path = "/etc/rss-ntfy/" + + event_handler = ConfigFileHandler() + observer = Observer() + observer.schedule(event_handler, path=config_path, recursive=False) + observer.start() + + try: + while True: + threading.Event().wait(1) + except KeyboardInterrupt: + observer.stop() + + observer.join() + + +class ConfigFileHandler(FileSystemEventHandler): + def on_modified(self, event): + global CONFIG + + config_path = "/etc/rss-ntfy/config.yml" + + global last_modification_time + + if event.src_path.endswith(config_path): + current_modification_time = os.path.getmtime(event.src_path) + logger.info("Configuration changed...") + if current_modification_time != last_modification_time: + last_modification_time = current_modification_time + update_config() + + +def init_logger(): + global logger + + config = CONFIG['config'] + log_level = config.get('log_level').upper() + + log_format = '[%(levelname)s] %(asctime)s - %(name)s - %(message)s' + + logger = logging.getLogger(__name__) + logger.setLevel(log_level) + + console_handler = logging.StreamHandler() + console_handler.setLevel(log_level) + + formatter = logging.Formatter(log_format) + console_handler.setFormatter(formatter) + + logger.addHandler(console_handler) + + +def split_message_into_parts(data_json, max_size): + message = data_json['ntfy_message'] + lines = message.splitlines() + + parts = [] + current_part = "" + current_title = data_json['ntfy_title'] + + for line in lines: + words = line.split() + for word in words: + if len(current_part) + len(word) + 1 <= max_size: + current_part += word + " " + else: + parts.append((current_title, current_part.strip())) + current_part = word + " " + current_title = data_json['ntfy_title'] + + current_part += "\n" + + if current_part: + parts.append((current_title, current_part.strip())) + + return parts + + +def ntfy_headers(ntfy_auth, ntfy_cache=None, ntfy_icon=None, ntfy_tags=None, ntfy_priority=None, ntfy_email=None, ntfy_call=None, ntfy_delay=None, ntfy_thumbnail=None): + headers = { + 'Content-Type': 'application/json', + 'charset': 'utf-8', + 'X-Markdown': 'True', + } + + if ntfy_auth is not None: + if ntfy_auth['username'] is not None and ntfy_auth['password'] is not None: + headers['Authorization'] = "Basic " + base64.b64encode((auth['username'] + ":" + ntfy_auth['password']).encode()).decode() + + if ntfy_auth['token'] is not None: + headers['Authorization'] = f"Bearer {auth['token']}" + + if ntfy_icon is not None: + headers['X-Icon'] = ntfy_icon + + if ntfy_tags is not None: + headers['X-Tags'] = ','.join(ntfy_tags) + + if ntfy_thumbnail is not None: + headers['X-Attach'] = ntfy_thumbnail + + if ntfy_priority is not None: + headers['X-Priority'] = ntfy_priority + + if ntfy_cache is not None: + headers['X-Cache'] = ntfy_cache + + if ntfy_email is not None: + headers['X-Email'] = ntfy_email + + if ntfy_call is not None: + headers['X-Call'] = ntfy_call + + if ntfy_delay is not None: + headers['X-Delay'] = ntfy_delay + + return headers + + +def ntfyr(data_json): + config = CONFIG['config'] + global_config = CONFIG['global'] + + max_attempts = int(config.get('max_attempts')) + retry_wait = int(config.get('retry_wait')) + + attempts = 0 + + request_json = { + "topic": data_json['ntfy_topic'], + } + + headers = ntfy_headers(data_json['ntfy_auth'], data_json['ntfy_cache'], data_json['ntfy_icon'], data_json['ntfy_tags'], data_json['ntfy_priority'], data_json['ntfy_email'], data_json['ntfy_call'], data_json['ntfy_delay'], data_json['ntfy_thumbnail']) + + parts = split_message_into_parts(data_json, max_size=4096) + total_parts = len(parts) + + if data_json['item_link'] is not None: + request_json['actions'] = [ + { + "action": "view", + "label": "View!", + "url": data_json['item_link'] + } + ] + + for part_num, (part_title, part_message) in enumerate(reversed(parts), start=1): + if total_parts > 1: + part_title = f"{part_title} [{total_parts - part_num + 1}/{total_parts}]" + + request_json["title"] = part_title + request_json["message"] = part_message + + if data_json['item_link'] is not None: + request_json['actions'] = [ + { + "action": "view", + "label": "View!", + "url": data_json['item_link'] + } + ] + + while attempts < max_attempts: + logger.info(f"sending message (part {part_num} of {total_parts})...") + logger.debug(f'"post_data": {json.dumps(request_json)}') + logger.debug(f'"post_header": {json.dumps(headers)}') + try: + response = requests.post(data_json['ntfy_server'], headers=headers, json=request_json) + logger.info(f"successfully sent part {part_num}!") + try: + response_json = response.json() + logger.debug(f'"response_content": {json.dumps(response_json)}') + except json.JSONDecodeError: + logger.debug(f'"response_content": "{response.text}"') + response.raise_for_status() + break + except requests.exceptions.HTTPError as e: + logger.error(e) + logger.error(f'"post_data": {json.dumps(request_json)}') + logger.error(f'"post_header": {json.dumps(headers)}') + try: + response_json = response.json() + logger.error(f'"response_content": {json.dumps(response_json)}') + except json.JSONDecodeError: + logger.error(f'"response_content": "{response.text}"') + + if response.status_code == 429: + logger.warning(f"Retry in {retry_wait} seconds...") + time.sleep(retry_wait) + else: + raise StopIteration + attempts += 1 + else: + logger.error("Failed to send part {part_num} of {total_parts} after {max_attempts} attempts.") + logger.error(f'"post_data": {json.dumps(request_json)}') + logger.error(f'"post_header": {json.dumps(headers)}') + try: + response_json = response.json() + logger.error(f'"response_content": {json.dumps(response_json)}') + except json.JSONDecodeError: + logger.error(f'"response_content": "{response.text}"') + raise StopIteration + + if total_parts > 1: + time.sleep(2) + + +def handlebar_replace(string, replacement): + return re.sub('{{.*}}', replacement, string) + + +def build_message(item): + message = str() + + if 'sub_title' in item: + message += f"{item.sub_title}\n" + elif 'published' in item: + message += f"Published: {item.published}\n" + elif 'updated' in item: + message += f"Updated: {item.updated}\n" + + if 'media_statistics' in item: + message += f"Media statistics:\n" + for k, v in item.media_statistics.items(): + message += f" {k.capitalize()}: {v}\n" + + if 'media_starrating' in item: + message += f"Media starring:\n" + for k, v in item.media_starrating.items(): + message += f" {k.capitalize()}: {v}\n" + + message += "\n" + + if item.description == item.summary: + message += f"{md(item.summary)}\n" + else: + message += f"{md(item.description)}\n\n" + message += f"{md(item.summary)}\n" + + message = re.sub(r'\n{3,}', '\n\n', message) + + return message + + +def build_subtitle(item, feed, url, feed_display_name=None, ntfy_subtitle_prefix=None, ntfy_subtitle_seperator=None): + ntfy_subtitle_text = f"({feed_display_name or feed})[{url}]" + ntfy_subtitle_postfix = str() + all_authors = list() + + if 'authors' in item and 'name' in item["authors"]: + for author in item['authors']: + author_linked = f"[{author['name']}]({author['href']})" if 'href' in author else author_name + all_authors.append(author_linked) + ntfy_subtitle_postfix += f" {', '.join(all_authors)}" + elif 'author_detail' in item and 'name' in item['author_detail'] and item['author_detail']['name'] != feed: + author_name = item["author_detail"]["name"] + author_link = f"[{author_name}]({item['author_detail']['href']})" if 'href' in item["author_detail"] else author_name + ntfy_subtitle_postfix += f" {author_link}" + elif 'author_detail' in item and 'email' in item['author_detail'] and item['author_detail']['email'] != feed: + author_name = item["author_detail"]["email"] + author_link = f"[{author_name}]({item['author_detail']['email']})" if 'href' in item["author_detail"] else author_name + ntfy_subtitle_postfix += f" {author_link}" + elif 'author' in item and item['author'] != feed: + ntfy_subtitle_postfix += f" {item['author']}" + + subtitle_parts = [part for part in [ntfy_subtitle_prefix, ntfy_subtitle_text, ntfy_subtitle_seperator, ntfy_subtitle_postfix] if part is not None] + + subtitle = " ".join(subtitle_parts) + subtitle = re.sub(r'^\s+|\s+$|\s+(?=\s)', ' ', subtitle) + + return subtitle + + +def read_file(file_path): + try: + with open(file_path, "r") as file: + data = json.load(file) + except (FileNotFoundError, json.JSONDecodeError): + data = {} + + return data + + +def write_file(file_path, data): + with open(file_path, "w") as file: + json.dump(data, file) + + +def cleanup_file(file_path, feed, feed_data): + feed_links = {item.link for item in feed_data.entries} + data = read_file(file_path) + + remove_links = [link for link in data.get(feed, []) if link not in feed_links] + data[feed] = [link for link in data.get(feed, []) if link not in remove_links] + + write_file(file_path, data) + + +def process_feed(data_json): + config = CONFIG['config'] + + ntfy_server = data_json['ntfy_server'] + ntfy_topic = data_json['ntfy_topic'] + feed = data_json['feed'] + feed_url = data_json['feed_url'] + url = data_json['url'] + + feed_display_name = data_json['feed_display_name'] + ntfy_subtitle_prefix = data_json['ntfy_subtitle_prefix'] + ntfy_subtitle_seperator = data_json['ntfy_subtitle_seperator'] + service_hist = data_json['service_hist'] + service = data_json['service'] + + logger.info(f"feed_url: {feed_url}") + + feed_data = feedparser.parse(data_json['feed_url']) + item_count = 1 + total_items = len(feed_data.entries) + + for item in feed_data.entries: + feed_wait = int(config.get('feed_wait')) + + ntfy_thumbnail = None + + logger.info(f"[{item_count}/{total_items}] {service} - {feed} - {feed_url} - {ntfy_server}/{ntfy_topic}") + + logger.debug(f'"feed_item": {json.dumps(item)}') + + message = build_message(item) + subtitle = build_subtitle(item, feed, url, feed_display_name, ntfy_subtitle_prefix, ntfy_subtitle_seperator) + ntfy_title = item.title + ntfy_message = f"{subtitle}\n\n{message}" + + data_json['ntfy_title'] = item.title + data_json['ntfy_message'] = f"{subtitle}\n\n{message}" + data_json['ntfy_thumbnail'] = ntfy_thumbnail + data_json['item_link'] = item.link + + if 'media_thumbnail' in item: + if 'url' in item['media_thumbnail'][0]: + data_json['ntfy_thumbnail'] = item['media_thumbnail'][0]['url'] + + + hist_json = read_file(service_hist) + + if item.link not in hist_json.get(feed, []): + try: + ntfyr(data_json) + time.sleep(feed_wait) + except StopIteration: + break + + hist_json.setdefault(feed, []).append(item.link) + write_file(service_hist, hist_json) + else: + logger.info(f"already sent.") + + item_count += 1 + cleanup_file(service_hist, feed, feed_data) + + +def main(): + config = CONFIG['config'] + global_config = CONFIG['global'] + + service_wait = int(config.get('service_wait')) + + cache_path = config.get('cache_location') + cache_location = os.path.expanduser(cache_path) + + Path(f"{cache_location}/rss-ntfy/").mkdir(parents=True, exist_ok=True) + + for service_name, service_config in CONFIG['services'].items(): + logger.info(f"service: {service_name}") + logger.debug(f'"service_config": {json.dumps(service_config)}') + + service_hist = f"{cache_location}/rss-ntfy/{service_name}_hist" + Path(service_hist).touch(exist_ok=True) + + for feed_config in CONFIG['feeds'][service_name]: + if not isinstance(feed_config, dict): + feed = feed_config + feed_config = {} + else: + feed = feed_config['name'] + logger.info(f"feed: {feed}") + logger.debug(f'"feed_config": {json.dumps(feed_config)}') + + feed_url = handlebar_replace(service_config['service_feed'], feed) + url = handlebar_replace(service_config['service_url'], feed) + + data_json = { + 'service': service_name, + 'feed': feed, + 'feed_url': feed_url, + 'url': url, + 'service_hist': service_hist, + 'feed_display_name': feed_config.get('feed_display_name', service_config.get('feed_display_name', global_config.get('feed_display_name'))), + 'ntfy_server': feed_config.get('ntfy_server', service_config.get('ntfy_server', global_config.get('ntfy_server'))), + 'ntfy_topic': feed_config.get('ntfy_topic', service_config.get('ntfy_topic', global_config.get('ntfy_topic'))), + 'ntfy_auth': feed_config.get('ntfy_auth', service_config.get('ntfy_auth', global_config.get('ntfy_auth'))), + 'ntfy_subtitle_prefix': feed_config.get('ntfy_subtitle_prefix', service_config.get('ntfy_subtitle_prefix', global_config.get('ntfy_subtitle_prefix'))), + 'ntfy_subtitle_seperator': feed_config.get('ntfy_subtitle_seperator', service_config.get('ntfy_subtitle_seperator', global_config.get('ntfy_subtitle_seperator'))), + 'ntfy_icon': feed_config.get('ntfy_icon', service_config.get('ntfy_icon', global_config.get('ntfy_icon'))), + 'ntfy_tags': feed_config.get('ntfy_tags', service_config.get('ntfy_tags', global_config.get('ntfy_tags'))), + 'ntfy_priority': feed_config.get('ntfy_priority', service_config.get('ntfy_priority', global_config.get('ntfy_priority'))), + 'ntfy_cache': feed_config.get('ntfy_cache', service_config.get('ntfy_cache', global_config.get('ntfy_cache'))), + 'ntfy_email': feed_config.get('ntfy_email', service_config.get('ntfy_email', global_config.get('ntfy_email'))), + 'ntfy_call': feed_config.get('ntfy_call', service_config.get('ntfy_call', global_config.get('ntfy_call'))), + 'ntfy_delay': feed_config.get('ntfy_delay', service_config.get('ntfy_delay', global_config.get('ntfy_delay'))) + } + + logger.debug(f"data_json: {json.dumps(data_json)}") + + process_feed(data_json) + time.sleep(service_wait) + + +def parse_cron_definition(cron_definition): + minute, hour, day, month, day_of_week = cron_definition.split() + return f"{minute} {hour} {day} {month} {day_of_week}" + + +def init_schedule(): + global CONFIG + + config = CONFIG['config'] + + schedule = config.get('schedule') + + cron_expression = parse_cron_definition(schedule) + cron = croniter(cron_expression) + + while True: + next_run_time = cron.get_next(float) + next_run_readable = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(next_run_time)) + sleep_time = next_run_time - time.time() + + logger.info(f"next run: {next_run_readable}") + if sleep_time > 0: + time.sleep(sleep_time) + main() + + +if __name__ == '__main__': + init_config() + print(f'"CONFIG": {json.dumps(CONFIG, indent=4)}') + + init_logger() + + config = CONFIG['config'] + + log_level = config.get('log_level').upper() + service_wait = int(config.get('service_wait')) + feed_wait = int(config.get('feed_wait')) + schedule = config.get('schedule') + + logger.info(f"log level: {log_level}") + logger.info(f"service wait: {service_wait}") + logger.info(f"feed wait: {feed_wait}") + logger.info(f"schedule: {schedule}") + logger.info("started rss-ntfy!") + + if bool(CONFIG['config']['run_on_startup']): + logger.info("running on startup.") + main() + + init_schedule() + diff --git a/rss-ntfy/schema.json b/rss-ntfy/schema.json new file mode 100644 index 0000000..b0a388b --- /dev/null +++ b/rss-ntfy/schema.json @@ -0,0 +1,130 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "config": { + "type": "object", + "properties": { + "cache_location": { "type": "string" }, + "run_on_startup": { "type": "boolean" }, + "log_level": { + "type": "string", + "enum": ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL", "debug", "info", "warning", "error", "critical"] + }, + "schedule": { "type": "string", "format": "cron" }, + "service_wait": { "type": "integer" }, + "feed_wait": { "type": "integer" }, + "max_attempts": { "type": "integer" }, + "retry_wait": { "type": "integer" } + }, + "required": [ + "cache_location", + "run_on_startup", + "log_level", + "schedule", + "service_wait", + "feed_wait", + "max_attempts", + "retry_wait" + ] + }, + "global": { + "type": "object", + "properties": { + "ntfy_server": { "type": "string", "format": "uri" }, + "ntfy_topic": { "type": "string" }, + "ntfy_auth": { + "type": "object", + "properties": { + "username": { "type": "string" }, + "password": { "type": "string" }, + "token": { "type": "string" } + } + }, + "ntfy_subtitle_prefix": { "type": "string" }, + "ntfy_subtitle_seperator": { "type": "string" }, + "ntfy_icon": { "type": "string", "format": "uri" }, + "ntfy_tags": { "type": "array", "items": { "type": "string" } }, + "ntfy_priority": { + "oneOf": [ + { "type": "integer", + "enum": ["1", "2", "3", "4", "5"] + }, + { + "type": "string", + "enum": ["min", "low", "default", "high", "max", "urgent"] + } + ] + }, + "ntfy_cache": { "type": "boolean" }, + "ntfy_email": { "type": "string", "format": "email" }, + "ntfy_call": { "type": "string", "format": "phone"}, + "ntfy_delay": { + "anyOf": [ + { "type": "string" }, + { "type": "integer" } + ] + }, + "feed_display_name": { "type": "string" } + } + }, + "services": { + "type": "object", + "minItems": 1, + "patternProperties": { + "^.+$": { + "allOf": [ + { + "type": "object", + "properties": { + "service_feed": { "type": "string" }, + "service_url": { "type": "string" } + }, + "required": ["service_feed", "service_url"] + }, + { + "$ref": "#/properties/global" + } + ] + } + } + }, + "feeds": { + "type": "object", + "minItems": 1, + "patternProperties": { + "^.+$": { + "oneOf": [ + { + "type": "array", + "items": { "type": "string" } + }, + { + "type": "array", + "items": { + "allOf": [ + { + "type": "object", + "properties": { + "name": { "type": "string" } + }, + "required": ["name"] + }, + { + "$ref": "#/properties/global" + } + ] + }, + "minItems": 1 + } + ] + } + } + } + }, + "formats": { + "cron": "^(?:[\\d*,\\/\\-]+\\s){4}[\\d*,\\/\\-]+$", + "phone": "^[+]?[(]?[0-9]{3}[)]?[-\\s\\.]?[0-9]{3}[-\\s\\.]?[0-9]{4,6}$" + }, + "required": ["config", "global", "services", "feeds"] +} diff --git a/rss-ntfy/teddit-follow-list.txt b/rss-ntfy/teddit-follow-list.txt deleted file mode 100644 index 23e9f69..0000000 --- a/rss-ntfy/teddit-follow-list.txt +++ /dev/null @@ -1 +0,0 @@ -funny