Skip to content

Commit

Permalink
Initial code
Browse files Browse the repository at this point in the history
  • Loading branch information
thevops committed Aug 21, 2023
0 parents commit 603af54
Show file tree
Hide file tree
Showing 15 changed files with 487 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.venv
__pycache__
.idea
venv
db/*.db.json
config.yaml
11 changes: 11 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
repos:
# For Python files
- repo: https://github.com/psf/black
rev: 23.3.0
hooks:
- id: black
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
args: ["--profile", "black"]
41 changes: 41 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#
# Stage 1
#
FROM debian:11-slim AS build

# Install system requirements for Python and Python virtual environment
ENV VIRTUAL_ENV=/venv
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
RUN apt-get update \
&& apt-get install --no-install-suggests --no-install-recommends --yes \
python3-venv \
gcc \
libpython3-dev \
tzdata \
&& python3 -m venv $VIRTUAL_ENV \
&& pip install --upgrade pip setuptools wheel \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

# Install Python requirements
COPY requirements.txt /requirements.txt
RUN pip install --disable-pip-version-check -r /requirements.txt

#
# Stage 2
#
FROM gcr.io/distroless/python3-debian11 AS production

COPY --from=build /venv /venv
ENV VIRTUAL_ENV=/venv
ENV PATH="$VIRTUAL_ENV/bin:$PATH"

WORKDIR /app
COPY ./src/ /app

ENV TZ=Europe/Warsaw
ENV PYTHONUNBUFFERED=1
ENV VIRTUAL_ENV=/venv
ENV PATH="$VIRTUAL_ENV/bin:$PATH"

ENTRYPOINT ["python", "main.py"]
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# RSS Assistant
12 changes: 12 additions & 0 deletions Taskfile.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
version: '3'

tasks:
docker-build:
desc: Build Docker image
cmds:
- docker build -t rss .

compose-up:
desc: Run Docker Compose
cmds:
- docker-compose up -d
1 change: 1 addition & 0 deletions db/README
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
The directory contains database files.
10 changes: 10 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
services:
rss:
build: ./
container_name: rss
platform: linux/amd64
environment:
- CONFIG_FILE=/config.yaml
- LOG_LEVEL=DEBUG
volumes:
- ./config.yaml:/config.yaml:ro
38 changes: 38 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import signal

from apscheduler.schedulers.blocking import BlockingScheduler
from apscheduler.triggers.cron import CronTrigger

from src.config import CONFIG, logger, tz
from src.controller import send_latest_feeds

# Scheduler object
SCHEDULER = BlockingScheduler(timezone=tz)


def exit_handler(signal_received, frame):
logger.info("Exiting gracefully")
SCHEDULER.shutdown(wait=False)
exit(0)


def main():
# Setup signal handlers
signal.signal(signal.SIGINT, exit_handler)
signal.signal(signal.SIGTERM, exit_handler)

# Add jobs to scheduler
for job in CONFIG["jobs"]:
SCHEDULER.add_job(
send_latest_feeds,
trigger=CronTrigger.from_crontab(job["schedule"]),
kwargs=job,
)
logger.info("Added RSSFeed job: %s" % job["name"])

# Run schedulers infinite loop
SCHEDULER.start()


if __name__ == "__main__":
main()
26 changes: 26 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Feed RSS parser
feedparser==6.0.10

# Database as JSON
pickleDB==0.9.2

# Scheduler (cron-like) for Python
APScheduler==3.10.3

# Logging
loguru==0.7.0

# YAML parser
PyYAML==6.0.1

# Notifications
apprise==1.4.5

# JSON transformer
json_dict_transformer==1.1.1

# YouTube API
pytube==15.0.0

# Todoist API
todoist-api-python==2.1.3
86 changes: 86 additions & 0 deletions src/RSSFeed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import hashlib
import os

import feedparser
import pickledb


class RSSFeed:
def __init__(self, link, database_dir="db/", history_limit=10):
self.link = link
self.history_limit = history_limit
# File name for storing the database is separate for each link
# The name is created by hashing the link
self.database_path = (
database_dir + hashlib.md5(link.encode("utf-8")).hexdigest() + ".db.json"
)

# Check if the database file exists
# If so, it is NOT the first run
if os.path.exists(self.database_path):
self.first_run = False
else:
self.first_run = True

def pull(self):
return feedparser.parse(self.link)

def parse(self):
feed = self.pull()
feeds = []
for entry in feed.entries:
# entry obligatory contains: id, title, link
# rest is optional
feeds.append(entry)
return feeds

def load_database(self):
# sig=False is needed to avoid error:
# ValueError: signal only works in main thread of the main interpreter
return pickledb.load(self.database_path, auto_dump=False, sig=False)

def clean_database(self):
db = self.load_database()
for item in db.getall():
item_data = db.get(item)
while len(item_data) > self.history_limit:
item_data.pop(0)
# overwrite the old list with the new one
db.set(item, item_data)
db.dump()

def get_latest(self):
feed_items = self.parse()
db = self.load_database()
latest = []

# self.link is a key containing list of IDs
if db.exists(self.link):
# get list of IDs for the link
link_ids = db.get(self.link)
for item in feed_items:
if item["id"] not in link_ids:
latest.append(item)
link_ids.append(item["id"])
# save new ID list for link
db.set(self.link, link_ids)

else:
# create new list of IDs for the link
link_ids = []
for item in feed_items:
link_ids.append(item["id"])
db.set(self.link, link_ids)
latest = feed_items

# save the database
db.dump()

# clean the database by removing old IDS
self.clean_database()

# Return latest only if it is NOT the first run
if self.first_run:
return []
else:
return latest
Empty file added src/__init__.py
Empty file.
31 changes: 31 additions & 0 deletions src/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import os
import sys

import yaml
from loguru import logger
from pytz import timezone


# Load config
def load_config(conf_file):
with open(conf_file, "r") as f:
config = yaml.safe_load(f)
return config


# Set timezone
tz = timezone(os.environ.get("TIMEZONE", "Europe/Warsaw"))

# Remove all logger handlers
logger.remove()

# Setup a new logger handler with INFO as default logging level
logger.add(
sys.stdout,
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level}</level> | {module} | {message}",
level=os.environ.get("LOG_LEVEL", "INFO"),
)

# YAML config file path
CONFIG_FILE = os.environ.get("CONFIG_FILE", "config.yaml")
CONFIG = load_config(CONFIG_FILE)
45 changes: 45 additions & 0 deletions src/controller.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from time import sleep

from src.config import CONFIG, logger
from src.notifications import Notification
from src.RSSFeed import RSSFeed


def get_receiever_by_name(name):
for receiver in CONFIG["receivers"]:
if receiver["name"] == name:
return receiver
return None


def send_latest_feeds(**kwargs):
name = kwargs["name"]
logger.debug(f"{name} | start")
url = kwargs["url"]
history_limit = kwargs["history_limit"] if "history_limit" in kwargs else 10
receiver_name = kwargs["receiver"]
# get receiver details by its name
receiver = get_receiever_by_name(receiver_name)

feed = RSSFeed(url, CONFIG["databaseDir"], history_limit)
feed_items = feed.get_latest()
logger.debug(f"{name} got items: {len(feed_items)}")

for item in feed_items:
logger.debug(
f"Sending to {receiver['name']} | id={item['id']} title={item['title']} link={item['link']}"
)
s = Notification.notify(item, receiver)
if s:
logger.debug(
f"Notification succeed with {receiver['name']} for {item['id']}"
)
else:
logger.error(
f"Notification failed with {receiver['name']} for {item['id']}"
)

# sleep for 0.5 seconds to avoid rate limiting
sleep(0.5)

logger.debug(f"{name} | finished")
Loading

0 comments on commit 603af54

Please sign in to comment.