diff --git a/app/admin/__init__.py b/app/admin/__init__.py index 2ae406103..74ced2059 100644 --- a/app/admin/__init__.py +++ b/app/admin/__init__.py @@ -16,6 +16,7 @@ from app.admin.metrics import DailyMetricAdmin, MetricAdmin from app.admin.invalid_mailbox_domain import InvalidMailboxDomainAdmin from app.admin.forbidden_mx_ip import ForbiddenMxIpAdmin +from app.admin.global_sender_blacklist import GlobalSenderBlacklistAdmin from app.admin.email_search import ( EmailSearchResult, EmailSearchHelpers, @@ -53,6 +54,7 @@ "MetricAdmin", "InvalidMailboxDomainAdmin", "ForbiddenMxIpAdmin", + "GlobalSenderBlacklistAdmin", # Search views "EmailSearchResult", "EmailSearchHelpers", diff --git a/app/admin/global_sender_blacklist.py b/app/admin/global_sender_blacklist.py new file mode 100644 index 000000000..19e64e834 --- /dev/null +++ b/app/admin/global_sender_blacklist.py @@ -0,0 +1,22 @@ +from flask_admin.form import SecureForm + +from app.admin.base import SLModelView + + +class GlobalSenderBlacklistAdmin(SLModelView): + form_base_class = SecureForm + + can_create = True + can_edit = True + can_delete = True + + column_searchable_list = ("pattern", "comment") + column_filters = ("enabled", "user_id") + column_editable_list = ("enabled", "comment") + + # Help text for admins when adding patterns + form_args = { + "pattern": { + "description": r"Regex, i.e. `@domain\.com$`", + } + } diff --git a/app/admin/index.py b/app/admin/index.py index 255bdcb9c..29e0a9e16 100644 --- a/app/admin/index.py +++ b/app/admin/index.py @@ -17,6 +17,7 @@ Metric2, InvalidMailboxDomain, ForbiddenMxIp, + ForbiddenEnvelopeSender, ) from app.admin.base import SLAdminIndexView from app.admin.user import UserAdmin @@ -31,6 +32,7 @@ from app.admin.metrics import DailyMetricAdmin, MetricAdmin from app.admin.invalid_mailbox_domain import InvalidMailboxDomainAdmin from app.admin.forbidden_mx_ip import ForbiddenMxIpAdmin +from app.admin.global_sender_blacklist import GlobalSenderBlacklistAdmin from app.admin.email_search import EmailSearchAdmin from app.admin.custom_domain_search import CustomDomainSearchAdmin from app.admin.abuser_lookup import AbuserLookupAdmin @@ -65,3 +67,4 @@ def init_admin(app: Flask): admin.add_view(MetricAdmin(Metric2, Session)) admin.add_view(InvalidMailboxDomainAdmin(InvalidMailboxDomain, Session)) admin.add_view(ForbiddenMxIpAdmin(ForbiddenMxIp, Session)) + admin.add_view(GlobalSenderBlacklistAdmin(ForbiddenEnvelopeSender, Session)) diff --git a/app/contact_utils.py b/app/contact_utils.py index 9d452e41f..529e59e9d 100644 --- a/app/contact_utils.py +++ b/app/contact_utils.py @@ -47,6 +47,7 @@ def create_contact( mail_from: Optional[str] = None, allow_empty_email: bool = False, automatic_created: bool = False, + block_forward: bool = False, from_partner: bool = False, ) -> ContactCreateResult: LOG.i( @@ -105,6 +106,7 @@ def create_contact( automatic_created=automatic_created, flags=flags, invalid_email=is_invalid_email, + block_forward=block_forward, commit=True, ) contact_id = contact.id diff --git a/app/dashboard/views/setting.py b/app/dashboard/views/setting.py index 7fa1d3101..b859b2e73 100644 --- a/app/dashboard/views/setting.py +++ b/app/dashboard/views/setting.py @@ -39,8 +39,11 @@ PartnerSubscription, UnsubscribeBehaviourEnum, UserAliasDeleteAction, + ForbiddenEnvelopeSender, ) from app.proton.proton_unlink import can_unlink_proton_account +from app.regex_utils import validate_sender_blacklist_pattern +from app.user_audit_log_utils import emit_user_audit_log, UserAuditLogAction from app.utils import ( random_string, CSRFValidationForm, @@ -285,6 +288,60 @@ def setting(): Session.commit() flash("Your preference has been updated", "success") + elif request.form.get("form-name") == "user-sender-blacklist-add": + pattern = (request.form.get("pattern") or "").strip() + comment = (request.form.get("comment") or "").strip() or None + + if len(pattern) > 255: + flash("Pattern too long (max 255 characters)", "warning") + return redirect(url_for("dashboard.setting") + "#sender-blacklist") + + err = validate_sender_blacklist_pattern(pattern) + if err: + flash(err, "warning") + return redirect(url_for("dashboard.setting") + "#sender-blacklist") + + ForbiddenEnvelopeSender.create( + user_id=current_user.id, + pattern=pattern, + enabled=True, + comment=comment, + commit=True, + ) + + emit_user_audit_log( + user=current_user, + action=UserAuditLogAction.AddSenderBlacklist, + message=f"Added sender blacklist pattern: {pattern}", + commit=True, + ) + flash("Sender blacklist entry added", "success") + return redirect(url_for("dashboard.setting") + "#sender-blacklist") + + elif request.form.get("form-name") == "user-sender-blacklist-delete": + try: + entry_id = int(request.form.get("entry-id")) + except Exception: + flash("Invalid request", "warning") + return redirect(url_for("dashboard.setting") + "#sender-blacklist") + + entry = ForbiddenEnvelopeSender.get_by(id=entry_id) + if entry is None or entry.user_id != current_user.id: + flash("Not found", "warning") + return redirect(url_for("dashboard.setting") + "#sender-blacklist") + + Session.delete(entry) + Session.commit() + + emit_user_audit_log( + user=current_user, + action=UserAuditLogAction.DeleteSenderBlacklist, + message=f"Deleted sender blacklist pattern: {entry.pattern}", + commit=True, + ) + flash("Sender blacklist entry deleted", "success") + return redirect(url_for("dashboard.setting") + "#sender-blacklist") + manual_sub = ManualSubscription.get_by(user_id=current_user.id) apple_sub = AppleSubscription.get_by(user_id=current_user.id) coinbase_sub = CoinbaseSubscription.get_by(user_id=current_user.id) @@ -296,6 +353,23 @@ def setting(): if partner_sub_name: partner_sub, partner_name = partner_sub_name + user_sender_blacklist_entries = ( + Session.query(ForbiddenEnvelopeSender) + .filter(ForbiddenEnvelopeSender.user_id == current_user.id) + .order_by(ForbiddenEnvelopeSender.id.asc()) + .all() + ) + + global_sender_blacklist_entries = ( + Session.query(ForbiddenEnvelopeSender) + .filter( + ForbiddenEnvelopeSender.enabled.is_(True), + ForbiddenEnvelopeSender.user_id.is_(None), + ) + .order_by(ForbiddenEnvelopeSender.id.asc()) + .all() + ) + return render_template( "dashboard/setting.html", csrf_form=csrf_form, @@ -318,4 +392,6 @@ def setting(): ALIAS_RAND_SUFFIX_LENGTH=ALIAS_RANDOM_SUFFIX_LENGTH, connect_with_proton=CONNECT_WITH_PROTON, can_unlink_proton_account=can_unlink_proton_account(current_user), + user_sender_blacklist_entries=user_sender_blacklist_entries, + global_sender_blacklist_entries=global_sender_blacklist_entries, ) diff --git a/app/models.py b/app/models.py index c0a1a3e83..c9178e5fa 100644 --- a/app/models.py +++ b/app/models.py @@ -3679,6 +3679,33 @@ class ForbiddenMxIp(Base, ModelMixin): comment = sa.Column(sa.Text, unique=False, nullable=True) +class ForbiddenEnvelopeSender(Base, ModelMixin): + """Forbidden inbound senders (SMTP envelope MAIL FROM). + + Pattern is a (re2-compatible) regex that is applied via search() against the + full envelope sender address. + + Examples: + - "@spamdomain\\.com$" + - "^no-?reply@.*" + """ + + __tablename__ = "global_sender_blacklist" + + # NULL user_id => global blacklist entry (admin-managed) + # non-NULL user_id => per-user blacklist entry (user-managed) + user_id = sa.Column(sa.ForeignKey(User.id, ondelete="cascade"), nullable=True) + + # RFC5321 states that an email address cannot be longer than 254 characters. + pattern = sa.Column(sa.String(255), nullable=False) + enabled = sa.Column(sa.Boolean, nullable=False, default=True, server_default="1") + comment = sa.Column(sa.Text, nullable=True) + + user = orm.relationship(User) + + __table_args__ = (sa.Index("ix_global_sender_blacklist_user_id", "user_id"),) + + # region Phone class PhoneCountry(Base, ModelMixin): __tablename__ = "phone_country" diff --git a/app/regex_utils.py b/app/regex_utils.py index e32413338..818e252d4 100644 --- a/app/regex_utils.py +++ b/app/regex_utils.py @@ -5,7 +5,41 @@ from app.log import LOG -def regex_match(rule_regex: str, local): +# Keep this permissive enough for practical regexes, but still a strict whitelist. +# Note: We intentionally do NOT allow whitespace. +_SENDER_BLACKLIST_ALLOWED_RE = re.compile( + r"^[A-Za-z0-9\[\]\{\}\(\)\|\?\^\$@,._\-\+\*\\\.]+$" +) + + +def validate_sender_blacklist_pattern(pattern: str) -> str | None: + """Validate a user-provided sender-blacklist regex pattern. + + The goal is to keep patterns simple and prevent expensive/unsafe constructs. + We also validate the regex compiles (re2 preferred). + + Returns: + None if valid; otherwise an error message string. + """ + if not pattern: + return "Pattern cannot be empty" + + # Keep the allowed character set intentionally small. + if not _SENDER_BLACKLIST_ALLOWED_RE.fullmatch(pattern): + return ( + "Invalid characters in pattern. Allowed: letters, digits, and []{}(),._-+*\\.^$@|?" + ) + + try: + re2.compile(pattern) + except Exception: + return "Invalid regex pattern" + + return None + + +def regex_match(rule_regex: str, local) -> bool: + """Return True if *full string* matches rule_regex.""" regex = re2.compile(rule_regex) try: if re2.fullmatch(regex, local): @@ -16,3 +50,20 @@ def regex_match(rule_regex: str, local): if re.fullmatch(regex, local): return True return False + + +def regex_search(rule_regex: str, text: str) -> bool: + """Return True if any substring of text matches rule_regex. + + Uses re2 when possible to avoid catastrophic backtracking. + """ + regex = re2.compile(rule_regex) + try: + if re2.search(regex, text): + return True + except TypeError: # re2 bug "Argument 'pattern' has incorrect type (expected bytes, got PythonRePattern)" + LOG.w("use re instead of re2 for %s %s", rule_regex, text) + regex = re.compile(rule_regex) + if re.search(regex, text): + return True + return False diff --git a/app/sender_blacklist.py b/app/sender_blacklist.py new file mode 100644 index 000000000..474513d21 --- /dev/null +++ b/app/sender_blacklist.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +from cachetools import TTLCache, cached + +from app.db import Session +from app.log import LOG +from app.models import ForbiddenEnvelopeSender +from app.regex_utils import regex_match + + +# Cache enabled patterns to avoid a DB query per inbound email. +# +# TTL: keep changes reasonably fresh while avoiding hammering the DB. +# Memory: cachetools.TTLCache is an in-process dict with an upper bound (maxsize). +_GLOBAL_PATTERNS_CACHE = TTLCache(maxsize=128, ttl=300) +_USER_PATTERNS_CACHE = TTLCache(maxsize=128, ttl=300) + + +@cached(cache=_GLOBAL_PATTERNS_CACHE) +def _get_enabled_global_patterns() -> list[str]: + return [ + r.pattern + for r in Session.query(ForbiddenEnvelopeSender) + .filter( + ForbiddenEnvelopeSender.enabled.is_(True), + ForbiddenEnvelopeSender.user_id.is_(None), + ) + .order_by(ForbiddenEnvelopeSender.id.asc()) + .all() + ] + + +# Per-user cache: avoid a DB query per email per user, but cap memory via maxsize. +@cached(cache=_USER_PATTERNS_CACHE) +def _get_enabled_user_patterns(user_id: int) -> list[str]: + return [ + r.pattern + for r in Session.query(ForbiddenEnvelopeSender) + .filter( + ForbiddenEnvelopeSender.enabled.is_(True), + ForbiddenEnvelopeSender.user_id == user_id, + ) + .order_by(ForbiddenEnvelopeSender.id.asc()) + .all() + ] + + +def is_sender_blocked_for_user(user_id: int | None, candidates: list[str]) -> bool: + """Return True if any candidate sender string matches: + + - the global sender blacklist (user_id is NULL), OR + - the given user's sender blacklist (user_id matches) + + Typical candidates: + - SMTP envelope MAIL FROM + - parsed header From address + """ + + patterns: list[str] = [] + patterns.extend(_get_enabled_global_patterns()) + if user_id is not None: + patterns.extend(_get_enabled_user_patterns(int(user_id))) + + if not patterns: + return False + + for candidate in candidates: + if not candidate: + continue + # Ignore bounce/null reverse-path + if candidate == "<>": + continue + + for pattern in patterns: + try: + # Full-string match to avoid false positives (partial hits). + if regex_match(pattern, candidate): + return True + except Exception: + # Never crash the SMTP handler because of a bad regex. + # (Global or user entry — both are user-provided.) + LOG.exception( + "Sender blacklist regex failed: user_id=%s pattern=%s candidate=%s", + user_id, + pattern, + candidate, + ) + + return False diff --git a/app/user_audit_log_utils.py b/app/user_audit_log_utils.py index b589e8813..ccab7414d 100644 --- a/app/user_audit_log_utils.py +++ b/app/user_audit_log_utils.py @@ -29,6 +29,9 @@ class UserAuditLogAction(Enum): UpdateDirectory = "update_directory" DeleteDirectory = "delete_directory" + AddSenderBlacklist = "add_sender_blacklist" + DeleteSenderBlacklist = "delete_sender_blacklist" + CreateAlias = "create_alias" UpdateAlias = "update_alias" DeleteAlias = "delete_alias" diff --git a/email_handler.py b/email_handler.py index 437b177fe..ca4941833 100644 --- a/email_handler.py +++ b/email_handler.py @@ -154,6 +154,7 @@ from app.handler.unsubscribe_generator import UnsubscribeGenerator from app.handler.unsubscribe_handler import UnsubscribeHandler from app.log import LOG, set_message_id +from app.sender_blacklist import is_sender_blocked_for_user from app.mail_sender import sl_sendmail from app.mailbox_utils import ( get_mailbox_for_reply_phase, @@ -214,13 +215,42 @@ def get_or_create_contact( mail_from, ) contact_email = mail_from + + # Normalize sender address to lowercase so blacklist patterns are easy to write. + sanitized_contact_email = sanitize_email(contact_email) + + # If a Contact already exists and is NOT blocked, it takes precedence over the blacklist. + # This allows users to "whitelist" a specific sender by manually creating/enabling a Contact. + existing_contact = Contact.get_by( + alias_id=alias.id, + website_email=sanitized_contact_email, + ) + + block_forward = False + if existing_contact is None: + # Check the blacklist BEFORE creating the contact. + # Otherwise an auto-created contact could allow subsequent emails to bypass the blacklist. + block_forward = is_sender_blocked_for_user( + alias.user_id, + candidates=[mail_from, sanitized_contact_email], + ) + + if block_forward: + LOG.i( + "Sender matched sender blacklist (global or user); creating disabled contact: mail_from=%s header_from=%s alias=%s", + mail_from, + sanitized_contact_email, + alias, + ) + contact_result = contact_utils.create_contact( - email=contact_email, + email=sanitized_contact_email, alias=alias, name=contact_name, mail_from=mail_from, allow_empty_email=True, automatic_created=True, + block_forward=block_forward, from_partner=False, ) if contact_result.error: diff --git a/migrations/versions/2026_0308_b7c1d6a4f2e1_global_sender_blacklist.py b/migrations/versions/2026_0308_b7c1d6a4f2e1_global_sender_blacklist.py new file mode 100644 index 000000000..c161a2a3e --- /dev/null +++ b/migrations/versions/2026_0308_b7c1d6a4f2e1_global_sender_blacklist.py @@ -0,0 +1,40 @@ +"""Add global sender blacklist + +Revision ID: b7c1d6a4f2e1 +Revises: 3ee37864eb67 +Create Date: 2026-03-08 + +""" + +import sqlalchemy_utils +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "b7c1d6a4f2e1" +down_revision = "3ee37864eb67" +branch_labels = None +depends_on = None + + +def upgrade(): + op.create_table( + "global_sender_blacklist", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column( + "created_at", sqlalchemy_utils.types.arrow.ArrowType(), nullable=False + ), + sa.Column("updated_at", sqlalchemy_utils.types.arrow.ArrowType(), nullable=True), + sa.Column("pattern", sa.String(length=512), nullable=False), + sa.Column( + "enabled", sa.Boolean(), server_default=sa.text("true"), nullable=False + ), + sa.Column("comment", sa.Text(), nullable=True), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("pattern"), + ) + + +def downgrade(): + op.drop_table("global_sender_blacklist") diff --git a/migrations/versions/2026_0318_9c2a7f3c1b21_user_sender_blacklist.py b/migrations/versions/2026_0318_9c2a7f3c1b21_user_sender_blacklist.py new file mode 100644 index 000000000..8a6794ab6 --- /dev/null +++ b/migrations/versions/2026_0318_9c2a7f3c1b21_user_sender_blacklist.py @@ -0,0 +1,80 @@ +"""User sender blacklist (extend global sender blacklist) + +Revision ID: 9c2a7f3c1b21 +Revises: b7c1d6a4f2e1 +Create Date: 2026-03-18 + +""" + +from alembic import op +import sqlalchemy as sa + + +def _drop_unique_constraint_on_pattern_if_present(): + """Drop the UNIQUE(pattern) constraint safely. + + Important: On PostgreSQL, attempting to drop a non-existent constraint + aborts the transaction. Catching the exception in Python is not enough + because the transaction remains in a failed state. + + Therefore we *reflect* existing unique constraints first and only drop + when we have an actual name. + """ + + bind = op.get_bind() + insp = sa.inspect(bind) + try: + uniques = insp.get_unique_constraints("global_sender_blacklist") + except Exception: + uniques = [] + + for uc in uniques: + cols = uc.get("column_names") or [] + if cols == ["pattern"]: + name = uc.get("name") + if name: + op.drop_constraint(name, "global_sender_blacklist", type_="unique") + break + + +# revision identifiers, used by Alembic. +revision = "9c2a7f3c1b21" +down_revision = "b7c1d6a4f2e1" +branch_labels = None +depends_on = None + + +def upgrade(): + # 1) Add user_id nullable so existing global entries stay valid. + with op.batch_alter_table("global_sender_blacklist") as batch: + batch.add_column( + sa.Column( + "user_id", + sa.Integer(), + sa.ForeignKey("users.id", ondelete="cascade"), + nullable=True, + ) + ) + batch.create_index("ix_global_sender_blacklist_user_id", ["user_id"]) + + # 2) Drop unique constraint on pattern so users can use the same pattern independently. + _drop_unique_constraint_on_pattern_if_present() + + +def downgrade(): + # Re-create unique constraint on pattern (best-effort). + try: + op.create_unique_constraint( + "global_sender_blacklist_pattern_key", + "global_sender_blacklist", + ["pattern"], + ) + except Exception: + pass + + with op.batch_alter_table("global_sender_blacklist") as batch: + try: + batch.drop_index("ix_global_sender_blacklist_user_id") + except Exception: + pass + batch.drop_column("user_id") diff --git a/pyproject.toml b/pyproject.toml index 67a6022a2..d03131772 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,6 +65,7 @@ dependencies = [ "newrelic ~= 8.8.0", "flanker ~= 0.9.11", "pyre2 ~= 0.3.6", + "cachetools ~= 5.3.3", "tldextract ~= 3.1.2", "flask-debugtoolbar-sqlalchemy ~= 0.2.0", "twilio ~= 7.3.2", diff --git a/templates/dashboard/setting.html b/templates/dashboard/setting.html index caaf9df1b..45bf8aa21 100644 --- a/templates/dashboard/setting.html +++ b/templates/dashboard/setting.html @@ -254,6 +254,58 @@ + + +
fullmatch()-style match against the full sender address.
+ @(spam|junk)-?domain\.com$, ^no-?reply@.*
+ {{ entry.pattern }}
+ {% if entry.comment %}— {{ entry.comment }}{% endif %}
+
+ {{ entry.pattern }}