Skip to content

Commit 0b6e867

Browse files
kevglisswssheldon
andauthored
Fixing signal dedup and snooze conflict (Netflix#4134)
* Fixing signal dedup and snooze conflict * Add docstrings --------- Co-authored-by: Will Sheldon <[email protected]>
1 parent 724748a commit 0b6e867

File tree

3 files changed

+165
-69
lines changed

3 files changed

+165
-69
lines changed

.vscode/settings.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
],
4040
"[python]": {
4141
"editor.codeActionsOnSave": {
42-
"source.organizeImports": false
42+
"source.organizeImports": "never"
4343
}
4444
},
4545
}

src/dispatch/signal/service.py

+103-68
Original file line numberDiff line numberDiff line change
@@ -568,100 +568,135 @@ def update_instance(
568568
return signal_instance
569569

570570

571-
def filter_signal(*, db_session: Session, signal_instance: SignalInstance) -> bool:
572-
"""
573-
Apply filter actions to the signal instance.
574-
575-
The function first checks if the signal instance is snoozed. If not snoozed,
576-
it checks for a deduplication rule set on the signal instance. If no
577-
deduplication rule is set, a default deduplication rule is applied,
578-
grouping all signal instances together for a 1-hour window, regardless of
579-
the entities in the signal instance.
571+
def filter_snooze(*, db_session: Session, signal_instance: SignalInstance) -> SignalInstance:
572+
"""Filters a signal instance for snoozing.
580573
581574
Args:
582575
db_session (Session): Database session.
583576
signal_instance (SignalInstance): Signal instance to be filtered.
584577
585578
Returns:
586-
bool: True if the signal instance is filtered, False otherwise.
579+
SignalInstance: The filtered signal instance.
587580
"""
588-
589-
filtered = False
590581
for f in signal_instance.signal.filters:
591582
if f.mode != SignalFilterMode.active:
592583
continue
593584

585+
if f.action != SignalFilterAction.snooze:
586+
continue
587+
588+
if f.expiration.replace(tzinfo=timezone.utc) <= datetime.now(timezone.utc):
589+
continue
590+
594591
query = db_session.query(SignalInstance).filter(
595592
SignalInstance.signal_id == signal_instance.signal_id
596593
)
597594
query = apply_filter_specific_joins(SignalInstance, f.expression, query)
598595
query = apply_filters(query, f.expression)
596+
# an expression is not required for snoozing, if absent we snooze regardless of entity
597+
if f.expression:
598+
instances = query.filter(SignalInstance.id == signal_instance.id).all()
599599

600-
# order matters, check for snooze before deduplication
601-
# we check to see if the current instances match's it's signals snooze filter
602-
if f.action == SignalFilterAction.snooze:
603-
if f.expiration.replace(tzinfo=timezone.utc) <= datetime.now(timezone.utc):
604-
continue
605-
606-
# an expression is not required for snoozing, if absent we snooze regardless of entity
607-
if f.expression:
608-
instances = query.filter(SignalInstance.id == signal_instance.id).all()
609-
610-
if instances:
611-
signal_instance.filter_action = SignalFilterAction.snooze
612-
filtered = True
613-
break
614-
else:
600+
if instances:
615601
signal_instance.filter_action = SignalFilterAction.snooze
616-
filtered = True
617602
break
603+
else:
604+
signal_instance.filter_action = SignalFilterAction.snooze
605+
break
618606

619-
elif f.action == SignalFilterAction.deduplicate:
620-
window = datetime.now(timezone.utc) - timedelta(minutes=f.window)
621-
query = query.filter(SignalInstance.created_at >= window)
622-
query = query.join(SignalInstance.entities).filter(
623-
Entity.id.in_([e.id for e in signal_instance.entities])
624-
)
625-
query = query.filter(SignalInstance.id != signal_instance.id)
607+
return signal_instance
626608

627-
# get the earliest instance
628-
query = query.order_by(asc(SignalInstance.created_at))
629-
instances = query.all()
630609

631-
if instances:
632-
# associate with existing case
633-
signal_instance.case_id = instances[0].case_id
634-
signal_instance.filter_action = SignalFilterAction.deduplicate
635-
filtered = True
636-
break
637-
else:
638-
# Check if there's a deduplication rule set on the signal
639-
has_dedup_filter = any(
640-
f.action == SignalFilterAction.deduplicate for f in signal_instance.signal.filters
610+
def filter_dedup(*, db_session: Session, signal_instance: SignalInstance) -> SignalInstance:
611+
"""Filters a signal instance for deduplication.
612+
613+
Args:
614+
db_session (Session): Database session.
615+
signal_instance (SignalInstance): Signal instance to be filtered.
616+
617+
Returns:
618+
SignalInstance: The filtered signal instance.
619+
"""
620+
for f in signal_instance.signal.filters:
621+
if f.mode != SignalFilterMode.active:
622+
continue
623+
624+
if f.action != SignalFilterAction.deduplicate:
625+
continue
626+
627+
query = db_session.query(SignalInstance).filter(
628+
SignalInstance.signal_id == signal_instance.signal_id
641629
)
642-
# Apply the default deduplication rule if there's no deduplication rule set on the signal
643-
# and the signal instance is not snoozed
644-
if not has_dedup_filter and not filtered:
645-
default_dedup_window = datetime.now(timezone.utc) - timedelta(hours=1)
646-
instance = (
647-
db_session.query(SignalInstance)
648-
.filter(
649-
SignalInstance.signal_id == signal_instance.signal_id,
650-
SignalInstance.created_at >= default_dedup_window,
651-
SignalInstance.id != signal_instance.id,
652-
SignalInstance.case_id.isnot(None), # noqa
653-
)
654-
.with_entities(SignalInstance.case_id)
655-
.order_by(desc(SignalInstance.created_at))
656-
.first()
630+
query = apply_filter_specific_joins(SignalInstance, f.expression, query)
631+
query = apply_filters(query, f.expression)
632+
633+
window = datetime.now(timezone.utc) - timedelta(minutes=f.window)
634+
query = query.filter(SignalInstance.created_at >= window)
635+
query = query.join(SignalInstance.entities).filter(
636+
Entity.id.in_([e.id for e in signal_instance.entities])
637+
)
638+
query = query.filter(SignalInstance.id != signal_instance.id)
639+
640+
# get the earliest instance
641+
query = query.order_by(asc(SignalInstance.created_at))
642+
instances = query.all()
643+
644+
if instances:
645+
# associate with existing case
646+
signal_instance.case_id = instances[0].case_id
647+
signal_instance.filter_action = SignalFilterAction.deduplicate
648+
break
649+
# apply default deduplication rule
650+
else:
651+
default_dedup_window = datetime.now(timezone.utc) - timedelta(hours=1)
652+
instance = (
653+
db_session.query(SignalInstance)
654+
.filter(
655+
SignalInstance.signal_id == signal_instance.signal_id,
656+
SignalInstance.created_at >= default_dedup_window,
657+
SignalInstance.id != signal_instance.id,
658+
SignalInstance.case_id.isnot(None), # noqa
657659
)
658-
if instance:
659-
signal_instance.case_id = instance.case_id
660-
signal_instance.filter_action = SignalFilterAction.deduplicate
661-
filtered = True
660+
.with_entities(SignalInstance.case_id)
661+
.order_by(desc(SignalInstance.created_at))
662+
.first()
663+
)
664+
if instance:
665+
signal_instance.case_id = instance.case_id
666+
signal_instance.filter_action = SignalFilterAction.deduplicate
667+
668+
return signal_instance
662669

663-
if not filtered:
670+
671+
def filter_signal(*, db_session: Session, signal_instance: SignalInstance) -> bool:
672+
"""
673+
Apply filter actions to the signal instance.
674+
675+
The function first checks if the signal instance is snoozed. If not snoozed,
676+
it checks for a deduplication rule set on the signal instance. If no
677+
deduplication rule is set, a default deduplication rule is applied,
678+
grouping all signal instances together for a 1-hour window, regardless of
679+
the entities in the signal instance.
680+
681+
Args:
682+
db_session (Session): Database session.
683+
signal_instance (SignalInstance): Signal instance to be filtered.
684+
685+
Returns:
686+
bool: True if the signal instance is filtered, False otherwise.
687+
"""
688+
filtered = False
689+
690+
signal_instance = filter_snooze(db_session=db_session, signal_instance=signal_instance)
691+
692+
# we only dedupe if we haven't been snoozed
693+
if not signal_instance.filter_action:
694+
signal_instance = filter_dedup(db_session=db_session, signal_instance=signal_instance)
695+
696+
if not signal_instance.filter_action:
664697
signal_instance.filter_action = SignalFilterAction.none
698+
else:
699+
filtered = True
665700

666701
db_session.commit()
667702
return filtered

tests/signal/test_signal_service.py

+61
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,67 @@ def test_filter_actions_deduplicate(session, signal, project):
275275
assert signal_instance_2.filter_action == SignalFilterAction.deduplicate
276276

277277

278+
def test_filter_action_with_dedupe_and_snooze(session, signal, project):
279+
from datetime import datetime, timedelta, timezone
280+
from dispatch.signal.models import (
281+
SignalFilter,
282+
SignalInstance,
283+
SignalFilterAction,
284+
)
285+
from dispatch.signal.service import filter_signal
286+
from dispatch.entity_type.models import EntityType
287+
from dispatch.entity.models import Entity
288+
289+
entity_type = EntityType(
290+
name="dedupe1+snooze",
291+
jpath="id",
292+
regular_expression=None,
293+
project=project,
294+
)
295+
session.add(entity_type)
296+
297+
entity = Entity(name="dedupe1+snooze", description="test", value="foo", entity_type=entity_type)
298+
session.add(entity)
299+
300+
# create instance
301+
signal_instance_1 = SignalInstance(
302+
raw=json.dumps({"id": "foo"}), project=project, signal=signal, entities=[entity]
303+
)
304+
session.add(signal_instance_1)
305+
306+
signal_instance_2 = SignalInstance(
307+
raw=json.dumps({"id": "foo"}), project=project, signal=signal, entities=[entity]
308+
)
309+
session.add(signal_instance_2)
310+
session.commit()
311+
# create deduplicate signal filter
312+
signal_filter = SignalFilter(
313+
name="dedupe1",
314+
description="test",
315+
expression=[
316+
{"or": [{"model": "EntityType", "field": "id", "op": "==", "value": entity_type.id}]}
317+
],
318+
action=SignalFilterAction.deduplicate,
319+
window=5,
320+
project=project,
321+
)
322+
signal.filters.append(signal_filter)
323+
324+
signal_filter = SignalFilter(
325+
name="snooze0",
326+
description="test",
327+
expression=[{"or": [{"model": "Entity", "field": "id", "op": "==", "value": entity.id}]}],
328+
action=SignalFilterAction.snooze,
329+
expiration=datetime.now(tz=timezone.utc) + timedelta(minutes=5),
330+
project=project,
331+
)
332+
signal.filters.append(signal_filter)
333+
334+
session.commit()
335+
assert filter_signal(db_session=session, signal_instance=signal_instance_2)
336+
assert signal_instance_2.filter_action == SignalFilterAction.snooze
337+
338+
278339
def test_filter_actions_snooze(session, entity, signal, project):
279340
from datetime import datetime, timedelta, timezone
280341
from dispatch.signal.models import (

0 commit comments

Comments
 (0)