Skip to content

Commit 4c700a2

Browse files
authored
feat(backup): Add DateAddedComparator (#54009)
This comparator accounts for the fact that old JSON exports may have zero millisecond timestamps formatted without the `.000`, as Python is wont to do in its natural datetime serializer. We have since replaced this default behavior with more consistent behavior that always includes the milliseconds, but to work with old exports, we still need to use this comparator. Closes getsentry/team-ospo#155
1 parent b77a4bb commit 4c700a2

File tree

5 files changed

+307
-57
lines changed

5 files changed

+307
-57
lines changed
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
[
2+
{
3+
"model": "sites.site",
4+
"pk": 1,
5+
"fields": {
6+
"domain": "example.com",
7+
"name": "example.com"
8+
}
9+
},
10+
{
11+
"model": "sentry.option",
12+
"pk": 1,
13+
"fields": {
14+
"key": "sentry:latest_version",
15+
"last_updated": "2023-06-22T00:00:00Z",
16+
"last_updated_by": "unknown",
17+
"value": "\"23.6.1\""
18+
}
19+
}
20+
]

src/sentry/backup/comparators.py

Lines changed: 112 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,12 @@
44
from typing import Callable, Dict, List, Literal
55

66
from dateutil import parser
7+
from django.db import models
78

89
from sentry.backup.findings import ComparatorFinding, InstanceID
10+
from sentry.backup.helpers import get_exportable_final_derivations_of
11+
from sentry.db.models import BaseModel
12+
from sentry.db.models.fields.foreignkey import FlexibleForeignKey
913
from sentry.utils.json import JSONData
1014

1115

@@ -25,7 +29,7 @@ class JSONScrubbingComparator(ABC):
2529
have their inputs mangled by one another."""
2630

2731
def __init__(self, *fields: str):
28-
self.fields = fields
32+
self.fields = set(fields)
2933

3034
def check(self, side: str, data: JSONData) -> None:
3135
"""Ensure that we have received valid JSON data at runtime."""
@@ -97,11 +101,9 @@ def __scrub__(
97101

98102
for field in self.fields:
99103
for side in [left, right]:
100-
if field not in side["fields"]:
104+
if not bool(side["fields"].get(field)):
101105
continue
102106
value = side["fields"][field]
103-
if not value:
104-
continue
105107
value = [value] if isinstance(value, str) else value
106108
del side["fields"][field]
107109
side["scrubbed"][f"{self.get_kind()}::{field}"] = f(value)
@@ -130,7 +132,7 @@ def __init__(self, field: str):
130132

131133
def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[ComparatorFinding]:
132134
f = self.field
133-
if f not in left["fields"] and f not in right["fields"]:
135+
if not bool(left["fields"].get(f)) and not bool(right["fields"].get(f)):
134136
return []
135137

136138
left_date_updated = left["fields"][f]
@@ -140,13 +142,40 @@ def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[Compa
140142
ComparatorFinding(
141143
kind=self.get_kind(),
142144
on=on,
143-
reason=f"""the left date_updated value on `{on}` ({left_date_updated}) was not
144-
less than or equal to the right ({right_date_updated})""",
145+
reason=f"""the left value ({left_date_updated}) of `{f}` was not less than or equal to the right value ({right_date_updated})""",
145146
)
146147
]
147148
return []
148149

149150

151+
class DateAddedComparator(JSONScrubbingComparator):
152+
"""Some exports from before [email protected] may trim milliseconds from timestamps if they end in
153+
exactly `.000` (ie, not milliseconds at all - what are the odds!). Because comparisons may fail
154+
in this case, we use a special comparator for these cases."""
155+
156+
def __init__(self, *fields: str):
157+
super().__init__(*fields)
158+
159+
def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[ComparatorFinding]:
160+
findings = []
161+
fields = sorted(self.fields)
162+
for f in fields:
163+
if not bool(left["fields"].get(f)) and not bool(right["fields"].get(f)):
164+
continue
165+
166+
left_date_added = left["fields"][f]
167+
right_date_added = right["fields"][f]
168+
if parser.parse(left_date_added) != parser.parse(right_date_added):
169+
findings.append(
170+
ComparatorFinding(
171+
kind=self.get_kind(),
172+
on=on,
173+
reason=f"""the left value ({left_date_added}) of `{f}` was not equal to the right value ({right_date_added})""",
174+
)
175+
)
176+
return findings
177+
178+
150179
class ObfuscatingComparator(JSONScrubbingComparator, ABC):
151180
"""Comparator that compares private values, but then safely truncates them to ensure that they
152181
do not leak out in logs, stack traces, etc."""
@@ -156,7 +185,8 @@ def __init__(self, *fields: str):
156185

157186
def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[ComparatorFinding]:
158187
findings = []
159-
for f in self.fields:
188+
fields = sorted(self.fields)
189+
for f in fields:
160190
if f not in left["fields"] and f not in right["fields"]:
161191
continue
162192

@@ -169,8 +199,7 @@ def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[Compa
169199
ComparatorFinding(
170200
kind=self.get_kind(),
171201
on=on,
172-
reason=f"""the left `{f}` value ("{lv}") on `{on}` was not equal to the
173-
right value ("{rv}")""",
202+
reason=f"""the left value ("{lv}") of `{f}` was not equal to the right value ("{rv}")""",
174203
)
175204
)
176205
return findings
@@ -224,53 +253,89 @@ def truncate(self, data: list[str]) -> list[str]:
224253
return truncated
225254

226255

256+
def auto_assign_date_added_comparator(comps: ComparatorMap) -> None:
257+
"""Automatically assigns the DateAddedComparator to any `DateTimeField` that is not already claimed by the `DateUpdatedComparator`."""
258+
259+
exportable = get_exportable_final_derivations_of(BaseModel)
260+
for e in exportable:
261+
name = "sentry." + e.__name__.lower()
262+
fields = e._meta.get_fields()
263+
assign = set()
264+
for f in fields:
265+
if isinstance(f, models.DateTimeField) and name in comps:
266+
date_updated_comparator = next(
267+
filter(lambda e: isinstance(e, DateUpdatedComparator), comps[name]), None
268+
)
269+
if not date_updated_comparator or f.name not in date_updated_comparator.fields:
270+
assign.add(f.name)
271+
272+
if name in comps:
273+
found = next(filter(lambda e: isinstance(e, DateAddedComparator), comps[name]), None)
274+
if found:
275+
found.fields.update(assign)
276+
else:
277+
comps[name].append(DateAddedComparator(*assign))
278+
else:
279+
comps[name] = [DateAddedComparator(*assign)]
280+
281+
282+
def auto_assign_email_obfuscating_comparator(comps: ComparatorMap) -> None:
283+
"""Automatically assigns the EmailObfuscatingComparator to any field that is an `EmailField` or has a foreign key into the `sentry.User` table."""
284+
285+
exportable = get_exportable_final_derivations_of(BaseModel)
286+
for e in exportable:
287+
name = "sentry." + e.__name__.lower()
288+
fields = e._meta.get_fields()
289+
assign = set()
290+
for f in fields:
291+
if isinstance(f, models.EmailField):
292+
assign.add(f.name)
293+
elif isinstance(f, FlexibleForeignKey) and f.related_model.__name__ == "User":
294+
assign.add(f.name)
295+
elif isinstance(f, models.OneToOneField) and f.related_model.__name__ == "User":
296+
assign.add(f.name)
297+
elif isinstance(f, models.ManyToManyField) and f.related_model.__name__ == "User":
298+
assign.add(f.name)
299+
300+
if name in comps:
301+
found = next(
302+
filter(lambda e: isinstance(e, EmailObfuscatingComparator), comps[name]), None
303+
)
304+
if found:
305+
found.fields.update(assign)
306+
else:
307+
comps[name].append(EmailObfuscatingComparator(*assign))
308+
else:
309+
comps[name] = [EmailObfuscatingComparator(*assign)]
310+
311+
227312
ComparatorList = List[JSONScrubbingComparator]
228313
ComparatorMap = Dict[str, ComparatorList]
314+
315+
# Some comparators (like `DateAddedComparator`) we can automatically assign by inspecting the
316+
# `Field` type on the Django `Model` definition. Others, like the ones in this map, we must assign
317+
# manually, since there is no clever way to derive them automatically.
229318
DEFAULT_COMPARATORS: ComparatorMap = {
230-
"sentry.apitoken": [
231-
EmailObfuscatingComparator("user"),
232-
HashObfuscatingComparator("refresh_token", "token"),
233-
],
234-
"sentry.apiapplication": [
235-
EmailObfuscatingComparator("owner"),
236-
HashObfuscatingComparator("client_id", "client_secret"),
237-
],
238-
"sentry.apiauthorization": [
239-
EmailObfuscatingComparator("user"),
240-
],
241-
"sentry.authidentity": [
242-
EmailObfuscatingComparator("user"),
243-
HashObfuscatingComparator("ident", "token"),
244-
],
245-
"sentry.authenticator": [EmailObfuscatingComparator("user")],
246-
"sentry.email": [EmailObfuscatingComparator("email")],
319+
"sentry.apitoken": [HashObfuscatingComparator("refresh_token", "token")],
320+
"sentry.apiapplication": [HashObfuscatingComparator("client_id", "client_secret")],
321+
"sentry.authidentity": [HashObfuscatingComparator("ident", "token")],
247322
"sentry.alertrule": [DateUpdatedComparator("date_modified")],
248323
"sentry.incidenttrigger": [DateUpdatedComparator("date_modified")],
249324
"sentry.orgauthtoken": [HashObfuscatingComparator("token_hashed", "token_last_characters")],
250-
"sentry.organizationmember": [
251-
EmailObfuscatingComparator("user_email"),
252-
HashObfuscatingComparator("token"),
253-
],
325+
"sentry.organizationmember": [HashObfuscatingComparator("token")],
254326
"sentry.projectkey": [HashObfuscatingComparator("public_key", "secret_key")],
255327
"sentry.querysubscription": [DateUpdatedComparator("date_updated")],
256328
"sentry.relay": [HashObfuscatingComparator("relay_id", "public_key")],
257329
"sentry.relayusage": [HashObfuscatingComparator("relay_id", "public_key")],
258-
"sentry.sentryapp": [EmailObfuscatingComparator("creator_user", "creator_label", "proxy_user")],
330+
"sentry.sentryapp": [EmailObfuscatingComparator("creator_label")],
259331
"sentry.servicehook": [HashObfuscatingComparator("secret")],
260-
"sentry.user": [
261-
EmailObfuscatingComparator("email", "username"),
262-
HashObfuscatingComparator("password"),
263-
],
264-
"sentry.useremail": [
265-
EmailObfuscatingComparator("email", "user"),
266-
HashObfuscatingComparator("validation_hash"),
267-
],
268-
"sentry.userip": [EmailObfuscatingComparator("user")],
269-
"sentry.useroption": [EmailObfuscatingComparator("user")],
270-
"sentry.userpermission": [EmailObfuscatingComparator("user")],
332+
"sentry.user": [HashObfuscatingComparator("password")],
333+
"sentry.useremail": [HashObfuscatingComparator("validation_hash")],
271334
"sentry.userrole": [DateUpdatedComparator("date_updated")],
272-
"sentry.userroleuser": [
273-
DateUpdatedComparator("date_updated"),
274-
EmailObfuscatingComparator("user"),
275-
],
335+
"sentry.userroleuser": [DateUpdatedComparator("date_updated")],
276336
}
337+
338+
# Where possible, we automatically deduce fields that should have special comparators and add them
339+
# to the `DEFAULT_COMPARATORS` map.
340+
auto_assign_date_added_comparator(DEFAULT_COMPARATORS)
341+
auto_assign_email_obfuscating_comparator(DEFAULT_COMPARATORS)

tests/sentry/backup/test_comparators.py

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from sentry.backup.comparators import (
2+
DateAddedComparator,
23
DateUpdatedComparator,
34
EmailObfuscatingComparator,
45
HashObfuscatingComparator,
@@ -63,6 +64,53 @@ def test_bad_comparator_only_one_side_existing():
6364
assert "my_date_field" in res[0].reason
6465

6566

67+
def test_good_date_added_comparator():
68+
cmp = DateAddedComparator("my_date_field")
69+
id = InstanceID("test", 1)
70+
left: JSONData = {
71+
"model": "test",
72+
"pk": 1,
73+
"fields": {
74+
"my_date_field": "2023-06-22T23:00:00.123Z",
75+
},
76+
}
77+
right: JSONData = {
78+
"model": "test",
79+
"pk": 1,
80+
"fields": {
81+
"my_date_field": "2023-06-22T23:00:00.123Z",
82+
},
83+
}
84+
assert not cmp.compare(id, left, right)
85+
86+
87+
def test_bad_date_added_comparator():
88+
cmp = DateAddedComparator("my_date_field")
89+
id = InstanceID("test", 1)
90+
left: JSONData = {
91+
"model": "test",
92+
"pk": 1,
93+
"fields": {
94+
"my_date_field": "2023-06-22T00:00:00.000Z",
95+
},
96+
}
97+
right: JSONData = {
98+
"model": "test",
99+
"pk": 1,
100+
"fields": {
101+
"my_date_field": "2023-06-22T00:00:00.123Z",
102+
},
103+
}
104+
res = cmp.compare(id, left, right)
105+
assert res
106+
assert res[0]
107+
assert res[0].on == id
108+
assert res[0].kind == "DateAddedComparator"
109+
assert "`my_date_field`" in res[0].reason
110+
assert "left value (2023-06-22T00:00:00.000Z)" in res[0].reason
111+
assert "right value (2023-06-22T00:00:00.123Z)" in res[0].reason
112+
113+
66114
def test_good_date_updated_comparator():
67115
cmp = DateUpdatedComparator("my_date_field")
68116
id = InstanceID("test", 1)
@@ -105,6 +153,9 @@ def test_bad_date_updated_comparator():
105153
assert res[0]
106154
assert res[0].on == id
107155
assert res[0].kind == "DateUpdatedComparator"
156+
assert "`my_date_field`" in res[0].reason
157+
assert "left value (2023-06-22T23:12:34.567Z)" in res[0].reason
158+
assert "right value (2023-06-22T23:00:00.001Z)" in res[0].reason
108159

109160

110161
def test_good_email_obfuscating_comparator():
@@ -155,14 +206,14 @@ def test_bad_email_obfuscating_comparator():
155206
assert res[0]
156207
assert res[0].on == id
157208
assert res[0].kind == "EmailObfuscatingComparator"
158-
assert "a[email protected]" in res[0].reason
159-
assert "a[email protected]" in res[0].reason
209+
assert "b[email protected]" in res[0].reason
210+
assert "b[email protected]" in res[0].reason
160211

161212
assert res[1]
162213
assert res[1].on == id
163214
assert res[1].kind == "EmailObfuscatingComparator"
164-
assert "b[email protected]" in res[1].reason
165-
assert "b[email protected]" in res[1].reason
215+
assert "a[email protected]" in res[1].reason
216+
assert "a[email protected]" in res[1].reason
166217

167218

168219
def test_good_email_obfuscating_comparator_scrubbed():
@@ -253,14 +304,14 @@ def test_bad_hash_obfuscating_comparator():
253304
assert res[0]
254305
assert res[0].on == id
255306
assert res[0].kind == "HashObfuscatingComparator"
256-
assert "123...39b" in res[0].reason
257-
assert "124...39c" in res[0].reason
307+
assert "1...e" in res[0].reason
308+
assert "2...f" in res[0].reason
258309

259310
assert res[1]
260311
assert res[1].on == id
261312
assert res[1].kind == "HashObfuscatingComparator"
262-
assert "1...e" in res[1].reason
263-
assert "2...f" in res[1].reason
313+
assert "123...39b" in res[1].reason
314+
assert "124...39c" in res[1].reason
264315

265316

266317
def test_good_hash_obfuscating_comparator_scrubbed():

0 commit comments

Comments
 (0)