diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f3b9c3..a969987 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ All notable changes to Sheaf are documented here. The format is based on [Keep a ## [Unreleased] +### Added + +- **Import deduplication.** Every importer (PluralKit, SimplyPlural, Tupperbox, PluralSpace, Prism, and Sheaf native re-import) now matches each incoming member against the system's existing roster before writing, so re-importing the same export no longer doubles your members. Matching is by PluralKit ID where present (exact, so PK round-trips cleanly) and otherwise by name, scoped so a member and a custom front sharing a name never collide. A new `conflict_strategy` option chooses what happens on a match: `skip` (default - leave the existing member untouched and add nothing), `update` (overwrite the existing member's importable fields from the export), or `create` (the old append-everything behaviour, kept as an escape hatch). The tier member cap now counts only the members an import would actually create, so re-importing into a near-full system no longer trips the cap on members that already exist. Deduplication is member-scoped: fronts, groups, journals, messages, polls, and reminders are still appended on re-import, so re-importing those sections over existing data can still duplicate them. The PluralKit member HID is now also confirmed to land in each member's `pluralkit_id` field, which doubles as the dedup key. ### Fixed - **Build provenance for local compose builds.** `GET /v1/version` reports the commit/tag/build-time the backend was built from; CI-built ghcr images already set these, but a local `docker compose build` left them null because the compose `args` didn't forward them. The app service now accepts `GIT_COMMIT` / `GIT_TAG` / `BUILD_TIME` from the host environment (documented in SELFHOSTING.md), so a compose build can identify itself too. Unset values stay null, same as before. diff --git a/sheaf/schemas/pk_import.py b/sheaf/schemas/pk_import.py index c9b782e..591def7 100644 --- a/sheaf/schemas/pk_import.py +++ b/sheaf/schemas/pk_import.py @@ -9,6 +9,8 @@ from pydantic import BaseModel, ConfigDict, Field +from sheaf.services.import_dedup import ImportConflictStrategy + class PKImportOptions(BaseModel): """What to import from a PluralKit export.""" @@ -18,6 +20,7 @@ class PKImportOptions(BaseModel): # the importer just dropped unknown keys; this is a tightening. model_config = ConfigDict(extra="forbid") + conflict_strategy: ImportConflictStrategy = ImportConflictStrategy.SKIP system_profile: bool = True member_ids: list[str] | None = Field( default=None, diff --git a/sheaf/schemas/pluralspace_import.py b/sheaf/schemas/pluralspace_import.py index cbffb98..3fb0be4 100644 --- a/sheaf/schemas/pluralspace_import.py +++ b/sheaf/schemas/pluralspace_import.py @@ -19,10 +19,13 @@ from pydantic import BaseModel, ConfigDict, Field +from sheaf.services.import_dedup import ImportConflictStrategy + class PluralspaceImportOptions(BaseModel): model_config = ConfigDict(extra="forbid") + conflict_strategy: ImportConflictStrategy = ImportConflictStrategy.SKIP system_profile: bool = True member_ids: list[str] | None = None @@ -78,6 +81,10 @@ class PluralspaceImportResult(BaseModel): members_imported: int = 0 custom_fronts_imported: int = 0 + # Dedup dispositions, covering all roster rows (members + custom + # fronts) that matched an existing row instead of being created. + members_skipped: int = 0 + members_updated: int = 0 avatars_imported: int = 0 tags_imported: int = 0 groups_imported: int = 0 diff --git a/sheaf/schemas/prism_import.py b/sheaf/schemas/prism_import.py index 6f4e08b..118be38 100644 --- a/sheaf/schemas/prism_import.py +++ b/sheaf/schemas/prism_import.py @@ -18,10 +18,13 @@ from pydantic import BaseModel, ConfigDict, Field +from sheaf.services.import_dedup import ImportConflictStrategy + class PrismImportOptions(BaseModel): model_config = ConfigDict(extra="forbid") + conflict_strategy: ImportConflictStrategy = ImportConflictStrategy.SKIP system_profile: bool = True member_ids: list[str] | None = None @@ -82,6 +85,8 @@ class PrismImportResult(BaseModel): model_config = ConfigDict(extra="forbid") members_imported: int = 0 + members_skipped: int = 0 + members_updated: int = 0 avatars_imported: int = 0 groups_imported: int = 0 custom_fields_imported: int = 0 diff --git a/sheaf/schemas/sheaf_import.py b/sheaf/schemas/sheaf_import.py index 0ab94a9..f64b41c 100644 --- a/sheaf/schemas/sheaf_import.py +++ b/sheaf/schemas/sheaf_import.py @@ -10,6 +10,8 @@ from pydantic import BaseModel, ConfigDict, Field +from sheaf.services.import_dedup import ImportConflictStrategy + class SheafImportOptions(BaseModel): """What to import from a Sheaf export. Each flag gates one section; @@ -21,6 +23,7 @@ class SheafImportOptions(BaseModel): # than being silently ignored. model_config = ConfigDict(extra="forbid") + conflict_strategy: ImportConflictStrategy = ImportConflictStrategy.SKIP system_profile: bool = True member_ids: list[str] | None = Field(default=None, max_length=10_000) fronts: bool = True diff --git a/sheaf/schemas/sp_import.py b/sheaf/schemas/sp_import.py index 4ed4329..fd7b072 100644 --- a/sheaf/schemas/sp_import.py +++ b/sheaf/schemas/sp_import.py @@ -2,6 +2,8 @@ from pydantic import BaseModel, ConfigDict, Field +from sheaf.services.import_dedup import ImportConflictStrategy + class SPImportOptions(BaseModel): """What to import from the SP export.""" @@ -10,6 +12,7 @@ class SPImportOptions(BaseModel): # than being silently ignored. model_config = ConfigDict(extra="forbid") + conflict_strategy: ImportConflictStrategy = ImportConflictStrategy.SKIP system_profile: bool = True member_ids: list[str] | None = Field( None, max_length=10_000, description="SP member IDs to import. None = all." @@ -46,6 +49,10 @@ class SPPreviewSummary(BaseModel): class SPImportResult(BaseModel): members_imported: int = 0 custom_fronts_imported: int = 0 + # Dedup dispositions, covering all roster rows (members + custom + # fronts) that matched an existing row instead of being created. + members_skipped: int = 0 + members_updated: int = 0 fronts_imported: int = 0 groups_imported: int = 0 custom_fields_imported: int = 0 diff --git a/sheaf/schemas/tb_import.py b/sheaf/schemas/tb_import.py index 3588ab4..c689d76 100644 --- a/sheaf/schemas/tb_import.py +++ b/sheaf/schemas/tb_import.py @@ -9,6 +9,8 @@ from pydantic import BaseModel, ConfigDict, Field +from sheaf.services.import_dedup import ImportConflictStrategy + class TBImportOptions(BaseModel): """What to import from a Tupperbox export.""" @@ -17,6 +19,7 @@ class TBImportOptions(BaseModel): # than being silently ignored. model_config = ConfigDict(extra="forbid") + conflict_strategy: ImportConflictStrategy = ImportConflictStrategy.SKIP member_ids: list[str] | None = Field( default=None, max_length=10_000, @@ -41,5 +44,7 @@ class TBPreviewSummary(BaseModel): class TBImportResult(BaseModel): members_imported: int = 0 + members_skipped: int = 0 + members_updated: int = 0 groups_imported: int = 0 warnings: list[str] = [] diff --git a/sheaf/services/import_dedup.py b/sheaf/services/import_dedup.py new file mode 100644 index 0000000..b06f370 --- /dev/null +++ b/sheaf/services/import_dedup.py @@ -0,0 +1,206 @@ +"""Member deduplication for re-imports. + +Every importer used to append members blindly, so importing the same +export twice doubled the roster. This module adds a match-and-resolve +layer: an importer builds its candidate Member exactly as before, then +asks `resolve_member()` what to do with it given the chosen strategy and +the members already in the system. + +Match key: the source's stable id where both the candidate and an +existing member carry one (`pluralkit_id`), otherwise the name +blind-index (`name_hash`) scoped by `is_custom_front`. Names are not +guaranteed unique within a system, so the name-hash path is best-effort: +a system that genuinely has two members sharing a name will match the +first. `pluralkit_id` is exact, so PK re-imports round-trip cleanly. + +The name-hash scope matters because some formats (SimplyPlural, +PluralSpace, Prism) store custom fronts as Member rows with +`is_custom_front=True`. Without the scope, a member and a custom front +that happen to share a name would match, and UPDATE would flip +`is_custom_front` and corrupt the member. `pluralkit_id` is member-only +(custom fronts never carry one), so that path needs no scoping. + +Strategies: +- CREATE: always insert (the pre-dedup behaviour). +- SKIP (default): an existing match is left untouched; the candidate is + not added. +- UPDATE: an existing match's importable fields are overwritten from the + candidate. + +The caller is responsible for two things based on the disposition: + * db.add() the candidate ONLY when disposition == "created"; + * use the returned member in its source-id -> member map either way, + so downstream sections (fronts, groups, custom fields) link to the + right row whether it was created, skipped, or updated. +""" + +from __future__ import annotations + +import enum +import uuid +from dataclasses import dataclass, field + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from sheaf.models.member import Member + + +class ImportConflictStrategy(enum.StrEnum): + CREATE = "create" + SKIP = "skip" + UPDATE = "update" + + +# Fields every importer always sets on a new Member, so UPDATE always +# overwrites them. is_custom_front is deliberately NOT here: matching is +# already scoped by it (a member only matches a member, a custom front +# only a custom front), so a match always agrees, and some importers +# leave it None on the candidate (relying on the column server-default), +# which would null out the existing row's NOT NULL column. +_ALWAYS_OVERWRITE = ("name", "name_hash", "privacy") +# Optional fields: UPDATE overwrites only when the candidate carries a +# value, so a re-import never nulls a field the source format doesn't +# model (e.g. PluralKit has no emoji, so a PK update must not wipe an +# emoji the user set after the first import). +_OVERWRITE_IF_SET = ( + "display_name", + "description", + "pronouns", + "avatar_url", + "color", + "birthday", + "pluralkit_id", + "emoji", + "note", +) + + +@dataclass +class MemberMatchIndex: + """In-memory index of a system's existing members, by match key. + + The name-hash index is keyed by `(is_custom_front, name_hash)` so a + member and a custom front sharing a name don't match each other. + """ + + by_pk_id: dict[str, Member] = field(default_factory=dict) + by_name_hash: dict[tuple[bool, str], Member] = field(default_factory=dict) + + def find( + self, + *, + name_hash: str, + is_custom_front: bool, + pluralkit_id: str | None = None, + ) -> Member | None: + if pluralkit_id and pluralkit_id in self.by_pk_id: + return self.by_pk_id[pluralkit_id] + return self.by_name_hash.get((bool(is_custom_front), name_hash)) + + def register(self, member: Member) -> None: + """Record a member so later candidates dedup against it too. + + First-wins on collisions: the earliest existing (or earliest + created-this-run) member is the canonical target. + """ + if member.pluralkit_id: + self.by_pk_id.setdefault(member.pluralkit_id, member) + if member.name_hash: + self.by_name_hash.setdefault( + (bool(member.is_custom_front), member.name_hash), member + ) + + +async def load_member_match_index( + db: AsyncSession, system_id: uuid.UUID +) -> MemberMatchIndex: + """Build the match index from the members already in the system.""" + rows = await db.execute(select(Member).where(Member.system_id == system_id)) + index = MemberMatchIndex() + for m in rows.scalars().all(): + index.register(m) + return index + + +@dataclass +class Resolution: + member: Member + disposition: str # "created" | "skipped" | "updated" + + +def _apply_update(existing: Member, candidate: Member) -> None: + for fld in _ALWAYS_OVERWRITE: + setattr(existing, fld, getattr(candidate, fld)) + for fld in _OVERWRITE_IF_SET: + val = getattr(candidate, fld, None) + if val is not None: + setattr(existing, fld, val) + + +def resolve_member( + candidate: Member, + *, + index: MemberMatchIndex, + strategy: ImportConflictStrategy, +) -> Resolution: + """Decide how a freshly-built candidate relates to existing members. + + On "created" the candidate is registered in the index so a later + intra-import row with the same key dedups against it too. + """ + if strategy == ImportConflictStrategy.CREATE: + return Resolution(candidate, "created") + existing = index.find( + name_hash=candidate.name_hash, + is_custom_front=bool(candidate.is_custom_front), + pluralkit_id=candidate.pluralkit_id, + ) + if existing is None: + index.register(candidate) + return Resolution(candidate, "created") + if strategy == ImportConflictStrategy.SKIP: + return Resolution(existing, "skipped") + _apply_update(existing, candidate) + return Resolution(existing, "updated") + + +def candidate_key(member: Member) -> tuple[str, str | None, bool]: + """The (name_hash, pluralkit_id, is_custom_front) match key for a + freshly-built candidate, as `count_new_members` expects it.""" + return (member.name_hash, member.pluralkit_id, bool(member.is_custom_front)) + + +def count_new_members( + keys: list[tuple[str, str | None, bool]], + *, + index: MemberMatchIndex, + strategy: ImportConflictStrategy, +) -> int: + """Count how many (name_hash, pluralkit_id, is_custom_front) candidate + keys would be created rather than skipped/updated. + + Used to size the tier member-cap check: under SKIP/UPDATE a pure + re-import of members already in the system adds nothing, so it must + not trip the cap. Mirrors `resolve_member`'s matching (including the + intra-batch dedup of earlier new keys) without building Member rows. + """ + if strategy == ImportConflictStrategy.CREATE: + return len(keys) + seen_new_pk: set[str] = set() + seen_new_name: set[tuple[bool, str]] = set() + new_count = 0 + for name_hash, pk_id, is_cf in keys: + if pk_id and (pk_id in index.by_pk_id or pk_id in seen_new_pk): + continue + name_key = (bool(is_cf), name_hash) + if not pk_id and ( + name_key in index.by_name_hash or name_key in seen_new_name + ): + continue + new_count += 1 + if pk_id: + seen_new_pk.add(pk_id) + else: + seen_new_name.add(name_key) + return new_count diff --git a/sheaf/services/pk_import_runner.py b/sheaf/services/pk_import_runner.py index d8d476e..4377561 100644 --- a/sheaf/services/pk_import_runner.py +++ b/sheaf/services/pk_import_runner.py @@ -24,6 +24,12 @@ from sheaf.models.import_job import ImportJob, ImportJobSource from sheaf.models.member import Member from sheaf.schemas.pk_import import PKImportOptions +from sheaf.services.import_dedup import ( + candidate_key, + count_new_members, + load_member_match_index, + resolve_member, +) from sheaf.services.import_parsing import ( ImportPayloadError, expect_dict, @@ -86,10 +92,11 @@ async def _process_pk_export( wanted = set(options.member_ids) pk_members = [m for m in pk_members if m.get("id") in wanted] - # Hard-fail before writing anything if this would blow the member cap. - await enforce_import_member_cap(db, system, len(pk_members)) - - hid_to_member: dict[str, Member] = {} + # Build all candidates first (pure construction, no DB writes), so + # the member-cap check below counts only the rows this run would + # actually CREATE. Under skip/update a pure re-import adds nothing + # and must not trip the cap. + candidates: list[tuple[Member, str]] = [] for pk_m in pk_members: # Defensive: each member runs in its own try so one bad row # doesn't kill the whole batch. Bad rows append an error event @@ -118,11 +125,35 @@ async def _process_pk_export( record_ref=str(hid_for_event) if hid_for_event else None, ) continue - db.add(member) - hid = str(hid_for_event or "") + candidates.append((member, str(hid_for_event or ""))) + + # Match against members already in the system. Hard-fail before + # writing anything if the NEW members would blow the cap. + index = await load_member_match_index(db, system.id) + new_count = count_new_members( + [candidate_key(m) for m, _ in candidates], + index=index, + strategy=options.conflict_strategy, + ) + await enforce_import_member_cap(db, system, new_count) + + hid_to_member: dict[str, Member] = {} + for member, hid in candidates: + resolution = resolve_member( + member, index=index, strategy=options.conflict_strategy + ) + if resolution.disposition == "created": + db.add(resolution.member) + update_counts(job, members_imported=1) + elif resolution.disposition == "updated": + update_counts(job, members_updated=1) + else: + update_counts(job, members_skipped=1) + # Downstream sections (groups, switches) link via this map, so it + # must point at the resolved row whether created, skipped, or + # updated. if hid: - hid_to_member[hid] = member - update_counts(job, members_imported=1) + hid_to_member[hid] = resolution.member await db.flush() diff --git a/sheaf/services/pluralspace_import.py b/sheaf/services/pluralspace_import.py index 80f7cf2..3683f8d 100644 --- a/sheaf/services/pluralspace_import.py +++ b/sheaf/services/pluralspace_import.py @@ -101,6 +101,13 @@ PluralspacePreviewSummary, ) from sheaf.services.custom_fields import encrypt_field_value +from sheaf.services.import_dedup import ( + ImportConflictStrategy, + candidate_key, + count_new_members, + load_member_match_index, + resolve_member, +) from sheaf.services.import_parsing import ( ImportPayloadError, safe_json_loads, @@ -322,6 +329,7 @@ async def run_import( user: User, db: AsyncSession, *, + conflict_strategy: ImportConflictStrategy = ImportConflictStrategy.SKIP, system_profile: bool = True, member_ids: list[str] | None = None, custom_fronts: bool = True, @@ -367,11 +375,11 @@ async def run_import( if bool(m_data.get("is_custom_front")) and not custom_fronts: continue eligible.append(m_data) - await enforce_import_member_cap(db, system, len(eligible)) - - ps_id_to_member: dict[str, Member] = {} - member_name_to_member: dict[str, Member] = {} + # Build candidates first (no DB writes), then size the member-cap + # check on the rows this run would actually CREATE. Both regular + # members and custom fronts are Member rows and count toward the cap. + candidates: list[tuple[Member, str, str, bool]] = [] for m_data in eligible: ps_id = _clean_str(m_data.get("id")) is_cf = bool(m_data.get("is_custom_front")) @@ -392,13 +400,36 @@ async def run_import( is_custom_front=is_cf, privacy=PrivacyLevel.PRIVATE, ) - db.add(member) - ps_id_to_member[ps_id] = member - member_name_to_member[plaintext_name] = member - if is_cf: - result.custom_fronts_imported += 1 + candidates.append((member, ps_id, plaintext_name, is_cf)) + + index = await load_member_match_index(db, system.id) + new_count = count_new_members( + [candidate_key(m) for m, _, _, _ in candidates], + index=index, + strategy=conflict_strategy, + ) + await enforce_import_member_cap(db, system, new_count) + + # Map PS id -> resolved row (created / skipped / updated) so later + # sections (avatars, tags, groups, fronts) link correctly. + ps_id_to_member: dict[str, Member] = {} + member_name_to_member: dict[str, Member] = {} + for member, ps_id, plaintext_name, is_cf in candidates: + resolution = resolve_member( + member, index=index, strategy=conflict_strategy + ) + if resolution.disposition == "created": + db.add(resolution.member) + if is_cf: + result.custom_fronts_imported += 1 + else: + result.members_imported += 1 + elif resolution.disposition == "updated": + result.members_updated += 1 else: - result.members_imported += 1 + result.members_skipped += 1 + ps_id_to_member[ps_id] = resolution.member + member_name_to_member[plaintext_name] = resolution.member if not ps_id_to_member: # Nothing else has anywhere to live; bail before walking the rest. diff --git a/sheaf/services/pluralspace_import_runner.py b/sheaf/services/pluralspace_import_runner.py index bc532bb..ebf72df 100644 --- a/sheaf/services/pluralspace_import_runner.py +++ b/sheaf/services/pluralspace_import_runner.py @@ -59,6 +59,7 @@ async def handle_pluralspace_file(job: ImportJob, db: AsyncSession) -> None: system, user, db, + conflict_strategy=options.conflict_strategy, system_profile=options.system_profile, member_ids=options.member_ids, custom_fronts=options.custom_fronts, @@ -76,6 +77,8 @@ async def handle_pluralspace_file(job: ImportJob, db: AsyncSession) -> None: job, members_imported=result.members_imported, custom_fronts_imported=result.custom_fronts_imported, + members_skipped=result.members_skipped, + members_updated=result.members_updated, avatars_imported=result.avatars_imported, tags_imported=result.tags_imported, groups_imported=result.groups_imported, diff --git a/sheaf/services/prism_import.py b/sheaf/services/prism_import.py index 3f2f686..fba7f3a 100644 --- a/sheaf/services/prism_import.py +++ b/sheaf/services/prism_import.py @@ -105,6 +105,13 @@ PrismPreviewSummary, ) from sheaf.services.custom_fields import encrypt_field_value +from sheaf.services.import_dedup import ( + ImportConflictStrategy, + candidate_key, + count_new_members, + load_member_match_index, + resolve_member, +) from sheaf.services.import_parsing import ( ImportPayloadError, sanitize_external_avatar_url, @@ -269,6 +276,7 @@ async def run_import( user: User, db: AsyncSession, *, + conflict_strategy: ImportConflictStrategy = ImportConflictStrategy.SKIP, system_profile: bool = True, member_ids: list[str] | None = None, member_avatars: bool = True, @@ -313,9 +321,10 @@ async def run_import( if selected is not None and ps_id not in selected: continue eligible.append(m) - await enforce_import_member_cap(db, system, len(eligible)) - ps_id_to_handle: dict[str, _MemberHandle] = {} + # Build candidates first (no DB writes), then size the member-cap + # check on the rows this run would actually CREATE. + candidates: list[tuple[Member, str, str, dict]] = [] for m in eligible: ps_id = _clean_str(m.get("id")) plaintext_name = (_clean_str(m.get("name")) or "unnamed")[:100] @@ -346,11 +355,33 @@ async def run_import( created_at = _parse_iso(m.get("createdAt")) if created_at: member.created_at = created_at - db.add(member) + candidates.append((member, ps_id, plaintext_name, m)) + + index = await load_member_match_index(db, system.id) + new_count = count_new_members( + [candidate_key(c) for c, _, _, _ in candidates], + index=index, + strategy=conflict_strategy, + ) + await enforce_import_member_cap(db, system, new_count) + + # Map PS id -> handle wrapping the resolved row (created / skipped / + # updated) so every later section attributes to the right member. + ps_id_to_handle: dict[str, _MemberHandle] = {} + for member, ps_id, plaintext_name, source in candidates: + resolution = resolve_member( + member, index=index, strategy=conflict_strategy + ) + if resolution.disposition == "created": + db.add(resolution.member) + result.members_imported += 1 + elif resolution.disposition == "updated": + result.members_updated += 1 + else: + result.members_skipped += 1 ps_id_to_handle[ps_id] = _MemberHandle( - member=member, plaintext_name=plaintext_name, source=m + member=resolution.member, plaintext_name=plaintext_name, source=source ) - result.members_imported += 1 if not ps_id_to_handle: return result diff --git a/sheaf/services/prism_import_runner.py b/sheaf/services/prism_import_runner.py index e21f2d0..608cdf9 100644 --- a/sheaf/services/prism_import_runner.py +++ b/sheaf/services/prism_import_runner.py @@ -76,6 +76,7 @@ async def handle_prism_file(job: ImportJob, db: AsyncSession) -> None: system, user, db, + conflict_strategy=options.conflict_strategy, system_profile=options.system_profile, member_ids=options.member_ids, member_avatars=options.member_avatars, @@ -96,6 +97,8 @@ async def handle_prism_file(job: ImportJob, db: AsyncSession) -> None: update_counts( job, members_imported=result.members_imported, + members_skipped=result.members_skipped, + members_updated=result.members_updated, avatars_imported=result.avatars_imported, groups_imported=result.groups_imported, custom_fields_imported=result.custom_fields_imported, diff --git a/sheaf/services/sheaf_import.py b/sheaf/services/sheaf_import.py index 543d513..1f5528f 100644 --- a/sheaf/services/sheaf_import.py +++ b/sheaf/services/sheaf_import.py @@ -16,10 +16,14 @@ historical audit actors are dropped, since the original account UUIDs are meaningless on the target instance. -Re-import is additive, with one exception: custom field *definitions* -are deduped by (name, type) against the target system, so restoring a -backup into a populated system doesn't leave a second copy of every -field. Members and their data are still added rather than merged. +Re-import deduplicates members against the target system (see +`import_dedup`): the chosen conflict strategy decides whether a member +that already exists is skipped (default) or updated, so restoring a +backup into a populated system doesn't double the roster. Custom field +*definitions* are likewise deduped by (name, type). Everything else a +member owns (fronts, journals, messages, polls, reminders) is still +added rather than merged, so re-importing those sections over existing +data appends duplicates - dedup is member-scoped only. """ import logging @@ -63,6 +67,13 @@ from sheaf.models.tag import Tag from sheaf.models.watch_token import WatchToken from sheaf.services.custom_fields import encrypt_field_value +from sheaf.services.import_dedup import ( + ImportConflictStrategy, + candidate_key, + count_new_members, + load_member_match_index, + resolve_member, +) from sheaf.services.import_image_strip import ( strip_internal_avatar_url, strip_internal_image_keys, @@ -148,6 +159,8 @@ def __init__(self): class SheafImportResult: def __init__(self): self.members_imported: int = 0 + self.members_skipped: int = 0 + self.members_updated: int = 0 self.fronts_imported: int = 0 self.groups_imported: int = 0 self.tags_imported: int = 0 @@ -196,6 +209,7 @@ async def run_import( system: System, db: AsyncSession, *, + conflict_strategy: ImportConflictStrategy = ImportConflictStrategy.SKIP, system_profile: bool = True, member_ids: list[str] | None = None, fronts: bool = True, @@ -305,12 +319,11 @@ async def run_import( selected = set(member_ids) export_members = [m for m in export_members if m.get("id") in selected] - # Hard-fail before writing anything if this would blow the member cap. - await enforce_import_member_cap(db, system, len(export_members)) - - # Map old export ID → new Member - old_id_to_member: dict[str, Member] = {} - + # Build candidates first (no DB writes), so the member-cap check + # below counts only the rows this run would actually CREATE. Under + # skip/update a pure re-import (e.g. restoring a backup over the same + # roster) adds nothing and must not trip the cap. + candidates: list[tuple[Member, str]] = [] for m_data in export_members: old_id = m_data.get("id", "") plaintext_name = (m_data.get("name") or "unnamed")[:100] @@ -357,18 +370,48 @@ async def run_import( # notify_on_front_member_ids points at other members by id; remapped # in the second pass below once every member row exists. ) - db.add(member) - old_id_to_member[old_id] = member - result.members_imported += 1 + candidates.append((member, old_id)) + + index = await load_member_match_index(db, system.id) + new_count = count_new_members( + [candidate_key(m) for m, _ in candidates], + index=index, + strategy=conflict_strategy, + ) + await enforce_import_member_cap(db, system, new_count) + + # Map old export ID → resolved Member (created / skipped / updated). + # `written_old_ids` tracks the rows this run actually wrote, so the + # second-pass notify remap below leaves skipped members untouched. + old_id_to_member: dict[str, Member] = {} + written_old_ids: set[str] = set() + for member, old_id in candidates: + resolution = resolve_member( + member, index=index, strategy=conflict_strategy + ) + if resolution.disposition == "created": + db.add(resolution.member) + result.members_imported += 1 + written_old_ids.add(old_id) + elif resolution.disposition == "updated": + result.members_updated += 1 + written_old_ids.add(old_id) + else: + result.members_skipped += 1 + old_id_to_member[old_id] = resolution.member await db.flush() # Second pass: notify_on_front_member_ids references other members by their # export id, which only fully resolve once every member exists. Remap old # ids to the new member ids, dropping any member that didn't import. Members - # with no such preference settle to the model default ([]). + # with no such preference settle to the model default ([]). Skipped members + # are left untouched (they keep whatever the existing row already had). for m_data in export_members: - member = old_id_to_member.get(m_data.get("id", "")) + old_id = m_data.get("id", "") + if old_id not in written_old_ids: + continue + member = old_id_to_member.get(old_id) if member is None: continue member.notify_on_front_member_ids = [ @@ -451,10 +494,25 @@ async def run_import( await db.flush() # Import field values. A reused definition can be referenced by more - # than one export field (a file with duplicate defs, or future deduped + # than one export field (a file with duplicate defs, or deduped # members), so guard (field, member) uniqueness ourselves to avoid # tripping the UNIQUE(field_id, member_id) constraint mid-import. seen_values: set[tuple[uuid.UUID, uuid.UUID]] = set() + # Pre-seed the guard with values already in the system: a re-import + # that reused an existing definition AND matched an existing + # (deduped) member would otherwise try to insert a second value for + # the same pair. Skip-if-present keeps re-import idempotent for + # field values (dedup is member-scoped; values ride along). + field_def_ids = {fd.id for fd in old_field_to_def.values()} + if field_def_ids: + existing_values = await db.execute( + select(CustomFieldValue.field_id, CustomFieldValue.member_id).where( + CustomFieldValue.field_id.in_(field_def_ids) + ) + ) + seen_values.update( + (row.field_id, row.member_id) for row in existing_values + ) for fd_data in data.get("custom_fields", []): old_fid = fd_data.get("id", "") field_def = old_field_to_def.get(old_fid) diff --git a/sheaf/services/sheaf_import_runner.py b/sheaf/services/sheaf_import_runner.py index 60648ff..4cc3a67 100644 --- a/sheaf/services/sheaf_import_runner.py +++ b/sheaf/services/sheaf_import_runner.py @@ -64,6 +64,7 @@ async def handle_sheaf_file(job: ImportJob, db: AsyncSession) -> None: parsed, system, db, + conflict_strategy=options.conflict_strategy, system_profile=options.system_profile, member_ids=options.member_ids, fronts=options.fronts, @@ -80,6 +81,8 @@ async def handle_sheaf_file(job: ImportJob, db: AsyncSession) -> None: update_counts( job, members_imported=result.members_imported, + members_skipped=result.members_skipped, + members_updated=result.members_updated, fronts_imported=result.fronts_imported, groups_imported=result.groups_imported, tags_imported=result.tags_imported, diff --git a/sheaf/services/sp_import.py b/sheaf/services/sp_import.py index 7d85cba..15f74f3 100644 --- a/sheaf/services/sp_import.py +++ b/sheaf/services/sp_import.py @@ -32,6 +32,12 @@ SPPreviewSummary, ) from sheaf.services.custom_fields import encrypt_field_value +from sheaf.services.import_dedup import ( + candidate_key, + count_new_members, + load_member_match_index, + resolve_member, +) from sheaf.services.import_parsing import sanitize_external_avatar_url from sheaf.services.member_limits import enforce_import_member_cap @@ -123,16 +129,11 @@ async def run_import( selected = set(options.member_ids) sp_members = [m for m in sp_members if m.get("_id") in selected] - # Custom fronts also become Member rows and count toward the cap, so fold - # them into the headroom check. Hard-fail before writing anything. - incoming = len(sp_members) - if options.custom_fronts: - incoming += len(_get_collection(data, "frontStatuses")) - await enforce_import_member_cap(db, system, incoming) - - # Map SP member _id → new Sheaf Member for cross-referencing - sp_id_to_member: dict[str, Member] = {} - + # Build member + custom-front candidates first (no DB writes), so the + # member-cap check below counts only the rows this run would CREATE. + # Both members and custom fronts are Member rows and count toward the + # cap; under skip/update a pure re-import adds nothing. + member_candidates: list[tuple[Member, str]] = [] for sp_m in sp_members: sp_id = sp_m.get("_id", "") plaintext_name = (sp_m.get("name") or "unnamed")[:100] @@ -153,16 +154,14 @@ async def run_import( color=_normalize_color(sp_m.get("color")), privacy=_map_privacy(sp_m.get("private", True)), ) - db.add(member) - sp_id_to_member[sp_id] = member - result.members_imported += 1 + member_candidates.append((member, sp_id)) # --- Custom fronts → imported as Members with is_custom_front=True --- # SP's "frontStatuses" are non-counting fronting entities like "Asleep" # or "Away". Sheaf models them as Members carrying the is_custom_front # flag, which the UI uses to list them separately from headcounted # members and exclude them from member-count statistics. - sp_id_to_custom_front: dict[str, Member] = {} + custom_front_candidates: list[tuple[Member, str]] = [] if options.custom_fronts: for sp_cf in _get_collection(data, "frontStatuses"): sp_id = sp_cf.get("_id", "") @@ -183,9 +182,51 @@ async def run_import( privacy=_map_privacy(sp_cf.get("private", True)), is_custom_front=True, ) - db.add(member) - sp_id_to_custom_front[sp_id] = member + custom_front_candidates.append((member, sp_id)) + + # Match against existing roster, then hard-fail before writing anything + # if the NEW rows would blow the cap. + index = await load_member_match_index(db, system.id) + new_count = count_new_members( + [ + candidate_key(m) + for m, _ in (*member_candidates, *custom_front_candidates) + ], + index=index, + strategy=options.conflict_strategy, + ) + await enforce_import_member_cap(db, system, new_count) + + # Map SP member _id → resolved Sheaf Member for cross-referencing. + # The map points at the resolved row (created / skipped / updated) so + # later sections (fronts, custom fields, groups) link correctly. + sp_id_to_member: dict[str, Member] = {} + for member, sp_id in member_candidates: + resolution = resolve_member( + member, index=index, strategy=options.conflict_strategy + ) + if resolution.disposition == "created": + db.add(resolution.member) + result.members_imported += 1 + elif resolution.disposition == "updated": + result.members_updated += 1 + else: + result.members_skipped += 1 + sp_id_to_member[sp_id] = resolution.member + + sp_id_to_custom_front: dict[str, Member] = {} + for member, sp_id in custom_front_candidates: + resolution = resolve_member( + member, index=index, strategy=options.conflict_strategy + ) + if resolution.disposition == "created": + db.add(resolution.member) result.custom_fronts_imported += 1 + elif resolution.disposition == "updated": + result.members_updated += 1 + else: + result.members_skipped += 1 + sp_id_to_custom_front[sp_id] = resolution.member # Flush to get member IDs assigned await db.flush() diff --git a/sheaf/services/sp_import_runner.py b/sheaf/services/sp_import_runner.py index 01ff9e6..46ad54a 100644 --- a/sheaf/services/sp_import_runner.py +++ b/sheaf/services/sp_import_runner.py @@ -66,6 +66,8 @@ async def handle_simplyplural_file(job: ImportJob, db: AsyncSession) -> None: job, members_imported=result.members_imported, custom_fronts_imported=result.custom_fronts_imported, + members_skipped=result.members_skipped, + members_updated=result.members_updated, fronts_imported=result.fronts_imported, groups_imported=result.groups_imported, custom_fields_imported=result.custom_fields_imported, diff --git a/sheaf/services/tb_import.py b/sheaf/services/tb_import.py index 5b86d1c..8c0ed4a 100644 --- a/sheaf/services/tb_import.py +++ b/sheaf/services/tb_import.py @@ -43,6 +43,12 @@ TBPreviewMember, TBPreviewSummary, ) +from sheaf.services.import_dedup import ( + candidate_key, + count_new_members, + load_member_match_index, + resolve_member, +) from sheaf.services.import_parsing import sanitize_external_avatar_url from sheaf.services.member_limits import enforce_import_member_cap @@ -97,24 +103,44 @@ async def run_import( wanted = set(options.member_ids) tuppers = [t for t in tuppers if _tupper_id(t) in wanted] - # Hard-fail before writing anything if this would blow the member cap. - await enforce_import_member_cap(db, system, len(tuppers)) - - id_to_member: dict[str, Member] = {} + # Build candidates first (no DB writes), so the member-cap check + # below counts only the rows this run would actually CREATE. + candidates: list[tuple[Member, str | None]] = [] tuppers_no_name = 0 - tuppers_no_id = 0 for tupper in tuppers: member = _build_member(tupper, system.id) if member is None: tuppers_no_name += 1 continue - db.add(member) - tid = _tupper_id(tupper) + candidates.append((member, _tupper_id(tupper))) + + index = await load_member_match_index(db, system.id) + new_count = count_new_members( + [candidate_key(m) for m, _ in candidates], + index=index, + strategy=options.conflict_strategy, + ) + await enforce_import_member_cap(db, system, new_count) + + id_to_member: dict[str, Member] = {} + tuppers_no_id = 0 + for member, tid in candidates: + resolution = resolve_member( + member, index=index, strategy=options.conflict_strategy + ) + if resolution.disposition == "created": + db.add(resolution.member) + result.members_imported += 1 + elif resolution.disposition == "updated": + result.members_updated += 1 + else: + result.members_skipped += 1 + # Map by tupper id (when present) to the resolved row so groups + # wire onto the right member whether created, skipped, or updated. if tid is None: tuppers_no_id += 1 else: - id_to_member[tid] = member - result.members_imported += 1 + id_to_member[tid] = resolution.member await db.flush() diff --git a/sheaf/services/tb_import_runner.py b/sheaf/services/tb_import_runner.py index c2398fa..66e64fa 100644 --- a/sheaf/services/tb_import_runner.py +++ b/sheaf/services/tb_import_runner.py @@ -64,6 +64,8 @@ async def handle_tupperbox_file(job: ImportJob, db: AsyncSession) -> None: update_counts( job, members_imported=result.members_imported, + members_skipped=result.members_skipped, + members_updated=result.members_updated, groups_imported=result.groups_imported, ) for warning in result.warnings: diff --git a/tests/test_import_dedup_unit.py b/tests/test_import_dedup_unit.py new file mode 100644 index 0000000..b6f167f --- /dev/null +++ b/tests/test_import_dedup_unit.py @@ -0,0 +1,168 @@ +"""Unit tests for the shared member-dedup logic. + +Pure logic, no DB and no docker stack: every test constructs detached +Member objects and drives `import_dedup` directly. Covers the bits that +are easy to get subtly wrong - pk-id-before-name-hash matching, the +is_custom_front scoping of the name-hash index, the update field policy, +intra-batch dedup, and the cap-sizing count. +""" + +from __future__ import annotations + +import uuid + +from sheaf.models.member import Member +from sheaf.services.import_dedup import ( + ImportConflictStrategy, + MemberMatchIndex, + candidate_key, + count_new_members, + resolve_member, +) + + +def _m(name_hash: str, *, pk_id: str | None = None, is_cf: bool = False, **extra): + """A detached Member carrying just the attrs dedup reads.""" + return Member( + id=uuid.uuid4(), + name_hash=name_hash, + pluralkit_id=pk_id, + is_custom_front=is_cf, + **extra, + ) + + +# --- MemberMatchIndex.find ------------------------------------------------- + + +def test_find_prefers_pk_id_over_name_hash(): + existing = _m("hashA", pk_id="abcd") + idx = MemberMatchIndex() + idx.register(existing) + # Different name hash, same pk id -> still a match (pk id wins). + assert ( + idx.find(name_hash="hashZ", pluralkit_id="abcd", is_custom_front=False) + is existing + ) + + +def test_find_name_hash_scoped_by_is_custom_front(): + member = _m("shared", is_cf=False) + idx = MemberMatchIndex() + idx.register(member) + # A custom front sharing the name must NOT match the regular member, + # else update would flip is_custom_front and corrupt the member. + assert idx.find(name_hash="shared", is_custom_front=True) is None + assert idx.find(name_hash="shared", is_custom_front=False) is member + + +def test_find_missing_returns_none(): + idx = MemberMatchIndex() + assert idx.find(name_hash="nope", is_custom_front=False) is None + + +def test_register_is_first_wins(): + first = _m("dup", pk_id="zz") + second = _m("dup", pk_id="zz") + idx = MemberMatchIndex() + idx.register(first) + idx.register(second) + assert idx.find(name_hash="dup", is_custom_front=False) is first + assert idx.find(name_hash="x", pluralkit_id="zz", is_custom_front=False) is first + + +# --- resolve_member -------------------------------------------------------- + + +def test_create_strategy_always_creates_even_on_match(): + idx = MemberMatchIndex() + idx.register(_m("dup")) + cand = _m("dup") + res = resolve_member(cand, index=idx, strategy=ImportConflictStrategy.CREATE) + assert res.disposition == "created" + assert res.member is cand + + +def test_skip_returns_existing_untouched(): + existing = _m("dup", display_name="keep") + idx = MemberMatchIndex() + idx.register(existing) + cand = _m("dup", display_name="ignored") + res = resolve_member(cand, index=idx, strategy=ImportConflictStrategy.SKIP) + assert res.disposition == "skipped" + assert res.member is existing + assert existing.display_name == "keep" + + +def test_update_overwrites_set_fields_preserves_unset(): + existing = _m("dup", display_name="old", pronouns="they/them", emoji=None) + idx = MemberMatchIndex() + idx.register(existing) + cand = _m("dup", display_name="new", pronouns=None, emoji="star") + res = resolve_member(cand, index=idx, strategy=ImportConflictStrategy.UPDATE) + assert res.disposition == "updated" + assert res.member is existing + assert existing.display_name == "new" # candidate had a value -> overwrite + assert existing.pronouns == "they/them" # candidate None -> preserved + assert existing.emoji == "star" # candidate set -> filled in + + +def test_no_match_creates_and_registers_for_intra_batch(): + idx = MemberMatchIndex() + first = _m("dup") + r1 = resolve_member(first, index=idx, strategy=ImportConflictStrategy.SKIP) + assert r1.disposition == "created" + # A later row in the same batch with the same key dedups against the + # one just created, not a fresh insert. + second = _m("dup") + r2 = resolve_member(second, index=idx, strategy=ImportConflictStrategy.SKIP) + assert r2.disposition == "skipped" + assert r2.member is first + + +def test_member_and_custom_front_same_name_both_created(): + idx = MemberMatchIndex() + member = _m("alex", is_cf=False) + cf = _m("alex", is_cf=True) + r1 = resolve_member(member, index=idx, strategy=ImportConflictStrategy.SKIP) + r2 = resolve_member(cf, index=idx, strategy=ImportConflictStrategy.SKIP) + assert r1.disposition == "created" + assert r2.disposition == "created" # different scope, no false match + + +# --- count_new_members (cap sizing) ---------------------------------------- + + +def test_count_new_members_excludes_existing_and_intra_batch_dupes(): + idx = MemberMatchIndex() + idx.register(_m("exists")) + keys = [ + ("exists", None, False), # already in system -> not new + ("fresh", None, False), # new + ("fresh", None, False), # intra-batch dup of the previous -> not new + ("fresh", None, True), # custom front, different scope -> new + ] + assert count_new_members(keys, index=idx, strategy=ImportConflictStrategy.SKIP) == 2 + + +def test_count_new_members_pk_id_path(): + idx = MemberMatchIndex() + idx.register(_m("hashA", pk_id="abcd")) + keys = [ + ("hashZ", "abcd", False), # matches existing by pk id -> not new + ("hashY", "wxyz", False), # new pk id + ("hashY", "wxyz", False), # intra-batch dup pk id -> not new + ] + assert count_new_members(keys, index=idx, strategy=ImportConflictStrategy.UPDATE) == 1 + + +def test_count_new_members_create_counts_everything(): + idx = MemberMatchIndex() + idx.register(_m("exists")) + keys = [("exists", None, False), ("exists", None, False)] + assert count_new_members(keys, index=idx, strategy=ImportConflictStrategy.CREATE) == 2 + + +def test_candidate_key_shape(): + m = _m("h", pk_id="abcd", is_cf=True) + assert candidate_key(m) == ("h", "abcd", True) diff --git a/tests/test_imports_pk_runner.py b/tests/test_imports_pk_runner.py index 29b9c94..2e2d37b 100644 --- a/tests/test_imports_pk_runner.py +++ b/tests/test_imports_pk_runner.py @@ -172,3 +172,92 @@ def test_pk_file_runner_finalize_wipes_storage_key(auth_client: httpx.Client): # row stays even after the blob is gone. resp = auth_client.get(f"/v1/imports/{job['id']}") assert resp.status_code == 200 + + +# --- pluralkit_id + deduplication ------------------------------------------ + + +def test_pk_file_runner_populates_pluralkit_id(auth_client: httpx.Client): + """Regression (#349): the PK member HID is imported into the Sheaf + member's pluralkit_id, which is also the dedup match key for + re-imports.""" + job = _post_pk_file(auth_client) + drive_import_runner() + final = wait_for_terminal(auth_client, job["id"]) + assert final["status"] == "complete" + + members = auth_client.get("/v1/members").json() + by_name = {m["name"]: m for m in members} + assert by_name["Alice"]["pluralkit_id"] == "alice" + assert by_name["Bob"]["pluralkit_id"] == "bobxyz" + assert by_name["Carol"]["pluralkit_id"] == "carol1" + + +def test_pk_reimport_skips_duplicates_by_default(auth_client: httpx.Client): + """Importing the same export twice leaves a single roster: the second + run matches every member by pluralkit_id and skips it (default + strategy), so nothing is appended.""" + first = _post_pk_file(auth_client) + drive_import_runner() + assert wait_for_terminal(auth_client, first["id"])["status"] == "complete" + + second = _post_pk_file(auth_client) + drive_import_runner() + final = wait_for_terminal(auth_client, second["id"]) + + assert final["status"] == "complete" + assert final["counts"].get("members_imported", 0) == 0 + assert final["counts"].get("members_skipped", 0) == 3 + + members = auth_client.get("/v1/members").json() + names = [m["name"] for m in members] + assert sorted(names) == ["Alice", "Bob", "Carol"], names + + +def test_pk_reimport_update_overwrites_existing(auth_client: httpx.Client): + """conflict_strategy=update re-points an existing member's importable + fields at the new payload instead of skipping or duplicating.""" + first = _post_pk_file(auth_client) + drive_import_runner() + assert wait_for_terminal(auth_client, first["id"])["status"] == "complete" + + # Re-upload the same export with Alice's display name changed. + data = json.loads(PK_FIXTURE.read_bytes()) + for m in data["members"]: + if m["id"] == "alice": + m["display_name"] = "Alice (updated)" + payload = json.dumps(data).encode() + + second = _post_pk_file( + auth_client, options={"conflict_strategy": "update"}, payload=payload + ) + drive_import_runner() + final = wait_for_terminal(auth_client, second["id"]) + + assert final["status"] == "complete" + assert final["counts"].get("members_imported", 0) == 0 + assert final["counts"].get("members_updated", 0) == 3 + + members = auth_client.get("/v1/members").json() + assert len(members) == 3 # no duplicates + alice = next(m for m in members if m["name"] == "Alice") + assert alice["display_name"] == "Alice (updated)" + + +def test_pk_reimport_create_strategy_appends(auth_client: httpx.Client): + """conflict_strategy=create is the explicit escape hatch: it restores + the old append-everything behaviour, doubling a re-imported roster.""" + first = _post_pk_file(auth_client) + drive_import_runner() + assert wait_for_terminal(auth_client, first["id"])["status"] == "complete" + + second = _post_pk_file(auth_client, options={"conflict_strategy": "create"}) + drive_import_runner() + final = wait_for_terminal(auth_client, second["id"]) + + assert final["status"] == "complete" + assert final["counts"].get("members_imported", 0) == 3 + assert final["counts"].get("members_skipped", 0) == 0 + + members = auth_client.get("/v1/members").json() + assert len(members) == 6 diff --git a/tests/test_imports_sheaf_runner.py b/tests/test_imports_sheaf_runner.py index 1d831f6..02d6a00 100644 --- a/tests/test_imports_sheaf_runner.py +++ b/tests/test_imports_sheaf_runner.py @@ -69,9 +69,13 @@ def test_sheaf_runner_roundtrip_from_export(auth_client: httpx.Client): drive_import_runner() final = wait_for_terminal(auth_client, job["id"]) assert final["status"] == "complete", final - # The exported member comes back in on re-import (alongside the - # original — re-import is additive, not a replace). - assert final["counts"]["members_imported"] >= 1, final["counts"] + # Re-importing an export into the system it came from now deduplicates + # (skip is the default conflict strategy): the member matches the + # existing one by name and is skipped rather than doubled. + assert final["counts"].get("members_imported", 0) == 0, final["counts"] + assert final["counts"].get("members_skipped", 0) == 1, final["counts"] + members = auth_client.get("/v1/members").json() + assert len([m for m in members if m["name"] == "RoundtripMember"]) == 1 def test_sheaf_runner_roundtrips_notify_prefs_and_coalesce(auth_client: httpx.Client): diff --git a/web/src/routes/import.tsx b/web/src/routes/import.tsx index 767e07c..6f54bc5 100644 --- a/web/src/routes/import.tsx +++ b/web/src/routes/import.tsx @@ -196,6 +196,8 @@ function SheafImportFlow({ onBack }: { onBack: () => void }) { const [systemProfile, setSystemProfile] = useState(true); const [allMembers, setAllMembers] = useState(true); + const [conflictStrategy, setConflictStrategy] = + useState("skip"); const [selectedMembers, setSelectedMembers] = useState>(new Set()); const [importFronts, setImportFronts] = useState(true); const [importGroups, setImportGroups] = useState(true); @@ -237,6 +239,7 @@ function SheafImportFlow({ onBack }: { onBack: () => void }) { options: { system_profile: systemProfile, member_ids: allMembers ? null : Array.from(selectedMembers), + conflict_strategy: conflictStrategy, fronts: importFronts, groups: importGroups, tags: importTags, @@ -383,6 +386,11 @@ function SheafImportFlow({ onBack }: { onBack: () => void }) { )} + + void }) { const [systemProfile, setSystemProfile] = useState(true); const [allMembers, setAllMembers] = useState(true); + const [conflictStrategy, setConflictStrategy] = + useState("skip"); const [selectedMembers, setSelectedMembers] = useState>(new Set()); const [customFronts, setCustomFronts] = useState(true); const [customFields, setCustomFields] = useState(true); @@ -454,6 +464,7 @@ function SPImportFlow({ onBack }: { onBack: () => void }) { options: { system_profile: systemProfile, member_ids: allMembers ? null : Array.from(selectedMembers), + conflict_strategy: conflictStrategy, custom_fronts: customFronts, custom_fields: customFields, groups, @@ -556,6 +567,11 @@ function SPImportFlow({ onBack }: { onBack: () => void }) { onChange={setFrontHistory} /> + + void }) { const [systemProfile, setSystemProfile] = useState(true); const [allMembers, setAllMembers] = useState(true); + const [conflictStrategy, setConflictStrategy] = + useState("skip"); const [selectedMembers, setSelectedMembers] = useState>(new Set()); const [groups, setGroups] = useState(true); const [frontHistory, setFrontHistory] = useState(false); @@ -649,6 +667,7 @@ function PKImportFlow({ onBack }: { onBack: () => void }) { const options = { system_profile: systemProfile, member_ids: allMembers ? null : Array.from(selectedMembers), + conflict_strategy: conflictStrategy, groups, front_history: frontHistory, }; @@ -864,6 +883,11 @@ function PKImportFlow({ onBack }: { onBack: () => void }) { onChange={setFrontHistory} /> + + void }) { const [idemKey] = useState(newIdempotencyKey); const [allMembers, setAllMembers] = useState(true); + const [conflictStrategy, setConflictStrategy] = + useState("skip"); const [selectedMembers, setSelectedMembers] = useState>(new Set()); const [groups, setGroups] = useState(true); @@ -929,6 +955,7 @@ function TBImportFlow({ onBack }: { onBack: () => void }) { idempotencyKey: idemKey, options: { member_ids: allMembers ? null : Array.from(selectedMembers), + conflict_strategy: conflictStrategy, groups, }, }); @@ -1003,6 +1030,11 @@ function TBImportFlow({ onBack }: { onBack: () => void }) { onChange={setGroups} /> + + void }) { const [idemKey] = useState(newIdempotencyKey); const [allMembers, setAllMembers] = useState(true); + const [conflictStrategy, setConflictStrategy] = + useState("skip"); const [selectedMembers, setSelectedMembers] = useState>(new Set()); const [systemProfile, setSystemProfile] = useState(true); const [customFronts, setCustomFronts] = useState(true); @@ -1079,6 +1113,7 @@ function PSImportFlow({ onBack }: { onBack: () => void }) { options: { system_profile: systemProfile, member_ids: allMembers ? null : Array.from(selectedMembers), + conflict_strategy: conflictStrategy, custom_fronts: customFronts, member_avatars: memberAvatars, roles_as_tags: rolesAsTags, @@ -1229,6 +1264,11 @@ function PSImportFlow({ onBack }: { onBack: () => void }) { onChange={setPolls} /> + + void }) { const [idemKey] = useState(newIdempotencyKey); const [allMembers, setAllMembers] = useState(true); + const [conflictStrategy, setConflictStrategy] = + useState("skip"); const [selectedMembers, setSelectedMembers] = useState>(new Set()); const [systemProfile, setSystemProfile] = useState(true); const [memberAvatars, setMemberAvatars] = useState(true); @@ -1323,6 +1365,7 @@ function PrismImportFlow({ onBack }: { onBack: () => void }) { options: { system_profile: systemProfile, member_ids: allMembers ? null : Array.from(selectedMembers), + conflict_strategy: conflictStrategy, member_avatars: memberAvatars, member_groups: memberGroups, custom_fields: customFields, @@ -1505,6 +1548,11 @@ function PrismImportFlow({ onBack }: { onBack: () => void }) { onChange={setMediaAttachments} /> + + void; +}) { + const options: { value: ConflictStrategy; label: string; hint: string }[] = [ + { + value: "skip", + label: "Skip members that already exist", + hint: "Re-importing the same export won't duplicate your roster. Existing members are left as they are.", + }, + { + value: "update", + label: "Update members that already exist", + hint: "Overwrite an existing member's details from the import. Use this to refresh from a newer export.", + }, + { + value: "create", + label: "Always add as new members", + hint: "Add every member even if one with the same name or PluralKit ID already exists. Can create duplicates.", + }, + ]; + return ( +
+ If a member already exists + {options.map((o) => ( + + ))} +
+ ); +} + function Checkbox({ label, checked,