Skip to content

Commit f12bb3f

Browse files
Merge pull request #616 from Labelbox/kkim/AL-2592
[AL-2592] [AL-2593] [AL-2594] Create/Update/Delete custom metadata schema
2 parents 0197e0a + 75fdafa commit f12bb3f

File tree

3 files changed

+340
-39
lines changed

3 files changed

+340
-39
lines changed

labelbox/schema/data_row_metadata.py

Lines changed: 260 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,19 @@ class _DeleteBatchDataRowMetadata(_CamelCaseMixin):
9393
_BatchFunction = Callable[[_BatchInputs], List[DataRowMetadataBatchResponse]]
9494

9595

96+
class _UpsertCustomMetadataSchemaEnumOptionInput(_CamelCaseMixin):
97+
id: Optional[SchemaId]
98+
name: constr(strip_whitespace=True, min_length=1, max_length=100)
99+
kind: str
100+
101+
102+
class _UpsertCustomMetadataSchemaInput(_CamelCaseMixin):
103+
id: Optional[SchemaId]
104+
name: constr(strip_whitespace=True, min_length=1, max_length=100)
105+
kind: str
106+
options: Optional[List[_UpsertCustomMetadataSchemaEnumOptionInput]]
107+
108+
96109
class DataRowMetadataOntology:
97110
""" Ontology for data row metadata
98111
@@ -122,21 +135,30 @@ def _build_ontology(self):
122135
f for f in self.fields if f.reserved
123136
]
124137
self.reserved_by_id = self._make_id_index(self.reserved_fields)
125-
self.reserved_by_name: Dict[
126-
str,
127-
DataRowMetadataSchema] = self._make_name_index(self.reserved_fields)
138+
self.reserved_by_name: Dict[str, Union[DataRowMetadataSchema, Dict[
139+
str, DataRowMetadataSchema]]] = self._make_name_index(
140+
self.reserved_fields)
141+
self.reserved_by_name_normalized: Dict[
142+
str, DataRowMetadataSchema] = self._make_normalized_name_index(
143+
self.reserved_fields)
128144

129145
# custom fields
130146
self.custom_fields: List[DataRowMetadataSchema] = [
131147
f for f in self.fields if not f.reserved
132148
]
133149
self.custom_by_id = self._make_id_index(self.custom_fields)
134-
self.custom_by_name: Dict[
150+
self.custom_by_name: Dict[str, Union[DataRowMetadataSchema, Dict[
135151
str,
136-
DataRowMetadataSchema] = self._make_name_index(self.custom_fields)
152+
DataRowMetadataSchema]]] = self._make_name_index(self.custom_fields)
153+
self.custom_by_name_normalized: Dict[
154+
str, DataRowMetadataSchema] = self._make_normalized_name_index(
155+
self.custom_fields)
137156

138157
@staticmethod
139-
def _make_name_index(fields: List[DataRowMetadataSchema]):
158+
def _make_name_index(
159+
fields: List[DataRowMetadataSchema]
160+
) -> Dict[str, Union[DataRowMetadataSchema, Dict[str,
161+
DataRowMetadataSchema]]]:
140162
index = {}
141163
for f in fields:
142164
if f.options:
@@ -147,6 +169,15 @@ def _make_name_index(fields: List[DataRowMetadataSchema]):
147169
index[f.name] = f
148170
return index
149171

172+
@staticmethod
173+
def _make_normalized_name_index(
174+
fields: List[DataRowMetadataSchema]
175+
) -> Dict[str, DataRowMetadataSchema]:
176+
index = {}
177+
for f in fields:
178+
index[f.name] = f
179+
return index
180+
150181
@staticmethod
151182
def _make_id_index(
152183
fields: List[DataRowMetadataSchema]
@@ -200,9 +231,144 @@ def _parse_ontology(raw_ontology) -> List[DataRowMetadataSchema]:
200231
return fields
201232

202233
def refresh_ontology(self):
234+
""" Update the `DataRowMetadataOntology` instance with the latest
235+
metadata ontology schemas
236+
"""
203237
self._raw_ontology = self._get_ontology()
204238
self._build_ontology()
205239

240+
def create_schema(self,
241+
name: str,
242+
kind: DataRowMetadataKind,
243+
options: List[str] = None) -> DataRowMetadataSchema:
244+
""" Create metadata schema
245+
246+
>>> mdo.create_schema(name, kind, options)
247+
248+
Args:
249+
name (str): Name of metadata schema
250+
kind (DataRowMetadataKind): Kind of metadata schema as `DataRowMetadataKind`
251+
options (List[str]): List of Enum options
252+
253+
Returns:
254+
Created metadata schema as `DataRowMetadataSchema`
255+
256+
Raises:
257+
KeyError: When provided name is not a valid custom metadata
258+
"""
259+
if not isinstance(kind, DataRowMetadataKind):
260+
raise ValueError(f"kind '{kind}' must be a `DataRowMetadataKind`")
261+
262+
upsert_schema = _UpsertCustomMetadataSchemaInput(name=name,
263+
kind=kind.value)
264+
if options:
265+
if kind != DataRowMetadataKind.enum:
266+
raise ValueError(
267+
f"Kind '{kind}' must be an Enum, if Enum options are provided"
268+
)
269+
upsert_enum_options = [
270+
_UpsertCustomMetadataSchemaEnumOptionInput(
271+
name=o, kind=DataRowMetadataKind.option.value)
272+
for o in options
273+
]
274+
upsert_schema.options = upsert_enum_options
275+
276+
return self._upsert_schema(upsert_schema)
277+
278+
def update_schema(self, name: str, new_name: str) -> DataRowMetadataSchema:
279+
""" Update metadata schema
280+
281+
>>> mdo.update_schema(name, new_name)
282+
283+
Args:
284+
name (str): Current name of metadata schema
285+
new_name (str): New name of metadata schema
286+
287+
Returns:
288+
Updated metadata schema as `DataRowMetadataSchema`
289+
290+
Raises:
291+
KeyError: When provided name is not a valid custom metadata
292+
"""
293+
schema = self._validate_custom_schema_by_name(name)
294+
upsert_schema = _UpsertCustomMetadataSchemaInput(id=schema.uid,
295+
name=new_name,
296+
kind=schema.kind.value)
297+
if schema.options:
298+
upsert_enum_options = [
299+
_UpsertCustomMetadataSchemaEnumOptionInput(
300+
id=o.uid,
301+
name=o.name,
302+
kind=DataRowMetadataKind.option.value)
303+
for o in schema.options
304+
]
305+
upsert_schema.options = upsert_enum_options
306+
307+
return self._upsert_schema(upsert_schema)
308+
309+
def update_enum_option(self, name: str, option: str,
310+
new_option: str) -> DataRowMetadataSchema:
311+
""" Update Enum metadata schema option
312+
313+
>>> mdo.update_enum_option(name, option, new_option)
314+
315+
Args:
316+
name (str): Name of metadata schema to update
317+
option (str): Name of Enum option to update
318+
new_option (str): New name of Enum option
319+
320+
Returns:
321+
Updated metadata schema as `DataRowMetadataSchema`
322+
323+
Raises:
324+
KeyError: When provided name is not a valid custom metadata
325+
"""
326+
schema = self._validate_custom_schema_by_name(name)
327+
if schema.kind != DataRowMetadataKind.enum:
328+
raise ValueError(
329+
f"Updating Enum option is only supported for Enum metadata schema"
330+
)
331+
332+
upsert_schema = _UpsertCustomMetadataSchemaInput(id=schema.uid,
333+
name=schema.name,
334+
kind=schema.kind.value)
335+
upsert_enum_options = []
336+
for o in schema.options:
337+
enum_option = _UpsertCustomMetadataSchemaEnumOptionInput(
338+
id=o.uid, name=o.name, kind=o.kind.value)
339+
if enum_option.name == option:
340+
enum_option.name = new_option
341+
upsert_enum_options.append(enum_option)
342+
upsert_schema.options = upsert_enum_options
343+
344+
return self._upsert_schema(upsert_schema)
345+
346+
def delete_schema(self, name: str) -> bool:
347+
""" Delete metadata schema
348+
349+
>>> mdo.delete_schema(name)
350+
351+
Args:
352+
name: Name of metadata schema to delete
353+
354+
Returns:
355+
True if deletion is successful, False if unsuccessful
356+
357+
Raises:
358+
KeyError: When provided name is not a valid custom metadata
359+
"""
360+
schema = self._validate_custom_schema_by_name(name)
361+
query = """mutation DeleteCustomMetadataSchemaPyApi($where: WhereUniqueIdInput!) {
362+
deleteCustomMetadataSchema(schema: $where){
363+
success
364+
}
365+
}"""
366+
res = self._client.execute(query, {'where': {
367+
'id': schema.uid
368+
}})['deleteCustomMetadataSchema']
369+
370+
return res['success']
371+
206372
def parse_metadata(
207373
self, unparsed: List[Dict[str,
208374
List[Union[str,
@@ -248,7 +414,7 @@ def parse_metadata_fields(
248414

249415
for f in unparsed:
250416
if f["schemaId"] not in self.fields_by_id:
251-
# Update metadata ontology if field can't be found
417+
# Fetch latest metadata ontology if metadata can't be found
252418
self.refresh_ontology()
253419
if f["schemaId"] not in self.fields_by_id:
254420
raise ValueError(
@@ -422,13 +588,69 @@ def _bulk_export(_data_row_ids: List[str]) -> List[DataRowMetadata]:
422588
data_row_ids,
423589
batch_size=self._batch_size)
424590

591+
def parse_upsert_metadata(self, metadata_fields) -> List[Dict[str, Any]]:
592+
""" Converts either `DataRowMetadataField` or a dictionary representation
593+
of `DataRowMetadataField` into a validated, flattened dictionary of
594+
metadata fields that are used to create data row metadata. Used
595+
internally in `Dataset.create_data_rows()`
596+
597+
Args:
598+
metadata_fields: List of `DataRowMetadataField` or a dictionary representation
599+
of `DataRowMetadataField`
600+
Returns:
601+
List of dictionaries representing a flattened view of metadata fields
602+
"""
603+
604+
def _convert_metadata_field(metadata_field):
605+
if isinstance(metadata_field, DataRowMetadataField):
606+
return metadata_field
607+
elif isinstance(metadata_field, dict):
608+
if not all(key in metadata_field
609+
for key in ("schema_id", "value")):
610+
raise ValueError(
611+
f"Custom metadata field '{metadata_field}' must have 'schema_id' and 'value' keys"
612+
)
613+
return DataRowMetadataField(
614+
schema_id=metadata_field["schema_id"],
615+
value=metadata_field["value"])
616+
else:
617+
raise ValueError(
618+
f"Metadata field '{metadata_field}' is neither 'DataRowMetadataField' type or a dictionary"
619+
)
620+
621+
# Convert all metadata fields to DataRowMetadataField type
622+
metadata_fields = [_convert_metadata_field(m) for m in metadata_fields]
623+
parsed_metadata = list(
624+
chain.from_iterable(self._parse_upsert(m) for m in metadata_fields))
625+
return [m.dict(by_alias=True) for m in parsed_metadata]
626+
627+
def _upsert_schema(
628+
self, upsert_schema: _UpsertCustomMetadataSchemaInput
629+
) -> DataRowMetadataSchema:
630+
query = """mutation UpsertCustomMetadataSchemaPyApi($data: UpsertCustomMetadataSchemaInput!) {
631+
upsertCustomMetadataSchema(data: $data){
632+
id
633+
name
634+
kind
635+
options {
636+
id
637+
name
638+
kind
639+
}
640+
}
641+
}"""
642+
res = self._client.execute(
643+
query, {"data": upsert_schema.dict(exclude_none=True)
644+
})['upsertCustomMetadataSchema']
645+
return _parse_metadata_schema(res)
646+
425647
def _parse_upsert(
426648
self, metadatum: DataRowMetadataField
427649
) -> List[_UpsertDataRowMetadataInput]:
428650
"""Format for metadata upserts to GQL"""
429651

430652
if metadatum.schema_id not in self.fields_by_id:
431-
# Update metadata ontology if field can't be found
653+
# Fetch latest metadata ontology if metadata can't be found
432654
self.refresh_ontology()
433655
if metadatum.schema_id not in self.fields_by_id:
434656
raise ValueError(
@@ -453,41 +675,14 @@ def _parse_upsert(
453675

454676
return [_UpsertDataRowMetadataInput(**p) for p in parsed]
455677

456-
# Convert metadata to DataRowMetadataField objects, parse all fields
457-
# and return a dictionary of metadata fields for upsert
458-
def parse_upsert_metadata(self, metadata_fields):
459-
460-
def _convert_metadata_field(metadata_field):
461-
if isinstance(metadata_field, DataRowMetadataField):
462-
return metadata_field
463-
elif isinstance(metadata_field, dict):
464-
if not all(key in metadata_field
465-
for key in ("schema_id", "value")):
466-
raise ValueError(
467-
f"Custom metadata field '{metadata_field}' must have 'schema_id' and 'value' keys"
468-
)
469-
return DataRowMetadataField(
470-
schema_id=metadata_field["schema_id"],
471-
value=metadata_field["value"])
472-
else:
473-
raise ValueError(
474-
f"Metadata field '{metadata_field}' is neither 'DataRowMetadataField' type or a dictionary"
475-
)
476-
477-
# Convert all metadata fields to DataRowMetadataField type
478-
metadata_fields = [_convert_metadata_field(m) for m in metadata_fields]
479-
parsed_metadata = list(
480-
chain.from_iterable(self._parse_upsert(m) for m in metadata_fields))
481-
return [m.dict(by_alias=True) for m in parsed_metadata]
482-
483678
def _validate_delete(self, delete: DeleteDataRowMetadata):
484679
if not len(delete.fields):
485680
raise ValueError(f"No fields specified for {delete.data_row_id}")
486681

487682
deletes = set()
488683
for schema_id in delete.fields:
489684
if schema_id not in self.fields_by_id:
490-
# Update metadata ontology if field can't be found
685+
# Fetch latest metadata ontology if metadata can't be found
491686
self.refresh_ontology()
492687
if schema_id not in self.fields_by_id:
493688
raise ValueError(
@@ -504,6 +699,16 @@ def _validate_delete(self, delete: DeleteDataRowMetadata):
504699
data_row_id=delete.data_row_id,
505700
schema_ids=list(delete.fields)).dict(by_alias=True)
506701

702+
def _validate_custom_schema_by_name(self,
703+
name: str) -> DataRowMetadataSchema:
704+
if name not in self.custom_by_name_normalized:
705+
# Fetch latest metadata ontology if metadata can't be found
706+
self.refresh_ontology()
707+
if name not in self.custom_by_name_normalized:
708+
raise KeyError(f"'{name}' is not a valid custom metadata")
709+
710+
return self.custom_by_name_normalized[name]
711+
507712

508713
def _batch_items(iterable: List[Any], size: int) -> Generator[Any, None, None]:
509714
l = len(iterable)
@@ -596,3 +801,22 @@ def _validate_enum_parse(
596801
"schemaId": field.value,
597802
"value": {}
598803
}]
804+
805+
806+
def _parse_metadata_schema(
807+
unparsed: Dict[str, Union[str, List]]) -> DataRowMetadataSchema:
808+
uid = unparsed['id']
809+
name = unparsed['name']
810+
kind = DataRowMetadataKind(unparsed['kind'])
811+
options = [
812+
DataRowMetadataSchema(uid=o['id'],
813+
name=o['name'],
814+
reserved=False,
815+
kind=DataRowMetadataKind.option,
816+
parent=uid) for o in unparsed['options']
817+
]
818+
return DataRowMetadataSchema(uid=uid,
819+
name=name,
820+
reserved=False,
821+
kind=kind,
822+
options=options or None)

tests/integration/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def graphql_url(environ: str) -> str:
4747
if environ == Environ.PROD:
4848
return 'https://api.labelbox.com/graphql'
4949
elif environ == Environ.STAGING:
50-
return 'https://staging-api.labelbox.com/graphql'
50+
return 'https://api.lb-stage.xyz/graphql'
5151
elif environ == Environ.ONPREM:
5252
hostname = os.environ.get('LABELBOX_TEST_ONPREM_HOSTNAME', None)
5353
if hostname is None:

0 commit comments

Comments
 (0)