Skip to content

Commit e6f523c

Browse files
Merge pull request #615 from Labelbox/bulk-upload-convo
Ability to bulk upload conversations using Dataset.create_data_rows() + update staging URL
2 parents 8e0c042 + a8fca14 commit e6f523c

File tree

5 files changed

+186
-3
lines changed

5 files changed

+186
-3
lines changed

labelbox/schema/dataset.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ def _create_descriptor_file(self, items, max_attachments_per_data_row=None):
226226
>>> {DataRow.row_data:"/path/to/file1.jpg"},
227227
>>> "path/to/file2.jpg",
228228
>>> {"tileLayerUrl" : "http://", ...}
229+
>>> {"conversationalData" : [...], ...}
229230
>>> ])
230231
231232
For an example showing how to upload tiled data_rows see the following notebook:
@@ -280,6 +281,33 @@ def validate_attachments(item):
280281
)
281282
return attachments
282283

284+
def validate_conversational_data(conversational_data: list) -> None:
285+
"""
286+
Checks each conversational message for keys expected as per https://docs.labelbox.com/reference/text-conversational#sample-conversational-json
287+
288+
Args:
289+
conversational_data (list): list of dictionaries.
290+
"""
291+
292+
def check_message_keys(message):
293+
accepted_message_keys = set([
294+
"messageId", "timestampUsec", "content", "user", "align",
295+
"canLabel"
296+
])
297+
for key in message.keys():
298+
if not key in accepted_message_keys:
299+
raise KeyError(
300+
f"Invalid {key} key found! Accepted keys in messages list is {accepted_message_keys}"
301+
)
302+
303+
if conversational_data and not isinstance(conversational_data,
304+
list):
305+
raise ValueError(
306+
f"conversationalData must be a list. Found {type(conversational_data)}"
307+
)
308+
309+
[check_message_keys(message) for message in conversational_data]
310+
283311
def parse_metadata_fields(item):
284312
metadata_fields = item.get('metadata_fields')
285313
if metadata_fields:
@@ -321,6 +349,27 @@ def convert_item(item):
321349
if "tileLayerUrl" in item:
322350
validate_attachments(item)
323351
return item
352+
353+
if "conversationalData" in item:
354+
messages = item.pop("conversationalData")
355+
version = item.pop("version")
356+
type = item.pop("type")
357+
if "externalId" in item:
358+
external_id = item.pop("externalId")
359+
item["external_id"] = external_id
360+
validate_conversational_data(messages)
361+
one_conversation = \
362+
{
363+
"type": type,
364+
"version": version,
365+
"messages": messages
366+
}
367+
conversationUrl = self.client.upload_data(
368+
json.dumps(one_conversation),
369+
content_type="application/json",
370+
filename="conversational_data.json")
371+
item["row_data"] = conversationUrl
372+
324373
# Convert all payload variations into the same dict format
325374
item = format_row(item)
326375
# Make sure required keys exist (and there are no extra keys)

tests/integration/conftest.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import os
23
import re
34
import time
@@ -190,10 +191,17 @@ def iframe_url(environ) -> str:
190191
@pytest.fixture
191192
def sample_video() -> str:
192193
path_to_video = 'tests/integration/media/cat.mp4'
193-
assert os.path.exists(path_to_video)
194194
return path_to_video
195195

196196

197+
@pytest.fixture
198+
def sample_bulk_conversation() -> list:
199+
path_to_conversation = 'tests/integration/media/bulk_conversation.json'
200+
with open(path_to_conversation) as json_file:
201+
conversations = json.load(json_file)
202+
return conversations
203+
204+
197205
@pytest.fixture
198206
def organization(client):
199207
# Must have at least one seat open in your org to run these tests
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
[
2+
{
3+
"externalId": "Convo-123",
4+
"attachments": [
5+
{
6+
"type": "TEXT",
7+
"value": "IOWA, Zone 2232, June 2022 [Text string]"
8+
},
9+
{
10+
"type": "IMAGE",
11+
"value": "https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg"
12+
}
13+
],
14+
"type": "application/vnd.labelbox.conversational",
15+
"version": 1,
16+
"conversationalData": [
17+
{
18+
"messageId": "message-0",
19+
"timestampUsec": 1530718491,
20+
"content": "I love iphone! i just bought new iphone! :smiling_face_with_3_hearts: :calling:",
21+
"user": {
22+
"userId": "Bot 002",
23+
"name": "Bot"
24+
},
25+
"align": "left",
26+
"canLabel": false
27+
},
28+
{
29+
"messageId": "message-1",
30+
"timestampUsec": 1530718503,
31+
"content": "Thats good for you, i'm not very into new tech",
32+
"user": {
33+
"userId": "User 00686",
34+
"name": "User"
35+
},
36+
"align": "right",
37+
"canLabel": true
38+
},
39+
{
40+
"messageId": "message-2",
41+
"timestampUsec": 1530718516,
42+
"content": "I am a college student and i am a college student :female-teacher::skin-tone-2:",
43+
"user": {
44+
"userId": "Bot 002",
45+
"name": "Bot"
46+
},
47+
"align": "left",
48+
"canLabel": false
49+
},
50+
{
51+
"messageId": "message-3",
52+
"timestampUsec": 1530718528,
53+
"content": "I am go to gym and live on donations :woman-lifting-weights::skin-tone-6:",
54+
"user": {
55+
"userId": "User 00686",
56+
"name": "User"
57+
},
58+
"align": "right",
59+
"canLabel": true
60+
}
61+
]
62+
},
63+
{
64+
"externalId": "Convo-456",
65+
"attachments": [
66+
{
67+
"type": "TEXT",
68+
"value": "IOWA, Zone 1234567, July 2022 [Text string]"
69+
},
70+
{
71+
"type": "IMAGE",
72+
"value": "https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg"
73+
}
74+
],
75+
"type": "application/vnd.labelbox.conversational",
76+
"version": 1,
77+
"conversationalData": [
78+
{
79+
"messageId": "message-0",
80+
"timestampUsec": 1530718491,
81+
"content": "I love iphone! i just bought new iphone! :smiling_face_with_3_hearts: :calling:",
82+
"user": {
83+
"userId": "Bot 002",
84+
"name": "Bot"
85+
},
86+
"align": "left",
87+
"canLabel": false
88+
},
89+
{
90+
"messageId": "message-1",
91+
"timestampUsec": 1530718503,
92+
"content": "Thats good for you, i'm not very into new tech",
93+
"user": {
94+
"userId": "User 00686",
95+
"name": "User"
96+
},
97+
"align": "right",
98+
"canLabel": true
99+
},
100+
{
101+
"messageId": "message-2",
102+
"timestampUsec": 1530718516,
103+
"content": "I am a college student and i am a college student :female-teacher::skin-tone-2:",
104+
"user": {
105+
"userId": "Bot 002",
106+
"name": "Bot"
107+
},
108+
"align": "left",
109+
"canLabel": false
110+
}
111+
]
112+
}
113+
]

tests/integration/test_client_errors.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ def test_semantic_error(client):
4343

4444

4545
def test_timeout_error(client, project):
46-
time.sleep(60) #Fails to connect if we don't wait
4746
with pytest.raises(labelbox.exceptions.TimeoutError) as excinfo:
4847
query_str = """query getOntology {
4948
project (where: {id: $%s}) {
@@ -52,7 +51,9 @@ def test_timeout_error(client, project):
5251
}
5352
}
5453
} """ % (project.uid)
55-
client.execute(query_str, check_naming=False, timeout=0.01)
54+
55+
# Setting connect timeout to 30s, and read timeout to 0.01s
56+
client.execute(query_str, check_naming=False, timeout=(30.0, 0.01))
5657

5758

5859
def test_query_complexity_error(client):

tests/integration/test_dataset.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import pytest
23
import requests
34
from labelbox import Dataset
@@ -101,6 +102,17 @@ def test_upload_video_file(dataset, sample_video: str) -> None:
101102
assert response.headers['Content-Type'] == 'video/mp4'
102103

103104

105+
def test_bulk_conversation(dataset, sample_bulk_conversation: list) -> None:
106+
"""
107+
Tests that bulk conversations can be uploaded.
108+
109+
"""
110+
task = dataset.create_data_rows(sample_bulk_conversation)
111+
task.wait_till_done()
112+
113+
assert len(list(dataset.data_rows())) == len(sample_bulk_conversation)
114+
115+
104116
def test_data_row_export(dataset, image_url):
105117
n_data_rows = 5
106118
ids = set()

0 commit comments

Comments
 (0)