Skip to content

Commit 3d7db22

Browse files
authored
Feature/datasets (#9)
* Add support for Dataset CRUD operations
1 parent 5747d56 commit 3d7db22

20 files changed

+1435
-78
lines changed

.github/workflows/main.yml .github/workflows/test.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: main
1+
name: test
22

33
on:
44
push:

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Azure ML Go SDK
22

3-
[![Actions Status](https://github.com/telemaco019/azureml-go-sdk/workflows/main/badge.svg)](https://github.com/telemaco019/azureml-go-sdk/actions)
3+
[![Actions Status](https://github.com/telemaco019/azureml-go-sdk/workflows/test/badge.svg)](https://github.com/telemaco019/azureml-go-sdk/actions)
44
[![codecov](https://codecov.io/gh/telemaco019/azureml-go-sdk/branch/main/graph/badge.svg)](https://codecov.io/gh/telemaco019/azureml-go-sdk)
55

66
Go SDK for configuring [Azure Machine Learning](https://azure.microsoft.com/en-us/services/machine-learning/)
@@ -53,5 +53,5 @@ datastores, err := ws.GetDatastores( "rg-name", "workspace-name" )
5353
### Get a specific Datastore of a workspace
5454

5555
```go
56-
datastore, err := ws.GetDatastores( "rg-name", "workspace-name", "datastore-name" )
56+
datastore, err := ws.GetDatastore( "rg-name", "workspace-name", "datastore-name" )
5757
```

go.mod

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ require (
1515
github.com/google/uuid v1.1.1 // indirect
1616
github.com/kylelemons/godebug v1.1.0 // indirect
1717
github.com/pmezard/go-difflib v1.0.0 // indirect
18+
github.com/stretchr/objx v0.1.0 // indirect
1819
github.com/tidwall/match v1.1.1 // indirect
1920
github.com/tidwall/pretty v1.2.0 // indirect
2021
go.uber.org/atomic v1.9.0 // indirect

go.sum

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
2121
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
2222
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
2323
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
24+
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
2425
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
2526
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
2627
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:f628698c5df5da698ecd9f3de18513c4574d17adcbaf5dd17c28ca5c026e523e
3+
size 603
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:5036ebcf8938c5f59a7561a29efd38e3fce6509a1dcde0ec5d58cc30dc3a0d0d
3+
size 603
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:4e4e897a11dc2b854a9acced8fa094a5b9e7adc007170a353be0a6c664a992f2
3+
size 496
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:0202b4e9ddf132ec87a6795852fe46ee6f16516d31fc4eb9c9cd290b00aa7229
3+
size 2145
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:05a675413bcb5f8d8d771ea085d457b68be2b72e7de6bc30dab6ba9b4b41e4fd
3+
size 1600

workspace/const.go

+1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ package workspace
22

33
const (
44
DefaultAmlOauthScope string = "https://management.azure.com/.default"
5+
NConcurrentWorkers = 8
56
)

workspace/converters.go

+93-17
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,21 @@ package workspace
33
import (
44
"fmt"
55
"github.com/tidwall/gjson"
6+
"go.uber.org/zap"
7+
"regexp"
68
)
79

8-
func unmarshalDatastore(json []byte) *Datastore {
9-
sysData := SystemData{
10-
CreationDate: gjson.GetBytes(json, "systemData.createdAt").Time(),
11-
CreationUserType: gjson.GetBytes(json, "systemData.createdByType").Str,
12-
CreationUser: gjson.GetBytes(json, "systemData.createdBy").Str,
13-
LastModifiedDate: gjson.GetBytes(json, "systemData.lastModifiedAt").Time(),
14-
LastModifiedUserType: gjson.GetBytes(json, "systemData.lastModifiedByType").Str,
15-
LastModifiedUser: gjson.GetBytes(json, "systemData.lastModifiedBy").Str,
10+
func unmarshalDatastoreArray(json []byte) []Datastore {
11+
jsonDatastoreArray := gjson.GetBytes(json, "value").Array()
12+
datastoreSlice := make([]Datastore, gjson.GetBytes(json, "value.#").Int())
13+
for i, jsonDatastore := range jsonDatastoreArray {
14+
datastore := unmarshalDatastore([]byte(jsonDatastore.Raw))
15+
datastoreSlice[i] = *datastore
1616
}
17+
return datastoreSlice
18+
}
19+
20+
func unmarshalDatastore(json []byte) *Datastore {
1721
auth := DatastoreAuth{
1822
CredentialsType: gjson.GetBytes(json, "properties.contents.credentials.credentialsType").Str,
1923
TenantId: gjson.GetBytes(json, "properties.contents.credentials.tenantId").Str,
@@ -32,20 +36,75 @@ func unmarshalDatastore(json []byte) *Datastore {
3236
StorageContainerName: gjson.GetBytes(json, "properties.contents.containerName").Str,
3337
StorageType: gjson.GetBytes(json, "properties.contents.contentsType").Str,
3438

35-
SystemData: &sysData,
39+
SystemData: unmarshalSystemData(json),
3640
Auth: &auth,
3741
}
3842
}
3943

40-
func unmarshalDatastoreArray(json []byte) []Datastore {
41-
jsonDatastoreArray := gjson.GetBytes(json, "value").Array()
42-
datastoreSlice := make([]Datastore, gjson.GetBytes(json, "value.#").Int())
43-
for i, jsonDatastore := range jsonDatastoreArray {
44-
datastore := unmarshalDatastore([]byte(jsonDatastore.Raw))
45-
datastoreSlice[i] = *datastore
46-
fmt.Println(datastore)
44+
type DatasetConverter struct {
45+
logger *zap.SugaredLogger
46+
}
47+
48+
func (d DatasetConverter) unmarshalDatasetVersionArray(datasetName string, json []byte) []Dataset {
49+
jsonDatasetArray := gjson.GetBytes(json, "value").Array()
50+
datasetSlice := make([]Dataset, gjson.GetBytes(json, "value.#").Int())
51+
for i, jsonDataset := range jsonDatasetArray {
52+
dataset := d.unmarshalDatasetVersion(datasetName, []byte(jsonDataset.Raw))
53+
datasetSlice[i] = *dataset
54+
}
55+
return datasetSlice
56+
}
57+
58+
func (d DatasetConverter) unmarshalDatasetVersion(datasetName string, json []byte) *Dataset {
59+
return &Dataset{
60+
Id: gjson.GetBytes(json, "id").Str,
61+
Name: datasetName,
62+
Description: gjson.GetBytes(json, "properties.description").Str,
63+
DatastoreId: gjson.GetBytes(json, "properties.datastoreId").Str,
64+
Version: int(gjson.GetBytes(json, "name").Int()),
65+
FilePaths: d.unmarshalDatasetPaths(gjson.GetBytes(json, "properties.paths"), "file"),
66+
DirectoryPaths: d.unmarshalDatasetPaths(gjson.GetBytes(json, "properties.paths"), "folder"),
67+
SystemData: unmarshalSystemData(json),
68+
}
69+
}
70+
71+
func (d DatasetConverter) unmarshalDatasetNextVersion(json []byte) int {
72+
return int(gjson.GetBytes(json, "properties.nextVersion").Int())
73+
}
74+
75+
func (d DatasetConverter) unmarshalDatasetPaths(jsonDatasetPaths gjson.Result, pathType string) []DatasetPath {
76+
result := make([]DatasetPath, 0)
77+
jsonDatasetPaths.ForEach(func(key, value gjson.Result) bool {
78+
path := value.Get(pathType)
79+
if path.Exists() == false {
80+
d.logger.Errorf("cannot unmarshal dataset path: path type %q does exist", pathType)
81+
return false
82+
}
83+
if path.Type != gjson.Null {
84+
isDatastorePath, _ := regexp.MatchString(fmt.Sprintf("%s.*", datastorePathPrefix), path.Str)
85+
if isDatastorePath == true {
86+
datastorePath, err := NewDatastorePath(path.Str)
87+
if err != nil {
88+
d.logger.Errorf("error unmarshalling dataset path: %s", err.Error())
89+
} else {
90+
result = append(result, datastorePath)
91+
}
92+
}
93+
}
94+
return true
95+
})
96+
return result
97+
}
98+
99+
func unmarshalSystemData(json []byte) *SystemData {
100+
return &SystemData{
101+
CreationDate: gjson.GetBytes(json, "systemData.createdAt").Time(),
102+
CreationUserType: gjson.GetBytes(json, "systemData.createdByType").Str,
103+
CreationUser: gjson.GetBytes(json, "systemData.createdBy").Str,
104+
LastModifiedDate: gjson.GetBytes(json, "systemData.lastModifiedAt").Time(),
105+
LastModifiedUserType: gjson.GetBytes(json, "systemData.lastModifiedByType").Str,
106+
LastModifiedUser: gjson.GetBytes(json, "systemData.lastModifiedBy").Str,
47107
}
48-
return datastoreSlice
49108
}
50109

51110
func toWriteDatastoreSchema(datastore *Datastore) *SchemaWrapper {
@@ -81,3 +140,20 @@ func toWriteDatastoreSchema(datastore *Datastore) *SchemaWrapper {
81140
},
82141
}
83142
}
143+
144+
func toWriteDatasetSchema(dataset *Dataset) *SchemaWrapper {
145+
pathSchemas := make([]DatasetPathsSchema, len(dataset.FilePaths)+len(dataset.DirectoryPaths))
146+
for i, filePath := range dataset.FilePaths {
147+
pathSchemas[i] = DatasetPathsSchema{FilePath: filePath.String()}
148+
}
149+
for i, directoryPath := range dataset.DirectoryPaths {
150+
pathSchemas[i] = DatasetPathsSchema{DirectoryPath: directoryPath.String()}
151+
}
152+
153+
return &SchemaWrapper{
154+
Properties: WriteDatasetSchema{
155+
Description: dataset.Description,
156+
Paths: pathSchemas,
157+
},
158+
}
159+
}

workspace/convertes_test.go

+176
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
package workspace
22

33
import (
4+
"fmt"
45
"github.com/stretchr/testify/assert"
6+
"github.com/tidwall/gjson"
7+
"go.uber.org/zap"
58
"testing"
69
"time"
710
)
@@ -153,3 +156,176 @@ func TestToWriteDatastoreSchema_NilAuth(t *testing.T) {
153156
}
154157
assert.Equal(t, expected, writeSchema)
155158
}
159+
160+
func TestToWriteDatasetSchema(t *testing.T) {
161+
a := assert.New(t)
162+
l, _ := zap.NewDevelopment()
163+
logger := l.Sugar()
164+
165+
testCases := []struct {
166+
testCaseName string
167+
testCase func()
168+
}{
169+
{
170+
testCaseName: "Test convert empty dataset",
171+
testCase: func() {
172+
d := &Dataset{}
173+
schema := toWriteDatasetSchema(d)
174+
props := schema.Properties.(WriteDatasetSchema)
175+
a.Empty(props.Description)
176+
a.Empty(props.Paths)
177+
},
178+
},
179+
{
180+
testCaseName: "Test convert dataset with datastore paths",
181+
testCase: func() {
182+
d := &Dataset{
183+
Id: "id",
184+
Name: "name",
185+
Description: "description",
186+
DatastoreId: "datastore-id",
187+
Version: 1,
188+
FilePaths: []DatasetPath{
189+
DatastorePath{
190+
DatastoreName: "foo",
191+
Path: "file.json",
192+
},
193+
DatastorePath{
194+
DatastoreName: "foo2",
195+
Path: "file2.json",
196+
},
197+
DatastorePath{
198+
DatastoreName: "foo3",
199+
Path: "file3.json",
200+
},
201+
},
202+
DirectoryPaths: []DatasetPath{
203+
DatastorePath{
204+
DatastoreName: "foo1",
205+
Path: "/dir1",
206+
},
207+
DatastorePath{
208+
DatastoreName: "foo2",
209+
Path: "/dir2",
210+
},
211+
},
212+
SystemData: &SystemData{},
213+
}
214+
props := toWriteDatasetSchema(d)
215+
writeSchema := props.Properties.(WriteDatasetSchema)
216+
217+
a.Equal(d.Description, writeSchema.Description)
218+
a.Equal(len(d.DirectoryPaths)+len(d.FilePaths), len(writeSchema.Paths))
219+
},
220+
},
221+
{
222+
testCaseName: "Test datastore directory paths conversion",
223+
testCase: func() {
224+
d := &Dataset{
225+
DirectoryPaths: []DatasetPath{
226+
DatastorePath{
227+
DatastoreName: "datastore",
228+
Path: "/foo/bar/",
229+
},
230+
},
231+
}
232+
props := toWriteDatasetSchema(d)
233+
schema := props.Properties.(WriteDatasetSchema)
234+
schemaPath := schema.Paths[0]
235+
a.Empty(schemaPath.FilePath)
236+
a.Equal(d.DirectoryPaths[0].String(), schemaPath.DirectoryPath)
237+
},
238+
},
239+
{
240+
testCaseName: "Test file paths conversion",
241+
testCase: func() {
242+
243+
},
244+
},
245+
}
246+
for _, test := range testCases {
247+
logger.Infof("Running test %q", test.testCaseName)
248+
test.testCase()
249+
}
250+
}
251+
252+
func TestUnmarshalDatasetPaths(t *testing.T) {
253+
a := assert.New(t)
254+
l, _ := zap.NewDevelopment()
255+
logger := l.Sugar()
256+
converter := &DatasetConverter{logger: logger}
257+
258+
testCases := []struct {
259+
testCaseName string
260+
testCase func()
261+
}{
262+
{
263+
testCaseName: "Test unmarshal dataset paths empty list",
264+
testCase: func() {
265+
paths := gjson.Parse("[]")
266+
result := converter.unmarshalDatasetPaths(paths, "")
267+
a.Empty(result)
268+
},
269+
},
270+
{
271+
testCaseName: "Test unmarshal dataset paths invalid path type",
272+
testCase: func() {
273+
paths := gjson.Parse("[{\"file\": null, \"folder\": \"azureml://datastores/datastore/foo\"}]")
274+
result := converter.unmarshalDatasetPaths(paths, "foo")
275+
a.Empty(result)
276+
},
277+
},
278+
{
279+
testCaseName: "Test unmarshal dataset paths not matching datastore regex",
280+
testCase: func() {
281+
paths := gjson.Parse("[{\"file\": null, \"folder\": \"path\"}]")
282+
result := converter.unmarshalDatasetPaths(paths, "folder")
283+
a.Empty(result)
284+
},
285+
},
286+
{
287+
testCaseName: "Test unmarshal dataset folder datastore paths",
288+
testCase: func() {
289+
firstPath := "azureml://datastores/datastore/paths/path/bar"
290+
secondPath := "azureml://datastores/datastore2/paths/foo2"
291+
paths := gjson.Parse(fmt.Sprintf("[{\"file\": null, \"folder\": \"%s\"}, {\"file\": null, \"folder\": \"%s\"}]", firstPath, secondPath))
292+
filePaths := converter.unmarshalDatasetPaths(paths, "file")
293+
folderPaths := converter.unmarshalDatasetPaths(paths, "folder")
294+
a.Empty(filePaths)
295+
a.Equal(2, len(folderPaths))
296+
a.Equal(firstPath, folderPaths[0].String())
297+
},
298+
},
299+
{
300+
testCaseName: "Test unmarshal dataset file datastore paths",
301+
testCase: func() {
302+
firstPath := "azureml://datastores/datastore/paths/foo/bar/foo"
303+
secondPath := "azureml://datastores/datastore2/paths/foo2"
304+
paths := gjson.Parse(fmt.Sprintf("[{\"folder\": null, \"file\": \"%s\"}, {\"folder\": null, \"file\": \"%s\"}]", firstPath, secondPath))
305+
folderPaths := converter.unmarshalDatasetPaths(paths, "folder")
306+
filePaths := converter.unmarshalDatasetPaths(paths, "file")
307+
a.Empty(folderPaths)
308+
a.Equal(2, len(filePaths))
309+
a.Equal(firstPath, filePaths[0].String())
310+
},
311+
},
312+
{
313+
testCaseName: "Test unmarshal dataset malformed datastore paths",
314+
testCase: func() {
315+
firstPath := "azureml://datastores/datastore/paths/foo/bar/foo"
316+
secondPath := "azureml://datastores/malformed"
317+
paths := gjson.Parse(fmt.Sprintf("[{\"folder\": null, \"file\": \"%s\"}, {\"folder\": null, \"file\": \"%s\"}]", firstPath, secondPath))
318+
folderPaths := converter.unmarshalDatasetPaths(paths, "folder")
319+
filePaths := converter.unmarshalDatasetPaths(paths, "file")
320+
a.Empty(folderPaths)
321+
a.Equal(1, len(filePaths))
322+
a.Equal(firstPath, filePaths[0].String())
323+
},
324+
},
325+
}
326+
327+
for _, test := range testCases {
328+
logger.Infof("Running test %q", test.testCaseName)
329+
test.testCase()
330+
}
331+
}

0 commit comments

Comments
 (0)