1
+ {
2
+ "$schema" : " https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#" ,
3
+ "contentVersion" : " 1.0.0.0" ,
4
+ "metadata" : {
5
+ "_generator" : {
6
+ "name" : " bicep" ,
7
+ "version" : " 0.10.61.36676" ,
8
+ "templateHash" : " 4722508883802150279"
9
+ }
10
+ },
11
+ "parameters" : {
12
+ "location" : {
13
+ "type" : " string" ,
14
+ "defaultValue" : " [resourceGroup().location]" ,
15
+ "metadata" : {
16
+ "description" : " Specifies the location for all resources."
17
+ }
18
+ },
19
+ "workspaceName" : {
20
+ "type" : " string" ,
21
+ "metadata" : {
22
+ "description" : " Specifies the name of the Azure Machine Learning workspace where sweep job will be deployed"
23
+ }
24
+ },
25
+ "jobName" : {
26
+ "type" : " string" ,
27
+ "metadata" : {
28
+ "description" : " Specifies the unique name for sweep job."
29
+ }
30
+ },
31
+ "computeName" : {
32
+ "type" : " string" ,
33
+ "metadata" : {
34
+ "description" : " Specifies the name of the Azure Machine Learning amlcompute cluster on which job will be run."
35
+ }
36
+ },
37
+ "storageAccountName" : {
38
+ "type" : " string" ,
39
+ "metadata" : {
40
+ "description" : " The name for the storage account to created and associated with the workspace."
41
+ }
42
+ },
43
+ "experimentName" : {
44
+ "type" : " string" ,
45
+ "metadata" : {
46
+ "description" : " Specifies the name of the Azure Machine Learning experiment under which job will be created."
47
+ }
48
+ },
49
+ "_artifactsLocation" : {
50
+ "type" : " string" ,
51
+ "defaultValue" : " [deployment().properties.templateLink.uri]" ,
52
+ "metadata" : {
53
+ "description" : " The base URI where artifacts required by this template are located including a trailing '/'."
54
+ }
55
+ },
56
+ "_artifactsLocationSasToken" : {
57
+ "type" : " secureString" ,
58
+ "defaultValue" : " " ,
59
+ "metadata" : {
60
+ "description" : " The sasToken required to access _artifactsLocation."
61
+ }
62
+ },
63
+ "inputs" : {
64
+ "type" : " object" ,
65
+ "defaultValue" : {
66
+ "iris_csv" : {
67
+ "mode" : " ReadOnlyMount" ,
68
+ "uri" : " [uri(parameters('_artifactsLocation'), format('data/iris.csv{0}', parameters('_artifactsLocationSasToken')))]" ,
69
+ "jobInputType" : " uri_file"
70
+ }
71
+ },
72
+ "metadata" : {
73
+ "description" : " Specifies dictionary of inputs search for sweep job."
74
+ }
75
+ },
76
+ "limits" : {
77
+ "type" : " object" ,
78
+ "defaultValue" : {
79
+ "jobLimitsType" : " Sweep" ,
80
+ "timeout" : " PT20M" ,
81
+ "trialTimeout" : " PT50S" ,
82
+ "maxConcurrentTrials" : 3 ,
83
+ "maxTotalTrials" : 5
84
+ },
85
+ "metadata" : {
86
+ "description" : " Specifies execution contraints for sweep job."
87
+ }
88
+ },
89
+ "objective" : {
90
+ "type" : " object" ,
91
+ "defaultValue" : {
92
+ "goal" : " maximize" ,
93
+ "primaryMetric" : " result"
94
+ },
95
+ "metadata" : {
96
+ "description" : " Specifies objective for sweep job."
97
+ }
98
+ },
99
+ "samplingAlgorithmType" : {
100
+ "type" : " string" ,
101
+ "defaultValue" : " Random" ,
102
+ "metadata" : {
103
+ "description" : " Specifies sampling algorithm for sweep job."
104
+ }
105
+ },
106
+ "searchSpace" : {
107
+ "type" : " object" ,
108
+ "defaultValue" : {
109
+ "learning_rate" : [
110
+ " uniform" ,
111
+ [
112
+ " [json('0.01')]" ,
113
+ " [json('0.9')]"
114
+ ]
115
+ ],
116
+ "boosting" : [
117
+ " choice" ,
118
+ [
119
+ [
120
+ " gbdt" ,
121
+ " dart"
122
+ ]
123
+ ]
124
+ ]
125
+ },
126
+ "metadata" : {
127
+ "description" : " Specifies different search space for sweep job."
128
+ }
129
+ },
130
+ "command" : {
131
+ "type" : " string" ,
132
+ "defaultValue" : " python main.py --iris-csv ${{inputs.iris_csv}} --learning-rate ${{search_space.learning_rate}} --boosting ${{search_space.boosting}}" ,
133
+ "metadata" : {
134
+ "description" : " Specifies command to be executed by trials of sweep job."
135
+ }
136
+ },
137
+ "environmentName" : {
138
+ "type" : " string" ,
139
+ "defaultValue" : " AzureML-lightgbm-3.2-ubuntu18.04-py37-cpu" ,
140
+ "metadata" : {
141
+ "description" : " Specifies the curated environment to run sweep job."
142
+ }
143
+ }
144
+ },
145
+ "resources" : [
146
+ {
147
+ "type" : " Microsoft.MachineLearningServices/workspaces/jobs" ,
148
+ "apiVersion" : " 2022-06-01-preview" ,
149
+ "name" : " [format('{0}/{1}', parameters('workspaceName'), parameters('jobName'))]" ,
150
+ "properties" : {
151
+ "description" : " Sweep Job Resource from ARM Template" ,
152
+ "properties" : {},
153
+ "tags" : {
154
+ "referenceNotebook" : " https://github.com/Azure/azureml-examples/blob/main/sdk/jobs/single-step/lightgbm/iris/lightgbm-iris-sweep.ipynb"
155
+ },
156
+ "computeId" : " [resourceId('Microsoft.MachineLearningServices/workspaces/computes', parameters('workspaceName'), parameters('computeName'))]" ,
157
+ "displayName" : " Sweep Job Resource" ,
158
+ "experimentName" : " [parameters('experimentName')]" ,
159
+ "isArchived" : false ,
160
+ "jobType" : " Sweep" ,
161
+ "inputs" : " [parameters('inputs')]" ,
162
+ "limits" : " [parameters('limits')]" ,
163
+ "objective" : " [parameters('objective')]" ,
164
+ "samplingAlgorithm" : {
165
+ "samplingAlgorithmType" : " [parameters('samplingAlgorithmType')]"
166
+ },
167
+ "searchSpace" : " [parameters('searchSpace')]" ,
168
+ "trial" : {
169
+ "codeId" : " [reference(resourceId('Microsoft.Resources/deployments', 'blob')).outputs.codeId.value]" ,
170
+ "command" : " [parameters('command')]" ,
171
+ "environmentId" : " [resourceId('Microsoft.MachineLearningServices/workspaces/environments/versions', parameters('workspaceName'), parameters('environmentName'), reference(resourceId('Microsoft.MachineLearningServices/workspaces/environments', split(format('{0}/{1}', parameters('workspaceName'), parameters('environmentName')), '/')[0], split(format('{0}/{1}', parameters('workspaceName'), parameters('environmentName')), '/')[1]), '2022-05-01').latestVersion)]" ,
172
+ "environmentVariables" : {}
173
+ }
174
+ },
175
+ "dependsOn" : [
176
+ " [resourceId('Microsoft.Resources/deployments', 'blob')]"
177
+ ]
178
+ },
179
+ {
180
+ "type" : " Microsoft.Resources/deployments" ,
181
+ "apiVersion" : " 2020-10-01" ,
182
+ "name" : " blob" ,
183
+ "properties" : {
184
+ "expressionEvaluationOptions" : {
185
+ "scope" : " inner"
186
+ },
187
+ "mode" : " Incremental" ,
188
+ "parameters" : {
189
+ "location" : {
190
+ "value" : " [parameters('location')]"
191
+ },
192
+ "workspaceName" : {
193
+ "value" : " [parameters('workspaceName')]"
194
+ },
195
+ "storageAccountName" : {
196
+ "value" : " [parameters('storageAccountName')]"
197
+ }
198
+ },
199
+ "template" : {
200
+ "$schema" : " https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#" ,
201
+ "contentVersion" : " 1.0.0.0" ,
202
+ "metadata" : {
203
+ "_generator" : {
204
+ "name" : " bicep" ,
205
+ "version" : " 0.10.61.36676" ,
206
+ "templateHash" : " 17993837818224864413"
207
+ }
208
+ },
209
+ "parameters" : {
210
+ "workspaceName" : {
211
+ "type" : " string" ,
212
+ "metadata" : {
213
+ "description" : " Specifies the name of the Azure Machine Learning workspace where sweep job will be deployed"
214
+ }
215
+ },
216
+ "filename" : {
217
+ "type" : " string" ,
218
+ "defaultValue" : " main.py" ,
219
+ "metadata" : {
220
+ "description" : " Name of the blob as it is stored in the blob container"
221
+ }
222
+ },
223
+ "containerName" : {
224
+ "type" : " string" ,
225
+ "defaultValue" : " hdscript" ,
226
+ "metadata" : {
227
+ "description" : " Name of the blob container"
228
+ }
229
+ },
230
+ "location" : {
231
+ "type" : " string" ,
232
+ "defaultValue" : " [resourceGroup().location]" ,
233
+ "metadata" : {
234
+ "description" : " Azure region where resources should be deployed"
235
+ }
236
+ },
237
+ "storageAccountName" : {
238
+ "type" : " string" ,
239
+ "metadata" : {
240
+ "description" : " Desired name of the storage account"
241
+ }
242
+ },
243
+ "codeVersion" : {
244
+ "type" : " string" ,
245
+ "defaultValue" : " 1" ,
246
+ "metadata" : {
247
+ "description" : " Specifies the env version for sweep job."
248
+ }
249
+ },
250
+ "codeId" : {
251
+ "type" : " string" ,
252
+ "defaultValue" : " code" ,
253
+ "metadata" : {
254
+ "description" : " Specifies the env for sweep job."
255
+ }
256
+ }
257
+ },
258
+ "variables" : {
259
+ "$fxv#0": "# imports\r\nimport os\r\nimport mlflow\r\nimport argparse\r\n\r\nimport pandas as pd\r\nimport lightgbm as lgbm\r\nimport matplotlib.pyplot as plt\r\n\r\nfrom sklearn.metrics import log_loss, accuracy_score\r\nfrom sklearn.preprocessing import LabelEncoder\r\nfrom sklearn.model_selection import train_test_split\r\n\r\n# define functions\r\ndef main(args):\r\n # enable auto logging\r\n mlflow.autolog()\r\n\r\n # setup parameters\r\n num_boost_round = args.num_boost_round\r\n params = {\r\n \"objective\": \"multiclass\",\r\n \"num_class\": 3,\r\n \"boosting\": args.boosting,\r\n \"num_iterations\": args.num_iterations,\r\n \"num_leaves\": args.num_leaves,\r\n \"num_threads\": args.num_threads,\r\n \"learning_rate\": args.learning_rate,\r\n \"metric\": args.metric,\r\n \"seed\": args.seed,\r\n \"verbose\": args.verbose,\r\n }\r\n\r\n # read in data\r\n df = pd.read_csv(args.iris_csv)\r\n\r\n # process data\r\n X_train, X_test, y_train, y_test, enc = process_data(df)\r\n\r\n # train model\r\n model = train_model(params, num_boost_round, X_train, X_test, y_train, y_test)\r\n\r\n\r\ndef process_data(df):\r\n # split dataframe into X and y\r\n X = df.drop([\"species\"], axis=1)\r\n y = df[\"species\"]\r\n\r\n # encode label\r\n enc = LabelEncoder()\r\n y = enc.fit_transform(y)\r\n\r\n # train/test split\r\n X_train, X_test, y_train, y_test = train_test_split(\r\n X, y, test_size=0.2, random_state=42\r\n )\r\n\r\n # return splits and encoder\r\n return X_train, X_test, y_train, y_test, enc\r\n\r\n\r\ndef train_model(params, num_boost_round, X_train, X_test, y_train, y_test):\r\n # create lightgbm datasets\r\n train_data = lgbm.Dataset(X_train, label=y_train)\r\n test_data = lgbm.Dataset(X_test, label=y_test)\r\n\r\n # train model\r\n model = lgbm.train(\r\n params,\r\n train_data,\r\n num_boost_round=num_boost_round,\r\n valid_sets=[test_data],\r\n valid_names=[\"test\"],\r\n )\r\n\r\n # return model\r\n return model\r\n\r\n\r\ndef parse_args():\r\n # setup arg parser\r\n parser = argparse.ArgumentParser()\r\n\r\n # add arguments\r\n parser.add_argument(\"--iris-csv\", type=str)\r\n parser.add_argument(\"--num-boost-round\", type=int, default=10)\r\n parser.add_argument(\"--boosting\", type=str, default=\"gbdt\")\r\n parser.add_argument(\"--num-iterations\", type=int, default=16)\r\n parser.add_argument(\"--num-leaves\", type=int, default=31)\r\n parser.add_argument(\"--num-threads\", type=int, default=0)\r\n parser.add_argument(\"--learning-rate\", type=float, default=0.1)\r\n parser.add_argument(\"--metric\", type=str, default=\"multi_logloss\")\r\n parser.add_argument(\"--seed\", type=int, default=42)\r\n parser.add_argument(\"--verbose\", type=int, default=0)\r\n\r\n # parse args\r\n args = parser.parse_args()\r\n\r\n # return args\r\n return args\r\n\r\n\r\n# run script\r\nif __name__ == \"__main__\":\r\n # parse args\r\n args = parse_args()\r\n\r\n # run main function\r\n main(args)"
260
+ },
261
+ "resources" : [
262
+ {
263
+ "type" : " Microsoft.Storage/storageAccounts/blobServices/containers" ,
264
+ "apiVersion" : " 2021-04-01" ,
265
+ "name" : " [format('{0}/{1}/{2}', parameters('storageAccountName'), 'default', parameters('containerName'))]" ,
266
+ "properties" : {
267
+ "publicAccess" : " Container"
268
+ },
269
+ "dependsOn" : [
270
+ " [resourceId('Microsoft.Storage/storageAccounts/blobServices', parameters('storageAccountName'), 'default')]"
271
+ ]
272
+ },
273
+ {
274
+ "type" : " Microsoft.Storage/storageAccounts/blobServices" ,
275
+ "apiVersion" : " 2021-04-01" ,
276
+ "name" : " [format('{0}/{1}', parameters('storageAccountName'), 'default')]"
277
+ },
278
+ {
279
+ "type" : " Microsoft.Resources/deploymentScripts" ,
280
+ "apiVersion" : " 2020-10-01" ,
281
+ "name" : " [format('deployscript-upload-blob-{0}', uniqueString(resourceId('Microsoft.Storage/storageAccounts/blobServices/containers', parameters('storageAccountName'), 'default', parameters('containerName'))))]" ,
282
+ "location" : " [parameters('location')]" ,
283
+ "kind" : " AzureCLI" ,
284
+ "properties" : {
285
+ "azCliVersion" : " 2.26.1" ,
286
+ "timeout" : " PT5M" ,
287
+ "retentionInterval" : " PT1H" ,
288
+ "environmentVariables" : [
289
+ {
290
+ "name" : " AZURE_STORAGE_ACCOUNT" ,
291
+ "value" : " [parameters('storageAccountName')]"
292
+ },
293
+ {
294
+ "name" : " AZURE_STORAGE_KEY" ,
295
+ "secureValue" : " [listKeys(resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2021-04-01').keys[0].value]"
296
+ },
297
+ {
298
+ "name" : " CONTENT" ,
299
+ "value" : " [variables('$fxv#0')]"
300
+ }
301
+ ],
302
+ "scriptContent" : " [format('echo \" $CONTENT\" > {0} && az storage blob upload -f {1} -c {2} -n {3}', parameters('filename'), parameters('filename'), parameters('containerName'), parameters('filename'))]"
303
+ },
304
+ "dependsOn" : [
305
+ " [resourceId('Microsoft.Storage/storageAccounts/blobServices/containers', parameters('storageAccountName'), 'default', parameters('containerName'))]"
306
+ ]
307
+ },
308
+ {
309
+ "type" : " Microsoft.MachineLearningServices/workspaces/codes/versions" ,
310
+ "apiVersion" : " 2022-05-01" ,
311
+ "name" : " [format('{0}/{1}-{2}/{3}', parameters('workspaceName'), parameters('codeId'), uniqueString(resourceId('Microsoft.Storage/storageAccounts/blobServices/containers', parameters('storageAccountName'), 'default', parameters('containerName'))), parameters('codeVersion'))]" ,
312
+ "properties" : {
313
+ "codeUri" : " [uri(format('https://{0}.blob.{1}/', parameters('storageAccountName'), environment().suffixes.storage), format('{0}/', parameters('containerName')))]" ,
314
+ "isAnonymous" : false
315
+ },
316
+ "dependsOn" : [
317
+ " [resourceId('Microsoft.Storage/storageAccounts/blobServices/containers', parameters('storageAccountName'), 'default', parameters('containerName'))]" ,
318
+ " [resourceId('Microsoft.Resources/deploymentScripts', format('deployscript-upload-blob-{0}', uniqueString(resourceId('Microsoft.Storage/storageAccounts/blobServices/containers', parameters('storageAccountName'), 'default', parameters('containerName')))))]"
319
+ ]
320
+ }
321
+ ],
322
+ "outputs" : {
323
+ "codeId" : {
324
+ "type" : " string" ,
325
+ "value" : " [resourceId('Microsoft.MachineLearningServices/workspaces/codes/versions', split(format('{0}/{1}-{2}/{3}', parameters('workspaceName'), parameters('codeId'), uniqueString(resourceId('Microsoft.Storage/storageAccounts/blobServices/containers', parameters('storageAccountName'), 'default', parameters('containerName'))), parameters('codeVersion')), '/')[0], split(format('{0}/{1}-{2}/{3}', parameters('workspaceName'), parameters('codeId'), uniqueString(resourceId('Microsoft.Storage/storageAccounts/blobServices/containers', parameters('storageAccountName'), 'default', parameters('containerName'))), parameters('codeVersion')), '/')[1], split(format('{0}/{1}-{2}/{3}', parameters('workspaceName'), parameters('codeId'), uniqueString(resourceId('Microsoft.Storage/storageAccounts/blobServices/containers', parameters('storageAccountName'), 'default', parameters('containerName'))), parameters('codeVersion')), '/')[2])]"
326
+ }
327
+ }
328
+ }
329
+ }
330
+ }
331
+ ],
332
+ "outputs" : {
333
+ "Job_Studio_Endpoint" : {
334
+ "type" : " string" ,
335
+ "value" : " [reference(resourceId('Microsoft.MachineLearningServices/workspaces/jobs', split(format('{0}/{1}', parameters('workspaceName'), parameters('jobName')), '/')[0], split(format('{0}/{1}', parameters('workspaceName'), parameters('jobName')), '/')[1])).services.Studio.endpoint]"
336
+ }
337
+ }
338
+ }
0 commit comments