@@ -38,6 +38,8 @@ namespace neutron {
3838 +----------------------------------------+------------------------------------------+
3939 | 1st output map (1B) | [nth* output map (1B)] |
4040 +----------------------------------------+------------------------------------------+
41+ | Payload version (1B) |
42+ +-----------------------------------------------------------------------------------+
4143*/
4244// clang-format on
4345#define ITEM_SIZE 1 // 1 Byte
@@ -53,10 +55,13 @@ namespace neutron {
5355#define OUTPUT_TENSOR_MAP_ARRAY_ADDR (base ) \
5456 (base + 3 * ITEM_SIZE + 2 * base[INPUT_TENSOR_FORMAT_LEN_POS] + \
5557 1 * base[OUTPUT_TENSOR_FORMAT_LEN_POS])
58+ #define PAYLOAD_VERSION_ADDR (base ) \
59+ (base + 3 * ITEM_SIZE + 2 * base[INPUT_TENSOR_FORMAT_LEN_POS] + \
60+ 2 * base[OUTPUT_TENSOR_FORMAT_LEN_POS])
5661#define PAYLOAD_ADDR (base ) \
5762 (base + \
5863 ALIGN_SIZE ( \
59- 3 * ITEM_SIZE + 2 * base[INPUT_TENSOR_FORMAT_LEN_POS] + \
64+ 4 * ITEM_SIZE + 2 * base[INPUT_TENSOR_FORMAT_LEN_POS] + \
6065 2 * base[OUTPUT_TENSOR_FORMAT_LEN_POS]))
6166
6267// Aggregate neutron model handle and data structures into one.
@@ -65,6 +70,8 @@ typedef struct {
6570 int numOutputs = 0 ;
6671 int numInputArgs = 0 ;
6772 uint32_t scratchSize = 0 ;
73+ uint32_t profileSize = 0 ;
74+ uint32_t debugSize = 0 ;
6875 NeutronModelConfig mcfg;
6976 NeutronDataConfig dcfg;
7077 NeutronModelHandle nmh = NULL ;
@@ -269,6 +276,7 @@ class NeutronBackend final : public PyTorchBackendInterface {
269276 OUTPUT_TENSOR_FORMAT_ARRAY_ADDR (payloadFlags);
270277 cfg->inputMap = INPUT_TENSOR_MAP_ARRAY_ADDR (payloadFlags);
271278 cfg->outputMap = OUTPUT_TENSOR_MAP_ARRAY_ADDR (payloadFlags);
279+ uint8_t payloadVersion = *PAYLOAD_VERSION_ADDR (payloadFlags);
272280
273281 const uint32_t * buffer = static_cast <const uint32_t *>(
274282 static_cast <const void *> PAYLOAD_ADDR (payloadFlags));
@@ -282,9 +290,28 @@ class NeutronBackend final : public PyTorchBackendInterface {
282290 }
283291 uint32_t microcodeSize = buffer[6 ];
284292 uint32_t weightsSize = buffer[7 ];
285- cfg->scratchSize = buffer[9 ];
286- cfg->numInputs = buffer[11 ];
287- cfg->numOutputs = buffer[12 ];
293+ switch (payloadVersion) {
294+ case 0 :
295+ cfg->scratchSize = buffer[9 ];
296+ cfg->profileSize = 0 ;
297+ cfg->debugSize = 0 ;
298+ cfg->numInputs = buffer[11 ];
299+ cfg->numOutputs = buffer[12 ];
300+ break ;
301+ case 1 :
302+ cfg->scratchSize = buffer[9 ];
303+ cfg->profileSize = buffer[10 ];
304+ cfg->debugSize = buffer[11 ];
305+ cfg->numInputs = buffer[13 ];
306+ cfg->numOutputs = buffer[14 ];
307+ break ;
308+ default :
309+ ET_LOG (
310+ Error,
311+ " Unknown payload version %d. Please update the backend" ,
312+ payloadVersion);
313+ return Error::InvalidProgram;
314+ }
288315 if (cfg->numInputs != numInputs) {
289316 ET_LOG (
290317 Error,
@@ -336,10 +363,16 @@ class NeutronBackend final : public PyTorchBackendInterface {
336363 // Allocate place for input and output pointers.
337364 cfg->dcfg .inputs = static_cast <const void **>(
338365 context.allocate (cfg->numInputs * sizeof (void *)));
339- cfg->dcfg .outputs =
340- static_cast <void **>(context.allocate (cfg->numOutputs * sizeof (void *)));
366+ // There are 3 extra entries: scratch, profile and debug. The scratch
367+ // pointer was allocated implicitly in the previous versions.
368+ cfg->dcfg .outputs = static_cast <void **>(
369+ context.allocate ((cfg->numOutputs + 3 ) * sizeof (void *)));
341370 cfg->dcfg .outputs [cfg->numOutputs ] =
342371 static_cast <void *>(context.allocate (cfg->scratchSize , 16 ));
372+ cfg->dcfg .outputs [cfg->numOutputs + 1 ] =
373+ static_cast <void *>(context.allocate (cfg->profileSize , 16 ));
374+ cfg->dcfg .outputs [cfg->numOutputs + 2 ] =
375+ static_cast <void *>(context.allocate (cfg->debugSize , 16 ));
343376
344377 // Set inputs from args.
345378 // Transpose inputs if needed.
@@ -352,7 +385,7 @@ class NeutronBackend final : public PyTorchBackendInterface {
352385 return Error::InvalidProgram;
353386 }
354387 // Allocate buffer, the allocator is reset after each PTE instruction.
355- void * buffer = context.allocate (arg.nbytes ());
388+ void * buffer = context.allocate (arg.nbytes (), 16 );
356389 transposeInput (
357390 arg.const_data_ptr (), buffer, arg.sizes (), arg.element_size ());
358391 cfg->dcfg .inputs [i] = buffer;
@@ -368,7 +401,7 @@ class NeutronBackend final : public PyTorchBackendInterface {
368401 if (cfg->outputTranspositionFlags [i] &&
369402 multipleChannelsPresent (arg.sizes ())) {
370403 // Allocate buffer, the allocator is reset after each PTE instruction.
371- void * buffer = context.allocate (arg.nbytes ());
404+ void * buffer = context.allocate (arg.nbytes (), 16 );
372405 cfg->dcfg .outputs [i] = buffer;
373406 } else {
374407 cfg->dcfg .outputs [i] = arg.mutable_data_ptr ();
0 commit comments