@@ -370,6 +370,13 @@ def benchmark_bin(self) -> Path:
370370 """Returns the path to the benchmark binary"""
371371 return self .bench .project .build_dir / "bin" / self .bench_name
372372
373+ def cpu_count_str (self , separator : str = " " ) -> str :
374+ return (
375+ f"{ separator } CPU count"
376+ if self .profiler_type == PROFILERS .CPU_COUNTER
377+ else ""
378+ )
379+
373380 def get_iters (self , run_trace : TracingType ):
374381 """Returns the number of iterations to run for the given tracing type."""
375382 return (
@@ -539,11 +546,16 @@ def supported_runtimes(self) -> list[RUNTIMES]:
539546 return super ().supported_runtimes () + [RUNTIMES .SYCL_PREVIEW ]
540547
541548 def enabled (self ) -> bool :
542- # This is a workaround for the BMG server where we have old results for self.KernelExecTime=20
543- # The benchmark instance gets created just to make metadata for these old results
544549 if not super ().enabled ():
545550 return False
546551
552+ if (
553+ self .runtime in (RUNTIMES .SYCL , RUNTIMES .UR )
554+ ) and options .profiler_type != self .profiler_type .value :
555+ return False
556+
557+ # This is a workaround for the BMG server where we have old results for self.KernelExecTime=20
558+ # The benchmark instance gets created just to make metadata for these old results
547559 device_arch = getattr (options , "device_architecture" , "" )
548560 if "bmg" in device_arch and self .KernelExecTime == 20 :
549561 # Disable this benchmark for BMG server, just create metadata
@@ -568,7 +580,7 @@ def name(self):
568580 f" KernelExecTime={ self .KernelExecTime } " if self .KernelExecTime != 1 else ""
569581 )
570582
571- return f"api_overhead_benchmark_{ self .runtime .value } SubmitKernel { order } { completion_str } { events_str } { kernel_exec_time_str } "
583+ return f"api_overhead_benchmark_{ self .runtime .value } SubmitKernel { order } { completion_str } { events_str } { kernel_exec_time_str } { self . cpu_count_str () } "
572584
573585 def display_name (self ) -> str :
574586 order = "in order" if self .ioq else "out of order"
@@ -580,7 +592,7 @@ def display_name(self) -> str:
580592 if self .KernelExecTime != 1 :
581593 info .append (f"KernelExecTime={ self .KernelExecTime } " )
582594 additional_info = f" { ' ' .join (info )} " if info else ""
583- return f"{ self .runtime .value .upper ()} SubmitKernel { order } { additional_info } , NumKernels { self .NumKernels } "
595+ return f"{ self .runtime .value .upper ()} SubmitKernel { order } { additional_info } , NumKernels { self .NumKernels } { self . cpu_count_str ( ', ' ) } "
584596
585597 def explicit_group (self ):
586598 order = "in order" if self .ioq else "out of order"
@@ -589,7 +601,7 @@ def explicit_group(self):
589601
590602 kernel_exec_time_str = f" long kernel" if self .KernelExecTime != 1 else ""
591603
592- return f"SubmitKernel { order } { completion_str } { events_str } { kernel_exec_time_str } "
604+ return f"SubmitKernel { order } { completion_str } { events_str } { kernel_exec_time_str } { self . cpu_count_str ( ', ' ) } "
593605
594606 def description (self ) -> str :
595607 order = "in-order" if self .ioq else "out-of-order"
@@ -607,18 +619,16 @@ def range(self) -> tuple[float, float]:
607619
608620 def bin_args (self , run_trace : TracingType = TracingType .NONE ) -> list [str ]:
609621 iters = self .get_iters (run_trace )
610- bin_args = [
622+ return [
611623 f"--iterations={ iters } " ,
612624 f"--Ioq={ self .ioq } " ,
613625 f"--MeasureCompletion={ self .MeasureCompletion } " ,
614626 "--Profiling=0" ,
615627 f"--NumKernels={ self .NumKernels } " ,
616628 f"--KernelExecTime={ self .KernelExecTime } " ,
617629 f"--UseEvents={ self .UseEvents } " ,
630+ f"--profilerType={ self .profiler_type .value } " ,
618631 ]
619- if self .runtime == RUNTIMES .SYCL or self .runtime == RUNTIMES .UR :
620- bin_args .append (f"--profilerType={ self .profiler_type .value } " )
621- return bin_args
622632
623633 def get_metadata (self ) -> dict [str , BenchmarkMetadata ]:
624634 metadata_dict = super ().get_metadata ()
@@ -656,13 +666,18 @@ def __init__(
656666 profiler_type = profiler_type ,
657667 )
658668
669+ def enabled (self ) -> bool :
670+ if options .profiler_type != self .profiler_type .value :
671+ return False
672+ return super ().enabled ()
673+
659674 def name (self ):
660675 order = "in order" if self .ioq else "out of order"
661- return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue { order } from { self .source } to { self .destination } , size { self .size } "
676+ return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue { order } from { self .source } to { self .destination } , size { self .size } { self . cpu_count_str () } "
662677
663678 def display_name (self ) -> str :
664679 order = "in order" if self .ioq else "out of order"
665- return f"SYCL ExecImmediateCopyQueue { order } from { self .source } to { self .destination } , size { self .size } "
680+ return f"SYCL ExecImmediateCopyQueue { order } from { self .source } to { self .destination } , size { self .size } { self . cpu_count_str ( ', ' ) } "
666681
667682 def description (self ) -> str :
668683 order = "in-order" if self .ioq else "out-of-order"
@@ -706,11 +721,16 @@ def __init__(self, bench, isCopyOnly, source, destination, size, profiler_type):
706721 profiler_type = profiler_type ,
707722 )
708723
724+ def enabled (self ) -> bool :
725+ if options .profiler_type != self .profiler_type .value :
726+ return False
727+ return super ().enabled ()
728+
709729 def name (self ):
710- return f"memory_benchmark_sycl QueueInOrderMemcpy from { self .source } to { self .destination } , size { self .size } "
730+ return f"memory_benchmark_sycl QueueInOrderMemcpy from { self .source } to { self .destination } , size { self .size } { self . cpu_count_str () } "
711731
712732 def display_name (self ) -> str :
713- return f"SYCL QueueInOrderMemcpy from { self .source } to { self .destination } , size { self .size } "
733+ return f"SYCL QueueInOrderMemcpy from { self .source } to { self .destination } , size { self .size } { self . cpu_count_str ( ', ' ) } "
714734
715735 def description (self ) -> str :
716736 operation = "copy-only" if self .isCopyOnly else "copy and command submission"
@@ -748,11 +768,16 @@ def __init__(self, bench, source, destination, size, profiler_type):
748768 bench , "memory_benchmark_sycl" , "QueueMemcpy" , profiler_type = profiler_type
749769 )
750770
771+ def enabled (self ) -> bool :
772+ if options .profiler_type != self .profiler_type .value :
773+ return False
774+ return super ().enabled ()
775+
751776 def name (self ):
752- return f"memory_benchmark_sycl QueueMemcpy from { self .source } to { self .destination } , size { self .size } "
777+ return f"memory_benchmark_sycl QueueMemcpy from { self .source } to { self .destination } , size { self .size } { self . cpu_count_str () } "
753778
754779 def display_name (self ) -> str :
755- return f"SYCL QueueMemcpy from { self .source } to { self .destination } , size { self .size } "
780+ return f"SYCL QueueMemcpy from { self .source } to { self .destination } , size { self .size } { self . cpu_count_str ( ', ' ) } "
756781
757782 def description (self ) -> str :
758783 return (
@@ -1038,8 +1063,16 @@ def __init__(
10381063 def supported_runtimes (self ) -> list [RUNTIMES ]:
10391064 return super ().supported_runtimes () + [RUNTIMES .SYCL_PREVIEW ]
10401065
1066+ def enabled (self ) -> bool :
1067+ if (
1068+ self .runtime == RUNTIMES .SYCL
1069+ and options .profiler_type != self .profiler_type .value
1070+ ):
1071+ return False
1072+ return super ().enabled ()
1073+
10411074 def explicit_group (self ):
1042- return f"SubmitGraph { self .ioq_str } { self .measure_str } { self .use_events_str } { self .host_tasks_str } , { self .numKernels } kernels"
1075+ return f"SubmitGraph { self .ioq_str } { self .measure_str } { self .use_events_str } { self .host_tasks_str } , { self .numKernels } kernels{ self . cpu_count_str ( ', ' ) } "
10431076
10441077 def description (self ) -> str :
10451078 return (
@@ -1048,10 +1081,10 @@ def description(self) -> str:
10481081 )
10491082
10501083 def name (self ):
1051- return f"graph_api_benchmark_{ self .runtime .value } SubmitGraph{ self .use_events_str } { self .host_tasks_str } numKernels:{ self .numKernels } ioq { self .inOrderQueue } measureCompletion { self .measureCompletionTime } "
1084+ return f"graph_api_benchmark_{ self .runtime .value } SubmitGraph{ self .use_events_str } { self .host_tasks_str } numKernels:{ self .numKernels } ioq { self .inOrderQueue } measureCompletion { self .measureCompletionTime } { self . cpu_count_str () } "
10521085
10531086 def display_name (self ) -> str :
1054- return f"{ self .runtime .value .upper ()} SubmitGraph { self .ioq_str } { self .measure_str } { self .use_events_str } { self .host_tasks_str } , { self .numKernels } kernels"
1087+ return f"{ self .runtime .value .upper ()} SubmitGraph { self .ioq_str } { self .measure_str } { self .use_events_str } { self .host_tasks_str } , { self .numKernels } kernels{ self . cpu_count_str ( ', ' ) } "
10551088
10561089 def get_tags (self ):
10571090 return [
@@ -1064,7 +1097,7 @@ def get_tags(self):
10641097
10651098 def bin_args (self , run_trace : TracingType = TracingType .NONE ) -> list [str ]:
10661099 iters = self .get_iters (run_trace )
1067- bin_args = [
1100+ return [
10681101 f"--iterations={ iters } " ,
10691102 f"--NumKernels={ self .numKernels } " ,
10701103 f"--MeasureCompletionTime={ self .measureCompletionTime } " ,
@@ -1074,10 +1107,8 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
10741107 f"--UseEvents={ self .useEvents } " ,
10751108 "--UseExplicit=0" ,
10761109 f"--UseHostTasks={ self .useHostTasks } " ,
1110+ f"--profilerType={ self .profiler_type .value } " ,
10771111 ]
1078- if self .runtime == RUNTIMES .SYCL :
1079- bin_args .append (f"--profilerType={ self .profiler_type .value } " )
1080- return bin_args
10811112
10821113 def get_metadata (self ) -> dict [str , BenchmarkMetadata ]:
10831114 metadata_dict = super ().get_metadata ()
@@ -1116,33 +1147,39 @@ def __init__(
11161147 def supported_runtimes (self ) -> list [RUNTIMES ]:
11171148 return [RUNTIMES .SYCL , RUNTIMES .LEVEL_ZERO ]
11181149
1150+ def enabled (self ) -> bool :
1151+ if (
1152+ self .runtime == RUNTIMES .SYCL
1153+ and options .profiler_type != self .profiler_type .value
1154+ ):
1155+ return False
1156+ return super ().enabled ()
1157+
11191158 def explicit_group (self ):
1120- return f"EmptyKernel, wgc: { self .wgc } , wgs: { self .wgs } "
1159+ return (
1160+ f"EmptyKernel, wgc: { self .wgc } , wgs: { self .wgs } { self .cpu_count_str (', ' )} "
1161+ )
11211162
11221163 def description (self ) -> str :
11231164 return ""
11241165
11251166 def name (self ):
1126- return f"ulls_benchmark_{ self .runtime .value } EmptyKernel wgc:{ self .wgc } , wgs:{ self .wgs } "
1167+ return f"ulls_benchmark_{ self .runtime .value } EmptyKernel wgc:{ self .wgc } , wgs:{ self .wgs } { self . cpu_count_str () } "
11271168
11281169 def display_name (self ) -> str :
1129- return (
1130- f"{ self .runtime .value .upper ()} EmptyKernel, wgc { self .wgc } , wgs { self .wgs } "
1131- )
1170+ return f"{ self .runtime .value .upper ()} EmptyKernel, wgc { self .wgc } , wgs { self .wgs } { self .cpu_count_str (', ' )} "
11321171
11331172 def get_tags (self ):
11341173 return [runtime_to_tag_name (self .runtime ), "micro" , "latency" , "submit" ]
11351174
11361175 def bin_args (self , run_trace : TracingType = TracingType .NONE ) -> list [str ]:
11371176 iters = self .get_iters (run_trace )
1138- bin_args = [
1177+ return [
11391178 f"--iterations={ iters } " ,
11401179 f"--wgs={ self .wgs } " ,
11411180 f"--wgc={ self .wgc } " ,
1181+ f"--profilerType={ self .profiler_type .value } " ,
11421182 ]
1143- if self .runtime == RUNTIMES .SYCL :
1144- bin_args .append (f"--profilerType={ self .profiler_type .value } " )
1145- return bin_args
11461183
11471184
11481185class UllsKernelSwitch (ComputeBenchmark ):
0 commit comments