diff --git a/.github/workflows/build-test-python-package.yml b/.github/workflows/build-test-python-package.yml index 51a77ac..aae2424 100644 --- a/.github/workflows/build-test-python-package.yml +++ b/.github/workflows/build-test-python-package.yml @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/publish-package.yml b/.github/workflows/publish-package.yml index c513a5c..f95b353 100644 --- a/.github/workflows/publish-package.yml +++ b/.github/workflows/publish-package.yml @@ -23,10 +23,10 @@ jobs: steps: # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - uses: actions/checkout@v3 - - name: Set up Python 3.9 + - name: Set up Python 3.12 uses: actions/setup-python@v4 with: - python-version: 3.9 + python-version: 3.12 cache: pip # Publishes to PyPi diff --git a/.gitignore b/.gitignore index f15a257..92749ae 100755 --- a/.gitignore +++ b/.gitignore @@ -10,9 +10,16 @@ cached_data_used/kernels/* cached_data_used/visualizations/* cached_data_used/last_run/* cached_data_used/import_runs/* +methodology_paper_evaluation/run/* +hyperparametertuning/* +hyperparametertuning_*/* +test_run_experiment/* + +# ignore setup files +*/setup/*_input.json # exceptions -!cached_data_used/cachefiles/ktt_values_to_kerneltuner.py +!cached_data_used/cachefiles/*.py # ignore specific experiment files experiment_files/milo.json diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..19bab30 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "benchmark_hub"] + path = benchmark_hub + url = https://github.com/AutoTuningAssociation/benchmark_hub.git diff --git a/README.md b/README.md index dad96e6..64c8fe4 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ Currently, the stable releases of this software package are compatible with [Ker ## Installation The package can be installed with `pip install autotuning_methodology`. Alternatively, it can be installed by cloning this repository and running `pip install .` in the root of the cloned project. -Python >= 3.9 is supported. +Like most Python packages, installing in a virtual environment or with `pipx` is recommended. Python >= 3.10 is supported. ## Notable features - Official software by the authors of the methodology-defining paper. diff --git a/benchmark_hub b/benchmark_hub new file mode 160000 index 0000000..ff76e2c --- /dev/null +++ b/benchmark_hub @@ -0,0 +1 @@ +Subproject commit ff76e2c86a7d9b3f389038589660e8b6ef4b4a5e diff --git a/cached_data_used/cachefiles/ktt_values_to_kerneltuner.py b/cached_data_used/cachefiles/ktt_values_to_kerneltuner.py deleted file mode 100644 index 5b3d27f..0000000 --- a/cached_data_used/cachefiles/ktt_values_to_kerneltuner.py +++ /dev/null @@ -1,116 +0,0 @@ -"""Script to overwrite Kernel Tuner brute forced cache files with the objective values of a KTT brute force search. - -Notes: this requires a fully bruteforced KTT and fully bruteforced KernelTuner (KT) cachefile on the same search space. -Objective value is assumed to be time by default. Time is assumed to be in microseconds for KTT and miliseconds for KT. -""" - -import json -from pathlib import Path - -from autotuning_methodology.runner import ktt_param_mapping - -kerneltuner_cachefiles_path = Path(__file__).parent.resolve() -assert kerneltuner_cachefiles_path.exists() -ktt_data_path = kerneltuner_cachefiles_path / "KTT data" -assert ktt_data_path.exists() - -files_to_import = [f for f in ktt_data_path.iterdir() if f.is_file() and f.suffix == ".json"] -ktt_objective_name = "Duration" -kt_objective_name = "time" - -error_status_mapping = { - "ok": None, - "devicelimitsexceeded": '"CompilationFailedConfig"', - "computationfailed": '"RuntimeFailedConfig"', -} - -for file in files_to_import: - # find the associated KernelTuner cachefile to write to - ktt_data = dict(json.loads(file.read_bytes())) - metadata = ktt_data["Metadata"] - device = str(metadata["Device"]) - device_filename = device.replace("NVIDIA GeForce ", "").replace(" ", "_") - kernel = str(ktt_data["Results"][0]["KernelName"]) - kernel_filename = kernel.lower() - kerneltuner_cachefile = kerneltuner_cachefiles_path / kernel_filename / f"{device_filename}.json" - assert kerneltuner_cachefile.exists() - ktt_param_mapping_kernel = ktt_param_mapping[kernel_filename] - print(f"Importing objective values from KTT to KernelTuner file for '{kernel}' on {device}") - - # for each configuration in the KTT file, use the value in the KernelTuner file - config_to_change = dict() - kerneltuner_data = dict(json.loads(kerneltuner_cachefile.read_bytes())) - ktt_results = ktt_data["Results"] - cache = kerneltuner_data["cache"] - assert len(cache) == len(ktt_results) - for ktt_config in ktt_results: - # convert the configuration to T4 style dictionary for fast lookups in the mapping - configuration_ktt = dict() - for param in ktt_config["Configuration"]: - configuration_ktt[param["Name"]] = param["Value"] - - # convert the configuration data with the mapping in the correct order - configuration = dict() - param_map = ktt_param_mapping_kernel - assert len(param_map) == len( - configuration_ktt - ), f"Mapping provided for {len(param_map)} params, but configuration has {len(configuration_ktt)}" - for param_name, mapping in param_map.items(): - param_value = configuration_ktt[param_name] - # if the mapping is None, do not include the parameter - if mapping is None: - pass - # if the mapping is a tuple, the first argument is the new parameter name and the second the value - elif isinstance(mapping, tuple): - param_mapped_name, param_mapped_value = mapping - if callable(param_mapped_value): - param_mapped_value = param_mapped_value(param_value) - configuration[param_mapped_name] = param_mapped_value - # if it's a list of tuples, map to multiple parameters - elif isinstance(mapping, list): - for param_mapped_name, param_mapped_value in mapping: - if callable(param_mapped_value): - param_mapped_value = param_mapped_value(param_value) - configuration[param_mapped_name] = param_mapped_value - else: - raise ValueError(f"Can not apply parameter mapping of {type(mapping)} ({mapping})") - - # get and validate the Kernel Tuner configuration - lookup_string = ",".join(str(v) for v in configuration.values()) # the key to lookup the configuration - assert lookup_string in cache - kt_config = cache[lookup_string] - for param, value in configuration.items(): - assert kt_config[param] == value - - # replace the objective in the KT configuration with the objective in the KTT configuration - kt_old_objective_value = kt_config[kt_objective_name] - kt_new_objective_value = "" - status = error_status_mapping[str(ktt_config["Status"]).lower()] - if status is None: - kt_new_objective_value = ktt_config["ComputationResults"][0][ktt_objective_name] / 1000 - else: - kt_new_objective_value = status - kerneltuner_data["cache"][lookup_string][kt_objective_name] = kt_new_objective_value - config_to_change[lookup_string] = (kt_old_objective_value, kt_new_objective_value) - # print(f"Replacing {kt_old_objective_value} with {kt_new_objective_value}") - - # load the individual lines of the file - with kerneltuner_cachefile.open(mode="r", encoding="utf-8") as fp: - lines = fp.readlines() - cache_start = False - # write the new data to file - with kerneltuner_cachefile.open(mode="w") as fp: - # for each line in the cache part of the file, lookup the config string in the changes dictionary and replace - for line in lines: - if '"cache":' in line: - cache_start = True - fp.write(line) - elif not cache_start or line[:1] == "}" or len(line) < 3: - fp.write(line) - else: - lookup_string = line.split(":")[0].replace('"', "").strip() - old_value, new_value = config_to_change[lookup_string] - line = line.replace(f'"time": {old_value},', f'"time": {new_value},', 1) - fp.write(line) - - # kerneltuner_cachefile.write_text(json.dumps(kerneltuner_data, indent=3)) diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 6949605..e55b60c 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -3,7 +3,7 @@ Getting Started Start out by installing the package. The simplest way to do this is ``pip install autotuning_methodology``. -Python 3.9 and up are supported. +Python 3.10 and up are supported. Defining an experiment ^^^^^^^^^^^^^^^^^^^^^^ diff --git a/experiment_files/compare_bo.json b/experiment_files/compare_bo.json new file mode 100644 index 0000000..3328abf --- /dev/null +++ b/experiment_files/compare_bo.json @@ -0,0 +1,72 @@ +{ + "version": "1.0.0", + "name": "Methodology paper evaluation", + "parent_folder": "./methodology_paper_evaluation", + "experimental_groups_defaults": { + "applications": [ + { + "name": "convolution", + "folder": "./cached_data_used/kernels", + "input_file": "convolution.json" + }, + { + "name": "pnpoly", + "folder": "./cached_data_used/kernels", + "input_file": "pnpoly.json" + } + ], + "gpus": [ + "RTX_3090", + "RTX_2080_Ti" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./cached_data_used/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 100, + "samples": 32, + "minimum_number_of_valid_search_iterations": 20, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "bayes_opt_og", + "search_method": "bayes_opt", + "display_name": "Bayesian Optimization", + "autotuner": "KernelTuner" + }, + { + "name": "dual_annealing", + "search_method": "dual_annealing", + "display_name": "Dual Annealing", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.96, + "cutoff_percentile_start": 0.5, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "time" + ] + }, + "visualization_settings": { + "x_axis_value_types": [ + "fevals", + "time", + "aggregated" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file diff --git a/experiment_files/compare_simple.json b/experiment_files/compare_simple.json new file mode 100644 index 0000000..372e529 --- /dev/null +++ b/experiment_files/compare_simple.json @@ -0,0 +1,132 @@ +{ + "version": "1.2.0", + "name": "Compare constrained strategies pyATF vs KT", + "parent_folder": "/var/scratch/fjwillem/constrained_optimization", + "experimental_groups_defaults": { + "applications": [ + { + "name": "dedispersion_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "dedispersion_milo.json", + "objective_performance_keys": [ + "time" + ] + } + ], + "gpus": [ + "A100", + "A4000" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 100, + "samples": 32, + "minimum_fraction_of_budget_valid": 0.1, + "minimum_number_of_valid_search_iterations": 10, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "genetic_algorithm_constrained", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "KT GA constrained", + "autotuner": "KernelTuner" + }, + { + "name": "genetic_algorithm_non_constrained", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": false + } + ], + "display_name": "KT GA non-constrained", + "autotuner": "KernelTuner", + "color_parent": "genetic_algorithm_constrained" + }, + { + "name": "pso_constrained", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "KT PSO constrained", + "autotuner": "KernelTuner" + }, + { + "name": "pso_non_constrained", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": false + } + ], + "display_name": "KT PSO non-constrained", + "autotuner": "KernelTuner", + "color_parent": "pso_constrained" + }, + { + "name": "simulated_annealing_constrained", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "KT SA constrained", + "autotuner": "KernelTuner" + }, + { + "name": "simulated_annealing_non_constrained", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": false + } + ], + "display_name": "KT SA non-constrained", + "autotuner": "KernelTuner", + "color_parent": "simulated_annealing_constrained" + } + ], + "statistics_settings": { + "cutoff_percentile": 0.95, + "cutoff_percentile_start": 0.01, + "cutoff_type": "time", + "objective_time_keys": [ + "all" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "aggregate", + "style": "head2head", + "comparison": { + "unit": "time", + "relative_time": 0.5 + }, + "annotate": true + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file diff --git a/experiment_files/convert_old_to_new_format.py b/experiment_files/convert_old_to_new_format.py new file mode 100644 index 0000000..d2cf5f8 --- /dev/null +++ b/experiment_files/convert_old_to_new_format.py @@ -0,0 +1,89 @@ +"""Module to convert the old experiments file format into the new format.""" + +# script to convert the old experiments file format into the new format +import json +from pathlib import Path + +from autotuning_methodology.validators import validate_experimentsfile + +# set input and output files +folderpath = Path(__file__).parent +old_file_path = folderpath / Path("../tests/autotuning_methodology/integration/mockfiles/test_import_runs.json") +new_file_path = folderpath / Path("../tests/autotuning_methodology/integration/mockfiles/test_import_runs_new.json") +encoding = "utf-8" +assert old_file_path.exists(), f"Old file does not exist at {old_file_path}" +assert not new_file_path.exists(), f"New file does already exists at {new_file_path}" + +# read input file to dictionary +with old_file_path.open("r", encoding=encoding) as fp: + old_experiment: dict = json.load(fp) + +# convert the dictionary to the new format +new_experiment = { + "version": "1.2.0", + "name": old_experiment["name"], + "parent_folder": f"./{old_experiment['folder_id']}", + "experimental_groups_defaults": { + "applications": [ + { + "name": kernel, + "input_file": f"{old_experiment['kernels_path']}/{kernel}", + "folder": f"{old_experiment['visualization_caches_path']}/{kernel}", + } + for kernel in old_experiment["kernels"] + ], + "gpus": old_experiment["GPUs"], + "pattern_for_full_search_space_filenames": { + "regex": f"{old_experiment['bruteforced_caches_path']}/" + "${applications}/${gpus}.json" + }, + "stochastic": old_experiment["strategy_defaults"]["stochastic"], + "repeats": old_experiment["strategy_defaults"]["repeats"], + "samples": old_experiment["strategy_defaults"]["iterations"], + "minimum_fraction_of_budget_valid": old_experiment.get("minimum_fraction_of_budget_valid", 0.5), + "minimum_number_of_valid_search_iterations": old_experiment["strategy_defaults"][ + "minimum_number_of_evaluations" + ], + "ignore_cache": False, + }, + "search_strategies": [ + { + "name": strategy["name"], + "search_method": strategy["strategy"], + "display_name": strategy["display_name"], + "autotuner": ( + "KernelTuner" if strategy["name"] != "ktt_profile_searcher" else "KTT" + ), # Assuming autotuner is KernelTuner for all strategies + } + for strategy in old_experiment["strategies"] + ], + "statistics_settings": { + "minimization": old_experiment["minimization"], + "cutoff_percentile": old_experiment["cutoff_percentile"], + "cutoff_percentile_start": old_experiment["cutoff_percentile_start"], + "cutoff_type": old_experiment["cutoff_type"], + "objective_time_keys": ["all"], # Mapped to 'all' + "objective_performance_keys": old_experiment["objective_performance_keys"], + }, + "visualization_settings": { + "plots": [ + { + "scope": "aggregate" if "aggregated" in plottype else "searchspace", + "style": "scatter" if "scatter" in plottype else "line", + "x_axis_value_types": [plottype if plottype != "aggregated" else "time"], + "y_axis_value_types": old_experiment["plot"]["plot_y_value_types"], + } + for plottype in old_experiment["plot"]["plot_x_value_types"] + ], + "resolution": old_experiment["resolution"], + "confidence_level": old_experiment["plot"]["confidence_level"], + "compare_baselines": old_experiment["plot"]["compare_baselines"], + "compare_split_times": old_experiment["plot"]["compare_split_times"], + }, +} + +# validate using schema +validate_experimentsfile(new_experiment, encoding=encoding) + +# write converted dictionary to file +with new_file_path.open("w", encoding=encoding) as fp: + json.dump(new_experiment, fp) diff --git a/experiment_files/diff_evo.json b/experiment_files/diff_evo.json new file mode 100644 index 0000000..d67dbf5 --- /dev/null +++ b/experiment_files/diff_evo.json @@ -0,0 +1,195 @@ +{ + "version": "1.2.0", + "name": "Compare Differential Evolution strategies Kernel Tuner", + "parent_folder": "/var/scratch/fjwillem/constrained_optimization", + "experimental_groups_defaults": { + "applications": [ + { + "name": "dedispersion_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "dedispersion_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "hotspot_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "hotspot_milo.json", + "objective_performance_keys": [ + "GFLOP/s" + ] + }, + { + "name": "convolution_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "convolution_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "gemm_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "gemm_milo.json", + "objective_performance_keys": [ + "time" + ] + } + ], + "gpus": [ + "A100", + "A4000", + "MI250X", + "A6000", + "W6600", + "W7800" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 100, + "samples": 32, + "minimum_fraction_of_budget_valid": 0.1, + "minimum_number_of_valid_search_iterations": 10, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "firefly_constrained", + "search_method": "firefly_algorithm", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "Firefly constrained", + "autotuner": "KernelTuner" + }, + { + "name": "genetic_algorithm_constrained", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "GA constrained", + "autotuner": "KernelTuner" + }, + { + "name": "pso_constrained", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "PSO constrained", + "autotuner": "KernelTuner" + }, + { + "name": "simulated_annealing_constrained", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "SA constrained", + "autotuner": "KernelTuner" + }, + { + "name": "diff_evo", + "search_method": "diff_evo", + "display_name": "Diff Evolution", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "cutoff_percentile": 0.95, + "cutoff_percentile_start": 0.01, + "cutoff_type": "time", + "objective_time_keys": [ + "all" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "absolute", + "baseline" + ] + }, + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "absolute", + "baseline" + ] + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ], + "cmin": -8.0, + "include_y_labels": true, + "include_colorbar": false, + "annotate": true + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ], + "cmin": -8.0, + "cnum": 10, + "include_y_labels": false, + "include_colorbar": true, + "annotate": true + }, + { + "scope": "aggregate", + "style": "head2head", + "comparison": { + "unit": "time", + "relative_time": 0.5 + }, + "annotate": true + }, + { + "scope": "aggregate", + "style": "line", + "ylabel": "Aggregate performance relative to baseline" + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file diff --git a/experiment_files/example_visualizations.json b/experiment_files/example_visualizations.json index 682147f..ed3991e 100644 --- a/experiment_files/example_visualizations.json +++ b/experiment_files/example_visualizations.json @@ -44,7 +44,7 @@ "strategy_defaults": { "repeats": 100, "minimum_number_of_evaluations": 20, - "cutoff_margin": 1.1, + "cutoff_margin": 0.1, "stochastic": true, "record_data": [ "time", diff --git a/experiment_files/ktt_coulomb_two_autotuner_versions.json b/experiment_files/ktt_coulomb_two_autotuner_versions.json new file mode 100644 index 0000000..e57af50 --- /dev/null +++ b/experiment_files/ktt_coulomb_two_autotuner_versions.json @@ -0,0 +1,60 @@ +{ + "version": "1.0.0", + "name": "Random vs. Random KTT 2.1 and KTT 2.2 on Coulomb", + "parent_folder": "/home/janka/autotuning_methodology_experiments/test_coulomb_two_versions_KTT", + "experimental_groups_defaults": { + "applications": [ + { + "name": "coulomb", + "input_file" : "/home/janka/KTT/Examples/CoulombSum3d/CoulombSum3dCudaScript.json", + "folder": "/home/janka/KTT/Examples/CoulombSum3d" + } + ], + "gpus": ["2080"], + "stochastic": true, + "repeats": 2, + "samples": 1, + "minimum_number_of_valid_search_iterations": 5, + "ignore_cache": true + }, + "search_strategies": [ + { + "name": "random-2.1", + "search_method": "Random", + "display_name": "Random with KTT 2.1", + "autotuner": "KTT", + "autotuner_path": "/home/janka/KTT/Build/x86_64_Release/" + }, + { + "name": "random-2.2", + "autotuner": "KTT", + "autotuner_path": "/home/janka/KTT-2.2/Build/x86_64_Release/", + "search_method": "Random", + "display_name": "Random with KTT 2.2" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.96, + "cutoff_percentile_start": 0.2, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "TotalDuration" + ] + }, + "visualization_settings": { + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "absolute" + ], + "resolution": 2, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} diff --git a/experiment_files/ktt_coulomb_two_search_methods.json b/experiment_files/ktt_coulomb_two_search_methods.json new file mode 100644 index 0000000..c83023b --- /dev/null +++ b/experiment_files/ktt_coulomb_two_search_methods.json @@ -0,0 +1,69 @@ +{ + "version": "1.0.0", + "name": "Random vs. profbased searcher KTT 2.1 on Coulomb", + "parent_folder": "/home/janka/autotuning_methodology_experiments/test_coulomb_two_methods", + "experimental_groups_defaults": { + "applications": [ + { + "name": "coulomb", + "input_file" : "/home/janka/KTT/Examples/CoulombSum3d/CoulombSum3dCudaScript.json", + "folder": "/home/janka/KTT/Examples/CoulombSum3d" + } + ], + "gpus": ["2080"], + "autotuner": "KTT", + "autotuner_path": "/home/janka/KTT/Build/x86_64_Release/", + "set_this_to_pythonpath": "/home/janka/KTT/Build/x86_64_Release/:/home/janka/KTT/Scripts", + "stochastic": true, + "repeats": 2, + "samples": 1, + "minimum_number_of_valid_search_iterations": 5, + "ignore_cache": true + }, + "search_strategies": [ + { + "name": "random", + "search_method": "Random", + "display_name": "Random" + }, + { + "name": "profbased", + "search_method": "ProfileBased", + "search_method_hyperparameters": [ + { + "name": "modelPath", + "value": "/home/janka/KTT/Examples/CoulombSum3d/Models/2080-coulomb_output_DT.sav" + }, + { + "name": "batchSize", + "value": "5" + } + ], + "display_name": "Profile-based searcher" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.96, + "cutoff_percentile_start": 0.2, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "TotalDuration" + ] + }, + "visualization_settings": { + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "absolute" + ], + "resolution": 2, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} diff --git a/experiment_files/ktt_vectorAddition_two_search_methods.json b/experiment_files/ktt_vectorAddition_two_search_methods.json new file mode 100644 index 0000000..25cf5d0 --- /dev/null +++ b/experiment_files/ktt_vectorAddition_two_search_methods.json @@ -0,0 +1,69 @@ +{ + "version": "1.0.0", + "name": "Random vs. profbased searcher KTT 2.1", + "parent_folder": "/home/janka/autotuning_methodology_experiments/test_vectorAddition_two_methods", + "experimental_groups_defaults": { + "applications": [ + { + "name": "vectorAddition", + "input_file" : "/home/janka/KTT/Tutorials/03KernelTuning/KernelTuningCudaScript.json", + "folder": "/home/janka/KTT/Tutorials/03KernelTuning/" + } + ], + "gpus": ["2080"], + "autotuner": "KTT", + "autotuner_path": "/home/janka/KTT/Build/x86_64_Release/", + "set_this_to_pythonpath": "/home/janka/KTT/Build/x86_64_Release:/home/janka/KTT/Scripts", + "stochastic": true, + "repeats": 2, + "samples": 1, + "minimum_number_of_valid_search_iterations": 2, + "ignore_cache": true + }, + "search_strategies": [ + { + "name": "random", + "search_method": "Random", + "display_name": "Random" + }, + { + "name": "profbased", + "search_method": "ProfileBased", + "search_method_hyperparameters": [ + { + "name": "modelPath", + "value": "/home/janka/KTT/Tutorials/03KernelTuning/2080-vectorAdd_output_DT.sav" + }, + { + "name": "batchSize", + "value": "1" + } + ], + "display_name": "Profile-based searcher" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.96, + "cutoff_percentile_start": 0.1, + "cutoff_type": "time", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "TotalDuration" + ] + }, + "visualization_settings": { + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "absolute" + ], + "resolution": 2, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} diff --git a/experiment_files/methodology_paper_evaluation_2.json b/experiment_files/methodology_paper_evaluation_2.json new file mode 100644 index 0000000..bc5f0ba --- /dev/null +++ b/experiment_files/methodology_paper_evaluation_2.json @@ -0,0 +1,78 @@ +{ + "version": "1.0.0", + "name": "Methodology paper evaluation", + "parent_folder": "./methodology_paper_evaluation", + "experimental_groups_defaults": { + "applications": [ + { + "name": "convolution", + "input_file": "../cached_data_used/kernels/convolution", + "folder": "../cached_data_used/visualizations/convolution" + }, + { + "name": "pnpoly", + "input_file": "../cached_data_used/kernels/pnpoly", + "folder": "../cached_data_used/visualizations/pnpoly" + } + ], + "gpus": [ + "RTX_3090", + "RTX_2080_Ti" + ], + "pattern_for_full_search_space_filenames": { + "regex": "../cached_data_used/cachefiles/${applications}/${gpus}.json" + }, + "stochastic": true, + "repeats": 100, + "samples": 32, + "minimum_number_of_valid_search_iterations": 20, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "genetic_algorithm", + "search_method": "genetic_algorithm", + "display_name": "Genetic Algorithm", + "autotuner": "KernelTuner" + }, + { + "name": "dual_annealing", + "search_method": "dual_annealing", + "display_name": "Dual Annealing", + "autotuner": "KernelTuner" + }, + { + "name": "greedy_ils", + "search_method": "greedy_ils", + "display_name": "Greedy ILS", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.96, + "cutoff_percentile_start": 0.5, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "time" + ] + }, + "visualization_settings": { + "x_axis_value_types": [ + "fevals", + "time", + "aggregated" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ], + "resolution": 1e3, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file diff --git a/experiment_files/methodology_paper_evaluation_new.json b/experiment_files/methodology_paper_evaluation_new.json new file mode 100644 index 0000000..462431e --- /dev/null +++ b/experiment_files/methodology_paper_evaluation_new.json @@ -0,0 +1,91 @@ +{ + "version": "1.1.0", + "name": "Methodology paper evaluation", + "parent_folder": "./methodology_paper_evaluation", + "experimental_groups_defaults": { + "applications": [ + { + "name": "convolution", + "folder": "./cached_data_used/kernels", + "input_file": "convolution.json" + }, + { + "name": "pnpoly", + "folder": "./cached_data_used/kernels", + "input_file": "pnpoly.json" + } + ], + "gpus": [ + "RTX_3090", + "RTX_2080_Ti" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./cached_data_used/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 100, + "samples": 32, + "minimum_number_of_valid_search_iterations": 20, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "genetic_algorithm", + "search_method": "genetic_algorithm", + "display_name": "Genetic Algorithm", + "autotuner": "KernelTuner" + }, + { + "name": "dual_annealing", + "search_method": "dual_annealing", + "display_name": "Dual Annealing", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.96, + "cutoff_percentile_start": 0.5, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "time" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "aggregate", + "style": "line" + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index ad6ba41..b9b5049 100755 --- a/mypy.ini +++ b/mypy.ini @@ -1,6 +1,6 @@ # Global options: [mypy] -python_version=3.9 +python_version=3.12 [mypy-isotonic.isotonic.*] ignore_missing_imports = True diff --git a/noxfile.py b/noxfile.py index b1e111f..e3a19ad 100644 --- a/noxfile.py +++ b/noxfile.py @@ -16,11 +16,13 @@ def lint(session: nox.Session) -> None: """Ensure the code is formatted as expected.""" session.install("ruff") - session.run("ruff", "check", "--output-format=github", "--config=pyproject.toml", ".") + session.run("ruff", "check", "--output-format=github", "--config=pyproject.toml", "src") # @nox.session # uncomment this line to only run on the current python interpreter -@nox.session(python=["3.9", "3.10", "3.11", "3.12"]) # missing versions can be installed with `pyenv install ...` +@nox.session( + python=["3.10", "3.11", "3.12", "3.13"] +) # missing versions can be installed with `pyenv install ...` # do not forget check / set the versions with `pyenv global`, or `pyenv local` in case of virtual environment def tests(session: nox.Session) -> None: """Run the tests for the specified Python versions.""" diff --git a/pyproject.toml b/pyproject.toml index 108a48d..cbd86f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,19 +10,19 @@ description = "Software package easing implementation of the guidelines of the 2 keywords = ["autotuning", "auto-tuning", "methodology", "scientific"] readme = "README.md" license = { file = "LICENSE" } -requires-python = ">=3.9" +requires-python = ">=3.10,<4" # NOTE when updating python version, also update classifiers and Nox test versions classifiers = [ "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: MIT License", "Natural Language :: English", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13" ] # https://pypi.org/classifiers/ dependencies = [ - "numpy >= 1.22.4", + "numpy (>=2.0.0)", "scipy >= 1.10.1", "scikit-learn >= 1.0.2", "matplotlib >= 3.7.1", @@ -30,7 +30,7 @@ dependencies = [ "progressbar2 >= 4.2.0", "jsonschema >= 4.17.3", "nonconformist >= 2.1.0", - "kernel_tuner >= 1.0.1", + "kernel_tuner >= 1.3.0", ] [project.optional-dependencies] @@ -64,7 +64,7 @@ minversion = "7.3" pythonpath = [ "src", ] # necessary to get coverage reports without installing with `-e` -addopts = "--cov --cov-config=.coveragerc --cov-report html --cov-report term-missing --cov-fail-under 80" +addopts = "--cov --cov-config=.coveragerc --cov-report html --cov-report term-missing --cov-fail-under 75" testpaths = ["tests/unit", "tests/integration", "tests/release"] [tool.black] @@ -80,6 +80,8 @@ select = [ "E", # pycodestyle "F", # pyflakes, "D", # pydocstyle, + "NPY201", # Numpy 2.0 compatibility ] +ignore = ["E501"] [tool.ruff.lint.pydocstyle] convention = "google" diff --git a/src/autotuning_methodology/baseline.py b/src/autotuning_methodology/baseline.py index ef3e571..f59328b 100644 --- a/src/autotuning_methodology/baseline.py +++ b/src/autotuning_methodology/baseline.py @@ -39,7 +39,14 @@ def get_standardised_curves(self, range: np.ndarray, strategy_curves: list[np.nd standardised_curves.append(None) continue assert strategy_curve.shape == random_curve.shape, "strategy_curve shape must match random_curve shape" - standardised_curve = (strategy_curve - random_curve) / (absolute_optimum - random_curve) + division = absolute_optimum - random_curve + + # check for division by zero + if 0.0 in division: + raise ValueError(f"Division by zero encountered, first at index {np.argmax(division == 0.0)}.") + + # calculate the standardised curve + standardised_curve = (strategy_curve - random_curve) / division standardised_curves.append(standardised_curve) return tuple(standardised_curves) @@ -138,6 +145,10 @@ def _get_random_curve(self, fevals_range: np.ndarray, smoothing=True) -> np.ndar x = fevals_range y = draws + # if there are too few data points left to interpolate on, return draws + if len(x) < 2 or len(y) < 2: + return draws + # apply the monotonicity-preserving Piecewise Cubic Hermite Interpolating Polynomial smooth_fevals_range = np.linspace(fevals_range[0], fevals_range[-1], len(fevals_range)) smooth_draws = PchipInterpolator(x, y)(smooth_fevals_range) @@ -167,13 +178,15 @@ def get_curve(self, range: np.ndarray, x_type: str, dist=None, confidence_level= return super().get_curve(range, x_type, dist, confidence_level) def get_curve_over_fevals( # noqa: D102 - self, fevals_range: np.ndarray, dist=None, confidence_level=None + self, fevals_range: np.ndarray, dist=None, confidence_level=None, return_split=False ) -> np.ndarray: if self.simulate: return self._get_random_curve_means(fevals_range) return self._get_random_curve(fevals_range) - def get_curve_over_time(self, time_range: np.ndarray, dist=None, confidence_level=None) -> np.ndarray: # noqa: D102 + def get_curve_over_time( # noqa: D102 + self, time_range: np.ndarray, dist=None, confidence_level=None, return_split=False + ) -> np.ndarray: fevals_range = self.time_to_fevals(time_range) curve_over_time = self.get_curve_over_fevals(fevals_range, dist, confidence_level) smoothing_factor = 0.0 @@ -208,7 +221,7 @@ def get_split_times_at_feval( # noqa: D102 # for each key, obtain the time at a feval objective_time_keys = searchspace_stats.objective_time_keys - split_time_per_feval = np.full((len(objective_time_keys), index_at_feval.shape[0]), np.NaN) + split_time_per_feval = np.full((len(objective_time_keys), index_at_feval.shape[0]), np.nan) for key_index, key in enumerate(objective_time_keys): split_time_per_feval[key_index] = searchspace_stats.objective_times_array[key_index, index_at_feval] @@ -316,7 +329,7 @@ def get_curve(self, range: np.ndarray, x_type: str, dist=None, confidence_level= return super().get_curve(range, x_type, dist, confidence_level) def get_curve_over_fevals( # noqa: D102 - self, fevals_range: np.ndarray, dist=None, confidence_level=None + self, fevals_range: np.ndarray, dist=None, confidence_level=None, return_split=False ) -> np.ndarray: if self.use_index: return self.searchspace_stats.objective_performances_total_sorted_nan[ @@ -326,7 +339,9 @@ def get_curve_over_fevals( # noqa: D102 assert self.y_array.ndim == 1 return self.y_array[fevals_range] - def get_curve_over_time(self, time_range: np.ndarray, dist=None, confidence_level=None) -> np.ndarray: # noqa: D102 + def get_curve_over_time( # noqa: D102 + self, time_range: np.ndarray, dist=None, confidence_level=None, return_split=False + ) -> np.ndarray: predicted_y_values = self._ir.predict(time_range) if not self.use_index: return predicted_y_values @@ -405,7 +420,7 @@ def get_curve(self, range: np.ndarray, x_type: str, dist=None, confidence_level= return self.stochastic_curve_to_deterministic(range=range, curve=stochastic_curve) def get_curve_over_fevals( # noqa: D102 - self, fevals_range: np.ndarray, dist=None, confidence_level=None + self, fevals_range: np.ndarray, dist=None, confidence_level=None, return_split=False ) -> np.ndarray: if dist is None: dist = self.searchspace_stats.objective_performances_total_sorted @@ -416,7 +431,9 @@ def get_curve_over_fevals( # noqa: D102 ) return self.stochastic_curve_to_deterministic(range=fevals_range, curve=stochastic_curve) - def get_curve_over_time(self, time_range: np.ndarray, dist=None, confidence_level=None) -> np.ndarray: # noqa: D102 + def get_curve_over_time( # noqa: D102 + self, time_range: np.ndarray, dist=None, confidence_level=None, return_split=False + ) -> np.ndarray: if dist is None: dist = self.searchspace_stats.objective_performances_total_sorted if confidence_level is None: diff --git a/src/autotuning_methodology/caching.py b/src/autotuning_methodology/caching.py index 2daa221..43556ec 100755 --- a/src/autotuning_methodology/caching.py +++ b/src/autotuning_methodology/caching.py @@ -30,44 +30,41 @@ class ResultsDescription: def __init__( self, - folder_id: str, - kernel_name: str, + run_folder: Path, + application_name: str, device_name: str, - strategy_name: str, - strategy_display_name: str, + group_name: str, + group_display_name: str, stochastic: bool, objective_time_keys: list[str], objective_performance_keys: list[str], minimization: bool, - visualization_caches_path: Path, ) -> None: """Initialization method for the ResultsDescription object. Args: - folder_id: the unique ID of the folder to store in. - kernel_name: the name of the kernel used. + run_folder: a folder to store all files generated during experiments + application_name: the name of the application. device_name: the name of the device used. - strategy_name: the name of the optimization algorithm used, must not contain spaces or special characters. - strategy_display_name: the name of the optimization algorithm used in printing / visualization. - stochastic: whether the optimization algorithm is stochastic. + group_name: the name of the experimental group, usually search method used, must not contain spaces or special characters. + group_display_name: the name of the experimental group used in printing / visualization. + stochastic: whether the search method is stochastic. objective_time_keys: the objective time keys used. objective_performance_keys: the objective performance keys used. - minimization: whether the optimization algorithm performed minimization (attempted to find the minimum). - visualization_caches_path: path to visualization caches relative to the experiments file, creation allowed. + minimization: whether the search method performed minimization (attempted to find the minimum). """ # all attributes must be hashable for symetric difference checking self._version = "1.3.0" self.__stored = False - self.__folder_id = folder_id - self.kernel_name = kernel_name + self.application_name = application_name self.device_name = device_name - self.strategy_name = strategy_name - self.strategy_display_name = strategy_display_name + self.group_name = group_name + self.group_display_name = group_display_name self.stochastic = stochastic self.objective_time_keys = objective_time_keys self.objective_performance_keys = objective_performance_keys self.minimization = minimization - self.visualization_caches_path = visualization_caches_path + self.run_folder = run_folder self.numpy_arrays_keys = [ "fevals_results", "objective_time_results", @@ -85,7 +82,7 @@ def __get_as_dict(self) -> dict: a dictionary, similar to self.__dict__ but with some keys removed. """ dictionary = vars(self) - not_saved_keys = ["strategy_display_name", "visualization_caches_path"] + not_saved_keys = ["group_display_name", "visualization_caches_path"] for not_saved_key in not_saved_keys: if not_saved_key in dictionary.keys(): del dictionary[not_saved_key] @@ -124,21 +121,25 @@ def is_same_as(self, other: ResultsDescription) -> bool: # check if same value for each key for attribute_key, attribute_value in self.__get_as_dict().items(): - if attribute_key == "strategy_display_name" or attribute_key == "visualization_caches_path": + if ( + attribute_key == "group_display_name" + or attribute_key == "visualization_caches_path" + or attribute_key == "run_folder" + ): continue else: - assert ( - attribute_value == other.__get_as_dict()[attribute_key] - ), f"{attribute_key} has different values: {attribute_value} != {other.__get_as_dict()[attribute_key]}" + assert attribute_value == other.__get_as_dict()[attribute_key], ( + f"{attribute_key} has different values: {attribute_value} != {other.__get_as_dict()[attribute_key]}" + ) return True def __get_cache_filename(self) -> str: - return f"{self.device_name}_{self.strategy_name}.npz" + return f"{self.device_name}_{self.application_name}.npz" def __get_cache_filepath(self) -> Path: """Get the filepath to this experiment.""" - return self.visualization_caches_path / self.__folder_id / self.kernel_name + return self.run_folder def __get_cache_full_filepath(self) -> Path: """Get the filepath for this file, including the filename and extension.""" @@ -147,7 +148,7 @@ def __get_cache_full_filepath(self) -> Path: def __check_for_file(self) -> bool: """Check whether the file exists.""" full_filepath = self.__get_cache_full_filepath() - self.__stored = full_filepath.exists() and np.DataSource().exists(full_filepath) + self.__stored = full_filepath.exists() and np.lib.npyio.DataSource().exists(full_filepath) return self.__stored def __write_to_file(self, arrays: dict): @@ -172,14 +173,22 @@ def __read_from_file(self) -> list[np.ndarray]: raise ValueError(f"File {full_filepath} does not exist") # load the data and verify the resultsdescription object is the same - data = np.load(full_filepath, allow_pickle=True) + try: + data = np.load(full_filepath, allow_pickle=True) + except Exception as e: + print(f"/!\\ Error loading file: {full_filepath} /!\\") + raise e data_results_description = data["resultsdescription"].item() assert self.is_same_as(data_results_description), "The results description of the results is not the same" # get the numpy arrays numpy_arrays = list() for numpy_array_key in self.numpy_arrays_keys: - numpy_arrays.append(data[numpy_array_key]) + try: + numpy_arrays.append(data[numpy_array_key]) + except Exception as e: + print(f"/!\\ Error adding numpy array {numpy_array_key} from file: {full_filepath} /!\\") + raise e return numpy_arrays def get_results(self) -> Results: @@ -190,3 +199,11 @@ def get_results(self) -> Results: def has_results(self) -> bool: """Checks whether there are results or the file exists.""" return self.__stored or self.__check_for_file() + + def delete(self) -> bool: + """Deletes the file if it exists, returns true if succesfully deleted.""" + fp = self.__get_cache_full_filepath() + if fp.exists() and fp.is_file(): + fp.unlink() + return True + return False diff --git a/src/autotuning_methodology/curves.py b/src/autotuning_methodology/curves.py index a839b4f..d05d0ab 100644 --- a/src/autotuning_methodology/curves.py +++ b/src/autotuning_methodology/curves.py @@ -33,35 +33,36 @@ def get_indices_in_distribution( A NumPy array of type float of the same shape as `draws`, with NaN where not found in `dist`. """ assert dist.ndim == 1, f"distribution can not have more than one dimension, has {dist.ndim}" + if draws.dtype != dist.dtype: + warn( + f"Draws dtype {draws.dtype} does not match distribution dtype {dist.dtype}, converting dist to draws dtype", + ) + dist = dist.astype(draws.dtype) # check whether the distribution is correctly ordered if not skip_dist_check: strictly_ascending_sort = dist[:-1] <= dist[1:] - assert np.all( - strictly_ascending_sort - ), f"""Distribution is not sorted ascendingly, + assert np.all(strictly_ascending_sort), f"""Distribution is not sorted ascendingly, {np.count_nonzero(~strictly_ascending_sort)} violations in {len(dist)} values: {dist}""" # check whether each value of draws (excluding NaN) is in dist if not skip_draws_check: - assert np.all( - np.in1d(draws[~np.isnan(draws)], dist) - ), f""" + assert np.all(np.isin(draws[~np.isnan(draws)], dist)), f""" Each value in draws should be in dist, - but {np.size(draws[~np.isnan(draws)][~np.in1d(draws[~np.isnan(draws)], dist)])} values - of the {np.size(draws)} are missing: {draws[~np.isnan(draws)][~np.in1d(draws[~np.isnan(draws)], dist)]}""" + but {np.size(draws[~np.isnan(draws)][~np.isin(draws[~np.isnan(draws)], dist)])} values + of the {np.size(draws)} are missing: {draws[~np.isnan(draws)][~np.isin(draws[~np.isnan(draws)], dist)]}""" # check the sorter if sorter is not None: assert sorter.shape == dist.shape, "The shape of the sorter must be the same as the distribution" # find the index of each draw in the distribution - indices_found = np.searchsorted(dist, draws, side="left", sorter=sorter).astype(float) + indices_found = np.searchsorted(dist, draws, side="left", sorter=sorter).astype(draws.dtype) assert indices_found.shape == draws.shape, "The shape of the indices must match the shape of the draws" # if indices found are outside the array, make them NaN - indices_found[indices_found < 0] = np.NaN - indices_found[indices_found >= len(dist)] = np.NaN + indices_found[indices_found < 0] = np.nan + indices_found[indices_found >= len(dist)] = np.nan return indices_found @@ -87,7 +88,7 @@ def get_indices_in_array(values: np.ndarray, array: np.ndarray) -> np.ndarray: # replace the indices found with the original, unsorted indices of array nan_mask = ~np.isnan(indices_found) - indices_found_unsorted = np.full_like(indices_found, fill_value=np.NaN) + indices_found_unsorted = np.full_like(indices_found, fill_value=np.nan) indices_found_unsorted[nan_mask] = array_sorter[indices_found[nan_mask].astype(int)] return indices_found_unsorted @@ -116,7 +117,14 @@ class CurveBasis(ABC): """Abstract object providing minimals for visualization and analysis. Implemented by ``Curve`` and ``Baseline``.""" @abstractmethod - def get_curve(self, range: np.ndarray, x_type: str, dist: np.ndarray = None, confidence_level: float = None): + def get_curve( + self, + range: np.ndarray, + x_type: str, + dist: np.ndarray = None, + confidence_level: float = None, + return_split: bool = True, + ): """Get the curve over the specified range of time or function evaluations. Args: @@ -124,6 +132,7 @@ def get_curve(self, range: np.ndarray, x_type: str, dist: np.ndarray = None, con x_type: the type of the x-axis range (either time or function evaluations). dist: the distribution, used for looking up indices. Ignored in ``Baseline``. Defaults to None. confidence_level: confidence level for the confidence interval. Ignored in ``Baseline``. Defaults to None. + return_split: whether to return the arrays split at the real / fictional point. Defaults to True. Raises: ValueError: on invalid ``x_type`` argument. @@ -133,19 +142,26 @@ def get_curve(self, range: np.ndarray, x_type: str, dist: np.ndarray = None, con See ``get_curve_over_fevals()`` and ``get_curve_over_time()`` for more precise return values. """ if x_type == "fevals": - return self.get_curve_over_fevals(range, dist, confidence_level) + return self.get_curve_over_fevals(range, dist, confidence_level, return_split=return_split) elif x_type == "time": - return self.get_curve_over_time(range, dist, confidence_level) + return self.get_curve_over_time(range, dist, confidence_level, return_split=return_split) raise ValueError(f"x_type must be 'fevals' or 'time', is {x_type}") @abstractmethod - def get_curve_over_fevals(self, fevals_range: np.ndarray, dist: np.ndarray = None, confidence_level: float = None): + def get_curve_over_fevals( + self, + fevals_range: np.ndarray, + dist: np.ndarray = None, + confidence_level: float = None, + return_split: bool = True, + ): """Get the curve over function evaluations. Args: fevals_range: the range of function evaluations. dist: the distribution, used for looking up indices. Ignored in ``Baseline``. Defaults to None. confidence_level: confidence level for the confidence interval. Ignored in ``Baseline``. Defaults to None. + return_split: whether to return the arrays split at the real / fictional point. Defaults to True. Returns: Two possible returns, for ``Baseline`` and ``Curve`` respectively: @@ -155,13 +171,16 @@ def get_curve_over_fevals(self, fevals_range: np.ndarray, dist: np.ndarray = Non raise NotImplementedError @abstractmethod - def get_curve_over_time(self, time_range: np.ndarray, dist: np.ndarray = None, confidence_level: float = None): + def get_curve_over_time( + self, time_range: np.ndarray, dist: np.ndarray = None, confidence_level: float = None, return_split: bool = True + ): """Get the curve over time. Args: time_range: the range of time. dist: the distribution, used for looking up indices. Ignored in ``Baseline``. Defaults to None. confidence_level: confidence level for the confidence interval. Ignored in ``Baseline``. Defaults to None. + return_split: whether to return the arrays split at the real / fictional point. Defaults to True. Returns: Two possible returns, for ``Baseline`` and ``Curve`` respectively: @@ -230,10 +249,13 @@ def __init__(self, results_description: ResultsDescription) -> None: results_description: the ResultsDescription object containing the data for the Curve. """ # inputs - self.name = results_description.strategy_name - self.display_name = results_description.strategy_display_name + self.name = results_description.group_name + try: + self.display_name = results_description.group_display_name + except AttributeError: + self.display_name = results_description.group_name.replace("_", " ").capitalize() self.device_name = results_description.device_name - self.kernel_name = results_description.kernel_name + self.application_name = results_description.application_name self.stochastic = results_description.stochastic self.minimization = results_description.minimization @@ -265,7 +287,7 @@ def check_attributes(self) -> None: assert isinstance(self.name, str) assert isinstance(self.display_name, str) assert isinstance(self.device_name, str) - assert isinstance(self.kernel_name, str) + assert isinstance(self.application_name, str) assert isinstance(self.stochastic, bool) assert isinstance(self._x_fevals, np.ndarray) assert isinstance(self._x_time, np.ndarray) @@ -504,9 +526,9 @@ def _check_curve_real_fictional_consistency( if x_axis_range_fictional.ndim > 0: # if there's a fictional part, ensure that all the expected data is in the combined real and fictional parts x_axis_range_combined = np.concatenate([x_axis_range_real, x_axis_range_fictional]) - assert ( - x_axis_range.shape == x_axis_range_combined.shape - ), f"The shapes of {x_axis_range.shape=} and {x_axis_range_combined.shape=} do not match" + assert x_axis_range.shape == x_axis_range_combined.shape, ( + f"The shapes of {x_axis_range.shape=} and {x_axis_range_combined.shape=} do not match" + ) assert np.array_equal( x_axis_range, np.concatenate([x_axis_range_real, x_axis_range_fictional]), equal_nan=True ) @@ -519,24 +541,29 @@ def _check_curve_real_fictional_consistency( ) else: # if there is no fictional part, ensure that all the expected data is in the real part - assert ( - x_axis_range.shape == x_axis_range_real.shape - ), f"The shapes of {x_axis_range.shape=} and {x_axis_range_real.shape=} do not match" - assert np.array_equal( - x_axis_range, x_axis_range_real, equal_nan=True - ), f"Unequal arrays: {x_axis_range}, {x_axis_range_real}" + assert x_axis_range.shape == x_axis_range_real.shape, ( + f"The shapes of {x_axis_range.shape=} and {x_axis_range_real.shape=} do not match" + ) + assert np.array_equal(x_axis_range, x_axis_range_real, equal_nan=True), ( + f"Unequal arrays: {x_axis_range}, {x_axis_range_real}" + ) assert np.array_equal(curve, curve_real, equal_nan=True), f"Unequal arrays: {curve}, {curve_real}" - assert np.array_equal( - curve_lower_err, curve_lower_err_real, equal_nan=True - ), f"Unequal arrays: {curve_lower_err}, {curve_lower_err_real}" - assert np.array_equal( - curve_upper_err, curve_upper_err_real, equal_nan=True - ), f"Unequal arrays: {curve_upper_err}, {curve_upper_err_real}" + assert np.array_equal(curve_lower_err, curve_lower_err_real, equal_nan=True), ( + f"Unequal arrays: {curve_lower_err}, {curve_lower_err_real}" + ) + assert np.array_equal(curve_upper_err, curve_upper_err_real, equal_nan=True), ( + f"Unequal arrays: {curve_upper_err}, {curve_upper_err_real}" + ) def get_curve( # noqa: D102 - self, range: np.ndarray, x_type: str, dist: np.ndarray = None, confidence_level: float = None + self, + range: np.ndarray, + x_type: str, + dist: np.ndarray = None, + confidence_level: float = None, + return_split: bool = True, ): - return super().get_curve(range, x_type, dist, confidence_level) + return super().get_curve(range, x_type, dist, confidence_level, return_split=return_split) def _get_matching_feval_indices_in_range(self, fevals_range: np.ndarray) -> np.ndarray: """Get a mask of where the fevals range matches with the data.""" @@ -551,6 +578,8 @@ def _get_matching_feval_indices_in_range(self, fevals_range: np.ndarray) -> np.n def _get_curve_over_fevals_values_in_range(self, fevals_range: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """Get the valid fevals and values that are in the given range.""" + if len(fevals_range) == 0: + raise ValueError("The fevals range must have at least one value") target_index: int = fevals_range[-1] - 1 # filter to only get data in the fevals range @@ -592,9 +621,9 @@ def _get_curve_over_fevals_values_in_range(self, fevals_range: np.ndarray) -> tu masked_fevals[:, greatest_common_non_NaN_index + 1 :] = np.nan # check that the filtered fevals are consistent - assert np.allclose( - masked_fevals, masked_fevals[0], equal_nan=True - ), "Every repeat must have the same array of function evaluations" + assert np.allclose(masked_fevals, masked_fevals[0], equal_nan=True), ( + "Every repeat must have the same array of function evaluations" + ) # as every repeat has the same array of fevals, check whether they match the range fevals = masked_fevals[ @@ -610,7 +639,11 @@ def _get_curve_over_fevals_values_in_range(self, fevals_range: np.ndarray) -> tu return fevals, masked_values def get_curve_over_fevals( # noqa: D102 - self, fevals_range: np.ndarray, dist: np.ndarray = None, confidence_level: float = None + self, + fevals_range: np.ndarray, + dist: np.ndarray = None, + confidence_level: float = None, + return_split: bool = True, ): fevals, masked_values = self._get_curve_over_fevals_values_in_range(fevals_range) @@ -631,6 +664,8 @@ def get_curve_over_fevals( # noqa: D102 # get the confidence interval indices_lower_err, indices_upper_err = self.get_confidence_interval(indices, confidence_level) indices_lower_err, indices_upper_err = indices_lower_err.astype(int), indices_upper_err.astype(int) + indices_lower_err = np.clip(indices_lower_err, a_min=0, a_max=dist.shape[0] - 1) + indices_upper_err = np.clip(indices_upper_err, a_min=0, a_max=dist.shape[0] - 1) # obtain the curves by looking up the associated values curve = dist[indices_mean] curve_lower_err = dist[indices_lower_err] @@ -682,10 +717,12 @@ def get_curve_over_fevals( # noqa: D102 assert np.all(~np.isnan(curve_lower_err)), f"NaNs at {np.nonzero(np.isnan(curve_lower_err))[0]}" assert np.all(~np.isnan(curve_upper_err)), f"NaNs at {np.nonzero(np.isnan(curve_upper_err))[0]}" - # return the curves split in real and fictional - return self._get_curve_split_real_fictional_parts( - real_stopping_point_index + 1, fevals_range, curve, curve_lower_err, curve_upper_err - ) + if return_split: + # return the curves split in real and fictional + return self._get_curve_split_real_fictional_parts( + real_stopping_point_index + 1, fevals_range, curve, curve_lower_err, curve_upper_err + ) + return real_stopping_point_index + 1, fevals_range, curve, curve_lower_err, curve_upper_err def _get_curve_over_time_values_in_range( self, time_range: np.ndarray, return_1d=True @@ -703,21 +740,39 @@ def _get_curve_over_time_values_in_range( times: np.ndarray = _times[nan_mask].reshape(-1, num_repeats) values: np.ndarray = _values[nan_mask].reshape(-1, num_repeats) - # get the highest time of each run of the algorithm, take the median + # find the stopping point times_no_nan = times times_no_nan[np.isnan(values)] = np.nan # to count only valid configurations towards highest time highest_time_per_repeat = np.nanmax(times_no_nan, axis=0) assert highest_time_per_repeat.shape[0] == num_repeats - real_stopping_point_time: float = np.nanmedian(highest_time_per_repeat) + highest_time_per_repeat = np.sort(highest_time_per_repeat) + # get the highest time of each run of the algorithm, take the average, stopping point is next highest time + try: + real_stopping_point_time: float = highest_time_per_repeat[ + np.nanmedian(highest_time_per_repeat) < highest_time_per_repeat + ][0] + except IndexError: + # there is no next highest time, so we return the last time in the range + real_stopping_point_time = time_range[-1] # filter to get the time range with a margin on both ends for the isotonic regression - time_range_margin = 0.1 - range_mask_margin = (time_range[0] * (1 - time_range_margin) <= times) & ( - times <= time_range[-1] * (1 + time_range_margin) - ) - assert np.all( - np.count_nonzero(range_mask_margin, axis=0) > 1 - ), "Not enough overlap in time range and time values" + time_range_margin_modifier = 0.25 * ( + num_repeats / times.size + ) # give more margin when there are few values relative to the number of repeats + time_range_margin = 0.1 + time_range_margin_modifier + time_range_start = time_range[0] * (1 - time_range_margin) + time_range_end = time_range[-1] * (1 + time_range_margin) + range_mask_margin = (time_range_start <= times) & (times <= time_range_end) + + # make sure there is enough overlap in the time ranges + if not np.all(np.count_nonzero(range_mask_margin, axis=0) > 1): + raise ValueError( + f"Not enough overlap in time range and time values: should be {time_range_start=} <= {times} <= {time_range_end=} ({time_range_margin=}, {num_repeats=}, {times.size=})", + np.count_nonzero(range_mask_margin, axis=0), + self.name, + self.application_name, + self.device_name, + ) times = np.where(range_mask_margin, times, np.nan) values = np.where(range_mask_margin, values, np.nan) num_repeats = values.shape[1] @@ -749,7 +804,12 @@ def _get_curve_over_time_values_in_range( return times, values, real_stopping_point_time, num_fevals, num_repeats def get_curve_over_time( # noqa: D102 - self, time_range: np.ndarray, dist: np.ndarray = None, confidence_level: float = None, use_bagging=True + self, + time_range: np.ndarray, + dist: np.ndarray = None, + confidence_level: float = None, + return_split: bool = True, + use_bagging=True, ): # check the distribution if dist is None: @@ -809,9 +869,9 @@ def get_curve_over_time( # noqa: D102 indices_curve = prediction_interval[:, 2] curve = dist[indices_curve] curve_lower_err, curve_upper_err = dist[prediction_interval[:, 0]], dist[prediction_interval[:, 1]] - assert ( - curve_lower_err.shape == curve_upper_err.shape == curve.shape - ), f"{curve_lower_err.shape=} != {curve_upper_err.shape=} != {curve.shape=}" + assert curve_lower_err.shape == curve_upper_err.shape == curve.shape, ( + f"{curve_lower_err.shape=} != {curve_upper_err.shape=} != {curve.shape=}" + ) # print(f"{self.display_name}: {np.median(curve - curve_lower_err)}, {np.median(curve_upper_err - curve)}") # for t, e, i in zip(time_range, curve_lower_err, prediction_interval[:, 0]): @@ -833,9 +893,11 @@ def get_curve_over_time( # noqa: D102 curve_lower_err[real_stopping_point_index:] = curve_lower_err[real_stopping_point_index] curve_upper_err[real_stopping_point_index:] = curve_upper_err[real_stopping_point_index] - return self._get_curve_split_real_fictional_parts( - real_stopping_point_index, time_range, curve, curve_lower_err, curve_upper_err - ) + if return_split: + return self._get_curve_split_real_fictional_parts( + real_stopping_point_index, time_range, curve, curve_lower_err, curve_upper_err + ) + return real_stopping_point_index, time_range, curve, curve_lower_err, curve_upper_err def get_split_times( # noqa: D102 self, range: np.ndarray, x_type: str, searchspace_stats: SearchspaceStatistics @@ -850,7 +912,7 @@ def get_split_times_at_feval( # noqa: D102 objective_time_keys = searchspace_stats.objective_time_keys num_keys = len(objective_time_keys) num_repeats = matching_indices_mask.shape[1] - masked_time_per_key = np.full((num_keys, matching_indices_mask.shape[0], num_repeats), np.NaN) + masked_time_per_key = np.full((num_keys, matching_indices_mask.shape[0], num_repeats), np.nan) # for each key, apply the boolean mask for key_index in range(num_keys): @@ -859,13 +921,13 @@ def get_split_times_at_feval( # noqa: D102 ] # remove where every repeat has NaN - time_in_range_per_key = np.full((num_keys, fevals_range.shape[0], num_repeats), np.NaN) + time_in_range_per_key = np.full((num_keys, fevals_range.shape[0], num_repeats), np.nan) for key_index in range(num_keys): all_nan_mask = ~np.all(np.isnan(masked_time_per_key[key_index]), axis=1) time_in_range_per_key[key_index] = masked_time_per_key[key_index][all_nan_mask] # get the median time per key at each repeat - split_time_per_feval = np.full((num_keys, fevals_range.shape[0]), np.NaN) + split_time_per_feval = np.full((num_keys, fevals_range.shape[0]), np.nan) for key_index in range(num_keys): split_time_per_feval[key_index] = np.mean(time_in_range_per_key[key_index], axis=1) assert split_time_per_feval.shape == ( @@ -884,7 +946,7 @@ def get_split_times_at_time( # noqa: D102 # for each key, interpolate the split times to the time range num_keys = len(searchspace_stats.objective_time_keys) - split_time_per_timestamp = np.full((num_keys, time_range.shape[0]), np.NaN) + split_time_per_timestamp = np.full((num_keys, time_range.shape[0]), np.nan) for key_index in range(num_keys): # remove NaN times_split_key = times_split[key_index] @@ -951,7 +1013,7 @@ def _get_prediction_interval_separated( num_repeats = values.shape[1] # predict an isotonic curve for the time range for each run - predictions = np.full((num_repeats, time_range.shape[0]), fill_value=np.NaN) + predictions = np.full((num_repeats, time_range.shape[0]), fill_value=np.nan) for run in range(num_repeats): # get the data of this run _x = times[:, run] @@ -980,9 +1042,9 @@ def _get_prediction_interval_separated( predictions = predictions.transpose() # set to (time_range, num_repeats) y_lower_err, y_upper_err = self.get_confidence_interval(predictions, confidence_level=confidence_level) mean_prediction = np.median(predictions, axis=1) - assert ( - y_lower_err.shape == y_upper_err.shape == mean_prediction.shape == time_range.shape - ), f"{y_lower_err.shape=} != {y_upper_err.shape=} != {mean_prediction.shape=} != {time_range.shape=}" + assert y_lower_err.shape == y_upper_err.shape == mean_prediction.shape == time_range.shape, ( + f"{y_lower_err.shape=} != {y_upper_err.shape=} != {mean_prediction.shape=} != {time_range.shape=}" + ) # combine the data and return as a prediction interval prediction_interval = np.concatenate([y_lower_err, y_upper_err, mean_prediction]).reshape((3, -1)).transpose() diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 382347b..0c64184 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -1,21 +1,25 @@ """Main experiments code.""" -from __future__ import annotations # for correct nested type hints e.g. list[str], tuple[dict, str] +from __future__ import ( + annotations, # for correct nested type hints e.g. list[str], tuple[dict, str] +) import json -import sys from argparse import ArgumentParser -from importlib import import_module -from importlib.resources import files from math import ceil -from os import getcwd +from os import getcwd, makedirs from pathlib import Path +from random import randint -from jsonschema import validate +from jsonschema import ValidationError from autotuning_methodology.caching import ResultsDescription from autotuning_methodology.runner import collect_results from autotuning_methodology.searchspace_statistics import SearchspaceStatistics +from autotuning_methodology.validators import validate_experimentsfile +from autotuning_methodology.formats_interface import load_T4_format + +PACKAGE_ROOT = Path(__file__).parent.parent.parent def get_args_from_cli(args=None) -> str: @@ -30,26 +34,31 @@ def get_args_from_cli(args=None) -> str: Returns: The filepath to the experiments file. """ - CLI = ArgumentParser() - CLI.add_argument("experiment", type=str, help="The experiment.json to execute, see experiments/template.json") - args = CLI.parse_args(args) + cli = ArgumentParser() + cli.add_argument( + "experiment", type=str, help="The experiment setup json file to execute, see experiments/template.json" + ) + args = cli.parse_args(args) filepath: str = args.experiment if filepath is None or filepath == "": - raise ValueError( - "Invalid '-experiment' option. Run 'visualize_experiments.py -h' to read more about the options." - ) + raise ValueError("Invalid '--experiment' option. Run 'visualize_experiments.py -h' to read more.") return filepath -def get_experiment_schema_filepath(): - """Obtains and checks the filepath to the JSON schema. - - Returns: - the filepath to the schema in Traversable format. - """ - schemafile = files("autotuning_methodology").joinpath("schema.json") - assert schemafile.is_file(), f"Path to schema.json does not exist, attempted path: {schemafile}" - return schemafile +def make_and_check_path(filename: str, parent=None, extension=None) -> Path: + """Checks if the file exists, if not, tries to add the extension.""" + filename_path = Path(filename) + if filename_path.is_absolute() is False and parent is not None: + filename_path = PACKAGE_ROOT / Path(parent).joinpath(filename).resolve() + if filename_path.exists(): + return filename_path + # try and add extension + if extension is None: + raise FileNotFoundError(f"{filename_path.resolve()} does not exist.") + filename_path = Path(str(filename_path) + extension) + if filename_path.exists(): + return filename_path + raise FileNotFoundError(f"{filename_path.resolve()} does not exist.") def get_experiment(filename: str) -> dict: @@ -73,153 +82,506 @@ def get_experiment(filename: str) -> dict: # path = Path(filename) assert path.exists(), f"Path to experiment file does not exist, attempted path: {path}, CWD: {getcwd()}" - # get the path to the schema - schemafile = get_experiment_schema_filepath() - # open the experiment file and validate using the schema file - with open(path, "r", encoding="utf-8") as file, open(schemafile, "r", encoding="utf-8") as schemafile: - schema = json.load(schemafile) + with path.open("r", encoding="utf-8") as file: experiment: dict = json.load(file) - validate(instance=experiment, schema=schema) - return experiment + try: + validate_experimentsfile(experiment) + return experiment + except ValidationError as e: + print(e) + raise ValidationError("Experiment file does not comply with schema") -def get_strategies(experiment: dict) -> dict: - """Gets the strategies from an experiments file by augmenting it with the defaults. +def get_experimental_groups(experiment: dict) -> list[dict]: + """Prepares all the experimental groups as all combinations of application and gpus (from experimental_groups_defaults) and big experimental groups from setup file (experimental_groups, usually search methods). Check additional settings for each experimental group. Prepares the directory structure for the whole experiment. Args: experiment: the experiment dictionary object. Returns: - The strategies in the experiment dictionary object, augmented where necessery. + The experimental groups in the experiment dictionary object. + """ + experimental_groups_defaults = experiment["experimental_groups_defaults"] + search_strategies = experiment["search_strategies"] + + # set up the directory structure + experiment["parent_folder_absolute_path"] = Path(experiment["parent_folder"]).resolve() + # if folders "run" and "setup" do not exist, create + makedirs(experiment["parent_folder_absolute_path"].joinpath("run"), exist_ok=True) + makedirs(experiment["parent_folder_absolute_path"].joinpath("setup"), exist_ok=True) + + # create folders for each experimental group from file + for strategy in search_strategies: + makedirs(experiment["parent_folder_absolute_path"].joinpath("run").joinpath(strategy["name"]), exist_ok=True) + + # generate all experimental groups + # with applications and gpus provided in experimental_groups_defaults + # and search strategies provided in search_strategies + all_experimental_groups = generate_all_experimental_groups( + search_strategies, experimental_groups_defaults, experiment["parent_folder_absolute_path"] + ) + + # additional check beyond validation + # if every experimental group has autotuner set + # set autotuner_path to default installation if not set by the user + for group in all_experimental_groups: + if group.get("autotuner") is None: + raise KeyError( + "Property 'autotuner' must be set for all groups, either in experimental_groups_defaults or in experimental_groups. It is not set for", + group["full_name"], + ) + if group["autotuner"] == "KTT": + if group["samples"] != 1: + raise NotImplementedError( + "KTT currently supports only one sample per run and output. Please set samples=1 for group['full_name']." + ) + if group.get("autotuner_path") is None: + raise NotImplementedError( + "Default autotuner_path is not supported yet for KTT, please set autotuner_path for ", + group["full_name"] + + " to directory with KttTuningLauncher and pyktt.so, e.g. /home/user/KTT/Build/x86_64_Release.", + ) + elif Path(group["autotuner_path"]).exists() is False: + raise FileNotFoundError( + f"Directory {group['autotuner_path']} does not exists. Try setting the absolute path." + ) + elif Path(group["autotuner_path"]).joinpath("KttTuningLauncher").exists() is False: + raise FileNotFoundError( + f"Directory {group['autotuner_path']} does not contain KttTuningLauncher. Have you used --tuning-loader when premaking KTT?" + ) + elif Path(group["autotuner_path"]).joinpath("pyktt.so").exists() is False: + raise FileNotFoundError( + f"Directory {group['autotuner_path']} does not contain pyktt.so. Have you used --python when premaking KTT?" + ) + # TODO make and set default autotuner path + + return all_experimental_groups + + +def generate_all_experimental_groups( + search_strategies: list[dict], experimental_groups_defaults: dict, parent_folder_path: Path +) -> list[dict]: + """Generates all experimental groups for the experiment as a combination of given applications, gpus and search strategies from experiments setup file. + + Args: + search_strategies: list of dictionaries with settings for various search strategies from experiments setup file, section search_strategies. + experimental_groups_defaults: a dictionary with default settings for experimental groups from experiments setup file, section experimental_groups_defaults. + parent_folder_path: path to experiment parent folder that stores all files generated in the experiment. + + Returns: + A list of dictionaries, one for each experimental group. + """ + experimental_groups = [] + + for gpu in experimental_groups_defaults["gpus"]: + for application in experimental_groups_defaults["applications"]: + for strategy in search_strategies: + group = strategy.copy() + + for default in experimental_groups_defaults: + if default not in group and default not in [ + "applications", + "gpus", + "pattern_for_full_search_space_filenames", + ]: + group[default] = experimental_groups_defaults[default] + + group["full_name"] = "_".join([gpu, application["name"], group["name"]]) + + group["gpu"] = gpu + group["application_name"] = application["name"] + + group["application_folder"] = Path(application["folder"]) + group["application_input_file"] = make_and_check_path( + application["input_file"], application["folder"], None + ) + group["input_file"] = parent_folder_path.joinpath("setup").joinpath( + "_".join([group["full_name"], "input.json"]) + ) + group["parent_folder_path"] = parent_folder_path + + if experimental_groups_defaults.get("pattern_for_full_search_space_filenames") is None: + group["full_search_space_file"] = get_full_search_space_filename_from_input_file( + group["application_input_file"] + ) + else: + group["full_search_space_file"] = get_full_search_space_filename_from_pattern( + experimental_groups_defaults["pattern_for_full_search_space_filenames"], + gpu, + application["name"], + ) + + # get the objective performance keys + if "objective_performance_keys" in application: + group["objective_performance_keys"] = application["objective_performance_keys"] + else: + # load the full search space file and derive the objective performance keys + print( + f"Loading full search space file {group['full_search_space_file']} to infer the objective performance keys. Consider setting them explicititely in the experiments file." + ) + data = load_T4_format(group["full_search_space_file"], validate=True) + objectives = data["results"][0]["objectives"] + assert len(objectives) == 1, "Only one objective is supported for now" + group["objective_performance_keys"] = objectives + objective = group["objective_performance_keys"][0] + + # derive the optimization direction + if "minimization" in application: + group["minimization"] = application["minimization"] + elif "time" in objective.lower(): + group["minimization"] = True + elif any(k in objective.lower() for k in ["score", "gflop/s", "gflops", "gb/s"]): + group["minimization"] = False + else: + raise NotImplementedError( + f"Optimization direction can not be automatically inferred from '{objective=}' ({gpu=}, {application=}, {strategy=}). Please set 'minimization' for this application in the experiments file." + ) + + if group["autotuner"] == "KTT": + raise NotImplementedError( + "KTT is working on supporting the shared interface. The old conversions have been deprecated. An older build can be used to use these functions." + ) + + group["output_file"]: Path = ( + parent_folder_path.joinpath("run") + .joinpath(group["name"]) + .joinpath(group["full_name"] + ".json") + .resolve() + ) + + generate_input_file(group) + experimental_groups.append(group) + + return experimental_groups + + +def get_full_search_space_filename_from_input_file(input_filename: Path) -> Path: + """Returns a path to full search space file that is provided in the input json file in KernelSpecification.SimulationInput. + + Args: + input_filename: path to input json file. + + Raises: + KeyError: if the path is not provided, but is expected. + + Returns: + A path to full search space file that was written in the input json file. + """ + with open(input_filename, "r", encoding="utf-8") as input_file: + input_json = json.load(input_file) + if input_json["KernelSpecification"].get("SimulationInput") is None: + raise KeyError( + "SimulationInput, i.e. full search space file is expected and not defined in", + input_filename, + ". Please set the path to that file in KernelSpecification.SimulationInput in input json file or set pattern_for_full_search_space_filename in experiments setup json file.", + ) + full_search_space_filename = make_and_check_path( + input_json["KernelSpecification"]["SimulationInput"], str(input_filename.parent), ".json" + ) + # need to return filename WITHOUT .json, KTT (and probably also others) needs that in SimulationInput in input json as other autotuner can take other formats + return full_search_space_filename.parent.joinpath(full_search_space_filename.stem) + + +def get_full_search_space_filename_from_pattern(pattern: dict, gpu: str, application_name: str) -> Path: + """Returns a path to full search space file that is generated from the pattern provided in experiments setup file. + + Args: + pattern: pattern regex string + gpu: name of the gpu, needs to be plugged into the pattern + application_name: name of the application, needs to be plugged into the pattern + + Raises: + NotImplementedError: if the regex expects other variables than just application name and gpu. + + Returns: + A path to full search file generated from the pattern. """ - strategy_defaults = experiment["strategy_defaults"] - strategies = experiment["strategies"] - # # get a baseline index if it exists - # baseline_index = list( - # strategy_index for strategy_index, strategy in enumerate(strategies) if "is_baseline" in strategy - # ) - # if len(baseline_index) != 1: - # raise ValueError(f"There must be exactly one baseline, found {len(baseline_index)} baselines") - # if strategies[baseline_index[0]]["is_baseline"] is not True: - # raise ValueError(f"is_baseline must be true, yet is set to {strategies[0]['is_baseline']}!") - # # if the baseline index is not 0, put the baseline strategy first - # if baseline_index[0] != 0: - # raise ValueError("The baseline strategy must be the first strategy in the experiments file!") - # # strategies.insert(0, strategies.pop(baseline_index[0])) - - # augment the strategies with the defaults - for strategy in strategies: - for default in strategy_defaults: - if default not in strategy: - strategy[default] = strategy_defaults[default] - return strategies - - -def execute_experiment(filepath: str, profiling: bool = False) -> tuple[dict, dict, dict]: + filename = pattern["regex"].replace("${applications}", application_name).replace("${gpus}", gpu) + if "${" in filename: + raise NotImplementedError( + f"Variables other than applications and gpus are not yet supported for pattern matching. Unresolved: {filename}." + ) + full_search_space_filename = make_and_check_path(filename) + return full_search_space_filename + + +def calculate_budget(group: dict, statistics_settings: dict, searchspace_stats: SearchspaceStatistics) -> dict: + """Calculates the budget for the experimental group, given cutoff point provided in experiments setup file. + + Args: + group: a dictionary with settings for experimental group + statistics_settings: a dictionary with settings related to statistics + searchspace_stats: a SearchspaceStatistics instance with cutoff points determined from related full search space files + + Returns: + A modified group dictionary. + """ + # get cutoff points + _, cutoff_point_fevals, cutoff_point_start_time, cutoff_point_time = ( + searchspace_stats.cutoff_point_fevals_time_start_end( + statistics_settings["cutoff_percentile_start"], statistics_settings["cutoff_percentile"] + ) + ) + + # +10% margin, to make sure cutoff_point is reached by compensating for potential non-valid evaluations # noqa: E501 + cutoff_margin = group.get("cutoff_margin", 0.1) + + # register in the group + group["budget"] = {} + group["cutoff_times"] = { + "cutoff_time_start": max(cutoff_point_start_time, 0.0) + if statistics_settings["cutoff_percentile_start"] > 0.0 + else 0.0, + "cutoff_time": cutoff_point_time * (1 + cutoff_margin), + } + + # set when to stop + if statistics_settings["cutoff_type"] == "time": + group["budget"]["time_limit"] = group["cutoff_times"]["cutoff_time"] + else: + budget = min(int(ceil(cutoff_point_fevals * (1 + cutoff_margin))), searchspace_stats.size) + group["budget"]["max_fevals"] = budget + + # write to group's input file as Budget + with open(group["input_file"], "r", encoding="utf-8") as fp: + input_json = json.load(fp) + if input_json.get("Budget") is None: + input_json["Budget"] = [] + input_json["Budget"].append({}) + if group["budget"].get("time_limit") is not None: + input_json["Budget"][0]["Type"] = "TuningDuration" + input_json["Budget"][0]["BudgetValue"] = group["budget"]["time_limit"] + else: # it's max_fevals + input_json["Budget"][0]["Type"] = "ConfigurationCount" + input_json["Budget"][0]["BudgetValue"] = group["budget"]["max_fevals"] + + # write the results and return the adjusted group + with open(group["input_file"], "w", encoding="utf-8") as fp: + json.dump(input_json, fp, indent=4) + return group + + +def generate_input_file(group: dict): + """Creates a input json file specific for a given application, gpu and search method. + + Args: + group: dictionary with settings for a given experimental group. + """ + with open(group["application_input_file"], "r", encoding="utf-8") as fp: + input_json = json.load(fp) + input_json["KernelSpecification"]["SimulationInput"] = str(group["full_search_space_file"]) + + # TODO dirty fix below for Kernel Tuner compatibility, instead implement reading T4 as cache in Kernel Tuner + input_json["KernelSpecification"]["SimulationInput"] = str( + input_json["KernelSpecification"]["SimulationInput"] + ).replace("_T4", "") + + input_json["General"]["OutputFile"] = str(group["output_file"].parent.joinpath(group["output_file"].stem)) + if input_json["General"]["OutputFormat"] != "JSON": + raise RuntimeError( + f"Only JSON output format is supported. Please set General.OutputFormat to JSON in {group['application_input_file']}." + ) + if "TimeUnit" not in input_json["General"]: + input_json["General"]["TimeUnit"] = "Milliseconds" + if input_json["KernelSpecification"].get("Device") is None: + input_json["KernelSpecification"]["Device"] = {} + input_json["KernelSpecification"]["Device"]["Name"] = group["gpu"] + else: + input_json["KernelSpecification"]["Device"]["Name"] = group["gpu"] + input_json["KernelSpecification"]["KernelFile"] = str( + Path( + Path(group["application_input_file"]).parent / Path(input_json["KernelSpecification"]["KernelFile"]) + ).resolve() + ) + + input_json["Search"] = {} + input_json["Search"]["Name"] = group["search_method"] + if group.get("search_method_hyperparameters") is not None: + input_json["Search"]["Attributes"] = [] + for param in group["search_method_hyperparameters"]: + attribute = {} + attribute["Name"] = param["name"] + attribute["Value"] = param["value"] + input_json["Search"]["Attributes"].append(attribute) + # note that this is written to a different file, specific for gpu, application and search method + with open(group["input_file"], "w", encoding="utf-8") as fp: + json.dump(input_json, fp, indent=4) + + +def get_random_unique_filename(prefix="", suffix=""): + """Get a random, unique filename that does not yet exist.""" + + def randpath(): + return Path(f"{prefix}{randint(1000, 9999)}{suffix}") + + path = randpath() + while path.exists(): + path = randpath() + return path + + +def generate_experiment_file( + name: str, + parent_folder: Path, + search_strategies: list[dict], + applications: list[dict] = None, + gpus: list[str] = None, + override: dict = None, + generate_unique_file=False, + overwrite_existing_file=False, +): + """Creates an experiment file based on the given inputs and opinionated defaults.""" + assert isinstance(name, str) and len(name) > 0, f"Name for experiment file must be valid, is '{name}'" + experiment_file_path = Path(f"./{name.replace(' ', '_')}.json") + if generate_unique_file is True: + experiment_file_path = get_random_unique_filename(f"{name.replace(' ', '_')}_", ".json") + if experiment_file_path.exists(): + if overwrite_existing_file is False: + raise FileExistsError(f"Experiments file '{experiment_file_path}' already exists") + defaults_path = Path(__file__).parent / "experiments_defaults.json" + with defaults_path.open() as fp: + experiment: dict = json.load(fp) + + # write the arguments to the experiment file + experiment["name"] = name + experiment["parent_folder"] = str(parent_folder.resolve()) + experiment["search_strategies"] = search_strategies + if applications is not None: + experiment["experimental_groups_defaults"]["applications"] = applications + if gpus is not None: + experiment["experimental_groups_defaults"]["gpus"] = gpus + if override is not None: + for key, value in override.items(): + experiment[key].update(value) + + # validate and write to experiments file + validate_experimentsfile(experiment) + with experiment_file_path.open("w", encoding="utf-8") as fp: + json.dump(experiment, fp) + + # return the location of the experiments file + return experiment_file_path.resolve() + + +def execute_experiment(filepath: str, profiling: bool = False, full_validate_on_load: bool = True): """Executes the experiment by retrieving it from the cache or running it. Args: filepath: path to the experiments .json file. profiling: whether profiling is enabled. Defaults to False. + full_validate_on_load: whether to fully validate the searchspace statistics file on load. Defaults to True. Raises: FileNotFoundError: if the path to the kernel specified in the experiments file is not found. Returns: - A tuple of the experiment dictionary, the strategies executed, and the resulting list of ``ResultsDescription``. + A tuple of the experiment dictionary, the experimental groups executed, the dictionary of ``Searchspace statistics`` and the resulting list of ``ResultsDescription``. """ experiment = get_experiment(filepath) - experiment_folderpath = Path(filepath).parent + experiment_folderpath = Path(experiment["parent_folder"]) print(f"Starting experiment '{experiment['name']}'") - experiment_folder_id: str = experiment["folder_id"] - minimization: bool = experiment.get("minimization", True) - cutoff_percentile: float = experiment.get("cutoff_percentile", 1) - cutoff_type: str = experiment.get("cutoff_type", "fevals") - assert cutoff_type == "fevals" or cutoff_type == "time", f"cutoff_type must be 'fevals' or 'time', is {cutoff_type}" - curve_segment_factor: float = experiment.get("curve_segment_factor", 0.05) - assert isinstance(curve_segment_factor, float), f"curve_segment_factor is not float, {type(curve_segment_factor)}" - strategies: list[dict] = get_strategies(experiment) - - # add the kernel directory to the path to import the module, relative to the experiment file - kernels_path = experiment_folderpath / Path(experiment["kernels_path"]) - if not kernels_path.exists(): - raise FileNotFoundError(f"No such path {kernels_path.resolve()}, CWD: {getcwd()}") - sys.path.append(str(kernels_path)) - kernel_names = experiment["kernels"] - kernels = list(import_module(kernel_name) for kernel_name in kernel_names) - - # variables for comparison - objective_time_keys: list[str] = experiment["objective_time_keys"] - objective_performance_keys: list[str] = experiment["objective_performance_keys"] - - # execute each strategy in the experiment per GPU and kernel - results_descriptions: dict[str, dict[str, dict[str, ResultsDescription]]] = dict() - gpu_name: str - for gpu_name in experiment["GPUs"]: - print(f" | running on GPU '{gpu_name}'") - results_descriptions[gpu_name] = dict() - for index, kernel in enumerate(kernels): - kernel_name = kernel_names[index] - searchspace_stats = SearchspaceStatistics( - kernel_name=kernel_name, + + all_experimental_groups = get_experimental_groups(experiment) + + # prepare objective_time_keys, in case it was defined as all, explicitly list all keys + objective_time_keys: list[str] = experiment["statistics_settings"]["objective_time_keys"] + if "all" in objective_time_keys: + objective_time_keys = [] + # open the experiment file and validate using the schema file + schema = validate_experimentsfile(experiment) + objective_time_keys = schema["properties"]["statistics_settings"]["properties"]["objective_time_keys"]["items"][ + "enum" + ] + objective_time_keys.remove("all") + experiment["statistics_settings"]["objective_time_keys"] = objective_time_keys + + experiment["experimental_groups_defaults"]["applications_names"] = [] + for application in experiment["experimental_groups_defaults"]["applications"]: + experiment["experimental_groups_defaults"]["applications_names"].append(application["name"]) + + # initialize the matrix of results_descriptions based on provided gpus and applications + # initialize searchspace statistics, one for each full search file + results_descriptions: dict[str, dict[str, dict[str, ResultsDescription]]] = {} + searchspace_statistics: dict[str, dict[str, SearchspaceStatistics]] = {} + + for gpu in experiment["experimental_groups_defaults"]["gpus"]: + results_descriptions[gpu] = {} + searchspace_statistics[gpu] = {} + for application in experiment["experimental_groups_defaults"]["applications_names"]: + results_descriptions[gpu][application] = {} + + # just iterate over experimental_groups, collect results and write to proper place + for group in all_experimental_groups: + # get the experimental group settings + application_name = group["application_name"] + gpu_name = group["gpu"] + minimization = group["minimization"] + objective_performance_keys = group["objective_performance_keys"] + + # show the progress in the console + print(f" | - running on GPU '{gpu_name}'") + print(f" | - | tuning application '{application_name}'") + print(f" | - | - | with settings of experimental group '{group['display_name']}'") + + # overwrite the experiment statistics settings with the group settings + experiment["statistics_settings"]["minimization"] = minimization + experiment["statistics_settings"]["objective_performance_keys"] = objective_performance_keys + + # create SearchspaceStatistics for full search space file associated with this group, if it does not exist + if any( + searchspace_statistics.get(group["gpu"], {}).get(group["application_name"], {}) == null_val + for null_val in [None, {}] + ): + full_search_space_file_path = None + if group.get("converted_full_search_space_file") is None: + full_search_space_file_path = group["full_search_space_file"] + else: + full_search_space_file_path = group["converted_full_search_space_file"] + + searchspace_statistics[gpu_name][application_name] = SearchspaceStatistics( + application_name=application_name, device_name=gpu_name, minimization=minimization, objective_time_keys=objective_time_keys, objective_performance_keys=objective_performance_keys, - bruteforced_caches_path=experiment_folderpath / experiment["bruteforced_caches_path"], + full_search_space_file_path=full_search_space_file_path, + full_validate=full_validate_on_load, ) - # set cutoff point - _, cutoff_point_fevals, cutoff_point_time = searchspace_stats.cutoff_point_fevals_time(cutoff_percentile) - - print(f" | - optimizing kernel '{kernel_name}'") - results_descriptions[gpu_name][kernel_name] = dict() - for strategy in strategies: - strategy_name: str = strategy["name"] - strategy_display_name: str = strategy["display_name"] - stochastic = strategy["stochastic"] - cutoff_margin = strategy.get( - "cutoff_margin", 1.1 - ) # +10% margin, to make sure cutoff_point is reached by compensating for potential non-valid evaluations # noqa: E501 - print(f" | - | using strategy '{strategy['display_name']}'") - - # setup the results description - if "options" not in strategy: - strategy["options"] = dict() - - # set when to stop - if cutoff_type == "time": - strategy["options"]["time_limit"] = cutoff_point_time * cutoff_margin - else: - strategy["options"]["max_fevals"] = min( - int(ceil(cutoff_point_fevals * cutoff_margin)), searchspace_stats.size - ) - results_description = ResultsDescription( - experiment_folder_id, - kernel_name, - gpu_name, - strategy_name, - strategy_display_name, - stochastic, - objective_time_keys=objective_time_keys, - objective_performance_keys=objective_performance_keys, - minimization=minimization, - visualization_caches_path=experiment_folderpath / experiment["visualization_caches_path"], - ) + # calculation of budget can be done only now, after searchspace statistics have been initialized + group = calculate_budget( + group, experiment["statistics_settings"], searchspace_statistics[group["gpu"]][group["application_name"]] + ) - # if the strategy is in the cache, use cached data - if "ignore_cache" not in strategy and results_description.has_results(): - print(" | - |-> retrieved from cache") - else: # execute each strategy that is not in the cache - results_description = collect_results( - kernel, strategy, results_description, searchspace_stats, profiling=profiling - ) + results_description = ResultsDescription( + run_folder=experiment_folderpath / "run" / group["name"], + application_name=application_name, + device_name=gpu_name, + group_name=group["name"], + group_display_name=group["display_name"], + stochastic=group["stochastic"], + objective_time_keys=objective_time_keys, + objective_performance_keys=objective_performance_keys, + minimization=minimization, + ) + + # if the strategy is in the cache, use cached data + if ("ignore_cache" not in group or group["ignore_cache"] is False) and results_description.has_results(): + print(" | - | - | -> retrieved from cache") + else: # execute each strategy that is not in the cache + results_description = collect_results( + group["input_file"], + group, + results_description, + searchspace_statistics[group["gpu"]][group["application_name"]], + profiling=profiling, + ) - # set the results - results_descriptions[gpu_name][kernel_name][strategy_name] = results_description + # set the results + results_descriptions[group["gpu"]][group["application_name"]][group["name"]] = results_description - return experiment, strategies, results_descriptions + return experiment, all_experimental_groups, searchspace_statistics, results_descriptions def entry_point(): # pragma: no cover diff --git a/src/autotuning_methodology/experiments_defaults.json b/src/autotuning_methodology/experiments_defaults.json new file mode 100644 index 0000000..96cbf54 --- /dev/null +++ b/src/autotuning_methodology/experiments_defaults.json @@ -0,0 +1,105 @@ +{ + "version": "1.2.0", + "name": "", + "parent_folder": ".", + "experimental_groups_defaults": { + "applications": [ + { + "name": "dedispersion_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "dedispersion_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "hotspot_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "hotspot_milo.json", + "objective_performance_keys": [ + "GFLOP/s" + ] + }, + { + "name": "convolution_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "convolution_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "gemm_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "gemm_milo.json", + "objective_performance_keys": [ + "time" + ] + } + ], + "gpus": [ + "A100", + "A4000", + "MI250X" + ], + "pattern_for_full_search_space_filenames": { + "regex": "../autotuning_methodology/benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 25, + "samples": 32, + "minimum_fraction_of_budget_valid": 0.25, + "minimum_number_of_valid_search_iterations": 25, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "genetic_algorithm", + "search_method": "genetic_algorithm", + "display_name": "Genetic Algorithm", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "cutoff_percentile": 0.95, + "cutoff_percentile_start": 0.01, + "cutoff_type": "time", + "objective_time_keys": [ + "all" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "aggregate", + "style": "line" + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file diff --git a/src/autotuning_methodology/formats_interface.py b/src/autotuning_methodology/formats_interface.py new file mode 100644 index 0000000..4d6fa58 --- /dev/null +++ b/src/autotuning_methodology/formats_interface.py @@ -0,0 +1,35 @@ +"""Interface for reading and writing input and output formats.""" + +import json +from pathlib import Path +from autotuning_methodology.validators import validate_T4 + + +def load_T4_format(filepath: Path, validate: True) -> dict: + """Load and optionally validate a T4 format file.""" + with open(filepath, "r", encoding="utf-8") as fh: + # get the cache from the .json file + orig_contents = fh.read() + try: + data: dict = json.loads(orig_contents) + except json.decoder.JSONDecodeError: + contents = orig_contents[:-1] + "}\n}" + try: + data = json.loads(contents) + except json.decoder.JSONDecodeError: + contents = orig_contents[:-2] + "}\n}" + data = json.loads(contents) + + # validate the data + if validate: + # validate it is in T4 format + validate_T4(data) + else: + # if not validating, we still want to do a basic check of the format + assert isinstance(data, dict), "T4 format file should be a dictionary." + assert "metadata" in data, "T4 format file should contain metadata." + assert "schema_version" in data, "T4 format file should contain schema_version." + assert "results" in data, "T4 format file should contain results." + + # return the T4 data + return data diff --git a/src/autotuning_methodology/report_experiments.py b/src/autotuning_methodology/report_experiments.py index 2f64de0..a37b2aa 100644 --- a/src/autotuning_methodology/report_experiments.py +++ b/src/autotuning_methodology/report_experiments.py @@ -14,28 +14,28 @@ from autotuning_methodology.searchspace_statistics import SearchspaceStatistics -def get_aggregation_data_key(gpu_name: str, kernel_name: str): +def get_aggregation_data_key(gpu_name: str, application_name: str): """Utility function to get the key for data in the aggregation data dictionary. Args: gpu_name: the GPU name - kernel_name: the kernel name + application_name: the application name Returns: The key as a string. """ - return f"{gpu_name}+{kernel_name}" + return f"{gpu_name}+{application_name}" def get_aggregation_data( experiment_folderpath: Path, experiment: dict, + searchspace_statistics: dict[str, dict[str, SearchspaceStatistics]], strategies: dict, results_descriptions: dict, cutoff_percentile: float, cutoff_percentile_start=0.01, confidence_level=0.95, - minimization: bool = True, time_resolution: int = 1e4, use_strategy_as_baseline=None, ): @@ -44,10 +44,10 @@ def get_aggregation_data( Args: experiment_folderpath: _description_ experiment: _description_ + searchspace_statistics: _description_ strategies: _description_ results_descriptions: _description_ cutoff_percentile: _description_ - minimization: _description_. Defaults to True. cutoff_percentile_start: _description_. Defaults to 0.01. confidence_level: _description_. Defaults to 0.95. time_resolution: _description_. Defaults to 1e4. @@ -61,26 +61,19 @@ def get_aggregation_data( time_resolution = int(time_resolution) aggregation_data: dict[str, tuple[Baseline, list[Curve], SearchspaceStatistics, np.ndarray]] = dict() - for gpu_name in experiment["GPUs"]: - for kernel_name in experiment["kernels"]: + for gpu_name in experiment["experimental_groups_defaults"]["gpus"]: + for application_name in experiment["experimental_groups_defaults"]["applications_names"]: # get the statistics - searchspace_stats = SearchspaceStatistics( - kernel_name=kernel_name, - device_name=gpu_name, - minimization=minimization, - objective_time_keys=experiment["objective_time_keys"], - objective_performance_keys=experiment["objective_performance_keys"], - bruteforced_caches_path=experiment_folderpath / experiment["bruteforced_caches_path"], - ) + searchspace_stats = searchspace_statistics[gpu_name][application_name] # get the cached strategy results as curves strategies_curves: list[Curve] = list() baseline_executed_strategy = None for strategy in strategies: - results_description = results_descriptions[gpu_name][kernel_name][strategy["name"]] + results_description = results_descriptions[gpu_name][application_name][strategy["name"]] if results_description is None: raise ValueError( - f"""Strategy {strategy['display_name']} not in results_description, + f"""Strategy {strategy["display_name"]} not in results_description, make sure execute_experiment() has ran first""" ) curve = StochasticOptimizationAlgorithm(results_description) @@ -91,11 +84,10 @@ def get_aggregation_data( raise ValueError(f"Could not find '{use_strategy_as_baseline}' in executed strategies") # set the x-axis range - _, cutoff_point_fevals, cutoff_point_time = searchspace_stats.cutoff_point_fevals_time(cutoff_percentile) - _, cutoff_point_fevals_start, cutoff_point_time_start = searchspace_stats.cutoff_point_fevals_time( - cutoff_percentile_start + cutoff_point_fevals_start, cutoff_point_fevals, cutoff_point_time_start, cutoff_point_time = ( + searchspace_stats.cutoff_point_fevals_time_start_end(cutoff_percentile_start, cutoff_percentile) ) - fevals_range = np.arange(start=cutoff_point_fevals_start, stop=cutoff_point_fevals) + fevals_range = np.arange(start=cutoff_point_fevals_start, stop=cutoff_point_fevals + 1) time_range = np.linspace(start=cutoff_point_time_start, stop=cutoff_point_time, num=time_resolution) # get the random baseline @@ -108,7 +100,7 @@ def get_aggregation_data( ) # collect aggregatable data - aggregation_data[get_aggregation_data_key(gpu_name, kernel_name)] = tuple( + aggregation_data[get_aggregation_data_key(gpu_name, application_name)] = tuple( [random_baseline, strategies_curves, searchspace_stats, time_range, fevals_range] ) @@ -192,48 +184,68 @@ def get_strategies_aggregated_performance( ) -def get_strategy_scores(experiment_filepath: str, use_strategy_as_baseline=None): +def get_strategy_scores(experiment_filepath: str, use_strategy_as_baseline=None, full_validate_on_load=True): """Function to get performance scores per strategy by running the passed experiments file. Args: experiment_filepath: the path to the experiment-filename.json to run. use_strategy_as_baseline: whether to use an executed strategy as the baseline. Defaults to None. + full_validate_on_load: whether to fully validate the T4 format file. Defaults to True. Returns: a dictionary of the strategies, with the performance score and error for each strategy. """ # execute the experiment if necessary, else retrieve it - experiment, strategies, results_descriptions = execute_experiment(experiment_filepath, profiling=False) - experiment_folderpath = Path(experiment_filepath).parent + experiment, strategies, searchspace_statistics, results_descriptions = execute_experiment( + experiment_filepath, profiling=False, full_validate_on_load=full_validate_on_load + ) # get the settings - minimization: bool = experiment.get("minimization", True) - cutoff_percentile: float = experiment["cutoff_percentile"] - cutoff_percentile_start: float = experiment.get("cutoff_percentile_start", 0.01) - time_resolution: float = experiment.get("resolution", 1e4) - confidence_level: float = experiment["plot"].get("confidence_level", 0.95) + experiment_folderpath = experiment["parent_folder_absolute_path"] + cutoff_percentile: float = experiment["statistics_settings"]["cutoff_percentile"] + cutoff_percentile_start: float = experiment["statistics_settings"]["cutoff_percentile_start"] + time_resolution: float = experiment["visualization_settings"]["resolution"] + confidence_level: float = experiment["visualization_settings"]["confidence_level"] # aggregate the data - aggregation_data = get_aggregation_data( - experiment_folderpath, - experiment, - strategies, - results_descriptions, - cutoff_percentile, - cutoff_percentile_start, - confidence_level, - minimization, - time_resolution, - use_strategy_as_baseline, - ) + def get_agg_data(): + return get_aggregation_data( + experiment_folderpath, + experiment, + searchspace_statistics, + strategies, + results_descriptions, + cutoff_percentile, + cutoff_percentile_start, + confidence_level, + time_resolution, + use_strategy_as_baseline, + ) - # get the aggregated performance per strategy - ( - strategies_performance, - strategies_lower_err, - strategies_upper_err, - strategies_real_stopping_point_fraction, - ) = get_strategies_aggregated_performance(list(aggregation_data.values()), confidence_level) + try: + # get the aggregated performance per strategy + aggregation_data = get_agg_data() + strategies_performance, _, _, _ = get_strategies_aggregated_performance( + list(aggregation_data.values()), confidence_level + ) + except ValueError as e: + if "Not enough overlap in time range and time values" in str(e.args[0]): + # delete the broken cachefile + _, strategy_name, application_name, device_name = e.args + assert results_descriptions[device_name][application_name][strategy_name].delete(), ( + "Failed to delete cachefile" + ) + + # re-execute the experiment and recollect the data to see if the issue is resolved + experiment, strategies, searchspace_statistics, results_descriptions = execute_experiment( + experiment_filepath, profiling=False + ) + aggregation_data = get_agg_data() + strategies_performance, _, _, _ = get_strategies_aggregated_performance( + list(aggregation_data.values()), confidence_level + ) + else: + raise e # calculate the average performance score and error per strategy results: dict[str, dict[str, float]] = dict() diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index ec9cb45..6035a97 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -7,24 +7,27 @@ import os import time as python_time import warnings -from inspect import getfile from pathlib import Path +# compression libraries if necessary for collecting results +import pickle +import gzip + import numpy as np import progressbar import yappi from autotuning_methodology.caching import ResultsDescription -from autotuning_methodology.searchspace_statistics import SearchspaceStatistics +from autotuning_methodology.searchspace_statistics import SearchspaceStatistics, convert_from_time_unit from autotuning_methodology.validators import ( is_invalid_objective_performance, is_invalid_objective_time, is_valid_config_result, + validate_T4, ) +# TODO this does not conform to new intended dicrectory structure folder = Path(__file__).parent.parent.parent -import_runs_path = Path(folder, "cached_data_used/import_runs") - # Imported runs must be remapped to have the same keys, values and order of parameters as the other runs. # This mapping provides both the order and mapping, so all keys must be present. @@ -55,19 +58,19 @@ @contextlib.contextmanager -def temporary_working_directory_change(new_WD: Path): +def temporary_working_directory_change(new_wd: Path): """Temporarily change to the given working directory in a context. Based on https://stackoverflow.com/questions/75048986/way-to-temporarily-change-the-directory-in-python-to-execute-code-without-affect. Args: - new_WD: path of the working directory to temporarily change to. + new_wd: path of the working directory to temporarily change to. """ - assert new_WD.exists() + assert new_wd.exists() # save the current working directory so we can revert to it original_working_directory = os.getcwd() # potentially raises an exception, left to the caller - os.chdir(new_WD) + os.chdir(new_wd) # yield control to the caller try: @@ -81,12 +84,12 @@ def temporary_working_directory_change(new_WD: Path): def load_json(path: Path): """Helper function to load a JSON file.""" - assert path.exists(), f"File {path.name} does not exist relative to {os.getcwd()}" + assert path.exists(), f"File {str(path)} does not exist relative to {os.getcwd()}" with path.open() as file_results: return json.load(file_results) -def get_results_and_metadata( +def get_kerneltuner_results_and_metadata( filename_results: str = f"{folder}../last_run/_tune_configuration-results.json", filename_metadata: str = f"{folder}../last_run/_tune_configuration-metadata.json", ) -> tuple[list, list]: @@ -105,12 +108,12 @@ def get_results_and_metadata( def tune( - run_number: int, - kernel, - kernel_name: str, + input_file, + application_name: str, device_name: str, - strategy: dict, - tune_options: dict, + group: dict, + objective: str, + objective_higher_is_better: bool, profiling: bool, searchspace_stats: SearchspaceStatistics, ) -> tuple[list, list, int]: @@ -119,12 +122,12 @@ def tune( Optionally collects profiling statistics. Args: - run_number: the run number (only relevant when importing). - kernel: the program (kernel) to tune. - kernel_name: the name of the program to tune. + input_file: the json input file for tuning the application. + application_name: the name of the program to tune. device_name: the device (GPU) to tune on. - strategy: the optimization algorithm to optimize with. - tune_options: a special options dictionary passed along to the autotuning framework. + group: the experimental group (usually the search method). + objective: the key to optimize for. + objective_higher_is_better: whether to maximize or minimize the objective. profiling: whether profiling statistics should be collected. searchspace_stats: a ``SearchspaceStatistics`` object passed to convert imported runs. @@ -132,192 +135,57 @@ def tune( ValueError: if tuning fails multiple times in a row. Returns: - A tuple of the metadata, the results, and the total runtime in miliseconds. + A tuple of the metadata, the results, and the total runtime in milliseconds. """ def tune_with_kerneltuner(): - """Interface with kernel tuner to tune the kernel and return the results.""" - # get the path to the directory the kernel is in; can't use importlib.resources.files because its not a package - kernel_directory = Path(getfile(kernel)).parent - assert kernel_directory.is_dir() - - # change CWD to the directory of the kernel - with temporary_working_directory_change(kernel_directory): - if profiling: - yappi.set_clock_type("cpu") - yappi.start() - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - res, env = kernel.tune( - device_name=device_name, - strategy=strategy["strategy"], - strategy_options=strategy["options"], - **tune_options, - ) - if profiling: - yappi.stop() - metadata, results = get_results_and_metadata( - filename_results=kernel.file_path_results, filename_metadata=kernel.file_path_metadata + """Interface with Kernel Tuner to tune the kernel and return the results.""" + from kernel_tuner import tune_kernel_T1 + + samples = group["samples"] + strategy_options = group.get("budget", {}) + if "custom_search_method_path" in group: + # if a custom search method is specified, use it + strategy_options["custom_search_method_path"] = group["custom_search_method_path"] + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + metadata, results = tune_kernel_T1( + input_file, + objective=objective, + objective_higher_is_better=objective_higher_is_better, + simulation_mode=True, + output_T4=True, + iterations=samples, + strategy_options=strategy_options, ) - # check that the number of iterations is correct - if "iterations" in strategy: - for result in results: - if "runtime" in result: - num_iters = len(results[0]["runtimes"]) - assert ( - strategy["iterations"] == num_iters - ), f"Specified {strategy['iterations']=} not equal to actual number of iterations ({num_iters})" - break - if "max_fevals" in strategy["options"]: - max_fevals = strategy["options"]["max_fevals"] - if len(results) < max_fevals * 0.1: - warnings.warn(f"Much fewer configurations were returned ({len(res)}) than the requested {max_fevals}") - if len(results) < 2: - raise ValueError("Less than two configurations were returned") + if "max_fevals" in group["budget"]: + max_fevals = group["budget"]["max_fevals"] + num_results = len(results["results"]) + if num_results < max_fevals * 0.1: + warnings.warn( + f"Much fewer configurations were returned ({num_results}) than the requested {max_fevals}" + ) + if num_results < 2 and group["budget"]["max_fevals"] > 2: + raise ValueError( + f"Less than two configurations were returned ({len(results['results'])}, budget {group['budget']}) \n" + ) return metadata, results def tune_with_BAT(): """Interface to tune with the BAT benchmarking suite.""" # TODO integrate with BAT + raise NotImplementedError("This will be implemented in the future.") - def import_from_KTT(use_param_mapping=True, use_bruteforce_objective=True): - """Import a KTT output file.""" - # import the file - assert import_runs_path.exists() and import_runs_path.is_dir() - expected_filename = ( - f"t~'ktt'd~'{device_name}'k~'{kernel_name}'s~'{strategy['strategy']}'r~{run_number}.json".lower() + def tune_with_KTT(): + """Interface with KTT to tune the kernel and return the results.""" + raise NotImplementedError( + "KTT is working on supporting the shared interface. The old conversions have been deprecated. An older build can be used to use these functions." ) - matching_runs: list[dict] = list() - for file in import_runs_path.iterdir(): - if file.name == expected_filename: - matching_runs.append(load_json(file)) - if len(matching_runs) < 1: - raise FileNotFoundError(f"No files to import found with name '{expected_filename}'") - if len(matching_runs) > 1: - raise FileExistsError( - f"{len(matching_runs)} files exist with name '{expected_filename}', there can be only one" - ) - run = matching_runs[0] - - # map all timeunits to miliseconds - ktt_timeunit_mapping = { - "seconds": lambda x: x * 1000, - "miliseconds": lambda x: x, - "microseconds": lambda x: x / 1000, - } - ktt_status_mapping = { - "ok": "correct", - "devicelimitsexceeded": "compile", - "computationfailed": "runtime", - } - - # convert to the T4 format - metadata = None # TODO implement the metadata conversion when necessary - results = list() - run_metadata: dict = run["Metadata"] - run_results: list[dict] = run["Results"] - timemapper = ktt_timeunit_mapping[str(run_metadata["TimeUnit"]).lower()] - total_time_ms = 0 - for config_attempt in run_results: - - # convert the configuration to T4 style dictionary for fast lookups in the mapping - configuration_ktt = dict() - for param in config_attempt["Configuration"]: - configuration_ktt[param["Name"]] = param["Value"] - - # convert the configuration data with the mapping in the correct order - configuration = dict() - if use_param_mapping and kernel_name in ktt_param_mapping: - param_map = ktt_param_mapping[kernel_name] - assert len(param_map) == len( - configuration_ktt - ), f"Mapping provided for {len(param_map)} params, but configuration has {len(configuration_ktt)}" - for param_name, mapping in param_map.items(): - param_value = configuration_ktt[param_name] - # if the mapping is None, do not include the parameter - if mapping is None: - pass - # if the mapping is a tuple, the first argument is the new parameter name and the second the value - elif isinstance(mapping, tuple): - param_mapped_name, param_mapped_value = mapping - if callable(param_mapped_value): - param_mapped_value = param_mapped_value(param_value) - configuration[param_mapped_name] = param_mapped_value - # if it's a list of tuples, map to multiple parameters - elif isinstance(mapping, list): - for param_mapped_name, param_mapped_value in mapping: - if callable(param_mapped_value): - param_mapped_value = param_mapped_value(param_value) - configuration[param_mapped_name] = param_mapped_value - else: - raise ValueError(f"Can not apply parameter mapping of {type(mapping)} ({mapping})") - else: - configuration = configuration_ktt - - # add to total time - total_duration = timemapper(config_attempt["TotalDuration"]) - total_overhead = timemapper(config_attempt["TotalOverhead"]) - total_time_ms += total_duration + total_overhead - - # convert the times data - times_runtimes = [] - duration = "" - if len(config_attempt["ComputationResults"]) > 0: - for config_result in config_attempt["ComputationResults"]: - times_runtimes.append(timemapper(config_result["Duration"])) - if use_bruteforce_objective: - config_string_key = ",".join(str(x) for x in configuration.values()) - duration = searchspace_stats.get_value_in_config(config_string_key, "time") - else: - duration = np.mean(times_runtimes) - assert ( - "iterations" in strategy - ), "For imported KTT runs, the number of iterations must be specified in the experiments file" - if strategy["iterations"] != len(times_runtimes): - times_runtimes = [np.mean(times_runtimes)] * strategy["iterations"] - warnings.warn( - f"The specified number of iterations ({strategy['iterations']}) did not equal" - + f"the actual number of iterations ({len(times_runtimes)}). " - + "The average has been used." - ) - if (not isinstance(duration, (float, int, np.number))) or np.isnan(duration): - duration = "" - times_search_algorithm = timemapper(config_attempt.get("SearcherOverhead", 0)) - times_validation = timemapper(config_attempt.get("ValidationOverhead", 0)) - times_framework = timemapper(config_attempt.get("DataMovementOverhead", 0)) - times_benchmark = total_duration - times_compilation = total_overhead - times_search_algorithm - times_validation - times_framework - - # assemble the converted data - converted = { - "configuration": configuration, - "invalidity": ktt_status_mapping[str(config_attempt["Status"]).lower()], - "correctness": 1, - "measurements": [ - { - "name": "time", - "value": duration, - "unit": "ms", - } - ], - "objectives": ["time"], - "times": { - "compilation": times_compilation, - "benchmark": times_benchmark, - "framework": times_framework, - "search_algorithm": times_search_algorithm, - "validation": times_validation, - "runtimes": times_runtimes, - }, - } - results.append(converted) - - return metadata, results, round(total_time_ms) - - strategy_name = str(strategy["name"]).lower() - if strategy_name.startswith("ktt_"): - metadata, results, total_time_ms = import_from_KTT() - elif strategy_name.startswith("kerneltuner_") or True: + + if group["autotuner"] == "KTT": + metadata, results, total_time_ms = tune_with_KTT() + elif group["autotuner"] == "KernelTuner": total_start_time = python_time.perf_counter() warnings.simplefilter("ignore", UserWarning) try: @@ -329,48 +197,113 @@ def import_from_KTT(use_param_mapping=True, use_bruteforce_objective=True): total_end_time = python_time.perf_counter() total_time_ms = round((total_end_time - total_start_time) * 1000) else: - raise ValueError(f"Invalid autotuning framework '{strategy_name}'") + raise ValueError(f"Invalid autotuning framework '{group['autotuner']}'") + + # convert time units + timeunit: str = results.get("metadata", {}).get("timeunit", "seconds") + for result in results["results"]: + for k, v in result["times"].items(): + result["times"][k] = convert_from_time_unit(v, timeunit) + # performance should not be auto-converted + # for i, m in enumerate(result["measurements"]): + # if "unit" in m and not isinstance(m["value"], str): + # result["measurements"][i]["value"] = convert_from_time_unit(m["value"], m["unit"]) + # be careful not to rely on total_time_ms when profiling, because it will include profiling time + validate_T4(results) return metadata, results, total_time_ms def collect_results( - kernel, - strategy: dict, + input_file, + group: dict, results_description: ResultsDescription, searchspace_stats: SearchspaceStatistics, profiling: bool, + compress: bool = True, ) -> ResultsDescription: """Executes optimization algorithms on tuning problems to capture their behaviour. Args: - kernel: the program (kernel) to tune. - strategy: the optimization algorithm to optimize with. - searchspace_stats: the ``SearchspaceStatistics`` object, only used for conversion of imported runs. + input_file: an input json file to tune. + group: a dictionary with settings for experimental group. results_description: the ``ResultsDescription`` object to write the results to. + searchspace_stats: the ``SearchspaceStatistics`` object, used for conversion of imported runs. profiling: whether profiling statistics must be collected. + compress: whether the results should be compressed. Returns: The ``ResultsDescription`` object with the results. """ - min_num_evals: int = strategy["minimum_number_of_evaluations"] - # TODO put the tune options in the .json in strategy_defaults? Make it Kernel Tuner independent - tune_options = {"verbose": False, "quiet": True, "simulation_mode": True} + if profiling: + import psutil + from os import getpid + process = psutil.Process(getpid()) + warnings.warn(f"Memory usage at start of collect_results: {process.memory_info().rss / 1e6:.1f} MB") + + # calculate the minimum number of function evaluations that must be valid + minimum_fraction_of_budget_valid = group.get("minimum_fraction_of_budget_valid", None) + if minimum_fraction_of_budget_valid is not None: + assert isinstance(minimum_fraction_of_budget_valid, float) + assert 0.0 < minimum_fraction_of_budget_valid <= 1.0 + max_fevals = None + budget = group["budget"] + if "max_fevals" in budget: + max_fevals = budget["max_fevals"] + elif "time_limit" in budget: + time_limit = budget["time_limit"] + time_per_feval = searchspace_stats.get_time_per_feval("mean_per_feval") + max_fevals = max(round(time_limit / time_per_feval), 2) + else: + raise ValueError(f"Unkown budget {budget}, can not calculate minimum fraction of budget valid") + min_num_evals = max(round(minimum_fraction_of_budget_valid * min(max_fevals, searchspace_stats.size)), 2) + if "minimum_number_of_valid_search_iterations" in group: + min_num_evals = max(min(min_num_evals, group["minimum_number_of_valid_search_iterations"]), 2) + warnings.warn( + f"Both 'minimum_number_of_valid_search_iterations' ({group['minimum_number_of_valid_search_iterations']}) and 'minimum_fraction_of_budget_valid' ({minimum_fraction_of_budget_valid}/{max_fevals=}) are set, the minimum ({min_num_evals}) is used." + ) + else: + min_num_evals: int = group["minimum_number_of_valid_search_iterations"] + + if len(results_description.objective_performance_keys) != 1: + raise NotImplementedError( + f"Multi objective tuning is not yet supported ({results_description.objective_performance_keys})" + ) + objective = results_description.objective_performance_keys[0] + objective_higher_is_better = not results_description.minimization - def report_multiple_attempts(rep: int, len_res: int, strategy_repeats: int): + def report_multiple_attempts(rep: int, len_res: int, group_repeats: int, attempt: int): """If multiple attempts are necessary, report the reason.""" if len_res < 1: - print(f"({rep+1}/{strategy_repeats}) No results found, trying once more...") + print(f"({rep + 1}/{group_repeats}) No results found, trying once more...") elif len_res < min_num_evals: - print(f"Too few results found ({len_res} of {min_num_evals} required), trying once more...") + print( + f"Too few results found ({len_res} of {min_num_evals} required, attempt {attempt}), trying once more..." + ) else: - print(f"({rep+1}/{strategy_repeats}) Only invalid results found, trying once more...") - - # repeat the strategy as specified - repeated_results = list() + print(f"({rep + 1}/{group_repeats}) Only invalid results found, trying once more...") + + def cumulative_time_taken(results: list) -> list: + """Calculates the cumulative time taken for each of the configurations in results.""" + config_times = [] + cumulative_time_taken = 0 + for config in results: + config_sum = 0 + for key in config["times"]: + if key in searchspace_stats.objective_time_keys: + time = config["times"][key] + if isinstance(time, (list, tuple)): + time = sum(time) + config_sum += time + cumulative_time_taken += config_sum + config_times.append(cumulative_time_taken) + return config_times + + # repeat the run as specified + repeated_results = [] total_time_results = np.array([]) for rep in progressbar.progressbar( - range(strategy["repeats"]), + range(group["repeats"]), redirect_stdout=True, prefix=" | - |-> running: ", widgets=[ @@ -389,57 +322,121 @@ def report_multiple_attempts(rep: int, len_res: int, strategy_repeats: int): len_res: int = -1 while only_invalid or len_res < min_num_evals: if attempt > 0: - report_multiple_attempts(rep, len_res, strategy["repeats"]) - metadata, results, total_time_ms = tune( - rep, - kernel, - results_description.kernel_name, + report_multiple_attempts(rep, len_res, group["repeats"], attempt) + if attempt >= 20: + raise RuntimeError( + f"Could not find enough results for {results_description.application_name} on {results_description.device_name} in {attempt} attempts ({'only invalid, ' if only_invalid else ''}{len_res}/{min_num_evals}), quiting..." + ) + _, results, total_time_ms = tune( + input_file, + results_description.application_name, results_description.device_name, - strategy, - tune_options, + group, + objective, + objective_higher_is_better, profiling, searchspace_stats, ) - len_res = len(results) + results = results["results"] + + # check without results that are beyond the cutoff times + time_taken = cumulative_time_taken(results) + cutoff_time = group["cutoff_times"]["cutoff_time"] + cutoff_time_start = group["cutoff_times"]["cutoff_time_start"] + temp_results = [res for res, time in zip(results, time_taken) if cutoff_time_start <= time <= cutoff_time] + # if len(temp_results) < len(results): + # print( + # f"Dropped {len(results) - len(temp_results)} configurations beyond cutoff time {round(cutoff_time, 3)}, {len(temp_results)} left" + # ) + # check if there are only invalid configs in the first min_num_evals, if so, try again - temp_res_filtered = list(filter(lambda config: is_valid_config_result(config), results)) - only_invalid = len(temp_res_filtered) < 1 + len_res = len(temp_results) + temp_res_filtered = list(filter(lambda config: is_valid_config_result(config), temp_results)) + only_invalid = len(temp_res_filtered) < 2 # there must be at least two valid configurations attempt += 1 + + # compress the results if necessary + if compress: + results = gzip.compress(pickle.dumps(results)) + # register the results repeated_results.append(results) total_time_results = np.append(total_time_results, total_time_ms) + # report the memory usage + if profiling: + warnings.warn(f"Memory usage after iteration {rep}: {process.memory_info().rss / 1e6:.1f} MB") + # gather profiling data and clear the profiler before the next round if profiling: stats = yappi.get_func_stats() # stats.print_all() - path = "../old_experiments/profilings/random/profile-v2.prof" + path = results_description.run_folder + "/profile-v2.prof" stats.save(path, type="pstat") # pylint: disable=no-member yappi.clear_stats() + warnings.warn(f"Memory usage before writing in collect_results: {process.memory_info().rss / 1e6:.1f} MB") # combine the results to numpy arrays and write to a file - write_results(repeated_results, results_description) + write_results(repeated_results, results_description, compressed=compress) + if profiling: + warnings.warn(f"Memory usage at end of of collect_results: {process.memory_info().rss / 1e6:.1f} MB") assert results_description.has_results(), "No results in ResultsDescription after writing results." return results_description -def write_results(repeated_results: list, results_description: ResultsDescription): +def write_results(repeated_results: list, results_description: ResultsDescription, compressed=False): """Combine the results and write them to a NumPy file. Args: repeated_results: a list of tuning results, one per tuning session. results_description: the ``ResultsDescription`` object to write the results to. + compressed: whether the repeated_results are compressed. """ # get the objective value and time keys objective_time_keys = results_description.objective_time_keys objective_performance_keys = results_description.objective_performance_keys - # find the maximum number of function evaluations - max_num_evals = max(len(repeat) for repeat in repeated_results) + # find the maximum (reasonable) number of function evaluations + num_evals = [] + for repeat in repeated_results: + if compressed: + repeat = pickle.loads(gzip.decompress(repeat)) + num_evals.append(len(repeat)) + max_num_evals = max(num_evals) if num_evals else 0 + mean_num_evals = np.mean(num_evals) if num_evals else 0 + if max_num_evals > mean_num_evals * 2: + # the maximum number of evaluations is more than twice the mean, this is likely an outlier, cut to save memory + max_num_evals = int(mean_num_evals * 2) + if max_num_evals > 1e8: + # more than 100 million evaluations, set to the mean number of evaluations + max_num_evals = int(mean_num_evals) + + # set the dtype + dtype = np.float64 + if max_num_evals * len(repeated_results) > 1e9: + warnings.warn( + f"More than 1 billion entries ({max_num_evals * len(repeated_results)}) in the results, using float16 to save memory." + ) + dtype = np.float16 + elif max_num_evals * len(repeated_results) > 1e8: + warnings.warn( + f"More than 100 million entries ({max_num_evals * len(repeated_results)}) in the results, using float32 to save memory." + ) + dtype = np.float32 + estimated_memory_usage = max_num_evals * len(repeated_results) * ( + 8 if dtype == np.float64 else 2 if dtype == np.float16 else 4 + ) # 8 bytes for float64, 4 bytes for float32, 2 bytes for float16 + if estimated_memory_usage > 1e9*10: # more than 10 GB + warnings.warn( + f"Estimated memory usage of {estimated_memory_usage / 1e9:.2f} GB for the results arrays, may go out of memory." + ) def get_nan_array() -> np.ndarray: """Get an array of NaN so they are not counted as zeros inadvertedly.""" - return np.full((max_num_evals, len(repeated_results)), np.nan) + # return np.full((max_num_evals, len(repeated_results)), np.nan, dtype=dtype) + arr = np.empty((max_num_evals, len(repeated_results)), dtype=dtype) + arr.fill(np.nan) + return arr # set the arrays to write to fevals_results = get_nan_array() @@ -447,17 +444,22 @@ def get_nan_array() -> np.ndarray: objective_performance_results = get_nan_array() objective_performance_best_results = get_nan_array() objective_performance_stds = get_nan_array() - objective_time_results_per_key = np.full((len(objective_time_keys), max_num_evals, len(repeated_results)), np.nan) + objective_time_results_per_key = np.full((len(objective_time_keys), max_num_evals, len(repeated_results)), np.nan, dtype=dtype) objective_performance_results_per_key = np.full( - (len(objective_time_keys), max_num_evals, len(repeated_results)), np.nan + (len(objective_time_keys), max_num_evals, len(repeated_results)), np.nan, dtype=dtype ) # combine the results opt_func = np.nanmin if results_description.minimization is True else np.nanmax for repeat_index, repeat in enumerate(repeated_results): + if compressed: + repeat = pickle.loads(gzip.decompress(repeat)) cumulative_objective_time = 0 objective_performance_best = np.nan for evaluation_index, evaluation in enumerate(repeat): + if evaluation_index >= max_num_evals: + break + # set the number of function evaluations fevals_results[evaluation_index, repeat_index] = ( evaluation_index + 1 @@ -470,15 +472,19 @@ def get_nan_array() -> np.ndarray: # TODO continue here with implementing switch in output format # obtain the objective time per key - objective_times_list = list() + objective_times_list = [] for key_index, key in enumerate(objective_time_keys): evaluation_times = evaluation["times"] - assert ( - key in evaluation_times - ), f"Objective time key {key} not in evaluation['times'] ({evaluation_times})" - value = evaluation_times[key] + assert key in evaluation_times, ( + f"Objective time key {key} not in evaluation['times'] ({evaluation_times})" + ) + if isinstance(evaluation_times[key], list): + # this happens when runtimes are in objective_time_keys + value = sum(evaluation_times[key]) + else: + value = evaluation_times[key] if value is not None and not is_invalid_objective_time(value): - value = value / 1000 # TODO this miliseconds to seconds conversion is specific to Kernel Tuner + # value = value / 1000 # TODO this milliseconds to seconds conversion is specific to Kernel Tuner objective_time_results_per_key[key_index, evaluation_index, repeat_index] = value objective_times_list.append(value) # sum the objective times of the keys @@ -489,16 +495,14 @@ def get_nan_array() -> np.ndarray: objective_time_results[evaluation_index, repeat_index] = cumulative_objective_time # obtain the objective performance per key (called 'measurements' in the T4 format) - objective_performances_list = list() + objective_performances_list = [] for key_index, key in enumerate(objective_performance_keys): evaluation_measurements = evaluation["measurements"] measurements = list(filter(lambda m: m["name"] == key, evaluation_measurements)) - assert ( - len(measurements) > 0 - ), f"Objective performance key name {key} not in evaluation['measurements'] ({evaluation_measurements})" - assert ( - len(measurements) == 1 - ), f"""Objective performance key name {key} multiply defined + assert len(measurements) > 0, ( + f"Objective performance key name {key} not in evaluation['measurements'] ({evaluation_measurements})" + ) + assert len(measurements) == 1, f"""Objective performance key name {key} multiply defined in evaluation['measurements'] ({evaluation_measurements})""" value = measurements[0]["value"] if value is not None and not is_invalid_objective_performance(value): diff --git a/src/autotuning_methodology/schema.json b/src/autotuning_methodology/schema.json deleted file mode 100755 index c985a95..0000000 --- a/src/autotuning_methodology/schema.json +++ /dev/null @@ -1,153 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://example.com/schemas/experiments/v0.1.1.schema.json", - "version": "0.1.2", - "title": "Experiment", - "description": "An experiment configuration file", - "type": "object", - "properties": { - "version": { - "description": "Version number of the experiment file standard", - "type": "string" - }, - "name": { - "description": "Name of the experiment", - "type": "string" - }, - "folder_id": { - "description": "Unique ID of the folder to store the results of this experiment in", - "type": "string" - }, - "kernels_path": { - "description": "Path to the directory that has the tuning scripts specified in `kernels`, relative to the experiments file.", - "type": "string" - }, - "bruteforced_caches_path": { - "description": "Path to the directory that has the bruteforced caches, relative to the experiments file.", - "type": "string" - }, - "visualization_caches_path": { - "description": "Path to the directory to write / look for visualization caches, relative to the experiments file.", - "type": "string" - }, - "kernels": { - "description": "Kernels to optimize", - "type": "array", - "items": { - "type": "string" - }, - "minItems": 1, - "uniqueItems": true - }, - "GPUs": { - "description": "GPUs to optimize on", - "type": "array", - "items": { - "type": "string" - }, - "minItems": 1, - "uniqueItems": true - }, - "minimization": { - "description": "Direction of optimization (minimize or maximize)", - "type": "boolean", - "default": true - }, - "resolution": { - "description": "The resolution of the time range", - "type": "integer", - "minimum": 2 - }, - "cutoff_percentile": { - "description": "Fraction of difference between median and absolute optimum at which to stop the time range", - "type": "number", - "exclusiveMinimum": 0, - "maximum": 1 - }, - "cutoff_percentile_start": { - "description": "Fraction of difference between median and absolute optimum at which to start the time range", - "type": "number", - "minimum": 0, - "exclusiveMaximum": 1 - }, - "cutoff_type": { - "description": "Whether to base the cutoff on function evaluations or time", - "type": "string", - "enum": [ - "fevals", - "time" - ] - }, - "plot": { - "type": "object", - "properties": { - "plot_x_value_types": { - "description": "Types of value on the x-axis", - "type": "array", - "items": { - "type": "string", - "enum": [ - "fevals", - "time", - "aggregated" - ] - }, - "minItems": 1, - "uniqueItems": true - }, - "plot_y_value_types": { - "description": "Types of value on the y-axis (absolute values, median-absolute normalized, improvement over baseline)", - "type": "array", - "items": { - "type": "string", - "enum": [ - "absolute", - "scatter", - "normalized", - "baseline" - ] - }, - "minItems": 1, - "uniqueItems": true - }, - "confidence_level": { - "type": [ - "number", - "null" - ], - "exclusiveMinimum": 0, - "maximum": 1 - }, - "compare_baselines": { - "type": "boolean", - "default": false - }, - "compare_split_times": { - "type": "boolean", - "default": false - } - }, - "required": [ - "plot_x_value_types", - "plot_y_value_types", - "confidence_level" - ] - } - }, - "required": [ - "version", - "name", - "folder_id", - "kernels_path", - "bruteforced_caches_path", - "visualization_caches_path", - "kernels", - "GPUs", - "minimization", - "resolution", - "cutoff_percentile", - "cutoff_percentile_start", - "cutoff_type", - "plot" - ] -} \ No newline at end of file diff --git a/src/autotuning_methodology/schemas/T4.json b/src/autotuning_methodology/schemas/T4.json new file mode 100644 index 0000000..82db64e --- /dev/null +++ b/src/autotuning_methodology/schemas/T4.json @@ -0,0 +1,91 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://github.com/odgaard/TuningSchema/blob/T4/results-schema.json", + "description": "Open Autotuning Results Schema", + "type": "object", + "properties": { + "schema_version": { + "description": "The version number of the schema in major.minor.patch format.", + "type": "string", + "pattern": "^[0-9]{1,}.[0-9]{1,}.[0-9]{1,}$", + "example": "1.0.0" + }, + "results": { + "type": "array", + "items": { + "type": "object", + "properties": { + "timestamp": { + "type": "string" + }, + "configuration": { + "type": "object" + }, + "objectives": { + "type": "array" + }, + "times": { + "type": "object", + "properties": { + "compilation_time": { + "type": "number" + }, + "runtimes": { + "type": "array" + }, + "framework": { + "type": "number" + }, + "search_algorithm": { + "type": "number" + }, + "validation": { + "type": "number" + } + } + }, + "invalidity": { + "enum": [ + "timeout", + "compile", + "runtime", + "correctness", + "constraints", + "correct" + ] + }, + "correctness": { + "type": "number" + }, + "measurements": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "value": { + "type": [ + "number", + "string", + "array" + ] + }, + "unit": { + "type": "string" + } + } + } + } + }, + "required": [ + "configuration", + "times", + "invalidity", + "correctness" + ] + } + } + } +} \ No newline at end of file diff --git a/src/autotuning_methodology/schemas/experiments.json b/src/autotuning_methodology/schemas/experiments.json new file mode 100755 index 0000000..81bb87d --- /dev/null +++ b/src/autotuning_methodology/schemas/experiments.json @@ -0,0 +1,518 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/schemas/experiments/v0.1.1.schema.json", + "version": "1.2.0", + "title": "Experiment", + "description": "An experiment setup configuration file", + "type": "object", + "required": [ + "name", + "parent_folder", + "experimental_groups_defaults", + "search_strategies", + "statistics_settings", + "visualization_settings" + ], + "properties": { + "version": { + "description": "Version number of the experiment setup configuration file standard", + "type": "string" + }, + "name": { + "description": "Name of the experiment", + "type": "string" + }, + "parent folder": { + "description": "Absolute or relative path of the folder to store all related files for this experiment. This folder needs to already exist.", + "type": "string", + "default": "./" + }, + "experimental_groups_defaults": { + "description": "Default settings for experimental groups", + "type": "object", + "required": [ + "applications", + "gpus" + ], + "properties": { + "autotuner": { + "description": "Autotuner that will be used to tune the experimental group. Has to be specified either in experimental_groups_defaults or in experimental group.", + "enum": [ + "KernelTuner", + "KTT" + ] + }, + "autotuner_path": { + "description": "Path to the library of the autotuner", + "type": "string" + }, + "applications": { + "description": "List of applications for which measurements were taken and written to full search space files. Can be used in pattern_for_full_search_space_filename.", + "type": "array", + "items": { + "type": "object", + "required": [ + "name", + "input_file" + ], + "properties": { + "name": { + "type": "string" + }, + "input_file": { + "type": "string" + }, + "minimization": { + "description": "Whether the optimization objective is to minimize (maximize if false).", + "type": "boolean", + "default": true + }, + "objective_performance_keys": { + "description": "The performance key(s) to use as the performance objective. In case of multiple keys, the values are summed.", + "type": "array", + "items": { + "type": "string" + }, + "uniqueItems": true, + "minItems": 1 + } + } + } + }, + "gpus": { + "description": "List of GPUs where measurements were taken and written to full search space files. Can be used in pattern_for_full_search_space_filename.", + "type": "array", + "items": { + "type": "string" + } + }, + "pattern_for_full_search_space_filenames": { + "description": "Pattern for filenames of full search space files", + "type": "object", + "required": [ + "regex" + ], + "properties": { + "regex": { + "type": "string", + "pattern": ".*.json", + "examples": [ + "${gpus}_${applications}_output.json", + "full-search-space-${applications}-${gpus}.json" + ] + }, + "regex_variables": { + "type": "array", + "items": { + "type": "string" + }, + "default": [ + "applications", + "gpus" + ] + } + } + }, + "stochastic": { + "description": "Whether the repeated runs of the same experimental group (combination of application, GPU and search strategy) exhibit stochastic behaviour, e.g. due to stochastic search strategy", + "type": "boolean", + "default": true + }, + "repeats": { + "description": "How many times to repeat the run for a single experimental group (combination of application, GPU and search strategy)", + "type": "integer", + "minimum": 1, + "default": 100 + }, + "samples": { + "description": "How many samples of measurements for a single configuration are present in full search space file", + "type": "integer", + "minimum": 1, + "default": 32 + }, + "minimum_fraction_of_budget_valid": { + "description": "The fraction of the total searchspace size that must be non-error, valid configurations for each single run of search algorithm", + "type": "number", + "exclusiveMinimum": 0.0, + "maximum": 1.0, + "default": 0.5 + }, + "minimum_number_of_valid_search_iterations": { + "description": "How many non-error, valid configurations account for a single run of search algorithm", + "type": "integer", + "minimum": 1, + "default": 10 + }, + "ignore_cache": { + "description": "If true, always re-run the experiments, even though results from previously executed experiments are stored in run folder.", + "type": "boolean", + "default": false + } + } + }, + "search_strategies": { + "description": "Settings for search strategies", + "type": "array", + "minItems": 1, + "uniqueItems": true, + "items": { + "type": "object", + "required": [ + "name", + "search_method", + "display_name" + ], + "properties": { + "name": { + "description": "Name of the search strategy", + "type": "string" + }, + "autotuner": { + "description": "Autotuner that will be used for tuning. Has to be specified either in experimental_groups_defaults or in search_strategies.", + "enum": [ + "KernelTuner", + "KTT" + ] + }, + "autotuner_path": { + "description": "Path to the library of the autotuner", + "type": "string" + }, + "custom_search_method_path": { + "description": "Optional path to a custom search method implementation.", + "type": "string" + }, + "search_method": { + "description": "Name of the search method as recognized by the autotuner, Class name if custom search method", + "type": "string" + }, + "search_method_hyperparameters": { + "description": "A list of hyperparameters for the search method as recognized by the autotuner", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "value": { + "type": [ + "number", + "string", + "boolean", + "object", + "array" + ] + } + } + } + }, + "display_name": { + "description": "Name for the search strategy used in visualizations", + "type": "string" + }, + "color_parent": { + "description": "Name of the parent search strategy to use a shade of its color for this search strategy.", + "type": "string" + }, + "color_index": { + "description": "Override of the index of the color in the matplotlib colormap to use for this search strategy. Only use on strategy parents. If set, all strategy parents must have it.", + "type": "integer", + "minimum": 0 + }, + "stochastic": { + "description": "Whether the repeated runs of the same experimental group (combination of application, GPU and search strategy) exhibit stochastic behaviour, e.g. due to stochastic search strategy", + "type": "boolean", + "default": true + }, + "repeats": { + "description": "How many times to repeat the run for a single experimental group (combination of application, GPU and search strategy)", + "type": "integer", + "minimum": 1, + "default": 100 + }, + "samples": { + "description": "How many samples of measurements for a single configuration are present in full search space file", + "type": "integer", + "minimum": 1, + "default": 32 + }, + "minimum_number_of_valid_search_iterations": { + "description": "How many non-error, valid configurations account for a single run of search strategy", + "type": "integer", + "minimum": 1, + "default": 20 + }, + "ignore_cache": { + "description": "If true, always re-run the experiments, even though results from previously executed experiments are stored in run folder.", + "type": "boolean", + "default": false + } + } + } + }, + "statistics_settings": { + "description": "Settings for the statistics calculation", + "type": "object", + "required": [ + "cutoff_percentile", + "cutoff_percentile_start", + "cutoff_type", + "objective_time_keys" + ], + "properties": { + "cutoff_percentile": { + "description": "Fraction of difference between median and absolute optimum at which to stop the time range", + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1 + }, + "cutoff_percentile_start": { + "description": "Fraction of difference between median and absolute optimum at which to start the time range", + "type": "number", + "minimum": 0, + "exclusiveMaximum": 1 + }, + "cutoff_type": { + "description": "Whether to base the cutoff on function evaluations or time", + "type": "string", + "enum": [ + "fevals", + "time" + ] + }, + "objective_time_keys": { + "description": "Time key(s) to use as the time objective. In case of multiple keys, the values are summed.", + "type": "array", + "items": { + "enum": [ + "compilation", + "runtimes", + "framework", + "search_algorithm", + "validation", + "all" + ] + }, + "uniqueItems": true, + "minItems": 1 + } + } + }, + "visualization_settings": { + "description": "Settings for the visualizations", + "type": "object", + "required": [ + "plots", + "resolution", + "confidence_level" + ], + "properties": { + "plots": { + "description": "Specification of the plots to produce.", + "type": "array", + "items": { + "type": "object", + "required": [ + "scope", + "style" + ], + "properties": { + "scope": { + "description": "The scope of the plot: whether it's a plot per searchspace, per search strategy, or the aggregate.", + "type": "string", + "enum": [ + "searchspace", + "search_strategy", + "aggregate" + ] + }, + "style": { + "description": "The type of plot.", + "type": "string", + "enum": [ + "line", + "scatter", + "heatmap", + "head2head" + ] + }, + "tmin": { + "description": "The minimum value on the time axis of the aggregate plot.", + "type": [ + "number", + "string" + ], + "default": 1.0 + }, + "vmin": { + "description": "The minimum value on the heatmap color scale or aggregate plot.", + "type": [ + "number", + "string" + ], + "default": -0.5 + }, + "vmax": { + "description": "The maximum value on the heatmap color scale.", + "type": "number", + "default": 1.0 + }, + "cmin": { + "description": "The minimum value shown in the heatmap colorbar. Must be vmin <= cmin < cmax <= vmax.", + "type": "number", + "default": -0.5 + }, + "cmax": { + "description": "The maximum value shown in the heatmap colorbar. Must be vmin <= cmin < cmax <= vmax", + "type": "number", + "default": 1.0 + }, + "cnum": { + "description": "The number of evenly placed ticks on the heatmap colorbar to display.", + "type": "integer", + "minimum": 2, + "default": 5 + }, + "cap_to_vmin": { + "description": "Whether to allow values below vmin to be capped at vmin in the heatmap (must be reported if applied).", + "type": "boolean" + }, + "comparison": { + "description": "For head2head plots, the description of how to compare the search strategies.", + "type": "object", + "required": [ + "relative_time", + "unit" + ], + "properties": { + "relative_time": { + "description": "The relative time at which to compare the search strategies.", + "type": "number", + "exclusiveMinimum": 0, + "inclusiveMaximum": 1 + }, + "unit": { + "description": "The unit of the value with which to compare the search strategies (difference in time or performance).", + "type": "string", + "enum": [ + "objective", + "time" + ] + } + } + }, + "annotate": { + "description": "Whether to annotate the heatmaps with the values in their cells.", + "type": "boolean", + "default": true + }, + "include_y_labels": { + "description": "Whether to show the y-axis labels on the heatmap. Displayed if not set.", + "type": "boolean" + }, + "include_colorbar": { + "description": "Whether to show the heatmap colorbar. Displayed if not set.", + "type": "boolean" + }, + "bins": { + "description": "The number of bins to use when plotting over a heatmap over a non-discrete range (e.g. time)", + "type": "integer", + "default": 10 + }, + "x_axis_value_types": { + "description": "Types of value on the x-axis. Multiple values may produce multiple (sub) plots.", + "type": "array", + "items": { + "type": "string", + "enum": [ + "fevals", + "time", + "applications", + "gpus", + "searchspaces" + ] + }, + "minItems": 1, + "uniqueItems": true + }, + "y_axis_value_types": { + "description": "Types of value on the y-axis. Multiple values may produce multiple (sub) plots.", + "type": "array", + "items": { + "type": "string", + "enum": [ + "absolute", + "normalized", + "baseline", + "time", + "applications", + "gpus", + "searchspaces" + ] + }, + "minItems": 1, + "uniqueItems": true + }, + "x_label": { + "description": "Override the default generated label for the x-axis.", + "type": "string" + }, + "y_label": { + "description": "Override the default generated label for the y-axis.", + "type": "string" + }, + "print_mean_of_columns": { + "description": "Print the mean of the columns in the heatmap to the console.", + "type": "boolean", + "default": false + }, + "print_mean_of_rows": { + "description": "Print the mean of the rows in the heatmap to the console.", + "type": "boolean", + "default": false + }, + "divide_train_test_axis": { + "description": "Axis to divide the train and test data on in the heatmap.", + "type": "string", + "enum": [ + "gpus", + "applications" + ], + "default": "gpus" + }, + "divide_train_test_after_num": { + "description": "Number of items in the divide_train_test_axis after which to divide the train and test data.", + "type": "integer", + "default": 3 + } + } + } + }, + "resolution": { + "description": "The resolution of the time range", + "type": "integer", + "default": 1000, + "minimum": 2 + }, + "confidence_level": { + "description": "The confidence level used for the confidence / prediction interval, visualized as an error shade", + "type": "number", + "default": 0.95, + "exclusiveMinimum": 0, + "maximum": 1 + }, + "compare_baselines": { + "description": "[preview feature] Compare baselines to each other. Requires editing the baselines list in the `plot_baselines_comparison` function.", + "type": "boolean", + "default": false + }, + "compare_split_times": { + "description": "[preview feature] Plot a comparison of split times for strategies and baselines", + "type": "boolean", + "default": false + } + } + } + } +} \ No newline at end of file diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index 9fe2c06..43d46c7 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -2,12 +2,14 @@ from __future__ import annotations # for correct nested type hints e.g. list[str], tuple[dict, str] -import json from math import ceil, floor from pathlib import Path +from warnings import warn +import matplotlib.pyplot as plt import numpy as np +from autotuning_methodology.formats_interface import load_T4_format from autotuning_methodology.validators import is_invalid_objective_performance, is_invalid_objective_time @@ -27,8 +29,118 @@ def nansumwrapper(array: np.ndarray, **kwargs) -> np.ndarray: return summed_array +def convert_from_time_unit(value, from_unit: str): + """Convert the value or list of values from the specified time unit to seconds.""" + if from_unit is None: + return None + elif isinstance(value, list): + return [convert_from_time_unit(v, from_unit) for v in value] + elif not isinstance(value, (int, float, complex)): + return value + unit = from_unit.lower() + if unit == "seconds" or unit == "s": + return value + elif unit == "milliseconds" or unit == "miliseconds" or unit == "ms": + return value / 1000 + elif unit == "microseconds": + return value / 1000000 + elif unit == "nanoseconds" or unit == "ns": + return value / 1000000000 + else: + raise ValueError(f"Conversion unit {from_unit} is not supported") + + +def is_not_invalid_value(value, performance: bool) -> bool: + """Checks if a performance or time value is an array or is not invalid.""" + if isinstance(value, str): + return False + if isinstance(value, (list, tuple, np.ndarray)): + return True + invalid_check_function = is_invalid_objective_performance if performance else is_invalid_objective_time + return not invalid_check_function(value) + +def filter_invalids(values, performance: bool) -> list: + """Filter out invalid values from the array. + + Assumes that `values` is a list or array of values. + If changes are made here, also change `is_invalid_objective_time`. + """ + if performance or any([isinstance(v, (str, list, tuple, np.ndarray)) for v in values]): + # if there are any non-numeric values, fall back to a list comprehension + return list([v for v in values if is_not_invalid_value(v, performance)]) + # invalid time values can be checked for the entire array at once, much faster than iterating + array = np.array(values) + return array[(~np.isnan(array)) & (array >= 0.0)].tolist() + +def to_valid_array( + results: list[dict], + key: str, + performance: bool, + from_time_unit: str = None, + replace_missing_measurement_from_times_key: str = None, +) -> np.ndarray: + """Convert results performance or time values to a numpy array, sum if the input is a list of arrays. + + replace_missing_measurement_from_times_key: if key is missing from measurements, use the mean value from times. + """ + # make a list of all valid values + if performance: + values = list() + for r in results: + val = None + # get the performance value from the measurements + measurements = list(filter(lambda m: m["name"] == key, r["measurements"])) + if len(measurements) == 0: + if replace_missing_measurement_from_times_key is not None: + val = np.mean(r["times"][replace_missing_measurement_from_times_key]) + else: + raise ValueError(f"Measurement with name {key} not found in {r['measurements']}") + if len(measurements) == 1: + m = measurements[0] + if key == m["name"]: + val = m["value"] + elif len(measurements) > 1: + raise ValueError(f"Multiple measurements with the same name {key} found in results") + # register the value + if is_not_invalid_value(val, performance): + # performance should not be auto-converted + # if len(m["unit"]) > 0: + # val = convert_from_time_unit(val, m["unit"]) + values.append(val) + else: + values.append(np.nan) + else: + values = list( + ( + convert_from_time_unit(v["times"][key], from_time_unit) + if key in v["times"] and is_not_invalid_value(v["times"][key], performance) + else np.nan + ) + for v in results + ) + # TODO other that time, performance such as power usage are in results["measurements"]. or not? + # check if there are values that are arrays + for value_index, value in enumerate(values): + if isinstance(value, (list, tuple, np.ndarray)): + # if the value is an array, sum the valid values + list_to_sum = filter_invalids(value, performance) + try: + sum_of_list = sum(list_to_sum) + values[value_index] = ( + sum_of_list + if len(list_to_sum) > 0 and is_not_invalid_value(sum_of_list, performance) + else np.nan + ) + except TypeError as e: + raise TypeError( + f"Invalid type for {key=}, {value=}, {list_to_sum=}, {values=}, {performance=}, {from_time_unit=} ({e})" + ) + assert all(isinstance(v, (int, float)) for v in values) + return np.array(values) + + class SearchspaceStatistics: - """Object for obtaining information from a raw, brute-forced cache file.""" + """Object for obtaining information from a full search space file.""" size: int repeats: int @@ -44,7 +156,7 @@ class SearchspaceStatistics: T4_time_keys_to_kernel_tuner_time_keys_mapping = { "compilation": "compile_time", - "benchmark": "benchmark_time", + "runtimes": "benchmark_time", "framework": "framework_time", "search_algorithm": "strategy_time", "validation": "verification_time", @@ -55,33 +167,35 @@ class SearchspaceStatistics: def __init__( self, - kernel_name: str, + application_name: str, device_name: str, minimization: bool, objective_time_keys: list[str], objective_performance_keys: list[str], - bruteforced_caches_path=Path("cached_data_used/cachefiles"), + full_search_space_file_path: str, + full_validate: bool = True, ) -> None: """Initialization method for a Searchspace statistics object. Args: - kernel_name: the name of the kernel. + application_name: the name of the kernel. device_name: the name of the device (GPU) used. minimization: whether the optimization algorithm was minimizing. objective_time_keys: the objective time keys used. objective_performance_keys: the objective performance keys used. - bruteforced_caches_path: the path to the bruteforced caches. + full_search_space_file_path: the path to the full search space file. + full_validate: whether to fully validate the searchspace statistics file on load. Defaults to True. """ self.loaded = False - self.kernel_name = kernel_name + self.application_name = application_name self.device_name = device_name self.minimization = minimization - self.objective_time_keys = self.T4_time_keys_to_kernel_tuner_time_keys(objective_time_keys) + self.objective_time_keys = objective_time_keys self.objective_performance_keys = objective_performance_keys - self.bruteforced_caches_path = bruteforced_caches_path + self.full_search_space_file_path = full_search_space_file_path # load the data into the arrays - self.loaded = self._load() + self.loaded = self._load(validate=full_validate) def T4_time_keys_to_kernel_tuner_time_keys(self, time_keys: list[str]) -> list[str]: """Temporary utility function to use the kernel tuner search space files with the T4 output format. @@ -131,9 +245,8 @@ def plot_histogram(self, cutoff_percentile: float): cutoff_percentile: the desired cutoff percentile to reach before stopping. """ # prepare plot - import matplotlib.pyplot as plt - fig, axs = plt.subplots(1, 1, sharey=True, tight_layout=True) + _, axs = plt.subplots(1, 1, sharey=True, tight_layout=True) if not isinstance(axs, list): axs = [axs] @@ -147,7 +260,7 @@ def plot_histogram(self, cutoff_percentile: float): n_bins = 200 axs[0].hist(performances, bins=n_bins) axs[0].set_ylabel("Number of configurations in bin") - axs[0].set_xlabel("Performance in miliseconds") + axs[0].set_xlabel("Performance in milliseconds") axs[0].axvline(x=[mean], label="Mean", c="red") axs[0].axvline(x=[median], label="Median", c="orange") axs[0].axvline(x=[cutoff_performance], label="Cutoff point", c="green") @@ -165,7 +278,11 @@ def cutoff_point(self, cutoff_percentile: float) -> tuple[float, int]: Returns: A tuple of the objective value at the cutoff point and the fevals to the cutoff point. """ - inverted_sorted_performance_arr = self.objective_performances_total_sorted[::-1] + inverted_sorted_performance_arr = ( + self.objective_performances_total_sorted[::-1] + if self.minimization + else self.objective_performances_total_sorted + ) N = inverted_sorted_performance_arr.shape[0] # get the objective performance at the cutoff point @@ -194,9 +311,27 @@ def cutoff_point(self, cutoff_percentile: float) -> tuple[float, int]: # iterate over the inverted_sorted_performance_arr until we have # i = next(x[0] for x in enumerate(inverted_sorted_performance_arr) if x[1] > cutoff_percentile * arr[-1]) - i = next( - x[0] for x in enumerate(inverted_sorted_performance_arr) if x[1] <= objective_performance_at_cutoff_point - ) + if self.minimization: + i = next( + x[0] + for x in enumerate(inverted_sorted_performance_arr) + if x[1] <= objective_performance_at_cutoff_point + ) + else: + i = next( + x[0] + for x in enumerate(inverted_sorted_performance_arr) + if x[1] >= objective_performance_at_cutoff_point + ) + if cutoff_percentile != 1.0 and inverted_sorted_performance_arr[i] == self.total_performance_absolute_optimum(): + if i == 0: + raise ValueError( + f"The optimum is directly reached ({inverted_sorted_performance_arr[i]})", + inverted_sorted_performance_arr, + ) + else: + i = i - 1 + warn(f"Scaled down cutoff point as {cutoff_percentile} is equal to optimum (1.0) for this distribution") # In case of x <= (1+p) * f_opt # i = next(x[0] for x in enumerate(inverted_sorted_performance_arr) if x[1] <= (1 + (1 - cutoff_percentile)) * arr[-1]) # noqa: E501 # In case of p*x <= f_opt @@ -207,6 +342,17 @@ def cutoff_point(self, cutoff_percentile: float) -> tuple[float, int]: # exit(0) return objective_performance_at_cutoff_point, fevals_to_cutoff_point + def cutoff_point_time_from_fevals(self, cutoff_point_fevals: int) -> float: + """Calculates the time to the cutoff point from the number of function evaluations. + + Args: + cutoff_point_fevals: the number of function evaluations to reach the cutoff point. + + Returns: + The time to the cutoff point. + """ + return cutoff_point_fevals * self.total_time_median() + def cutoff_point_fevals_time(self, cutoff_percentile: float) -> tuple[float, int, float]: """Calculates the cutoff point. @@ -217,18 +363,43 @@ def cutoff_point_fevals_time(self, cutoff_percentile: float) -> tuple[float, int A tuple of the objective value at cutoff point, fevals to cutoff point, and the mean time to cutoff point. """ cutoff_point_value, cutoff_point_fevals = self.cutoff_point(cutoff_percentile) - cutoff_point_time = cutoff_point_fevals * self.total_time_median() + cutoff_point_time = self.cutoff_point_time_from_fevals(cutoff_point_fevals) return cutoff_point_value, cutoff_point_fevals, cutoff_point_time - def _get_filepath(self, lowercase=True) -> Path: - """Returns the filepath.""" - kernel_directory = self.kernel_name - if lowercase: - kernel_directory = kernel_directory.lower() - filename = f"{self.device_name}.json" - if lowercase: - filename = filename.lower() - return self.bruteforced_caches_path / kernel_directory / filename + def cutoff_point_fevals_time_start_end( + self, cutoff_percentile_start: float, cutoff_percentile: float + ) -> tuple[int, int, float, float]: + """Calculates the cutoff point for both the start and end, and ensures there is enough margin between the two. + + Args: + cutoff_percentile_start: the desired cutoff percentile to reach before starting the plot. + cutoff_percentile: the desired cutoff percentile to reach before stopping. + + Returns: + A tuple of the fevals to cutoff point start and end, and the mean time to cutoff point start and end. + """ + # get the cutoff points + _, cutoff_point_fevals_start = self.cutoff_point(cutoff_percentile_start) + _, cutoff_point_fevals_end = self.cutoff_point(cutoff_percentile) + + # apply a safe margin if needed + if cutoff_point_fevals_end - cutoff_point_fevals_start < 2: + if cutoff_point_fevals_start == 0: + cutoff_point_fevals_end = min(self.cutoff_point(1.0)[0], cutoff_point_fevals_end + 2) + else: + cutoff_point_fevals_end = min(self.cutoff_point(1.0)[0], cutoff_point_fevals_end + 1) + cutoff_point_fevals_start -= 1 + if cutoff_point_fevals_end - cutoff_point_fevals_start == 0: + raise ValueError("Cutoff point start and end are the same") + + # get the times + cutoff_point_time_start = self.cutoff_point_time_from_fevals( + cutoff_point_fevals_start if cutoff_percentile_start > 0.0 else 0 + ) + cutoff_point_time_end = self.cutoff_point_time_from_fevals(cutoff_point_fevals_end) + + # return the values + return cutoff_point_fevals_start, cutoff_point_fevals_end, cutoff_point_time_start, cutoff_point_time_end def get_valid_filepath(self) -> Path: """Returns the filepath to the Searchspace statistics .json file if it exists. @@ -239,9 +410,9 @@ def get_valid_filepath(self) -> Path: Returns: Filepath to the Searchspace statistics .json file. """ - filepath = self._get_filepath() + filepath = self.full_search_space_file_path if not filepath.exists(): - filepath = self._get_filepath(lowercase=False) + filepath = Path(str(self.full_search_space_file_path) + ".json") if not filepath.exists(): # if the file is not found, raise an error from os import getcwd @@ -251,148 +422,102 @@ def get_valid_filepath(self) -> Path: ) return filepath - def _is_not_invalid_value(self, value, performance: bool) -> bool: - """Checks if a cache performance or time value is an array or is not invalid.""" - if isinstance(value, str): - return False - if isinstance(value, (list, tuple, np.ndarray)): - return True - invalid_check_function = is_invalid_objective_performance if performance else is_invalid_objective_time - return not invalid_check_function(value) - - def _to_valid_array(self, cache_values: list[dict], key: str, performance: bool) -> np.ndarray: - """Convert valid cache performance or time values to a numpy array, sum if the input is a list of arrays.""" - # make a list of all valid values - values = list( - v[key] if key in v and self._is_not_invalid_value(v[key], performance) else np.nan for v in cache_values - ) - # check if there are values that are arrays - for value_index, value in enumerate(values): - if isinstance(value, (list, tuple, np.ndarray)): - # if the cache value is an array, sum the valid values - array = value - list_to_sum = list(v for v in array if self._is_not_invalid_value(v, performance)) - values[value_index] = ( - sum(list_to_sum) - if len(list_to_sum) > 0 and self._is_not_invalid_value(sum(list_to_sum), performance) - else np.nan - ) - assert all(isinstance(v, (int, float)) for v in values) - return np.array(values) - - def _load(self) -> bool: - """Load the contents of the cache file.""" + def _load(self, validate=True) -> bool: + """Load the contents of the full search space file.""" + # if not, use a script to create a file with values from KTT output and formatting of KernelTuner filepath = self.get_valid_filepath() - with open(filepath, "r", encoding="utf-8") as fh: - print(f"Loading statistics for {filepath}...") - # get the cache from the .json file - orig_contents = fh.read() - try: - data = json.loads(orig_contents) - except json.decoder.JSONDecodeError: - contents = orig_contents[:-1] + "}\n}" - try: - data = json.loads(contents) - except json.decoder.JSONDecodeError: - contents = orig_contents[:-2] + "}\n}" - data = json.loads(contents) - cache: dict = data["cache"] - self.cache = cache - - # get the time values per configuration - cache_values = list(cache.values()) - self.size = len(cache_values) - self.objective_times = dict() - for key in self.objective_time_keys: - self.objective_times[key] = self._to_valid_array(cache_values, key, performance=False) - self.objective_times[key] = ( - self.objective_times[key] / 1000 - ) # TODO Kernel Tuner specific miliseconds to seconds conversion - assert ( - self.objective_times[key].ndim == 1 - ), f"Should have one dimension, has {self.objective_times[key].ndim}" - assert self.objective_times[key].shape[0] == len( - cache_values - ), f"Should have the same size as cache_values ({self.size}), has {self.objective_times[key].shape[0]}" - assert not np.all( - np.isnan(self.objective_times[key]) - ), f"""All values for {key=} are NaN. - Likely the experiment did not collect time values for objective_time_keys '{key}'.""" - - # get the performance values per configuration - self.objective_performances = dict() - for key in self.objective_performance_keys: - self.objective_performances[key] = self._to_valid_array(cache_values, key, performance=True) - assert ( - self.objective_performances[key].ndim == 1 - ), f"Should have one dimension, has {self.objective_performances[key].ndim}" - assert self.objective_performances[key].shape[0] == len( - cache_values - ), f"""Should have the same size as cache_values ({self.size}), - has {self.objective_performances[key].shape[0]}""" - assert not np.all( - np.isnan(self.objective_performances[key]) - ), f"""All values for {key=} are NaN. - Likely the experiment did not collect performance values for objective_performance_key '{key}'.""" - - # get the number of repeats - valid_cache_index: int = 0 - while "times" not in cache_values[valid_cache_index]: - valid_cache_index += 1 - self.repeats = len(cache_values[valid_cache_index]["times"]) - - # combine the arrays to the shape [len(objective_keys), self.size] - self.objective_times_array = np.array(list(self.objective_times[key] for key in self.objective_time_keys)) - assert self.objective_times_array.shape == tuple([len(self.objective_time_keys), self.size]) - self.objective_performances_array = np.array( - list(self.objective_performances[key] for key in self.objective_performance_keys) - ) - assert self.objective_performances_array.shape == tuple([len(self.objective_performance_keys), self.size]) - - # get the totals - self.objective_times_total = nansumwrapper(self.objective_times_array, axis=0) - assert self.objective_times_total.shape == tuple([self.size]) - # more of a test than a necessary assert - assert ( - np.nansum(self.objective_times_array[:, 0]) == self.objective_times_total[0] - ), f"""Sums of objective performances do not match: - {np.nansum(self.objective_times_array[:, 0])} vs. {self.objective_times_total[0]}""" - self.objective_performances_total = nansumwrapper(self.objective_performances_array, axis=0) - assert self.objective_performances_total.shape == tuple([self.size]) - # more of a test than a necessary assert - assert ( - np.nansum(self.objective_performances_array[:, 0]) == self.objective_performances_total[0] - ), f"""Sums of objective performances do not match: - {np.nansum(self.objective_performances_array[:, 0])} vs. {self.objective_performances_total[0]}""" - - # sort - self.objective_times_total_sorted = np.sort( - self.objective_times_total[~np.isnan(self.objective_times_total)] - ) - self.objective_times_number_of_nan = ( - self.objective_times_total.shape[0] - self.objective_times_total_sorted.shape[0] + data = load_T4_format(filepath, validate=validate) + metadata: dict = data.get("metadata", {}) + timeunit = metadata.get("timeunit", "seconds") + results: dict = data["results"] + self.results = results + + # get the time values per configuration + self.size = len(data["results"]) + self.objective_times = dict() + for key in self.objective_time_keys: + self.objective_times[key] = to_valid_array(results, key, performance=False, from_time_unit=timeunit) + assert self.objective_times[key].ndim == 1, ( + f"Should have one dimension, has {self.objective_times[key].ndim}" ) - objective_performances_nan_mask = np.isnan(self.objective_performances_total) - self.objective_performances_number_of_nan = np.count_nonzero(objective_performances_nan_mask) - self.objective_performances_total_sorted = np.sort( - self.objective_performances_total[~objective_performances_nan_mask] + assert self.objective_times[key].shape[0] == self.size, ( + f"Should have the same size as results ({self.size}), has {self.objective_times[key].shape[0]}" ) - # make sure the best values are at the start, because NaNs are appended to the end - sorted_best_first = ( - self.objective_performances_total_sorted - if self.minimization - else self.objective_performances_total_sorted[::-1] + assert not np.all(np.isnan(self.objective_times[key])), f"""All values for {key=} are NaN. + Likely the experiment did not collect time values for objective_time_keys '{key}'.""" + + # get the performance values per configuration + self.objective_performances = dict() + for key in self.objective_performance_keys: + self.objective_performances[key] = to_valid_array( + results, + key, + performance=True, + replace_missing_measurement_from_times_key="runtimes" if key == "time" else None, ) - self.objective_performances_total_sorted_nan = np.concatenate( - (sorted_best_first, [np.nan] * self.objective_performances_number_of_nan) + assert self.objective_performances[key].ndim == 1, ( + f"Should have one dimension, has {self.objective_performances[key].ndim}" ) + assert ( + self.objective_performances[key].shape[0] == self.size + ), f"""Should have the same size as results ({self.size}), + has {self.objective_performances[key].shape[0]}""" + assert not np.all(np.isnan(self.objective_performances[key])), f"""All values for {key=} are NaN. + Likely the experiment did not collect performance values for objective_performance_key '{key}'.""" + + # get the number of repeats + # TODO is this necessary? number of repeats is given in experiments setup file + # valid_cache_index: int = 0 + # while "times" not in cache_values[valid_cache_index]: + # valid_cache_index += 1 + # self.repeats = len(cache_values[valid_cache_index]["times"]) + + # combine the arrays to the shape [len(objective_keys), self.size] + self.objective_times_array = np.array(list(self.objective_times[key] for key in self.objective_time_keys)) + assert self.objective_times_array.shape == tuple([len(self.objective_time_keys), self.size]) + self.objective_performances_array = np.array( + list(self.objective_performances[key] for key in self.objective_performance_keys) + ) + assert self.objective_performances_array.shape == tuple([len(self.objective_performance_keys), self.size]) + + # get the totals + self.objective_times_total = nansumwrapper(self.objective_times_array, axis=0) + assert self.objective_times_total.shape == tuple([self.size]) + # more of a test than a necessary assert + assert ( + np.nansum(self.objective_times_array[:, 0]) == self.objective_times_total[0] + ), f"""Sums of objective performances do not match: + {np.nansum(self.objective_times_array[:, 0])} vs. {self.objective_times_total[0]}""" + self.objective_performances_total = nansumwrapper(self.objective_performances_array, axis=0) + assert self.objective_performances_total.shape == tuple([self.size]) + # more of a test than a necessary assert + assert ( + np.nansum(self.objective_performances_array[:, 0]) == self.objective_performances_total[0] + ), f"""Sums of objective performances do not match: + {np.nansum(self.objective_performances_array[:, 0])} vs. {self.objective_performances_total[0]}""" + + # sort + self.objective_times_total_sorted = np.sort(self.objective_times_total[~np.isnan(self.objective_times_total)]) + self.objective_times_number_of_nan = ( + self.objective_times_total.shape[0] - self.objective_times_total_sorted.shape[0] + ) + objective_performances_nan_mask = np.isnan(self.objective_performances_total) + self.objective_performances_number_of_nan = np.count_nonzero(objective_performances_nan_mask) + self.objective_performances_total_sorted = np.sort( + self.objective_performances_total[~objective_performances_nan_mask] + ) + # make sure the best values are at the start, because NaNs are appended to the end + sorted_best_first = ( + self.objective_performances_total_sorted + if self.minimization + else self.objective_performances_total_sorted[::-1] + ) + self.objective_performances_total_sorted_nan = np.concatenate( + (sorted_best_first, [np.nan] * self.objective_performances_number_of_nan) + ) return True - def get_value_in_config(self, config: str, key: str): - """Get the value for a key given a configuration.""" - return self.cache[config][key] - def get_num_duplicate_values(self, value: float) -> int: """Get the number of duplicate values in the searchspace.""" duplicates = np.count_nonzero(np.where(self.objective_performances_total == value, 1, 0)) - 1 @@ -402,7 +527,7 @@ def get_num_duplicate_values(self, value: float) -> int: def mean_strategy_time_per_feval(self) -> float: """Gets the average time spent on the strategy per function evaluation.""" - if "strategy" in self.objective_times: + if "search_algorithm" in self.objective_times: strategy_times = self.objective_times invalid_mask = np.isnan(self.objective_performances_total) if not all(invalid_mask): diff --git a/src/autotuning_methodology/validators.py b/src/autotuning_methodology/validators.py index 1fcab0b..7c4469d 100644 --- a/src/autotuning_methodology/validators.py +++ b/src/autotuning_methodology/validators.py @@ -1,9 +1,56 @@ """Module containing various checks for validity.""" +from importlib.resources import files +from json import load + import numpy as np +from jsonschema import validate error_types_strings = ["", "InvalidConfig", "CompilationFailedConfig", "RuntimeFailedConfig"] kernel_tuner_error_value = 1e20 +schemas_path = files("autotuning_methodology").joinpath("schemas") + + +def get_experiment_schema_filepath(): + """Obtains and checks the filepath to the JSON schema. + + Returns: + the filepath to the schema in Traversable format. + """ + schemafile = schemas_path.joinpath("experiments.json") + assert schemafile.is_file(), f"Path to experiments.json does not exist, attempted path: {schemafile}" + return schemafile + + +def get_T4_schema_filepath(): + """Obtains and checks the filepath to the JSON schema. + + Returns: + the filepath to the schema in Traversable format. + """ + schemafile = schemas_path.joinpath("T4.json") + assert schemafile.is_file(), f"Path to T4.json does not exist, attempted path: {schemafile}" + return schemafile + + +def validate_with_schema_path(instance: dict, schemafile_path, encoding: str) -> dict: + """Validates the passed instance against the passed schema path. Returns schema or throws ValidationError.""" + with schemafile_path.open("r", encoding=encoding) as fp: + schema = load(fp) + validate(instance=instance, schema=schema) + return schema + + +def validate_experimentsfile(instance: dict, encoding="utf-8") -> dict: + """Validates the passed instance against the experiments file schema. Returns schema or throws ValidationError.""" + schemafile_path = get_experiment_schema_filepath() + return validate_with_schema_path(instance, schemafile_path, encoding) + + +def validate_T4(instance: dict, encoding="utf-8") -> dict: + """Validates the passed instance against the T4 schema. Returns schema or throws ValidationError.""" + schemafile_path = get_T4_schema_filepath() + return validate_with_schema_path(instance, schemafile_path, encoding) def is_invalid_objective_performance(objective_performance: float) -> bool: @@ -29,7 +76,7 @@ def is_invalid_objective_performance(objective_performance: float) -> bool: def is_invalid_objective_time(objective_time: float) -> bool: - """Returns whether an objective time is invalid. + """Returns whether an objective time is invalid. If changes are made here, also change `filter_invalids`. Args: objective_time: the objective time value to check. diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index a2ee993..fb2cd0e 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -4,12 +4,13 @@ import warnings from collections import defaultdict +from math import ceil from pathlib import Path import matplotlib.pyplot as plt import numpy as np from matplotlib.cm import get_cmap -from matplotlib.colors import rgb2hex +from matplotlib.colors import LinearSegmentedColormap, rgb2hex, to_hex, to_rgb from autotuning_methodology.baseline import ( Baseline, @@ -29,17 +30,88 @@ # The kernel information per device and device information for visualization purposes marker_variatons = ["v", "s", "*", "1", "2", "d", "P", "X"] +remove_from_gpus_label = "" +remove_from_applications_label = " milo" +remove_from_searchspace_label = " milo" + # total set of objective time keys objective_time_keys_values = ["compilation", "benchmark", "framework", "search_algorithm", "validation"] -def get_colors(strategies: list[dict], scale_margin_left=0.4, scale_margin_right=0.15): +def lighten_color(color, amount: float = 0.5): + """Lightens the given color by interpolating it toward white.""" + r, g, b = to_rgb(color) + return to_hex([(1 - amount) * c + amount for c in (r, g, b)]) + + +def get_colors(strategies: list[dict]) -> list: + """Assign colors using the tab10 colormap, with lighter shades for children.""" + tab10 = plt.get_cmap("tab10").colors + tab10 = [c for i, c in enumerate(tab10) if i != 1] # remove the second color (orange) to avoid confusion with the fourth (red) + max_parents = len(tab10) + strategy_parents = defaultdict(list) + override_index = False + + # Group children under their parents and check for overriden color indices + for i, strategy in enumerate(strategies): + if "color_parent" in strategy: + strategy_parents[strategy["color_parent"]].append(i) + if "color_index" in strategy: + override_index = True + if "color_parent" in strategy: + raise ValueError( + f"Strategy '{strategy['name']}' has both 'color_index' and 'color_parent' defined, which is not allowed." + ) + + if len(strategy_parents) > max_parents: + raise ValueError(f"Too many color parents: max supported is {max_parents} using tab10") + + parent_colors = {} + colors = [None] * len(strategies) + color_index = 0 + + for i, strategy in enumerate(strategies): + name = strategy["name"] + if name in strategy_parents: + children_indices = strategy_parents[name] + if len(children_indices) > 2: + raise ValueError(f"Color parent '{name}' has more than two children") + if override_index: + assert "color_index" in strategy, f"All strategies, including '{name}', must have either 'color_index' or 'color_parent' if 'color_index' is used anywhere." + color_index = strategy["color_index"] + if color_index >= len(tab10): + raise ValueError(f"Color index {color_index} for strategy '{name}' is out of bounds for tab10 colormap (max {len(tab10) - 1})") + base_color = tab10[color_index] + parent_colors[name] = { + idx: lighten_color(base_color, amount=0.4 + 0.3 * j) for j, idx in enumerate(children_indices) + } + colors[i] = to_hex(base_color) + color_index += 1 + elif "color_parent" in strategy: + parent = strategy["color_parent"] + assert parent in parent_colors, f"Parent '{parent}' for strategy '{name}' not found in parent colors - child strategies must be defined after their parents." + colors[i] = parent_colors[parent][i] + else: + if override_index: + assert "color_index" in strategy, f"All strategies, including '{name}', must have either 'color_index' or 'color_parent' if 'color_index' is used anywhere." + color_index = strategy["color_index"] + if color_index >= len(tab10): + raise ValueError("Too many unparented strategies for tab10 colormap") + colors[i] = to_hex(tab10[color_index]) + color_index += 1 + + return colors + + +def get_colors_old(strategies: list[dict], scale_margin_left=0.4, scale_margin_right=0.15) -> list: """Function to get the colors for each of the strategies.""" default_colors = plt.rcParams["axes.prop_cycle"].by_key()["color"] - main_colors = ["Blues", "Greens", "Reds", "Purples", "Greys"] + main_colors = ["Blues", "Greens", "Reds", "Purples", "Greys", "Oranges"] main_color_counter = 0 strategy_parents = defaultdict(list) + # TODO switch to qualitative colormaps, e.g. tab10 if no children, otherwise tab20 (https://matplotlib.org/stable/users/explain/colors/colormaps.html#qualitative) + # get the dictionary of parents with the index of their child strategies for strategy_index, strategy in enumerate(strategies): if "color_parent" in strategy: @@ -96,10 +168,14 @@ class Visualize: "time_total": "Total time in seconds", "aggregate_time": "Relative time to cutoff point", "time_partial_framework_time": "framework time", + "time_partial_framework": "framework time", "time_partial_strategy_time": "strategy time", + "time_partial_search_algorithm": "strategy time", "time_partial_compile_time": "compile time", + "time_partial_compilation": "compile time", "time_partial_benchmark_time": "kernel runtime", "time_partial_times": "kernel runtime", + "time_partial_runtimes": "kernel runtime", "time_partial_verification_time": "verification time", } ) @@ -114,7 +190,7 @@ class Visualize: "objective_baseline_max": "Improvement over random sampling", "aggregate_objective": "Aggregate best-found objective value relative to baseline", "aggregate_objective_max": "Aggregate improvement over random sampling", - "time": "Best-found kernel time in miliseconds", + "time": "Best-found kernel time in milliseconds", "GFLOP/s": "GFLOP/s", } ) @@ -127,8 +203,6 @@ class Visualize: "baseline", ] # absolute values, scatterplot, median-absolute normalized, improvement over baseline - plot_filename_prefix_parent = "generated_plots" - def __init__( self, experiment_filepath: str, @@ -154,71 +228,72 @@ def __init__( # # silently execute the experiment # with warnings.catch_warnings(): # warnings.simplefilter("ignore") - self.experiment, self.strategies, self.results_descriptions = execute_experiment( - experiment_filepath, profiling=False + self.experiment, self.all_experimental_groups, self.searchspace_statistics, self.results_descriptions = ( + execute_experiment(experiment_filepath, profiling=False) ) - experiment_folderpath = Path(experiment_filepath).parent - experiment_folder_id: str = self.experiment["folder_id"] - assert isinstance(experiment_folder_id, str) and len(experiment_folder_id) > 0 - self.plot_filename_prefix = f"{self.plot_filename_prefix_parent}/{experiment_folder_id}/" + experiment_folder: Path = self.experiment["parent_folder_absolute_path"] + assert isinstance(experiment_folder, Path) + self.plot_filename_prefix = experiment_folder.joinpath("run", "generated_graphs") print("\n") print("Visualizing") # preparing filesystem if save_figs or save_extra_figs: - Path(self.plot_filename_prefix_parent).mkdir(exist_ok=True) Path(self.plot_filename_prefix).mkdir(exist_ok=True) + # search strategies are search methods defined in experiments setup file + # self.all_experimental_groups are all combinations of gpu+application+search method that got executed + self.strategies = self.experiment["search_strategies"] # settings - self.minimization: bool = self.experiment.get("minimization", True) - cutoff_percentile: float = self.experiment["cutoff_percentile"] - cutoff_percentile_start: float = self.experiment.get("cutoff_percentile_start", 0.01) - cutoff_type: str = self.experiment.get("cutoff_type", "fevals") + cutoff_percentile: float = self.experiment["statistics_settings"]["cutoff_percentile"] + cutoff_percentile_start: float = self.experiment["statistics_settings"]["cutoff_percentile_start"] + cutoff_type: str = self.experiment["statistics_settings"]["cutoff_type"] assert cutoff_type == "fevals" or cutoff_type == "time", f"cutoff_type != 'fevals' or 'time', is {cutoff_type}" - time_resolution: float = self.experiment.get("resolution", 1e4) + time_resolution: float = self.experiment["visualization_settings"]["resolution"] if int(time_resolution) != time_resolution: raise ValueError(f"The resolution must be an integer, yet is {time_resolution}.") time_resolution = int(time_resolution) - objective_time_keys: list[str] = self.experiment["objective_time_keys"] + objective_time_keys: list[str] = self.experiment["statistics_settings"]["objective_time_keys"] # plot settings - plot_settings: dict = self.experiment["plot"] - plot_x_value_types: list[str] = plot_settings["plot_x_value_types"] - plot_y_value_types: list[str] = plot_settings["plot_y_value_types"] - compare_baselines: bool = plot_settings.get("compare_baselines", False) - compare_split_times: bool = plot_settings.get("compare_split_times", False) - confidence_level: float = plot_settings.get("confidence_level", 0.95) - self.colors = get_colors( - self.strategies, - scale_margin_left=plot_settings.get("color_parent_scale_margin_left", 0.4), - scale_margin_right=plot_settings.get("color_parent_scale_margin_right", 0.1), - ) + plots: list[dict] = self.experiment["visualization_settings"]["plots"] + compare_baselines: bool = self.experiment["visualization_settings"]["compare_baselines"] + compare_split_times: bool = self.experiment["visualization_settings"]["compare_split_times"] + confidence_level: float = self.experiment["visualization_settings"]["confidence_level"] + self.colors = get_colors(self.strategies) + # self.colors = get_colors_old( + # self.strategies, + # scale_margin_left=self.experiment["visualization_settings"].get("color_parent_scale_margin_left", 0.4), + # scale_margin_right=self.experiment["visualization_settings"].get("color_parent_scale_margin_right", 0.1), + # ) self.plot_skip_strategies: list[str] = list() if use_strategy_as_baseline is not None: self.plot_skip_strategies.append(use_strategy_as_baseline) # visualize aggregation_data = get_aggregation_data( - experiment_folderpath, + experiment_folder, self.experiment, + self.searchspace_statistics, self.strategies, self.results_descriptions, cutoff_percentile, cutoff_percentile_start, confidence_level, - self.minimization, time_resolution, use_strategy_as_baseline, ) - for gpu_name in self.experiment["GPUs"]: - for kernel_name in self.experiment["kernels"]: - print(f" | visualizing optimization of {kernel_name} for {gpu_name}") - title = f"{kernel_name} on {gpu_name}" + + # plot per searchspace + for gpu_name in self.experiment["experimental_groups_defaults"]["gpus"]: + for application_name in self.experiment["experimental_groups_defaults"]["applications_names"]: + print(f" | visualizing optimization of {application_name} for {gpu_name}") + title = f"{application_name} on {gpu_name}" title = title.replace("_", " ") # unpack the aggregation data random_baseline, strategies_curves, searchspace_stats, time_range, fevals_range = aggregation_data[ - get_aggregation_data_key(gpu_name=gpu_name, kernel_name=kernel_name) + get_aggregation_data_key(gpu_name=gpu_name, application_name=application_name) ] # baseline_time_interpolated = np.linspace(mean_feval_time, cutoff_point_time, time_resolution) @@ -283,88 +358,742 @@ def __init__( # ) # ) - # visualize the results - for x_type in plot_x_value_types: - if x_type == "aggregated": + for plot in plots: + # get settings + scope: str = plot["scope"] + if scope != "searchspace": continue - elif x_type == "fevals": - x_axis_range = fevals_range - elif x_type == "time": - x_axis_range = time_range + style: str = plot["style"] + plot_x_value_types: list[str] = plot["x_axis_value_types"] + plot_y_value_types: list[str] = plot["y_axis_value_types"] + + # visualize the results + for x_type in plot_x_value_types: + if x_type == "fevals": + x_axis_range = fevals_range + elif x_type == "time": + x_axis_range = time_range + else: + raise NotImplementedError(f"X-axis type '{x_type}' not supported for scope '{plot}'") + + # create the figure and plots + fig, axs = plt.subplots( + nrows=len(plot_y_value_types), + ncols=1, + figsize=(8, 4.2 * len(plot_y_value_types)), + sharex=True, + dpi=300, + ) + if not hasattr( + axs, "__len__" + ): # if there is just one subplot, wrap it in a list so it can be passed to the plot functions + axs = [axs] + fig.canvas.manager.set_window_title(title) + if not save_figs: + fig.suptitle(title) + + # plot the subplots of individual searchspaces + for index, y_type in enumerate(plot_y_value_types): + self.plot_strategies( + style, + x_type, + y_type, + axs[index], + searchspace_stats, + strategies_curves, + x_axis_range, + self.experiment["visualization_settings"], + random_baseline, + baselines_extra=baselines_extra, + ) + if index == 0: + loc = "lower right" if y_type == "normalized" else "best" + axs[index].legend(loc=loc) + + # finalize the figure and save or display it + fig.supxlabel(self.get_x_axis_label(x_type, objective_time_keys)) + fig.tight_layout() + if save_figs: + filename_path = Path(self.plot_filename_prefix) / f"{title}_{x_type}".replace(" ", "_") + fig.savefig(filename_path, dpi=300, bbox_inches="tight", pad_inches=0.01) + print(f"Figure saved to {filename_path}") + else: + plt.show() + + # plot per searchstrategy + for plot in plots: + # get settings + scope: str = plot["scope"] + style: str = plot["style"] + if scope != "search_strategy": + continue + if style != "heatmap" and style != "compare_heatmaps": + raise NotImplementedError( + f"Scope {scope} currently only supports 'heatmap' or 'compare_heatmaps' as a style, not {style}" + ) + plot_x_value_types: list[str] = plot["x_axis_value_types"] + plot_y_value_types: list[str] = plot["y_axis_value_types"] + annotate: bool = plot.get("annotate", True) + print_mean_of_columns: bool = plot.get("print_mean_of_columns", False) + print_mean_of_rows: bool = plot.get("print_mean_of_rows", False) + assert len(plot_x_value_types) == 1 + assert len(plot_y_value_types) == 1 + x_type = plot_x_value_types[0] + y_type = plot_y_value_types[0] + bins = plot.get("bins", 10) + vmin = plot.get("vmin", -15.0) # color range lower limit + vmax = plot.get("vmax", 1.0) # color range upper limit + cmin = plot.get("cmin", vmin) # colorbar lower limit + cmax = plot.get("cmax", vmax) # colorbar upper limit + cnum = plot.get("cnum", 5) # number of ticks on the colorbar + cap_to_vmin = plot.get("cap_to_vmin", False) # whether to cap the values to vmin + divide_train_test_axis = plot.get( + "divide_train_test_axis", False + ) # whether to add visual indication for train/test split + divide_train_test_after_num = plot.get( + "divide_train_test_after_num", False + ) # where to add the visual indication for train/test split + include_y_labels = plot.get("include_y_labels", None) + include_colorbar = plot.get("include_colorbar", True) + if vmin != -15.0: + warnings.warn( + f"Careful: VMin has been changed from -15.0 to {vmin}. This breaks visual comparison compatiblity with plots that do not have the same VMin. Maybe use cmin instead?." + ) + if vmax != 1.0: + warnings.warn( + f"Careful: VMax has been changed from 1.0 to {vmax}. This breaks visual comparison compatiblity with plots that do not have the same VMax. Maybe use cmax instead?" + ) + if cmin < vmin: + raise ValueError( + f"Colorbar minimum can't be lower than the minimum value of the heatmap: {cmin} < {vmin}" + ) + if cmax > vmax: + raise ValueError( + f"Colorbar maximum can't be higher than the maximum value of the heatmap: {cmax} > {vmax}" + ) + + # set the colormap + def norm_color_val(v): + """Normalize a color value to fit in the 0-1 range.""" + return (v - vmin) / (vmax - vmin) + + cmap = LinearSegmentedColormap.from_list( + "my_colormap", + [ + (norm_color_val(-15.0), "black"), + (norm_color_val(-4.0), "red"), + (norm_color_val(-1.0), "orange"), + (norm_color_val(0.0), "yellow"), + (norm_color_val(1.0), "green"), + ], + ) + + # collect and plot the data for each search strategy + data_collected: dict[str, list[tuple]] = defaultdict(list) + for strategy in self.strategies: + strategy_name = strategy["name"] + strategy_displayname = strategy["display_name"] + assert ( + sum([1 for s in self.strategies if s["name"] == strategy_name]) == 1 + ), f"Strategy name '{strategy_name}' is not unqiue" + + # get the data from the collected aggregated data + for gpu_name in self.experiment["experimental_groups_defaults"]["gpus"]: + for application_name in self.experiment["experimental_groups_defaults"]["applications_names"]: + # unpack the aggregation data + random_baseline, strategies_curves, searchspace_stats, time_range, fevals_range = ( + aggregation_data[ + get_aggregation_data_key(gpu_name=gpu_name, application_name=application_name) + ] + ) + + # get the data + dist = searchspace_stats.objective_performances_total_sorted + for _, strategy_curve in enumerate(strategies_curves): + if strategy_name != strategy_curve.name: + continue + # get the real and fictional performance curves + ( + _, + x_axis_range_real, + curve_real, + _, + _, + x_axis_range_fictional, + curve_fictional, + _, + _, + ) = strategy_curve.get_curve_over_time( + time_range, dist=dist, confidence_level=confidence_level + ) + # combine the real and fictional parts to get the full curve + combine = x_axis_range_fictional.ndim > 0 + x_axis_range = ( + np.concatenate([x_axis_range_real, x_axis_range_fictional]) + if combine + else x_axis_range_real + ) + assert np.array_equal( + time_range, x_axis_range, equal_nan=True + ), "time_range != x_axis_range" + curve = np.concatenate([curve_real, curve_fictional]) if combine else curve_real + # get the standardised curves and write them to the collector + curve: np.ndarray = random_baseline.get_standardised_curves( + time_range, [curve], x_type="time" + )[0] + score = np.mean(curve, axis=0) + curve_binned = np.array_split(curve, bins) + score_binned = [np.mean(c, axis=0) for c in curve_binned] + + # set the data + gpu_display_name = str(gpu_name).replace("_", " ") + application_display_name = str(application_name).replace("_", " ").capitalize() + data_collected[strategy_name].append( + tuple([gpu_display_name, application_display_name, score, score_binned]) + ) + if style == "heatmap": + for strategy in self.strategies: + strategy_name = strategy["name"] + strategy_displayname = strategy["display_name"] + strategy_data = data_collected[strategy_name] + + # get the performance per selected type in an array + plot_data = np.stack(np.array([t[2] for t in strategy_data])) + cutoff_percentile: float = self.experiment["statistics_settings"].get("cutoff_percentile", 1.0) + cutoff_percentile_start: float = self.experiment["statistics_settings"].get( + "cutoff_percentile_start", 0.01 + ) + label_data = { + "gpus": ( + list(dict.fromkeys([t[0].replace(remove_from_gpus_label, "") for t in strategy_data])), + "GPUs", + ), + "applications": ( + list( + dict.fromkeys([t[1].replace(remove_from_applications_label, "") for t in strategy_data]) + ), + "Applications", + ), + "searchspaces": ( + list( + dict.fromkeys( + [ + f"{t[1]} on\n{t[0]}".replace(remove_from_searchspace_label, "") + for t in strategy_data + ] + ) + ), + "Searchspaces", + ), + "time": ( + np.round(np.linspace(0.0, 1.0, bins), 2), + f"Fraction of time between {cutoff_percentile_start * 100}% and {cutoff_percentile * 100}%", + ), + } + x_ticks = label_data[x_type][0] + y_ticks = label_data[y_type][0] + figsize = None + if (x_type == "time" and y_type == "searchspaces") or ( + x_type == "searchspaces" and y_type == "time" + ): + plot_data: np.ndarray = np.stack(np.array([t[3] for t in strategy_data])) + if x_type == "searchspaces": + plot_data = plot_data.transpose() + figsize = (9, 5) + elif (x_type == "gpus" and y_type == "applications") or ( + y_type == "gpus" and x_type == "applications" + ): + plot_data = np.reshape( + plot_data, (len(label_data["gpus"][0]), len(label_data["applications"][0])) + ) + if x_type == "gpus": + plot_data = np.transpose(plot_data) + figsize = (5, 3.5) else: - raise ValueError(f"Invalid {x_type=}") + raise NotImplementedError( + f"Heatmap has not yet been implemented for {x_type}, {y_type}. Submit an issue to request it." + ) - # create the figure and plots - fig, axs = plt.subplots( - nrows=len(plot_y_value_types), - ncols=1, - figsize=(8, 3.4 * len(plot_y_value_types)), - sharex=True, - dpi=300, + # validate the data is within the vmin-vmax range and visible colorbar range + assert not ( + plot_data > 1.0 + ).any(), ( + "Plot data contains values greater than 1.0, which should not be possible. Please investigate." ) - if not hasattr( - axs, "__len__" - ): # if there is just one subplot, wrap it in a list so it can be passed to the plot functions + if cap_to_vmin: + plot_data = np.clip(plot_data, vmin, 1.0) + outside_range = np.where(np.logical_or(plot_data < vmin, plot_data > vmax)) + assert ( + len(outside_range[0]) == 0 and len(outside_range[1]) == 0 + ), f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range} for strategy {strategy_displayname})" + outside_visible_range = np.where(np.logical_or(plot_data < cmin, plot_data > cmax)) + if not (len(outside_visible_range[0]) == 0 and len(outside_visible_range[1]) == 0): + warnings.warn( + f"There are values outside of the visible colorbar range ({cmin}, {cmax}): {plot_data[outside_visible_range]} ({outside_visible_range})" + ) + + # set up the plot + fig, axs = plt.subplots( + ncols=1, figsize=figsize, dpi=300 + ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. + if not hasattr(axs, "__len__"): axs = [axs] + title = f"Performance of {strategy_displayname} over {'+'.join(plot_x_value_types)},{'+'.join(plot_y_value_types)}" fig.canvas.manager.set_window_title(title) if not save_figs: fig.suptitle(title) - # plot the subplots of individual searchspaces - for index, y_type in enumerate(plot_y_value_types): - self.plot_strategies( - x_type, - y_type, - axs[index], - searchspace_stats, - strategies_curves, - x_axis_range, - plot_settings, - random_baseline, - baselines_extra=baselines_extra, - ) - if index == 0: - loc = "lower right" if y_type == "normalized" else "best" - axs[index].legend(loc=loc) + # plot the heatmap + axs[0].set_xlabel(plot.get("xlabel", label_data[x_type][1])) + axs[0].set_xticks(ticks=np.arange(len(x_ticks)), labels=x_ticks, rotation=0) + if include_y_labels is True or None: + axs[0].set_ylabel(plot.get("ylabel", label_data[y_type][1])) + axs[0].set_yticks(ticks=np.arange(len(y_ticks)), labels=y_ticks) + if include_y_labels is True: + # axs[0].yaxis.set_label_position("right") + axs[0].yaxis.tick_right() + elif include_y_labels is False: + axs[0].set_yticks(ticks=np.arange(len(y_ticks))) + axs[0].tick_params(labelleft=False) + hm = axs[0].imshow( + plot_data, + vmin=vmin, + vmax=vmax, + cmap=cmap, + interpolation="nearest", + aspect="auto", + # extent=[-0.5, plot_data.shape[1] + 0.5, -0.5, plot_data.shape[0] + 0.5], + ) + if divide_train_test_axis is not False: + # axs[0].set_ylim(plot_data.shape[0] - 0.5, -0.5) # Ensure correct y-axis limits + if x_type == divide_train_test_axis.lower(): + # add the vertical line to the x-axis + axs[0].axvline( + x=divide_train_test_after_num - 0.5, color="black", linestyle="--", linewidth=0.8 + ) + # add train and test texts to either side of the x-label + axs[0].text( + x=divide_train_test_after_num - 0.5, + y=-0.5, + s="train", + ha="center", + va="top", + fontsize=10, + ) + axs[0].text( + x=divide_train_test_after_num - 0.5, + y=plot_data.shape[0] - 0.5, + s="test", + ha="center", + va="bottom", + fontsize=10, + ) + elif y_type == divide_train_test_axis.lower(): + # add the horizontal line to the y-axis + axs[0].axhline( + y=divide_train_test_after_num - 0.5, color="black", linestyle="--", linewidth=0.8 + ) + if include_y_labels is not False: + # add train and test texts to either side of the y-label + x_loc = -0.02 + y_center = 0.5 + text = "train" + axs[0].text( + x=x_loc, + y=y_center + 0.25 + (len(text) * 0.01), + s=text, + color="grey", + fontsize=8.5, + ha="center", + va="center", + rotation=90, + transform=axs[0].transAxes, + ) + text = "test" + axs[0].text( + x=x_loc, + y=y_center - 0.25 - (len(text) * 0.01), + s=text, + color="grey", + fontsize=8.5, + ha="center", + va="center", + rotation=90, + transform=axs[0].transAxes, + ) + else: + raise ValueError(f"{divide_train_test_axis=} not in x ({x_type}) or y ({y_type}) axis") + + # plot the colorbar + if include_colorbar is True: + cbar = fig.colorbar(hm) + if cmin != vmin or cmax != vmax: + cbar.set_ticks(np.linspace(cmin, cmax, num=cnum)) # set colorbar limits + cbar.ax.set_ylim(cmin, cmax) # adjust visible colorbar limits + # cbar.set_label("Performance relative to baseline (0.0) and optimum (1.0)") + cbar.ax.set_ylabel("Performance score", rotation=-90, va="bottom") + + # keep only non-overlapping ticks + max_ticks = 15 + if len(x_ticks) > max_ticks: + indices = np.linspace(0, len(x_ticks) - 1, max_ticks).round() + hide_tick = np.isin(np.arange(len(x_ticks)), indices, invert=True, assume_unique=True) + for i, t in enumerate(axs[0].xaxis.get_ticklabels()): + if hide_tick[i]: + t.set_visible(False) + if len(y_ticks) > max_ticks: + indices = np.linspace(0, len(y_ticks) - 1, max_ticks).round() + hide_tick = np.isin(np.arange(len(y_ticks)), indices, invert=True, assume_unique=True) + for i, t in enumerate(axs[0].yaxis.get_ticklabels()): + if hide_tick[i]: + t.set_visible(False) + + # loop over data dimensions and create text annotations + if annotate: + # replace with looping over plot_data instead + for i, j in np.ndindex(plot_data.shape): + number = plot_data[i, j] + if np.isnan(number): + continue + text = axs[0].text( + j, + i, + f"{round(number, 2) if number < -10 else round(number, 3)}", + ha="center", + va="center", + color="white" if (number > 0.5 or number < -2) else "black", + fontsize="small", + ) + + # print extra information if requested + if print_mean_of_columns: + mean_of_columns = np.nanmean(plot_data, axis=0) + print(f"Mean of columns for {strategy_displayname} ({x_type}): {mean_of_columns}") + if print_mean_of_rows: + mean_of_rows = np.nanmean(plot_data, axis=1) + print(f"Mean of rows for {strategy_displayname} ({y_type}): {mean_of_rows}") # finalize the figure and save or display it - fig.supxlabel(self.get_x_axis_label(x_type, objective_time_keys)) fig.tight_layout() if save_figs: - filename = f"{self.plot_filename_prefix}{title}_{x_type}" - filename = filename.replace(" ", "_") - fig.savefig(filename, dpi=300) - print(f"Figure saved to {filename}") + suffix = "" + if include_colorbar and not (x_type == "time" or y_type == "time"): + suffix += "_colorbar" + if include_y_labels and not (x_type == "time" or y_type == "time"): + suffix += "_ylabels" + filename_path = ( + Path(self.plot_filename_prefix) + / f"{strategy_name}_heatmap_{'_'.join(plot_x_value_types)}_{'_'.join(plot_y_value_types)}{suffix}" + ) + fig.savefig(filename_path, dpi=300, bbox_inches="tight", pad_inches=0.01) + print(f"Figure saved to {filename_path}") else: plt.show() + elif style == "compare_heatmaps": + + raise NotImplementedError("Still a work in progress") + + # comparisons = plot["comparison"] + # # set up the plot + # fig, axs = plt.subplots( + # ncols=1, figsize=(9, 6), dpi=300 + # ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. + # if not hasattr(axs, "__len__"): + # axs = [axs] + # # title = f"Performance of {strategy_displayname} over {'+'.join(plot_x_value_types)},{'+'.join(plot_y_value_types)}" + # # fig.canvas.manager.set_window_title(title) + # # if not save_figs: + # # fig.suptitle(title) + + # for comparison in comparisons: + # strategy_names = comparisons["strategies"] + # strategies = [s for s in self.strategies if s["name"]] + # # for strategy in strategies: + # strategy_displayname = strategy["display_name"] + # strategy_data = data_collected[strategy_name] + + # # get the performance per selected type in an array + # plot_data = np.stack(np.array([t[2] for t in strategy_data])) + # cutoff_percentile: float = self.experiment["statistics_settings"].get("cutoff_percentile", 1) + # cutoff_percentile_start: float = self.experiment["statistics_settings"].get( + # "cutoff_percentile_start", 0.01 + # ) + # label_data = { + # "gpus": ( + # list(dict.fromkeys([t[0].replace(remove_from_gpus_label, "") for t in strategy_data])), + # "GPUs", + # ), + # "applications": ( + # list( + # dict.fromkeys([t[1].replace(remove_from_applications_label, "") for t in strategy_data]) + # ), + # "Applications", + # ), + # "searchspaces": ( + # list( + # dict.fromkeys( + # [ + # f"{t[1]} on\n{t[0]}".replace(remove_from_searchspace_label, "") + # for t in strategy_data + # ] + # ) + # ), + # "Searchspaces", + # ), + # "time": ( + # np.round(np.linspace(0.0, 1.0, bins), 2), + # f"Fraction of time between {cutoff_percentile_start * 100}% and {cutoff_percentile * 100}%", + # ), + # } + # x_ticks = label_data[x_type][0] + # y_ticks = label_data[y_type][0] + # if (x_type == "time" and y_type == "searchspaces") or ( + # x_type == "searchspaces" and y_type == "time" + # ): + # plot_data: np.ndarray = np.stack(np.array([t[3] for t in strategy_data])) + # if x_type == "searchspaces": + # plot_data = plot_data.transpose() + # elif (x_type == "gpus" and y_type == "applications") or ( + # y_type == "gpus" and x_type == "applications" + # ): + # plot_data = np.reshape( + # plot_data, (len(label_data["gpus"][0]), len(label_data["applications"][0])) + # ) + # if x_type == "gpus": + # plot_data = np.transpose(plot_data) + # else: + # raise NotImplementedError( + # f"Heatmap has not yet been implemented for {x_type}, {y_type}. Submit an issue to request it." + # ) + + # # validate the data + # outside_range = np.where(np.logical_or(plot_data < vmin, plot_data > vmax)) + # assert len(outside_range[0]) == 0 and len(outside_range[1]) == 0, ( + # f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range} for strategy {strategy_displayname})" + # ) + else: + raise NotImplementedError(f"Invalid {style=}") # plot the aggregated searchspaces - if ( - "aggregated" in plot_x_value_types - and continue_after_comparison - or not (compare_baselines or compare_split_times) - ): - fig, axs = plt.subplots( - ncols=1, figsize=(9, 6), dpi=300 - ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. - if not hasattr(axs, "__len__"): - axs = [axs] - title = f"""Aggregated Data\nkernels: - {', '.join(self.experiment['kernels'])}\nGPUs: {', '.join(self.experiment['GPUs'])}""" - fig.canvas.manager.set_window_title(title) - if not save_figs: - fig.suptitle(title) - - # finalize the figure and save or display it - self.plot_strategies_aggregated(axs[0], aggregation_data, plot_settings=plot_settings) - fig.tight_layout() - if save_figs: - filename = f"{self.plot_filename_prefix}aggregated" - filename = filename.replace(" ", "_") - fig.savefig(filename, dpi=300) - print(f"Figure saved to {filename}") - else: - plt.show() + for plot in plots: + # get settings + scope: str = plot["scope"] + style: str = plot["style"] + vmin: float = plot.get("vmin", None) # visual range lower limit + if scope != "aggregate": + continue + if style != "line" and style != "head2head": + raise NotImplementedError(f"{scope} does currently not support {style}, create an issue to request it.") + if style == "head2head": + compare_at_relative_time = plot["comparison"]["relative_time"] + comparison_unit = plot["comparison"]["unit"] + annotate = plot.get("annotate", True) + + # the comparison data will be a double nested dictionary of the strategy indices + comparison_data_raw = self.get_head2head_comparison_data( + aggregation_data, compare_at_relative_time, comparison_unit + ) + + # if more than half of the comparisons between two strategies are NaN, set all to NaN + for strategy1 in comparison_data_raw.keys(): + for strategy2 in comparison_data_raw[strategy1].keys(): + comparison = comparison_data_raw[strategy1][strategy2] + if len([v for v in comparison if np.isnan(v)]) > ceil(0.5 * len(comparison)): + comparison_data_raw[strategy1][strategy2] = [np.nan] * len(comparison) + + # convert the comparison data dictionary to a 2D numpy array of means + comparison_data = np.array( + [ + [ + np.nanmean(comparison_data_raw[strategy1][strategy2]) + for strategy2 in comparison_data_raw[strategy1].keys() + ] + for strategy1 in comparison_data_raw.keys() + ] + ).transpose() + + # set up the plot + fig, axs = plt.subplots(ncols=1, figsize=(8, 6), dpi=300) + if not hasattr(axs, "__len__"): + axs = [axs] + ax = axs[0] + title = f"Head-to-head comparison of strategies at {compare_at_relative_time} relative time" + fig.canvas.manager.set_window_title(title) + if not save_figs: + fig.suptitle(title) + + # # set the x and y labels + # if comparison_unit == "time": + # ax.set_xlabel("How much time do these strategies take...") + # elif comparison_unit == "objective": + # ax.set_xlabel("How much objective value do these strategies achieve...") + # ax.set_ylabel("...relative to these strategies?") + # ax.xaxis.set_label_position('top') + + # set the x and y ticks + x_ticks = list(comparison_data_raw.keys()) + y_ticks = list(comparison_data_raw.keys()) + # Show all ticks and label them with the respective list entries + ax.set_xticks(range(len(x_ticks)), labels=x_ticks, rotation=-15, ha="right", rotation_mode="anchor") + ax.set_yticks(range(len(y_ticks)), labels=y_ticks, rotation=-30, ha="right", rotation_mode="anchor") + ax.xaxis.tick_top() + + # set the color map + vmin = 0.0 + vmax = 1000.0 + + def norm_color_val(v): + """Normalize a color value to fit in the 0-1 range.""" + return (v - vmin) / (vmax - vmin) + + if comparison_unit == "time": + cmap = LinearSegmentedColormap.from_list( + "head2head_colormap", + [ + (norm_color_val(vmin), "darkgreen"), + (norm_color_val(100.0), "greenyellow"), + (norm_color_val(200.0), "orange"), + (norm_color_val(500.0), "red"), + (norm_color_val(800.0), "darkred"), + (norm_color_val(vmax), "black"), + ], + ) + elif comparison_unit == "objective": + cmap = LinearSegmentedColormap.from_list( + "head2head_colormap", + [ + (norm_color_val(vmin), "darkred"), + (norm_color_val(80.0), "yellow"), + (norm_color_val(100.0), "greenyellow"), + (norm_color_val(200.0), "green"), + (norm_color_val(vmax), "darkgreen"), + ], + ) + + # if there are any values above the vmax, warn + if np.any(comparison_data > vmax): + warnings.warn( + f"There are values above the vmax ({vmax}) in the comparison data: {comparison_data[comparison_data > vmax]}, these are clipped" + ) + # clip the comparison data to the vmin-vmax range + comparison_data_clipped = np.clip(comparison_data, vmin, vmax) + + # plot the comparison data + im = ax.imshow( + comparison_data_clipped, + vmin=vmin, + vmax=vmax, + aspect="auto", + cmap=cmap, + ) + + # set the colorbar + # cmin = np.nanmin(comparison_data_clipped) + cmin = vmin # always show 0.0 as the start + max_val = np.nanmax(comparison_data_clipped) + if np.isnan(max_val): + max_val = vmax + # round to the nearest 100 + cmax = round(ceil(max_val), -2) + if cmax < max_val: + cmax += 100 # ensure the colorbar max is above the max value + cnum = round(cmax / 100) + 1 + cbar = ax.figure.colorbar(im, ax=ax) + if cmin != vmin or cmax != vmax: + cbar.set_ticks(np.linspace(cmin, cmax, num=cnum)) # set colorbar limits + cbar.ax.set_ylim(cmin, cmax) # adjust visible colorbar limits + if comparison_unit == "time": + cbar.ax.set_ylabel( + "Time difference to same objective value (lower is better)", rotation=-90, va="bottom" + ) + elif comparison_unit == "objective": + cbar.ax.set_ylabel( + "Objective value difference at same time (higher is better)", rotation=-90, va="bottom" + ) + else: + raise NotImplementedError(f"Comparison unit '{comparison_unit}' not implemented") + + # loop over data dimensions and create text annotations + if annotate: + for i in range(len(x_ticks)): + for j in range(len(y_ticks)): + number = comparison_data[i, j] + if np.isnan(number): + continue + text = ax.text( + j, + i, + f"{round(number, 1) if number < 100 else round(number)}%", + ha="center", + va="center", + color="white" if (number > 200 or number < 50) else "black", + fontsize="small", + ) + + # plot the averages per strategy as labels under the heatmap + averages = np.nanmean(comparison_data, axis=0) + # add "mean" before the averages + ax.text(-0.5, len(y_ticks) - 0.2, "Mean:", ha="right", va="center", color="black", fontsize=10) + for i, avg in enumerate(averages): + ax.text( + i, + len(y_ticks) - 0.2, + f"{'NaN' if np.isnan(avg) else round(avg, 1) if avg < 100 else round(avg)}%", + ha="center", + va="center", + color="black", + fontsize="small", + ) + print( + f"Averages per strategy at {compare_at_relative_time} relative time: {[(s, a) for s, a in zip(x_ticks, averages)]}" + ) + + # finalize the figure and save or display it + fig.tight_layout() + if save_figs: + filename_path = Path(self.plot_filename_prefix) / f"head2head_comparison_{comparison_unit}" + fig.savefig(filename_path, dpi=300, bbox_inches="tight", pad_inches=0.01) + print(f"Figure saved to {filename_path}") + else: + plt.show() + + # plot the aggregation + if style == "line" and (continue_after_comparison or not (compare_baselines or compare_split_times)): + # fig, axs = plt.subplots( + # ncols=1, figsize=(6.8, 4.0), dpi=300 + # ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. + fig, axs = plt.subplots( + ncols=1, figsize=(8.5, 5.0), dpi=300 + ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. + if not hasattr(axs, "__len__"): + axs = [axs] + title = f"""Aggregated Data\napplications: + {", ".join(self.experiment["experimental_groups_defaults"]["applications_names"])}\nGPUs: {", ".join(self.experiment["experimental_groups_defaults"]["gpus"])}""" + fig.canvas.manager.set_window_title(title) + if not save_figs: + fig.suptitle(title) + + # finalize the figure and save or display it + lowest_real_y_value = self.plot_strategies_aggregated( + axs[0], + aggregation_data, + visualization_settings=self.experiment["visualization_settings"], + plot_settings=plot, + ) + if vmin is not None: + if isinstance(vmin, (int, float)): + axs[0].set_ylim(bottom=vmin) + elif vmin == "real": + axs[0].set_ylim(bottom=lowest_real_y_value - (abs(lowest_real_y_value) + 1.0) * 0.02) + else: + raise NotImplementedError(f"{vmin=} not implemented") + fig.tight_layout() + if save_figs: + filename_path = Path(self.plot_filename_prefix) / "aggregated" + fig.savefig(filename_path, dpi=300, bbox_inches="tight", pad_inches=0.01) + print(f"Figure saved to {filename_path}") + else: + plt.show() def plot_baselines_comparison( self, @@ -390,7 +1119,7 @@ def plot_baselines_comparison( save_fig: whether to save the resulting figure to file. Defaults to False. """ dist = searchspace_stats.objective_performances_total_sorted - plt.figure(figsize=(8, 5), dpi=300) + plt.figure(figsize=(9, 7), dpi=300) # list the baselines to test baselines: list[Baseline] = list() @@ -446,10 +1175,9 @@ def plot_baselines_comparison( # write to file or show if save_fig: - filename = f"{self.plot_filename_prefix}{title}_baselines" - filename = filename.replace(" ", "_") - plt.savefig(filename, dpi=300) - print(f"Figure saved to {filename}") + filename_path = Path(self.plot_filename_prefix) / f"{title}_baselines".replace(" ", "_") + plt.savefig(filename_path, dpi=300, bbox_inches="tight", pad_inches=0.01) + print(f"Figure saved to {filename_path}") else: plt.show() @@ -533,10 +1261,9 @@ def plot_split_times_comparison( # write to file or show if save_fig: - filename = f"{self.plot_filename_prefix}{title}_split_times_{x_type}" - filename = filename.replace(" ", "_") - plt.savefig(filename, dpi=300) - print(f"Figure saved to {filename}") + filename_path = Path(self.plot_filename_prefix) / f"{title}_split_times_{x_type}".replace(" ", "_") + plt.savefig(filename_path, dpi=300, bbox_inches="tight", pad_inches=0.01) + print(f"Figure saved to {filename_path}") else: plt.show() @@ -581,7 +1308,7 @@ def plot_split_times_bar_comparison( for _ in range((len(strategies_curves) - len(self.plot_skip_strategies)) + 1) ) for objective_time_key in objective_time_keys: - data_dict[objective_time_key] = np.full((len(strategies_curves)), np.NaN) + data_dict[objective_time_key] = np.full((len(strategies_curves)), np.nan) for strategy_index, strategy_curve in enumerate(strategies_curves): if strategy_curve.name in self.plot_skip_strategies: continue @@ -641,15 +1368,164 @@ def plot_split_times_bar_comparison( # write to file or show if save_fig: - filename = f"{self.plot_filename_prefix}{title}_split_times_bar" - filename = filename.replace(" ", "_") - plt.savefig(filename, dpi=300) - print(f"Figure saved to {filename}") + filename_path = Path(self.plot_filename_prefix) / f"{title}_split_times_bar".replace(" ", "_") + plt.savefig(filename_path, dpi=300, bbox_inches="tight", pad_inches=0.01) + print(f"Figure saved to {filename_path}") else: plt.show() + def get_head2head_comparison_data( + self, aggregation_data: dict, compare_at_relative_time: float, comparison_unit: str + ) -> dict: + """Gets the data for a head-to-head comparison of strategies across all searchspaces.""" + # the comparison data will be a double nested dictionary of the strategy indices + comparison_data = dict() + for strategy_alpha in self.strategies: + comparison_data[strategy_alpha["display_name"]] = dict() + for strategy_beta in self.strategies: + comparison_data[strategy_alpha["display_name"]][strategy_beta["display_name"]] = list() + + # iterate over the searchspaces and strategies to get head2head data + for gpu_name in self.experiment["experimental_groups_defaults"]["gpus"]: + for application_name in self.experiment["experimental_groups_defaults"]["applications_names"]: + print(f" | visualizing head2head of {application_name} for {gpu_name}") + + # unpack the aggregation data + _, strategies_curves, searchspace_stats, time_range, _ = aggregation_data[ + get_aggregation_data_key(gpu_name=gpu_name, application_name=application_name) + ] + + # get the head2head comparison data + comparison_data_ss = self.get_head2head_comparison_data_searchspace( + "time", + compare_at_relative_time, + comparison_unit, + searchspace_stats, + strategies_curves, + time_range, + ) + + # for this searchspace, append each strategy's data to the comparison data + for strategy_index_alpha, strategy_alpha in enumerate(self.strategies): + for strategy_index_beta, strategy_beta in enumerate(self.strategies): + comparison_data[strategy_alpha["display_name"]][strategy_beta["display_name"]].append( + comparison_data_ss[strategy_index_alpha][strategy_index_beta] + ) + + return comparison_data + + def get_head2head_comparison_data_searchspace( + self, + x_type: str, + compare_at_relative_time: float, + comparison_unit: str, + searchspace_stats: SearchspaceStatistics, + strategies_curves: list[Curve], + x_axis_range: np.ndarray, + ) -> dict: + """Gets the data for a head-to-head comparison of strategies on a specific searchspace. + + Args: + x_type: the type of ``x_axis_range``. + compare_at_relative_time: the relative point in time to compare at, between 0.0 and 1.0. + comparison_unit: the unit to compare with, 'time' or 'objective'. + searchspace_stats: the Searchspace statistics object. + strategies_curves: the strategy curves to draw in the plot. + x_axis_range: the time or function evaluations range to plot on. + + Returns: + A doubly-nested dictionary with strategy names as keys and how much better outer performs relative to inner. + """ + comparison_point = x_axis_range[-1] * compare_at_relative_time + comparison_data = dict() + confidence_level = 0.95 # irrelevant because the confidence intervals are not used + minimization = searchspace_stats.minimization + dist = searchspace_stats.objective_performances_total_sorted + for strategy_index_alpha, strategy_alpha in enumerate(self.strategies): + inner_comparison_data = dict() + strategy_curve_alpha = strategies_curves[strategy_index_alpha] + _, time_range_alpha, curve_alpha, _, _ = strategy_curve_alpha.get_curve( + x_axis_range, x_type, dist=dist, confidence_level=confidence_level, return_split=False + ) + + # find the index of the closest time and performance to the comparison point + closest_index_alpha = np.argmin(np.abs(time_range_alpha - comparison_point)) + time_at_comparison_alpha = time_range_alpha[closest_index_alpha] + performance_at_comparison_alpha = curve_alpha[closest_index_alpha] + + absolute_optimum = searchspace_stats.total_performance_absolute_optimum() + median = searchspace_stats.total_performance_median() + + def normalize(val): + """Min-max normalization of the performance value.""" + if absolute_optimum == median: + return 0.0 + return (val - median) / (absolute_optimum - median) + + performance_at_comparison_alpha_norm = normalize(performance_at_comparison_alpha) + + # compare against all other strategies + for strategy_index_beta, strategy_beta in enumerate(self.strategies): + if strategy_index_alpha == strategy_index_beta: + inner_comparison_data[strategy_index_beta] = np.nan + continue + strategy_curve_beta = strategies_curves[strategy_index_beta] + _, time_range_beta, curve_beta, _, _ = strategy_curve_beta.get_curve( + x_axis_range, x_type, dist=dist, confidence_level=confidence_level, return_split=False + ) + + # calculate the relative difference between the two strategies at the comparison point + if comparison_unit == "time": + # given the performance at `compare_at_relative_time`, what is the index of the first time that strategy beta reaches at least the same performance? + index_matching = ( + np.argwhere(curve_beta <= performance_at_comparison_alpha) + if minimization + else np.argwhere(curve_beta >= performance_at_comparison_alpha) + ) + if index_matching.size == 0: + # if strategy beta never reaches the performance of strategy alpha, we cannot compare, instead we take the time at the end so we know what the minimal performance gain is + time_at_comparison_beta = time_range_beta[-1] + # another alternative: take the last time * fraction of inverse (e.g. if GA-nc doesn’t find the objective of GA, take end-of-time * 1/([GA-to-GAnc]/100)) + # inner_comparison_data[strategy_index_beta] = np.nan + # continue + else: + # get the time at which strategy beta reaches the performance of strategy alpha + closest_index_beta = index_matching[0][0] # take the first match + time_at_comparison_beta = time_range_beta[closest_index_beta] + + # given the performance at `compare_at_relative_time`, how much longer does strategy beta take to get to the same performance compared to strategy alpha? (lower is better) + # closest_index_beta = np.argmin(np.abs(curve_beta - performance_at_comparison_alpha)) + # time_at_comparison_beta = time_range_beta[closest_index_beta] + # outer takes X% of the time inner takes to reach the same performance (100%+percentage change) + percentage_change = ( + (time_at_comparison_alpha - time_at_comparison_beta) / abs(time_at_comparison_beta) * 100 + ) + inner_comparison_data[strategy_index_beta] = 100 + percentage_change + elif comparison_unit == "objective": + # given the time at `compare_at_relative_time`, how much worse is the objective value of strategy beta at that moment compared to strategy alpha? (higher is better) + closest_index_beta = np.argmin(np.abs(time_range_beta - time_at_comparison_alpha)) + performance_at_comparison_beta = curve_beta[closest_index_beta] + performance_at_comparison_beta_norm = normalize(performance_at_comparison_beta) + + # percentage_change = (performance_at_comparison_beta - performance_at_comparison_alpha) / abs(performance_at_comparison_beta) * 100 + # if not minimization: + # percentage_change = -percentage_change + + percentage_change_norm = ( + (performance_at_comparison_beta_norm - performance_at_comparison_alpha_norm) + / abs(performance_at_comparison_beta_norm) + * 100 + ) + inner_comparison_data[strategy_index_beta] = 100 + percentage_change_norm + else: + raise ValueError(f"Invalid comparison unit: {comparison_unit}. Expected 'time' or 'objective'.") + + comparison_data[strategy_index_alpha] = inner_comparison_data + return comparison_data + def plot_strategies( self, + style: str, x_type: str, y_type: str, ax: plt.Axes, @@ -665,6 +1541,7 @@ def plot_strategies( """Plots all optimization strategies for individual search spaces. Args: + style: the style of plot, either 'line' or 'scatter'. x_type: the type of ``x_axis_range``. y_type: the type of plot on the y-axis. ax: the axis to plot on. @@ -681,20 +1558,19 @@ def plot_strategies( confidence_level: float = plot_settings.get("confidence_level", 0.95) absolute_optimum = searchspace_stats.total_performance_absolute_optimum() median = searchspace_stats.total_performance_median() - optimum_median_difference = absolute_optimum - median def normalize(curve): """Min-max normalization with median as min and absolute optimum as max.""" if curve is None: return None - return (curve - median) / optimum_median_difference + return (curve - median) / (absolute_optimum - median) def normalize_multiple(curves: list) -> tuple: """Normalize multiple curves at once.""" return tuple(normalize(curve) for curve in curves) # plot the absolute optimum - absolute_optimum_y_value = absolute_optimum if y_type == "absolute" or y_type == "scatter" else 1 + absolute_optimum_y_value = absolute_optimum if y_type == "absolute" or style == "scatter" else 1 absolute_optimum_label = ( "Absolute optimum ({})".format(round(absolute_optimum, 3)) if y_type == "absolute" else "Absolute optimum" ) @@ -704,8 +1580,15 @@ def normalize_multiple(curves: list) -> tuple: if baseline_curve is not None: if y_type == "baseline": ax.axhline(0, label="baseline trajectory", color="black", ls="--") - elif y_type == "normalized" or y_type == "baseline": + elif y_type == "normalized" or y_type == "baseline" or y_type == "absolute": baseline = baseline_curve.get_curve(x_axis_range, x_type) + if absolute_optimum in baseline: + raise ValueError( + f"The optimum {absolute_optimum} is in the baseline, this will cause zero division problems" + ) + # cut_at_index = np.argmax(baseline == absolute_optimum) + # baseline = baseline[:cut_at_index] + # x_axis_range = x_axis_range[:cut_at_index] if y_type == "normalized": baseline = normalize(baseline) ax.plot(x_axis_range, baseline, label="Calculated baseline", color="black", ls="--") @@ -740,7 +1623,7 @@ def normalize_multiple(curves: list) -> tuple: continue # get the plot data - if y_type == "scatter": + if style == "scatter": x_axis, y_axis = strategy_curve.get_scatter_data(x_type) ax.scatter(x_axis, y_axis, label=label, color=color) continue @@ -843,8 +1726,10 @@ def normalize_multiple(curves: list) -> tuple: ax.set_ylabel(self.y_metric_displayname[f"objective_{y_type}"], fontsize="large") normalized_ylim_margin = 0.02 if y_type == "absolute": - multiplier = 0.99 if self.minimization else 1.01 - ax.set_ylim(absolute_optimum * multiplier, median) + # multiplier = 0.99 if self.minimization else 1.01 + # ax.set_ylim(absolute_optimum * multiplier, median) + # ax.set_ylim(1.0) + pass # elif y_type == 'normalized': # ax.set_ylim((0.0, 1 + normalized_ylim_margin)) elif y_type == "baseline": @@ -854,21 +1739,26 @@ def plot_strategies_aggregated( self, ax: plt.Axes, aggregation_data, - plot_settings: dict, - ): + visualization_settings: dict = {}, + plot_settings: dict = {}, + ) -> float: """Plots all optimization strategies combined accross search spaces. Args: ax: the axis to plot on. aggregation_data: the aggregated data from the various searchspaces. - plot_settings: dictionary of additional plot settings. + visualization_settings: dictionary of additional visualization settings. + plot_settings: dictionary of additional visualization settings related to this particular plot. + + Returns: + The lowest performance value of the real stopping point for all strategies. """ # plot the random baseline and absolute optimum ax.axhline(0, label="Calculated baseline", c="black", ls=":") ax.axhline(1, label="Absolute optimum", c="black", ls="-.") # get the relative aggregated performance for each strategy - confidence_level: float = plot_settings.get("confidence_level", 0.95) + confidence_level: float = visualization_settings.get("confidence_level", 0.95) ( strategies_performance, strategies_lower_err, @@ -876,23 +1766,72 @@ def plot_strategies_aggregated( strategies_real_stopping_point_fraction, ) = get_strategies_aggregated_performance(list(aggregation_data.values()), confidence_level) - # plot each strategy + # get the relevant plot settings + cutoff_percentile: float = self.experiment["statistics_settings"].get("cutoff_percentile", 1) + cutoff_percentile_start: float = self.experiment["statistics_settings"].get("cutoff_percentile_start", 0.01) + xlabel = plot_settings.get( + "xlabel", + f"{self.x_metric_displayname['aggregate_time']} ({cutoff_percentile_start * 100}% to {cutoff_percentile * 100}%)", + ) # noqa: E501 + ylabel = plot_settings.get("ylabel", self.y_metric_displayname["aggregate_objective"]) + tmin = plot_settings.get("tmin", 1.0) + + # setup the plot y_axis_size = strategies_performance[0].shape[0] time_range = np.arange(y_axis_size) plot_errors = True + lowest_real_y_value = 0.0 print("\n-------") print("Quantification of aggregate performance across all search spaces:") + + # get the highest real_stopping_point_index, adjust y_axis_size and time_range if necessary + real_stopping_point_indices = [ + min( + round(strategies_real_stopping_point_fraction[strategy_index] * time_range.shape[0]) + 1, + time_range.shape[0], + ) + for strategy_index in range(len(strategies_performance)) + ] # noqa: E501 + real_stopping_point_index_max = max(real_stopping_point_indices) + if tmin == "real": + # stop the time at the largest real stopping point + if real_stopping_point_index_max < y_axis_size: + y_axis_size = real_stopping_point_index_max + print(f" adjusted stopping point index: {real_stopping_point_index_max}/{y_axis_size}") + time_range = np.arange(y_axis_size) + elif tmin < 1.0: + # stop the time at the given tmin + y_axis_size = y_axis_size * tmin + time_range = np.arange(y_axis_size) + elif tmin > 1.0: + raise ValueError(f"Invalid {tmin=}, must be between 0.0 and 1.0 or 'real'") + + # adjust the xlabel if necessary + if tmin == "real" and "xlabel" not in plot_settings: + xlabel = "Relative time until the last strategy stopped" + + # plot each strategy for strategy_index, strategy_performance in enumerate(strategies_performance): if self.strategies[strategy_index]["name"] in self.plot_skip_strategies: continue displayname = self.strategies[strategy_index]["display_name"] color = self.colors[strategy_index] - real_stopping_point_fraction = strategies_real_stopping_point_fraction[strategy_index] - real_stopping_point_index = round(real_stopping_point_fraction * time_range.shape[0]) - if real_stopping_point_index <= 0: + real_stopping_point_index = real_stopping_point_indices[strategy_index] + if real_stopping_point_index <= 1: warnings.warn(f"Stopping point index for {displayname} is at {real_stopping_point_index}") continue + # calculate the lowest real_y_value + lowest_real_y_value = min( + lowest_real_y_value, + ( + strategy_performance[real_stopping_point_index] + if real_stopping_point_index < time_range.shape[0] + else strategy_performance[time_range.shape[0] - 1] + ), + ) + assert isinstance(lowest_real_y_value, (int, float)), f"Invalid {lowest_real_y_value=}" + # plot the errors if plot_errors: strategy_lower_err = strategies_lower_err[strategy_index] @@ -910,9 +1849,9 @@ def plot_strategies_aggregated( and real_stopping_point_index < len(strategy_lower_err) - 1 ): ax.fill_between( - time_range[real_stopping_point_index:], - strategy_lower_err[real_stopping_point_index:], - strategy_upper_err[real_stopping_point_index:], + time_range[real_stopping_point_index - 1 : y_axis_size], + strategy_lower_err[real_stopping_point_index - 1 : y_axis_size], + strategy_upper_err[real_stopping_point_index - 1 : y_axis_size], alpha=0.15, antialiased=True, color=color, @@ -931,8 +1870,8 @@ def plot_strategies_aggregated( and real_stopping_point_index < len(strategy_performance) - 1 ): ax.plot( - time_range[real_stopping_point_index:], - strategy_performance[real_stopping_point_index:], + time_range[real_stopping_point_index - 1 : y_axis_size], + strategy_performance[real_stopping_point_index - 1 : y_axis_size], color=color, ls="dashed", ) @@ -940,22 +1879,25 @@ def plot_strategies_aggregated( performance_score_std = round(np.std(strategy_performance), 3) print(f" | performance of {displayname}: {performance_score} (±{performance_score_std})") - # set the axis - cutoff_percentile: float = self.experiment.get("cutoff_percentile", 1) - cutoff_percentile_start: float = self.experiment.get("cutoff_percentile_start", 0.01) - ax.set_xlabel( - f"{self.x_metric_displayname['aggregate_time']} ({cutoff_percentile_start*100}% to {cutoff_percentile*100}%)", # noqa: E501 - fontsize="large", - ) - ax.set_ylabel(self.y_metric_displayname["aggregate_objective"], fontsize="large") - num_ticks = 11 - ax.set_xticks( - np.linspace(0, y_axis_size, num_ticks), - np.round(np.linspace(0, 1, num_ticks), 2), - ) + # set the axis labels + ax.set_xlabel(xlabel, fontsize="large") + ax.set_ylabel(ylabel, fontsize="large") + + # set the ticks + if tmin == "real": + ax.set_xticks([], []) + else: + num_ticks = 11 + ax.set_xticks( + np.linspace(0, y_axis_size, num_ticks), + np.round(np.linspace(0, tmin, num_ticks), 2), + ) + + # set the limits and legend ax.set_ylim(top=1.02) - ax.set_xlim((0, y_axis_size)) + ax.set_xlim((0, y_axis_size - 1)) ax.legend() + return lowest_real_y_value def get_x_axis_label(self, x_type: str, objective_time_keys: list): """Formatter to get the appropriate x-axis label depending on the x-axis type. diff --git a/tests/autotuning_methodology/integration/mockfiles/convolution.cu b/tests/autotuning_methodology/integration/mockfiles/convolution.cu new file mode 100644 index 0000000..da84830 --- /dev/null +++ b/tests/autotuning_methodology/integration/mockfiles/convolution.cu @@ -0,0 +1,168 @@ +extern "C" { +#define image_height 4096 +#define image_width 4096 + +#ifndef filter_height + #define filter_height 17 +#endif +#ifndef filter_width + #define filter_width 17 +#endif + +#define border_height ((filter_height/2)*2) +#define border_width ((filter_width/2)*2) +#define input_height (image_height + border_height) +#define input_width (image_width + border_width) + +#ifndef block_size_x + #define block_size_x 16 +#endif +#ifndef block_size_y + #define block_size_y 16 +#endif +#ifndef block_size_z + #define block_size_z 1 +#endif +#ifndef tile_size_x + #define tile_size_x 1 +#endif +#ifndef tile_size_y + #define tile_size_y 1 +#endif + +#define i_end min(block_size_y*tile_size_y+border_height, input_height) +#define j_end min(block_size_x*tile_size_x+border_width, input_width) + +/* + * If requested, we can use the __ldg directive to load data through the + * read-only cache. + */ +#define USE_READ_ONLY_CACHE read_only +#if USE_READ_ONLY_CACHE == 1 +#define LDG(x, y) __ldg(x+y) +#elif USE_READ_ONLY_CACHE == 0 +#define LDG(x, y) x[y] +#endif + +__constant__ float d_filter[33*33]; //large enough for the largest filter + +/* + * If use_padding == 1, we introduce (only when necessary) a number of padding + * columns in shared memory to avoid shared memory bank conflicts + * + * padding columns are only inserted when block_size_x is not a multiple of 32 (the assumed number of memory banks) + * and when the width of the data needed is not a multiple of 32. The latter is because some filter_widths never + * cause bank conflicts. + * + * If not passed as a tunable parameter, padding is on by default + */ +#define shared_mem_width (block_size_x*tile_size_x+border_width) +#ifndef use_padding + #define use_padding 1 +#endif +#if use_padding == 1 + #if (((block_size_x % 32)!=0) && (((shared_mem_width-block_size_x)%32) != 0)) + // next line uses &31 instead of %32, because % in C is remainder not modulo + #define padding_columns ((32 - (border_width + block_size_x*tile_size_x - block_size_x)) & 31) + #undef shared_mem_width + #define shared_mem_width (block_size_x*tile_size_x+border_width+padding_columns) + #endif +#endif + + +__global__ void convolution_kernel(float *output, float *input, float *filter) { + int ty = threadIdx.y; + int tx = threadIdx.x; + int by = blockIdx.y * block_size_y * tile_size_y; + int bx = blockIdx.x * block_size_x * tile_size_x; + + //shared memory to hold all input data need by this thread block + __shared__ float sh_input[block_size_y*tile_size_y+border_height][shared_mem_width]; + + //load all input data needed by this thread block into shared memory + #pragma unroll + for (int i=ty; i=64", + "Parameters": [ + "block_size_x", + "block_size_y" + ] + }, + { + "Expression": "block_size_x*block_size_y<=1024", + "Parameters": [ + "block_size_x", + "block_size_y" + ] + }, + { + "Expression": "tile_size_x*tile_size_y<30", + "Parameters": [ + "tile_size_x", + "tile_size_y" + ] + } + ] + }, + "KernelSpecification": { + "Language": "CUDA", + "CompilerOptions": [ + "-std=c++11" + ], + "BenchmarkName": "GEMM", + "KernelName": "convolution_kernel", + "KernelFile": "convolution.cu", + "GlobalSizeType": "CUDA", + "LocalSize": { + "X": "block_size_x", + "Y": "block_size_y", + "Z": "1" + }, + "GlobalSize": { + "X": "(262144 // block_size_x) // tile_size_x", + "Y": "(262144 // block_size_y) // tile_size_y", + "Z": "1" + }, + "GridDivX": [ + "block_size_x", + "tile_size_x" + ], + "GridDivY": [ + "block_size_y", + "tile_size_y" + ], + "ProblemSize": [ + 4096, + 4096 + ], + "SharedMemory": 0, + "Stream": null, + "Arguments": [ + { + "Name": "output_image", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "WriteOnly", + "FillType": "Constant", + "Size": "ProblemSize[0]*ProblemSize[1]", + "FillValue": 0.0, + "Output": 1 + }, + { + "Name": "input_image", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "ReadOnly", + "FillType": "Random", + "Size": "(ProblemSize[0]+max(filter_width)-1) * (ProblemSize[1]+max(filter_height)-1)", + "FillValue": 1.0 + }, + { + "Name": "d_filter", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "ReadOnly", + "MemType": "Constant", + "FillType": "Random", + "Size": "max(filter_height) * max(filter_width)", + "FillValue": 1.0 + } + ] + } +} \ No newline at end of file diff --git a/tests/autotuning_methodology/integration/mockfiles/mocktest_kernel_convolution.py b/tests/autotuning_methodology/integration/mockfiles/mocktest_kernel_convolution.py deleted file mode 100644 index f260be0..0000000 --- a/tests/autotuning_methodology/integration/mockfiles/mocktest_kernel_convolution.py +++ /dev/null @@ -1,88 +0,0 @@ -"""Test kernel to run integration tests.""" - -#!/usr/bin/env python -import sys - -import numpy - -import kernel_tuner -from kernel_tuner.file_utils import store_metadata_file, store_output_file - -file_path_prefix = "../../../../cached_data_used" -file_path_results = file_path_prefix + "/last_run/_tune_configuration-results.json" -file_path_metadata = file_path_prefix + "/last_run/_tune_configuration-metadata.json" - - -def tune(device_name: str, strategy="mls", strategy_options=None, verbose=True, quiet=False, simulation_mode=True): - # input dimensions and data - image_width = 4096 - image_height = 4096 - filter_width = 15 - filter_height = 15 - problem_size = (image_width, image_height) - size = numpy.prod(problem_size) - - input_size = (problem_size[0] + filter_width - 1) * (problem_size[1] + filter_height - 1) - output_image = numpy.zeros(size).astype(numpy.float32) - input_image = numpy.random.randn(input_size).astype(numpy.float32) - filter_weights = numpy.random.randn(filter_width * filter_height).astype(numpy.float32) - - cmem_args = {"d_filter": filter_weights} - args = [output_image, input_image, filter_weights] - - metrics = dict() - metrics["GFLOP/s"] = lambda p: (image_width * image_height * filter_width * filter_height * 2 / 1e9) / ( - p["time"] / 1e3 - ) - - # setup tunable parameters - tune_params = dict() - tune_params["block_size_x"] = [1, 2, 4, 8, 16, 32, 48, 64, 96, 112, 128] - tune_params["block_size_y"] = [1, 2, 4, 8, 16, 32] - tune_params["filter_height"] = [filter_height] - tune_params["filter_width"] = [filter_width] - tune_params["read_only"] = [0, 1] - tune_params["tile_size_x"] = [1, 2, 3, 4, 5, 6, 7, 8] - tune_params["tile_size_y"] = [1, 2, 3, 4, 5, 6, 7, 8] - tune_params["use_padding"] = [0, 1] - - restrict = ["block_size_x*block_size_y>=64", "block_size_x*block_size_y<=1024", "tile_size_x*tile_size_y<30"] - - grid_div_x = ["block_size_x", "tile_size_x"] - grid_div_y = ["block_size_y", "tile_size_y"] - - # start tuning - results, env = kernel_tuner.tune_kernel( - "convolution_kernel", - "convolution.cu", - problem_size, - args, - tune_params, - grid_div_y=grid_div_y, - grid_div_x=grid_div_x, - cmem_args=cmem_args, - restrictions=restrict, - cache=file_path_prefix + "/cachefiles/mocktest_kernel_convolution/" + device_name.lower(), - metrics=metrics, - iterations=32, - device=0, - verbose=verbose, - quiet=quiet, - strategy=strategy, - strategy_options=strategy_options, - simulation_mode=simulation_mode, - ) - - store_output_file(file_path_results, results, tune_params) - store_metadata_file(file_path_metadata) - return results, env - - -if __name__ == "__main__": - if len(sys.argv) != 2: - print("Usage: ./mocktest_kernel_convolution.py [device name]") - exit(1) - - device_name = sys.argv[1] - - tune(device_name) diff --git a/tests/autotuning_methodology/integration/mockfiles/test.json b/tests/autotuning_methodology/integration/mockfiles/test.json index 9b11d8b..b477359 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test.json +++ b/tests/autotuning_methodology/integration/mockfiles/test.json @@ -1,60 +1,114 @@ { - "version": "0.1.2", + "version": "1.2.0", "name": "Mock run test", - "folder_id": "test_run_experiment", - "kernels_path": "../mockfiles", - "bruteforced_caches_path": "../../../../cached_data_used/cachefiles", - "visualization_caches_path": "../../../../cached_data_used/visualizations", - "kernels": [ - "mocktest_kernel_convolution" - ], - "GPUs": [ - "mock_GPU" - ], - "minimization": true, - "resolution": 1e3, - "cutoff_percentile": 0.99, - "cutoff_percentile_start": 0.7, - "objective_time_keys": [ - "compilation", - "benchmark", - "framework", - "search_algorithm", - "validation" - ], - "objective_performance_keys": [ - "time" - ], - "cutoff_type": "fevals", - "plot": { - "plot_x_value_types": [ - "fevals", - "time", - "aggregated" + "parent_folder": "./test_run_experiment", + "experimental_groups_defaults": { + "applications": [ + { + "name": "mocktest_kernel_convolution", + "input_file": "mocktest_kernel_convolution.json", + "folder": "./tests/autotuning_methodology/integration/mockfiles" + } ], - "plot_y_value_types": [ - "normalized", - "baseline" + "gpus": [ + "mock_GPU" ], - "confidence_level": 0.95, - "compare_baselines": true, - "compare_split_times": true - }, - "strategy_defaults": { - "repeats": 10, - "minimum_number_of_evaluations": 20, + "pattern_for_full_search_space_filenames": { + "regex": "./tests/autotuning_methodology/integration/mockfiles/mock_gpu_T4.json" + }, "stochastic": true, - "record_data": [ - "time", - "GFLOP/s" - ] + "repeats": 10, + "samples": 3, + "minimum_fraction_of_budget_valid": 0.5, + "minimum_number_of_valid_search_iterations": 20, + "ignore_cache": false }, - "strategies": [ + "search_strategies": [ { "name": "random_sample_10_iter", - "strategy": "random_sample", + "search_method": "random_sample", "display_name": "Random sampling 10 iters", - "repeats": 3 + "autotuner": "KernelTuner" } - ] + ], + "statistics_settings": { + "cutoff_percentile": 0.99, + "cutoff_percentile_start": 0.7, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ], + "cmin": -2.5 + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "searchspaces" + ], + "bins": 10 + }, + { + "scope": "aggregate", + "style": "head2head", + "comparison": { + "unit": "time", + "relative_time": 0.5 + }, + "annotate": true + }, + { + "scope": "aggregate", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": true, + "compare_split_times": true + } } \ No newline at end of file diff --git a/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json b/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json index dcf3554..d1c9fe6 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json +++ b/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json @@ -1,58 +1,60 @@ { - "version": "0.1.2", + "version": "1.1.0", "name": "Test output file writer", - "folder_id": "test_output_file_writer", - "kernels_path": "cached_data_used/bogus_kernels_path", - "bruteforced_caches_path": "../../../../cached_data_used/cachefiles", - "visualization_caches_path": "../../../../cached_data_used/visualizations", - "kernels": [ - "convolution" - ], - "GPUs": [ - "RTX_2080_Ti" - ], - "minimization": true, - "resolution": 1e3, - "cutoff_percentile": 0.99, - "cutoff_percentile_start": 0.7, - "objective_time_keys": [ - "compilation", - "benchmark", - "framework", - "search_algorithm", - "validation" - ], - "objective_performance_keys": [ - "time" - ], - "cutoff_type": "fevals", - "plot": { - "plot_x_value_types": [ - "aggregated" + "parent_folder": "./test_output_file_writer", + "experimental_groups_defaults": { + "applications": [ + { + "name": "convolution", + "input_file": "mocktest_kernel_convolution.json", + "folder": "./tests/autotuning_methodology/bogus_kernel_path/integration/mockfiles" + } ], - "plot_y_value_types": [ - "normalized", - "baseline" + "gpus": [ + "RTX_2080_Ti" ], - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - }, - "strategy_defaults": { - "repeats": 100, - "minimum_number_of_evaluations": 20, + "pattern_for_full_search_space_filenames": { + "regex": "./tests/autotuning_methodology/integration/mockfiles/mock_gpu_T4.json" + }, "stochastic": true, - "record_data": [ - "time", - "GFLOP/s" - ] + "repeats": 100, + "samples": 5, + "minimum_number_of_valid_search_iterations": 20, + "ignore_cache": false }, - "strategies": [ + "search_strategies": [ { "name": "random_sample_100_iter", - "strategy": "random_sample", + "search_method": "random_sample", "display_name": "Random sampling 100 iters", - "repeats": 5 + "autotuner": "KernelTuner" } - ] + ], + "statistics_settings": { + "cutoff_percentile": 0.99, + "cutoff_percentile_start": 0.7, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "aggregate", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } } \ No newline at end of file diff --git a/tests/autotuning_methodology/integration/mockfiles/test_cached.json b/tests/autotuning_methodology/integration/mockfiles/test_cached.json index 1821090..86412fa 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test_cached.json +++ b/tests/autotuning_methodology/integration/mockfiles/test_cached.json @@ -1,58 +1,60 @@ { - "version": "0.1.2", + "version": "1.1.0", "name": "Test output file writer", - "folder_id": "test_output_file_writer", - "kernels_path": "cached_data_used/kernels", - "bruteforced_caches_path": "../../../../cached_data_used/cachefiles", - "visualization_caches_path": "../../../../cached_data_used/visualizations", - "kernels": [ - "convolution" - ], - "GPUs": [ - "RTX_2080_Ti" - ], - "minimization": true, - "resolution": 1e3, - "cutoff_percentile": 0.99, - "cutoff_percentile_start": 0.7, - "objective_time_keys": [ - "compilation", - "benchmark", - "framework", - "search_algorithm", - "validation" - ], - "objective_performance_keys": [ - "time" - ], - "cutoff_type": "fevals", - "plot": { - "plot_x_value_types": [ - "aggregated" + "parent_folder": "./test_output_file_writer", + "experimental_groups_defaults": { + "applications": [ + { + "name": "convolution", + "input_file": "mocktest_kernel_convolution.json", + "folder": "./tests/autotuning_methodology/integration/mockfiles" + } ], - "plot_y_value_types": [ - "normalized", - "baseline" + "gpus": [ + "RTX_2080_Ti" ], - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - }, - "strategy_defaults": { - "repeats": 100, - "minimum_number_of_evaluations": 20, + "pattern_for_full_search_space_filenames": { + "regex": "./tests/autotuning_methodology/integration/mockfiles/mock_gpu_T4.json" + }, "stochastic": true, - "record_data": [ - "time", - "GFLOP/s" - ] + "repeats": 100, + "samples": 5, + "minimum_number_of_valid_search_iterations": 20, + "ignore_cache": false }, - "strategies": [ + "search_strategies": [ { "name": "random_sample_100_iter", - "strategy": "random_sample", + "search_method": "random_sample", "display_name": "Random sampling 100 iters", - "repeats": 5 + "autotuner": "KernelTuner" } - ] + ], + "statistics_settings": { + "cutoff_percentile": 0.99, + "cutoff_percentile_start": 0.7, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "aggregate", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } } \ No newline at end of file diff --git a/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json b/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json index 02fcc88..4cdaabe 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json +++ b/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json @@ -1,58 +1,60 @@ { - "version": "0.1.2", + "version": "1.1.0", "name": "Test import runs", - "folder_id": "test_output_file_writer", - "kernels_path": "../mockfiles", - "bruteforced_caches_path": "../../../../cached_data_used/cachefiles", - "visualization_caches_path": "../../../../cached_data_used/visualizations", - "kernels": [ - "mocktest_kernel_convolution" - ], - "GPUs": [ - "mock_GPU" - ], - "minimization": true, - "resolution": 1e3, - "cutoff_percentile": 0.99, - "cutoff_percentile_start": 0.7, - "objective_time_keys": [ - "compilation", - "benchmark", - "framework", - "search_algorithm", - "validation" - ], - "objective_performance_keys": [ - "time" - ], - "cutoff_type": "fevals", - "plot": { - "plot_x_value_types": [ - "aggregated" + "parent_folder": "./test_output_file_writer", + "experimental_groups_defaults": { + "applications": [ + { + "name": "mocktest_kernel_convolution", + "input_file": "mocktest_kernel_convolution.json", + "folder": "./tests/autotuning_methodology/integration/mockfiles" + } ], - "plot_y_value_types": [ - "normalized", - "baseline" + "gpus": [ + "mock_GPU" ], - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - }, - "strategy_defaults": { - "iterations": 32, - "repeats": 2, - "minimum_number_of_evaluations": 20, + "pattern_for_full_search_space_filenames": { + "regex": "./tests/autotuning_methodology/integration/mockfiles/mock_gpu_T4.json" + }, "stochastic": true, - "record_data": [ - "time", - "GFLOP/s" - ] + "repeats": 2, + "samples": 32, + "minimum_number_of_valid_search_iterations": 20, + "ignore_cache": false }, - "strategies": [ + "search_strategies": [ { "name": "ktt_profile_searcher", - "strategy": "profile_searcher", - "display_name": "KTT Profile Searcher" + "search_method": "profile_searcher", + "display_name": "KTT Profile Searcher", + "autotuner": "KTT" } - ] + ], + "statistics_settings": { + "cutoff_percentile": 0.99, + "cutoff_percentile_start": 0.7, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "aggregate", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } } \ No newline at end of file diff --git a/tests/autotuning_methodology/integration/test_report.py b/tests/autotuning_methodology/integration/test_report.py index 521d56f..a0240e4 100644 --- a/tests/autotuning_methodology/integration/test_report.py +++ b/tests/autotuning_methodology/integration/test_report.py @@ -10,7 +10,7 @@ normal_cachefiles_path, ) -from autotuning_methodology.experiments import get_experiment, get_strategies +from autotuning_methodology.experiments import get_experiment, get_experimental_groups from autotuning_methodology.report_experiments import get_strategy_scores # setup file paths @@ -44,7 +44,7 @@ def test_visualize_experiment(): # get the experiment details experiment_filepath = str(experiment_filepath_test) experiment = get_experiment(experiment_filepath) - strategies = get_strategies(experiment) + strategies = get_experimental_groups(experiment) # TODO fix this test that used to use get_strategies # get the scores strategies_scores = get_strategy_scores(experiment_filepath) diff --git a/tests/autotuning_methodology/integration/test_run_experiment.py b/tests/autotuning_methodology/integration/test_run_experiment.py index 35b7220..8db7d78 100644 --- a/tests/autotuning_methodology/integration/test_run_experiment.py +++ b/tests/autotuning_methodology/integration/test_run_experiment.py @@ -1,40 +1,36 @@ """Integration test for running and fetching an experiment from cache.""" -import json from importlib.resources import files from pathlib import Path from shutil import copyfile +from warnings import warn import numpy as np import pytest -from jsonschema import validate from autotuning_methodology.curves import StochasticOptimizationAlgorithm -from autotuning_methodology.experiments import ( - ResultsDescription, - execute_experiment, - get_args_from_cli, - get_experiment_schema_filepath, -) +from autotuning_methodology.experiments import ResultsDescription, execute_experiment, get_args_from_cli +from autotuning_methodology.validators import validate_experimentsfile # get the path to the package package_path = Path(files("autotuning_methodology")).parent.parent -# package_path = "" # setup file paths +strategy = "random_sample_10_iter" mockfiles_path_root = package_path / Path("tests/autotuning_methodology/integration/mockfiles/") mockfiles_path_source = mockfiles_path_root / "mock_gpu.json" mockfiles_path = mockfiles_path_root experiment_filepath_test = mockfiles_path / "test.json" assert experiment_filepath_test.exists() kernel_id = "mocktest_kernel_convolution" -cached_visualization_path = package_path / Path(f"cached_data_used/visualizations/test_run_experiment/{kernel_id}") -cached_visualization_file = cached_visualization_path / "mock_GPU_random_sample_10_iter.npz" -cached_visualization_imported_path = package_path / Path( - f"cached_data_used/visualizations/test_output_file_writer/{kernel_id}" -) -cached_visualization_imported_file = cached_visualization_imported_path / "mock_GPU_ktt_profile_searcher.npz" -normal_cachefiles_path = package_path / Path(f"cached_data_used/cachefiles/{kernel_id}") +experiment_path = package_path / Path("test_run_experiment") +experiment_path_run = experiment_path / "run" +experiment_path_setup = experiment_path / "setup" + +cached_visualization_path = experiment_path_run +plot_path = cached_visualization_path / "generated_graphs" +cached_visualization_file = experiment_path_run / strategy / "mock_GPU_mocktest_kernel_convolution.npz" +normal_cachefiles_path = package_path / Path(f"benchmark_hub/cachefiles/{kernel_id}") normal_cachefile_destination = normal_cachefiles_path / "mock_gpu.json" experiment_import_filepath_test = mockfiles_path / "test_import_runs.json" assert experiment_import_filepath_test.exists() @@ -43,12 +39,24 @@ import_runs_filepaths: list[Path] = list() -def _remove_dir(path: Path): +def _remove_dir(path: Path, ignore_permission_error=False): """Utility function for removing a directory and the contained files.""" - assert path.exists() + assert path.exists(), f"Path to directory does not exist: {path.resolve()}" + permission_errors = [] for sub in path.iterdir(): - sub.unlink() - path.rmdir() + try: + if sub.is_dir(): + _remove_dir(sub) + else: + sub.unlink() + except PermissionError as e: + if ignore_permission_error: + warn(e) + permission_errors.append(e) + else: + raise e + if not (ignore_permission_error and len(permission_errors) > 0): + path.rmdir() def setup_module(): @@ -58,9 +66,6 @@ def setup_module(): assert normal_cachefiles_path.exists() normal_cachefile_destination.write_text(mockfiles_path_source.read_text()) assert normal_cachefile_destination.exists() - # cached_visualization_path.mkdir(parents=True, exist_ok=True) - # assert cached_visualization_path.exists() - # copy the import run test files to the import run folder assert import_runs_source_path.exists() import_runs_path.mkdir(parents=True, exist_ok=True) assert import_runs_path.exists() @@ -79,15 +84,10 @@ def teardown_module(): if normal_cachefile_destination.exists(): normal_cachefile_destination.unlink() _remove_dir(normal_cachefiles_path) - if cached_visualization_file.exists(): - cached_visualization_file.unlink() - _remove_dir(cached_visualization_path) - if cached_visualization_imported_file.exists(): - cached_visualization_imported_file.unlink() - _remove_dir(cached_visualization_imported_path) # delete the import run test files from the import run folder for import_run_file in import_runs_filepaths: import_run_file.unlink() + _remove_dir(experiment_path) def test_CLI_input(): @@ -96,11 +96,11 @@ def test_CLI_input(): with pytest.raises(SystemExit) as e: dummy_args = ["-dummy_arg=option"] get_args_from_cli(dummy_args) - assert e.type == SystemExit + assert e.type == SystemExit # noqa: E721 assert e.value.code == 2 # improper input 2 - with pytest.raises(ValueError, match="Invalid '-experiment' option"): + with pytest.raises(ValueError, match="Invalid '--experiment' option"): get_args_from_cli([""]) # proper input @@ -122,7 +122,7 @@ def test_bad_experiment(): def test_run_experiment_bad_kernel_path(): """Run an experiment with a bad kernel path.""" experiment_filepath = str(mockfiles_path / "test_bad_kernel_path.json") - with pytest.raises(FileNotFoundError, match="No such path"): + with pytest.raises(FileNotFoundError, match="does not exist"): execute_experiment(experiment_filepath, profiling=False) @@ -133,8 +133,10 @@ def test_run_experiment(): if cached_visualization_file.exists(): cached_visualization_file.unlink() assert not cached_visualization_file.exists() - (experiment, strategies, results_descriptions) = execute_experiment(str(experiment_filepath_test), profiling=False) - validate_experiment_results(experiment, strategies, results_descriptions) + (experiment, all_experimental_groups, searchspace_statistics, results_descriptions) = execute_experiment( + str(experiment_filepath_test), profiling=False + ) + validate_experiment_results(experiment, all_experimental_groups, searchspace_statistics, results_descriptions) @pytest.mark.usefixtures("test_run_experiment") @@ -144,29 +146,23 @@ def test_cached_experiment(): assert normal_cachefile_destination.exists() assert cached_visualization_path.exists() assert cached_visualization_file.exists() - (experiment, strategies, results_descriptions) = execute_experiment(str(experiment_filepath_test), profiling=False) - validate_experiment_results(experiment, strategies, results_descriptions) - - -def test_import_run_experiment(): - """Import runs from an experiment.""" - assert import_runs_path.exists() - (experiment, strategies, results_descriptions) = execute_experiment( - str(experiment_import_filepath_test), profiling=False + (experiment, all_experimental_groups, searchspace_statistics, results_descriptions) = execute_experiment( + str(experiment_filepath_test), profiling=False ) - assert cached_visualization_imported_path.exists() - assert cached_visualization_imported_file.exists() - validate_experiment_results(experiment, strategies, results_descriptions) + validate_experiment_results(experiment, all_experimental_groups, searchspace_statistics, results_descriptions) @pytest.mark.usefixtures("test_run_experiment") def test_curve_instance(): """Test a Curve instance.""" # setup the test - (experiment, strategies, results_descriptions) = execute_experiment(str(experiment_filepath_test), profiling=False) - kernel_name = experiment["kernels"][0] - gpu_name = experiment["GPUs"][0] - strategy_name = strategies[0]["name"] + (experiment, all_experimental_groups, _, results_descriptions) = execute_experiment( + str(experiment_filepath_test), profiling=False + ) + experimental_groups: dict = experiment["experimental_groups_defaults"] + kernel_name = experimental_groups["applications"][0]["name"] + gpu_name = experimental_groups["gpus"][0] + strategy_name = all_experimental_groups[0]["name"] results_description = results_descriptions[gpu_name][kernel_name][strategy_name] curve = StochasticOptimizationAlgorithm(results_description) @@ -194,21 +190,31 @@ def test_curve_instance(): def validate_experiment_results( experiment, - strategies, + all_experimental_groups, + searchspace_statistics, results_descriptions, ): """Validate the types and contents returned from an experiment.""" - assert isinstance(experiment, dict) - assert isinstance(strategies, list) - assert isinstance(results_descriptions, dict) + assert isinstance(experiment, dict), f"should be dict, is {type(experiment)} ({experiment})" + assert isinstance( + searchspace_statistics, dict + ), f"should be dict, is {type(searchspace_statistics)} ({searchspace_statistics})" + assert isinstance( + all_experimental_groups, list + ), f"should be list, is {type(all_experimental_groups)} ({all_experimental_groups})" + assert isinstance( + results_descriptions, dict + ), f"should be dict, is {type(results_descriptions)} ({results_descriptions})" # validate the contents - schemafilepath = get_experiment_schema_filepath() - with open(schemafilepath, "r", encoding="utf-8") as schemafile: - schema = json.load(schemafile) - validate(instance=experiment, schema=schema) - kernel_name = experiment["kernels"][0] - gpu_name = experiment["GPUs"][0] - assert len(strategies) == 1 - strategy_name = strategies[0]["name"] + validate_experimentsfile(experiment) + experimental_groups: dict = experiment["experimental_groups_defaults"] + assert isinstance(experimental_groups, dict) + kernel_name = experimental_groups["applications"][0]["name"] + assert kernel_name == "mocktest_kernel_convolution" + gpu_name = experimental_groups["gpus"][0] + assert gpu_name == "mock_GPU" + assert len(all_experimental_groups) == 1 + strategy_name = all_experimental_groups[0]["name"] + assert strategy_name == "random_sample_10_iter" assert isinstance(results_descriptions[gpu_name][kernel_name][strategy_name], ResultsDescription) diff --git a/tests/autotuning_methodology/integration/test_visualization.py b/tests/autotuning_methodology/integration/test_visualization.py index 97e34dc..1fc682e 100644 --- a/tests/autotuning_methodology/integration/test_visualization.py +++ b/tests/autotuning_methodology/integration/test_visualization.py @@ -2,33 +2,40 @@ from pathlib import Path +import pytest from test_run_experiment import ( _remove_dir, cached_visualization_file, - cached_visualization_path, experiment_filepath_test, + experiment_path, kernel_id, mockfiles_path_source, normal_cachefile_destination, normal_cachefiles_path, + plot_path, ) from autotuning_methodology.visualize_experiments import Visualize # setup file paths experiment_title = f"{kernel_id}_on_mock_GPU" -plot_path = Path("generated_plots/test_run_experiment") plot_path_fevals = plot_path / f"{experiment_title}_fevals.png" plot_path_time = plot_path / f"{experiment_title}_time.png" +plot_path_heatmap = plot_path / "random_sample_10_iter_heatmap_applications_gpus_colorbar.png" +plot_path_heatmap_time = plot_path / "random_sample_10_iter_heatmap_time_searchspaces.png" +plot_path_aggregated_head2head = plot_path / "head2head_comparison_time.png" plot_path_aggregated = plot_path / "aggregated.png" plot_path_split_times_fevals = plot_path / f"{experiment_title}_split_times_fevals.png" plot_path_split_times_time = plot_path / f"{experiment_title}_split_times_time.png" plot_path_split_times_bar = plot_path / f"{experiment_title}_split_times_bar.png" plot_path_baselines_comparison = plot_path / f"{experiment_title}_baselines.png" -plot_filepaths = [ +plot_filepaths: list[Path] = [ plot_path_fevals, plot_path_time, + plot_path_heatmap, + plot_path_heatmap_time, plot_path_aggregated, + plot_path_aggregated_head2head, plot_path_split_times_fevals, plot_path_split_times_time, plot_path_split_times_bar, @@ -43,10 +50,6 @@ def setup_module(): assert normal_cachefiles_path.exists() normal_cachefile_destination.write_text(mockfiles_path_source.read_text()) assert normal_cachefile_destination.exists() - # cached_visualization_path.mkdir(parents=True, exist_ok=True) - # assert cached_visualization_path.exists() - # plot_path.mkdir(parents=True, exist_ok=True) - # assert plot_path.exists() def teardown_module(): @@ -54,15 +57,14 @@ def teardown_module(): if normal_cachefile_destination.exists(): normal_cachefile_destination.unlink() _remove_dir(normal_cachefiles_path) - if cached_visualization_file.exists(): - cached_visualization_file.unlink() - _remove_dir(cached_visualization_path) if plot_path.exists(): for plot_filepath in plot_filepaths: plot_filepath.unlink(missing_ok=True) plot_path.rmdir() + _remove_dir(experiment_path) +@pytest.mark.dependency() def test_visualize_experiment(): """Visualize a dummy experiment.""" assert normal_cachefile_destination.exists() @@ -77,5 +79,14 @@ def test_visualize_experiment(): continue_after_comparison=True, compare_extra_baselines=True, ) + + +@pytest.mark.dependency(depends=["test_visualize_experiment"]) +@pytest.mark.parametrize("plot_filepath", plot_filepaths) +def test_visualized_plot(plot_filepath: Path): + """Test whether valid plots have been produced.""" + assert plot_path.exists() for plot_filepath in plot_filepaths: - assert plot_filepath.exists(), f"{plot_filepath} does not exist" + assert ( + plot_filepath.exists() + ), f"{plot_filepath} does not exist, files in folder: {[f.name for f in plot_filepath.parent.iterdir() if f.is_file()]}" diff --git a/tests/autotuning_methodology/unit/test_curves.py b/tests/autotuning_methodology/unit/test_curves.py index bc0b4fe..0fcd7cb 100644 --- a/tests/autotuning_methodology/unit/test_curves.py +++ b/tests/autotuning_methodology/unit/test_curves.py @@ -8,9 +8,9 @@ def test_get_indices_in_distribution(): """Each draw should have the same value as the associated value in the distribution.""" - draws = np.array([[4, np.NaN, 5], [1, 2, 4.5]]) + draws = np.array([[4, np.nan, 5], [1, 2, 4.5]]) dist = np.array([1, 2, 4, 4, 4.5, 5]) - expected_indices = np.array([[2, np.NaN, 5], [0, 1, 4]]) + expected_indices = np.array([[2, np.nan, 5], [0, 1, 4]]) indices_found = get_indices_in_distribution(draws=draws, dist=dist) @@ -31,16 +31,16 @@ def test_get_indices_in_distribution(): def test_get_indices_in_distribution_check_dist(): """Dist order should be checked by default and dist should not contain NaN.""" - draws = np.array([[4, np.NaN, 5], [1, 2, 4.5]]) + draws = np.array([[4, np.nan, 5], [1, 2, 4.5]]) with pytest.raises(AssertionError, match="2 violations in 5 values"): - get_indices_in_distribution(draws=draws, dist=np.array([1, 2, np.NaN, 4, 4.5])) + get_indices_in_distribution(draws=draws, dist=np.array([1, 2, np.nan, 4, 4.5])) with pytest.raises(AssertionError, match="1 violations in 4 values"): get_indices_in_distribution(draws=draws, dist=np.array([5, 4, 6, 7])) def test_get_indices_in_distribution_check_draws(): """Values in draw (with the exception of NaN) that are not in dist should throw an exception.""" - draws = np.array([[4, np.NaN, 3], [1, 2, 4.5]]) + draws = np.array([[4, np.nan, 3], [1, 2, 4.5]]) dist = np.array([1, 2, 4, 4, 4.5, 5]) with pytest.raises(AssertionError, match="Each value in draws should be in dist"): get_indices_in_distribution(draws=draws, dist=dist) @@ -48,9 +48,9 @@ def test_get_indices_in_distribution_check_draws(): def test_get_indices_in_array(): """Each value should have the same value as the associated value in the unsorted array.""" - draws = np.array([[4, np.NaN, 5], [1, 2, 4.5]]) + draws = np.array([[4, np.nan, 5], [1, 2, 4.5]]) dist = np.array([4, 2, 1, 4, 5, 4.5]) - expected_indices = np.array([[0, np.NaN, 4], [2, 1, 5]]) + expected_indices = np.array([[0, np.nan, 4], [2, 1, 5]]) indices_found = get_indices_in_array(values=draws, array=dist)