-
Notifications
You must be signed in to change notification settings - Fork 212
Introduce Function Context Feature to TaskVineExecutor #3724
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
03b21a0
6556d39
72e9e00
10486b2
ee39165
79079b0
a8104e7
a6e609d
5b190bb
4681cd0
97b3fe7
3cd8608
433917e
edf192d
6c9388b
59dd532
62db46d
1f2ba3e
bd766e7
a03757d
f93da84
51a047f
0c327bb
7bcb6ef
9fb00d5
056dcc4
a0cc793
75823c2
2595333
2e0f9f8
0b30fef
3ca1675
23fd926
63d4a12
bd26ed0
efe9ef1
d033a54
ee803e6
058f42d
abcbffd
3bebee1
831e4b6
12a9be7
f57a957
79886a8
821daf4
753a127
2ba5f41
6f2a148
7736704
3a079dd
60b2110
12e90c0
c0bfa91
e0868f7
42868d2
5592c37
821ef15
716e897
bf40473
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,7 +20,7 @@ | |
| import uuid | ||
| from concurrent.futures import Future | ||
| from datetime import datetime | ||
| from typing import List, Literal, Optional, Union | ||
| from typing import Dict, List, Literal, Optional, Union | ||
|
|
||
| # Import other libraries | ||
| import typeguard | ||
|
|
@@ -84,8 +84,12 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin): | |
| pre-warmed forked python process. | ||
| Default is 'regular'. | ||
|
|
||
| use_tmp_dir_for_staging: bool | ||
| Whether to use tmp dir for staging functions, arguments, and results. | ||
| Default is False. | ||
|
|
||
| manager_config: TaskVineManagerConfig | ||
| Configuration for the TaskVine manager. Default | ||
| Configuration for the TaskVine manager. | ||
|
|
||
| factory_config: TaskVineFactoryConfig | ||
| Configuration for the TaskVine factory. | ||
|
|
@@ -105,6 +109,7 @@ def __init__(self, | |
| label: str = "TaskVineExecutor", | ||
| worker_launch_method: Union[Literal['provider'], Literal['factory'], Literal['manual']] = 'factory', | ||
| function_exec_mode: Union[Literal['regular'], Literal['serverless']] = 'regular', | ||
| use_tmp_dir_for_staging: bool = False, | ||
| manager_config: TaskVineManagerConfig = TaskVineManagerConfig(), | ||
| factory_config: TaskVineFactoryConfig = TaskVineFactoryConfig(), | ||
| provider: Optional[ExecutionProvider] = None, | ||
|
|
@@ -135,6 +140,7 @@ def __init__(self, | |
| self.label = label | ||
| self.worker_launch_method = worker_launch_method | ||
| self.function_exec_mode = function_exec_mode | ||
| self.use_tmp_dir_for_staging = use_tmp_dir_for_staging | ||
| self.manager_config = manager_config | ||
| self.factory_config = factory_config | ||
| self.storage_access = storage_access | ||
|
|
@@ -183,6 +189,13 @@ def __init__(self, | |
| # Path to directory that holds all tasks' data and results. | ||
| self._function_data_dir = "" | ||
|
|
||
| # Mapping of function names to function details. | ||
| # Currently the values include function objects, path to serialized functions, | ||
| # path to serialized function contexts, and whether functions are serialized. | ||
| # Helpful to detect inconsistencies in serverless functions. | ||
| # Helpful to deduplicate the same function. | ||
| self._map_func_names_to_func_details: Dict[str, Dict] = {} | ||
|
|
||
| # Helper scripts to prepare package tarballs for Parsl apps | ||
| self._package_analyze_script = shutil.which("poncho_package_analyze") | ||
| self._package_create_script = shutil.which("poncho_package_create") | ||
|
|
@@ -229,8 +242,13 @@ def __create_data_and_logging_dirs(self): | |
| # Create directories for data and results | ||
| log_dir = os.path.join(run_dir, self.label) | ||
| os.makedirs(log_dir) | ||
| tmp_prefix = f'{self.label}-{getpass.getuser()}-{datetime.now().strftime("%Y%m%d%H%M%S%f")}-' | ||
| self._function_data_dir = tempfile.TemporaryDirectory(prefix=tmp_prefix) | ||
|
|
||
| if self.use_tmp_dir_for_staging: | ||
| tmp_prefix = f'{self.label}-{getpass.getuser()}-{datetime.now().strftime("%Y%m%d%H%M%S%f")}-' | ||
| self._function_data_dir = tempfile.TemporaryDirectory(prefix=tmp_prefix).name | ||
| else: | ||
| self._function_data_dir = os.path.join(log_dir, 'function') | ||
| os.makedirs(self._function_data_dir, exist_ok=True) | ||
|
|
||
| # put TaskVine logs outside of a Parsl run as TaskVine caches between runs while | ||
| # Parsl does not. | ||
|
|
@@ -240,7 +258,7 @@ def __create_data_and_logging_dirs(self): | |
|
|
||
| # factory logs go with manager logs regardless | ||
| self.factory_config.scratch_dir = self.manager_config.vine_log_dir | ||
| logger.debug(f"Function data directory: {self._function_data_dir.name}, log directory: {log_dir}") | ||
| logger.debug(f"Function data directory: {self._function_data_dir}, log directory: {log_dir}") | ||
| logger.debug( | ||
| f"TaskVine manager log directory: {self.manager_config.vine_log_dir}, " | ||
| f"factory log directory: {self.factory_config.scratch_dir}") | ||
|
|
@@ -307,7 +325,7 @@ def _path_in_task(self, executor_task_id, *path_components): | |
| 'map': Pickled file with a dict between local parsl names, and remote taskvine names. | ||
| """ | ||
| task_dir = "{:04d}".format(executor_task_id) | ||
| return os.path.join(self._function_data_dir.name, task_dir, *path_components) | ||
| return os.path.join(self._function_data_dir, task_dir, *path_components) | ||
|
|
||
| def submit(self, func, resource_specification, *args, **kwargs): | ||
| """Processes the Parsl app by its arguments and submits the function | ||
|
|
@@ -330,11 +348,30 @@ def submit(self, func, resource_specification, *args, **kwargs): | |
| Keyword arguments to the Parsl app | ||
| """ | ||
|
|
||
| # a Parsl function must have a name | ||
| if func.__name__ is None: | ||
| raise ValueError('A Parsl function must have a name') | ||
|
|
||
| logger.debug(f'Got resource specification: {resource_specification}') | ||
|
|
||
| # Default execution mode of apps is regular | ||
| exec_mode = resource_specification.get('exec_mode', self.function_exec_mode) | ||
|
|
||
| if exec_mode == 'serverless': | ||
| if func.__name__ not in self._map_func_names_to_func_details: | ||
| self._map_func_names_to_func_details[func.__name__] = {'func_obj': func} | ||
| else: | ||
| if func is not self._map_func_names_to_func_details[func.__name__]['func_obj']: | ||
| logger.error( | ||
| ('Inconsistency in a serverless function call detected. ' | ||
| 'A function name cannot point to two different function objects.') | ||
| ) | ||
| raise ExecutorError( | ||
| self, | ||
| ('In the serverless mode, a function name cannot ' | ||
| 'point to two different function objects.') | ||
| ) | ||
|
|
||
| # Detect resources and features of a submitted Parsl app | ||
| cores = None | ||
| memory = None | ||
|
|
@@ -365,7 +402,7 @@ def submit(self, func, resource_specification, *args, **kwargs): | |
| self._executor_task_counter += 1 | ||
|
|
||
| # Create a per task directory for the function, argument, map, and result files | ||
| os.mkdir(self._path_in_task(executor_task_id)) | ||
| os.makedirs(self._path_in_task(executor_task_id), exist_ok=True) | ||
|
|
||
| input_files = [] | ||
| output_files = [] | ||
|
|
@@ -398,22 +435,71 @@ def submit(self, func, resource_specification, *args, **kwargs): | |
| argument_file = None | ||
| result_file = None | ||
| map_file = None | ||
| function_context_file = None | ||
| function_context_input_files = {} | ||
|
|
||
| # Get path to files that will contain the pickled function, | ||
| # arguments, result, and map of input and output files | ||
| function_file = self._path_in_task(executor_task_id, "function") | ||
| if exec_mode == 'serverless': | ||
| if 'function_file' not in self._map_func_names_to_func_details[func.__name__]: | ||
| function_file = os.path.join(self._function_data_dir, func.__name__, 'function') | ||
| os.makedirs(os.path.join(self._function_data_dir, func.__name__)) | ||
tphung3 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| self._map_func_names_to_func_details[func.__name__].update({'function_file': function_file, 'is_serialized': False}) | ||
| else: | ||
| function_file = self._map_func_names_to_func_details[func.__name__]['function_file'] | ||
| else: | ||
| function_file = self._path_in_task(executor_task_id, "function") | ||
| argument_file = self._path_in_task(executor_task_id, "argument") | ||
| result_file = self._path_in_task(executor_task_id, "result") | ||
| map_file = self._path_in_task(executor_task_id, "map") | ||
|
|
||
| logger.debug("Creating executor task {} with function at: {}, argument at: {}, \ | ||
| and result to be found at: {}".format(executor_task_id, function_file, argument_file, result_file)) | ||
| if exec_mode == 'serverless': | ||
| if 'function_context' in resource_specification: | ||
| if 'function_context_file' not in self._map_func_names_to_func_details[func.__name__]: | ||
| function_context = resource_specification.get('function_context') | ||
| function_context_args = resource_specification.get('function_context_args', []) | ||
| function_context_kwargs = resource_specification.get('function_context_kwargs', {}) | ||
| function_context_file = os.path.join(self._function_data_dir, func.__name__, 'function_context') | ||
|
|
||
tphung3 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| self._cloudpickle_serialize_object_to_file(function_context_file, | ||
| [function_context, | ||
| function_context_args, | ||
| function_context_kwargs]) | ||
| self._map_func_names_to_func_details[func.__name__].update({'function_context_file': function_context_file}) | ||
| else: | ||
| function_context_file = self._map_func_names_to_func_details[func.__name__]['function_context_file'] | ||
| function_context_input_files = resource_specification.get('function_context_input_files', {}) | ||
|
|
||
| logger.debug("Creating executor task {} with function at: {}, argument at: {}, and result to be found at: {}".format(executor_task_id, | ||
| function_file, | ||
| argument_file, | ||
| result_file)) | ||
|
|
||
| # Serialize function object and arguments, separately | ||
| self._serialize_object_to_file(function_file, func) | ||
| if exec_mode == 'regular' or not self._map_func_names_to_func_details[func.__name__]['is_serialized']: | ||
tphung3 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| self._serialize_object_to_file(function_file, func) | ||
| if exec_mode == 'serverless': | ||
| self._map_func_names_to_func_details[func.__name__]['is_serialized'] = True | ||
|
|
||
| # Delete references of function context information from resource_specification | ||
| # as they are not needed to be transferred to remote nodes. | ||
| # They are restored when the kwargs serialization is done. | ||
| if exec_mode == 'serverless': | ||
| function_context = kwargs['parsl_resource_specification'].pop('function_context', None) | ||
| function_context_args = kwargs['parsl_resource_specification'].pop('function_context_args', []) | ||
| function_context_kwargs = kwargs['parsl_resource_specification'].pop('function_context_kwargs', {}) | ||
| function_context_input_files = kwargs['parsl_resource_specification'].pop('function_context_input_files', {}) | ||
|
|
||
| args_dict = {'args': args, 'kwargs': kwargs} | ||
| self._serialize_object_to_file(argument_file, args_dict) | ||
|
|
||
| if exec_mode == 'serverless': | ||
| if function_context: | ||
| kwargs['parsl_resource_specification']['function_context'] = function_context | ||
| kwargs['parsl_resource_specification']['function_context_args'] = function_context_args | ||
| kwargs['parsl_resource_specification']['function_context_kwargs'] = function_context_kwargs | ||
| kwargs['parsl_resource_specification']['function_context_input_files'] = function_context_input_files | ||
|
|
||
| # Construct the map file of local filenames at worker | ||
| self._construct_map_file(map_file, input_files, output_files) | ||
|
|
||
|
|
@@ -431,6 +517,7 @@ def submit(self, func, resource_specification, *args, **kwargs): | |
| category = func.__name__ if self.manager_config.autocategory else 'parsl-default' | ||
|
|
||
| task_info = ParslTaskToVine(executor_id=executor_task_id, | ||
| func_name=func.__name__, | ||
| exec_mode=exec_mode, | ||
| category=category, | ||
| input_files=input_files, | ||
|
|
@@ -439,6 +526,8 @@ def submit(self, func, resource_specification, *args, **kwargs): | |
| function_file=function_file, | ||
| argument_file=argument_file, | ||
| result_file=result_file, | ||
| function_context_file=function_context_file, | ||
| function_context_input_files=function_context_input_files, | ||
| cores=cores, | ||
| memory=memory, | ||
| disk=disk, | ||
|
|
@@ -493,6 +582,12 @@ def _serialize_object_to_file(self, path, obj): | |
| while written < len(serialized_obj): | ||
| written += f_out.write(serialized_obj[written:]) | ||
|
|
||
| def _cloudpickle_serialize_object_to_file(self, path, obj): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we talked about this somewhere before but I can't remember where: you should be using the parsl serialization libraries not cloudpickle unless you have a specific reason that needs different serialization.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The object I serialize is a list containing a function and other Python objects. https://github.com/Parsl/parsl/pull/3724/files#diff-c5ce2bce42f707d31639e986d8fea5c00d31b5eead8fa510f7fe7e3181e67ccfR458-R461 Because it is a list, Parsl
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is meant to happen is that If you're seeing instances where this doesn't work, that's a problem with parsl serialization in general that I'm interested in distinct from taskvine.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That makes sense to me. From my point of view, the problem is that the pickle doesn't fail at the process of serialization (the serialization doesn't return an error), but the output of this serialization process is unusable for TaskVine. That is, this line doesn't raise an exception, but In my case the function was serialized "successfully" by pickle via
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to brainstorm, would adding a parameter to
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I found the root of the problem, which is pickle and the importability of functions. I tried using parsl.serialize-deserialize instead of cloudpickle and run I inspected the content of the serialization output and confirmed that pickle did the serialization. As you know, pickle serializes functions by reference or "fully qualified name" (reference here, in the "Note that functions" bit). When pickle deserializes these functions, it tries to import them by these names. The names of the test functions are fully importable because they are in the On another local test setup that I have where test functions are defined in a Python script (so they have their names like "__main__.f_context"), they are serialized with the "__main__" prefix in their names, so when they are reconstructed elsewhere, the other process can't find the functions in their main module, causing an error like this So the moral of the story is for pickle to work, it needs functions to have importable names. cloudpickle and maybe dill sidesteps this requirement as they serialize by value.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah the I'm kinda inclined to change the parsl.serialize behaviour to not do pickle first - because behaviour like you report above with
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Putting dill in front of pickle works on my other local test setup (so cloudpickle is not needed in the TaskVineExecutor), should I include it here or make a separate PR?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll make a separate PR, because it is a parsl-wide serialization change that I want to keep nicely isolated in history.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| """Takes any object and serializes it to the file path.""" | ||
| import cloudpickle # type: ignore[import-not-found] | ||
| with open(path, 'wb') as f: | ||
| cloudpickle.dump(obj, f) | ||
|
|
||
| def _construct_map_file(self, map_file, input_files, output_files): | ||
| """ Map local filepath of parsl files to the filenames at the execution worker. | ||
| If using a shared filesystem, the filepath is mapped to its absolute filename. | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.