Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Profile data mirroring #6723

Draft
wants to merge 21 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
8851e48
Move dumping test fixtures to `conftest.py`
GeigerJ2 Jan 23, 2025
98ea050
First working version with `DataDumper` and `CollectionDumper`
GeigerJ2 Jan 23, 2025
65214e8
Mirroring of workflows and calculations works
GeigerJ2 Jan 23, 2025
17f2730
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 23, 2025
0d37e59
Major code refactor
GeigerJ2 Jan 27, 2025
0105c08
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 27, 2025
64b715e
Symlinking of workflows between groups works.
GeigerJ2 Jan 27, 2025
a8c5aac
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 27, 2025
fbdf478
Fix `verdi process dump` tests
GeigerJ2 Jan 28, 2025
b98c61a
Fix mypy complaints
GeigerJ2 Jan 28, 2025
2dfe2ca
Start to work on group testing
GeigerJ2 Jan 28, 2025
492f87f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 28, 2025
3887130
Add ArithmeticAdd CJ Node fixture without `run`
GeigerJ2 Jan 28, 2025
f452ab2
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 28, 2025
abbfaff
First tests for node collection dumping
GeigerJ2 Jan 31, 2025
e09e078
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 31, 2025
48acce7
Improve logging and add dry-run feature.
GeigerJ2 Feb 5, 2025
b2aba2f
BaseDumper dataclass. get_processes return dict. Extend tests.
GeigerJ2 Feb 6, 2025
7c905e6
Add `ProcessesToDump` NamedTuple
GeigerJ2 Feb 6, 2025
7dba485
Use `compare_tree` utility function for dumping tests
GeigerJ2 Feb 6, 2025
8ddaa15
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/reference/command_line.rst
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,7 @@ Below is a list with all available subcommands.
configure-rabbitmq Configure RabbitMQ for a profile.
delete Delete one or more profiles.
list Display a list of all available profiles.
mirror Dump all data in an AiiDA profile's storage to disk.
set-default Set a profile as the default profile.
setdefault (Deprecated) Set a profile as the default profile.
setup Set up a new profile.
Expand Down
63 changes: 24 additions & 39 deletions src/aiida/cmdline/commands/cmd_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,38 +562,11 @@ def process_repair(manager, broker, dry_run):
@arguments.PROCESS()
@options.PATH()
@options.OVERWRITE()
@click.option(
'--include-inputs/--exclude-inputs',
default=True,
show_default=True,
help='Include the linked input nodes of the `CalculationNode`(s).',
)
@click.option(
'--include-outputs/--exclude-outputs',
default=False,
show_default=True,
help='Include the linked output nodes of the `CalculationNode`(s).',
)
@click.option(
'--include-attributes/--exclude-attributes',
default=True,
show_default=True,
help='Include attributes in the `.aiida_node_metadata.yaml` written for every `ProcessNode`.',
)
@click.option(
'--include-extras/--exclude-extras',
default=True,
show_default=True,
help='Include extras in the `.aiida_node_metadata.yaml` written for every `ProcessNode`.',
)
@click.option(
'-f',
'--flat',
is_flag=True,
default=False,
show_default=True,
help='Dump files in a flat directory for every step of the workflow.',
)
@options.FLAT()
@options.INCLUDE_INPUTS()
@options.INCLUDE_OUTPUTS()
@options.INCLUDE_ATTRIBUTES()
@options.INCLUDE_EXTRAS()
@click.option(
'--dump-unsealed',
is_flag=True,
Expand All @@ -602,15 +575,17 @@ def process_repair(manager, broker, dry_run):
help='Also allow the dumping of unsealed process nodes.',
)
@options.INCREMENTAL()
# TODO: Also add CONFIG_FILE option here
# TODO: Currently, setting rich options is not supported here directly
def process_dump(
process,
path,
overwrite,
flat,
include_inputs,
include_outputs,
include_attributes,
include_extras,
flat,
dump_unsealed,
incremental,
) -> None:
Expand All @@ -631,21 +606,33 @@ def process_dump(
"""

from aiida.tools.archive.exceptions import ExportValidationError
from aiida.tools.dumping.processes import ProcessDumper
from aiida.tools.dumping.base import BaseDumper
from aiida.tools.dumping.process import ProcessDumper

base_dumper = BaseDumper(
dump_parent_path=path,
overwrite=overwrite,
incremental=incremental,
)

process_dumper = ProcessDumper(
base_dumper=base_dumper,
include_inputs=include_inputs,
include_outputs=include_outputs,
include_attributes=include_attributes,
include_extras=include_extras,
overwrite=overwrite,
flat=flat,
dump_unsealed=dump_unsealed,
incremental=incremental,
)

try:
dump_path = process_dumper.dump(process_node=process, output_path=path)
dump_path = process_dumper.dump(
process_node=process,
output_path=path,
)
echo.echo_success(
f'Raw files for {process.__class__.__name__} <{process.pk}> dumped into folder `{dump_path}`.'
)
except FileExistsError:
echo.echo_critical(
'Dumping directory exists and overwrite is False. Set overwrite to True, or delete directory manually.'
Expand All @@ -654,5 +641,3 @@ def process_dump(
echo.echo_critical(f'{e!s}')
except Exception as e:
echo.echo_critical(f'Unexpected error while dumping {process.__class__.__name__} <{process.pk}>:\n ({e!s}).')

echo.echo_success(f'Raw files for {process.__class__.__name__} <{process.pk}> dumped into folder `{dump_path}`.')
129 changes: 129 additions & 0 deletions src/aiida/cmdline/commands/cmd_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,3 +269,132 @@ def profile_delete(force, delete_data, profiles):

get_config().delete_profile(profile.name, delete_storage=delete_data)
echo.echo_success(f'Profile `{profile.name}` was deleted.')


@verdi_profile.command('mirror')
@options.PATH()
@options.OVERWRITE()
# @options.INCREMENTAL()
@options.DUMP_PROCESSES()
@options.DEDUPLICATE()
@options.INCLUDE_INPUTS()
@options.INCLUDE_OUTPUTS()
@options.INCLUDE_ATTRIBUTES()
@options.INCLUDE_EXTRAS()
@options.FLAT()
@options.DUMP_CONFIG_FILE()
@options.GROUPS()
@options.ORGANIZE_BY_GROUPS()
@options.DRY_RUN()
@click.pass_context
def profile_mirror(
ctx,
path,
overwrite,
organize_by_groups,
dry_run,
dump_processes,
deduplicate,
include_inputs,
include_outputs,
include_attributes,
include_extras,
flat,
dump_config_file,
groups,
):
"""Dump all data in an AiiDA profile's storage to disk."""

import json
from datetime import datetime
from pathlib import Path

from aiida.tools.dumping import ProcessDumper, ProfileDumper
from aiida.tools.dumping.base import BaseDumper
from aiida.tools.dumping.logger import DumpLogger
from aiida.tools.dumping.utils import prepare_dump_path

profile = ctx.obj['profile']

incremental = not overwrite

if path is None:
path = Path.cwd() / f'{profile.name}-mirror'

echo.echo_report(f'Mirroring data of profile `{profile.name}`at path: `{path}`.')

SAFEGUARD_FILE: str = '.verdi_profile_mirror' # noqa: N806
safeguard_file_path: Path = path / SAFEGUARD_FILE

try:
prepare_dump_path(
path_to_validate=path,
overwrite=overwrite,
incremental=incremental,
safeguard_file=SAFEGUARD_FILE,
)
except FileExistsError as exc:
echo.echo_critical(str(exc))

breakpoint()

try:
with safeguard_file_path.open('r') as fhandle:
last_dump_time = datetime.fromisoformat(fhandle.readlines()[-1].strip().split()[-1]).astimezone()
except IndexError:
last_dump_time = None

if dry_run:
node_counts = ProfileDumper._get_number_of_nodes_to_dump(last_dump_time)
node_counts_str = ' & '.join(f'{count} {node_type}' for node_type, count in node_counts.items())
dry_run_message = f'Dry run for mirroring of profile `{profile.name}`: {node_counts_str} to dump.\n'
echo.echo_report(dry_run_message)
return

if incremental:
msg = 'Incremental mirroring selected. Will update directory.'
echo.echo_report(msg)

try:
dump_logger = DumpLogger.from_file(dump_parent_path=path)
except (json.JSONDecodeError, OSError):
dump_logger = DumpLogger(dump_parent_path=path)

base_dumper = BaseDumper(
dump_parent_path=path,
overwrite=overwrite,
incremental=incremental,
last_dump_time=last_dump_time,
)

process_dumper = ProcessDumper(
base_dumper=base_dumper,
include_inputs=include_inputs,
include_outputs=include_outputs,
include_attributes=include_attributes,
include_extras=include_extras,
flat=flat,
)

profile_dumper = ProfileDumper(
base_dumper=base_dumper,
process_dumper=process_dumper,
dump_logger=dump_logger,
groups=groups,
organize_by_groups=organize_by_groups,
deduplicate=deduplicate,
profile=profile,
dump_processes=dump_processes,
)

profile_dumper.dump()

# Append the current time to the file
last_dump_time = datetime.now().astimezone()
with safeguard_file_path.open('a') as fhandle:
fhandle.write(f'Last profile mirror time: {last_dump_time.isoformat()}\n')

# Write the logging json file to disk
dump_logger.save_log()

echo.echo_success(f'Dumped {dump_logger.counter} new nodes.')
77 changes: 77 additions & 0 deletions src/aiida/cmdline/params/options/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,17 @@
'DB_PORT',
'DB_USERNAME',
'DEBUG',
'DEDUPLICATE',
'DESCRIPTION',
'DICT_FORMAT',
'DICT_KEYS',
'DRY_RUN',
'DUMP_CONFIG_FILE',
'DUMP_PROCESSES',
'EXIT_STATUS',
'EXPORT_FORMAT',
'FAILED',
'FLAT',
'FORCE',
'FORMULA_MODE',
'FREQUENCY',
Expand All @@ -68,6 +72,10 @@
'GROUP_CLEAR',
'HOSTNAME',
'IDENTIFIER',
'INCLUDE_ATTRIBUTES',
'INCLUDE_EXTRAS',
'INCLUDE_INPUTS',
'INCLUDE_OUTPUTS',
'INCREMENTAL',
'INPUT_FORMAT',
'INPUT_PLUGIN',
Expand All @@ -80,6 +88,7 @@
'OLDER_THAN',
'ORDER_BY',
'ORDER_DIRECTION',
'ORGANIZE_BY_GROUPS',
'OVERWRITE',
'PAST_DAYS',
'PATH',
Expand Down Expand Up @@ -783,6 +792,74 @@ def set_log_level(ctx, _param, value):
show_default=True,
)

DEDUPLICATE = OverridableOption(
'--deduplicate/--no-deduplicate',
is_flag=True,
default=True,
show_default=True,
help='',
)

DUMP_PROCESSES = OverridableOption(
'--dump-processes/--no-dump-processes',
is_flag=True,
default=True,
show_default=True,
help='Dump process data.',
)

DUMP_CONFIG_FILE = OverridableOption(
'--dump-config-file',
default=None,
type=types.FileOrUrl(),
help='Provide dumping options via a config file in YAML format.',
)

ORGANIZE_BY_GROUPS = OverridableOption(
'--organize-by-groups/--no-organize-by-groups',
default=True,
is_flag=True,
type=bool,
show_default=True,
help='If the collection of nodes to be dumped is organized in groups, reproduce its hierarchy.',
)

INCLUDE_INPUTS = OverridableOption(
'--include-inputs/--exclude-inputs',
default=True,
show_default=True,
help='Include linked input nodes of `CalculationNode`(s).',
)

INCLUDE_OUTPUTS = OverridableOption(
'--include-outputs/--exclude-outputs',
default=False,
show_default=True,
help='Include linked output nodes of `CalculationNode`(s).',
)

INCLUDE_ATTRIBUTES = OverridableOption(
'--include-attributes/--exclude-attributes',
default=True,
show_default=True,
help='Include attributes in the `.aiida_node_metadata.yaml` written for every `ProcessNode`.',
)

INCLUDE_EXTRAS = OverridableOption(
'--include-extras/--exclude-extras',
default=True,
show_default=True,
help='Include extras in the `.aiida_node_metadata.yaml` written for every `ProcessNode`.',
)

FLAT = OverridableOption(
'-f',
'--flat',
is_flag=True,
default=False,
help='Dump files in a flat directory for every step of a workflow.',
)

INCREMENTAL = OverridableOption(
'--incremental/--no-incremental',
is_flag=True,
Expand Down
9 changes: 8 additions & 1 deletion src/aiida/tools/dumping/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,11 @@
###########################################################################
"""Modules related to the dumping of AiiDA data."""

__all__ = ('processes',)
from .base import BaseDumper
from .collection import CollectionDumper
from .process import ProcessDumper
from .profile import ProfileDumper

# from .collection import CollectionDumper

__all__ = ('BaseDumper', 'CollectionDumper', 'ProcessDumper', 'ProfileDumper') # , 'CollectionDumper')
24 changes: 24 additions & 0 deletions src/aiida/tools/dumping/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved. #
# This file is part of the AiiDA code. #
# #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################

from dataclasses import dataclass
from datetime import datetime
from pathlib import Path


@dataclass
class BaseDumper:
dump_parent_path: Path | None = None
overwrite: bool = False
incremental: bool = True
last_dump_time: datetime | None = None

def __post_init__(self):
if self.dump_parent_path is None:
self.dump_parent_path = Path.cwd()
Loading
Loading