diff --git a/neuracore/ml/config/config.yaml b/neuracore/ml/config/config.yaml index 4837ff00..77591883 100644 --- a/neuracore/ml/config/config.yaml +++ b/neuracore/ml/config/config.yaml @@ -24,7 +24,6 @@ max_prefetch_workers: 4 # Dataset synchronization dataset_name: null -dataset_id: null frequency: 10 # You can either specify input_data_types/output_data_types or diff --git a/neuracore/ml/train.py b/neuracore/ml/train.py index 88c3bb0c..ecb840e2 100644 --- a/neuracore/ml/train.py +++ b/neuracore/ml/train.py @@ -225,7 +225,6 @@ def _save_local_training_metadata( "status": "RUNNING", "algorithm": algorithm_name, "algorithm_id": getattr(cfg, "algorithm_id", None), - "dataset_id": getattr(cfg, "dataset_id", None), "dataset_name": getattr(cfg, "dataset_name", None), "launch_time": time.time(), "local_output_dir": str(output_dir), @@ -587,25 +586,15 @@ def main(cfg: DictConfig) -> None: "Neither 'algorithm' nor 'algorithm_id' is provided. " "Please specify one." ) - if cfg.dataset_id is None and cfg.dataset_name is None: - raise ValueError("Either 'dataset_id' or 'dataset_name' must be provided.") - if cfg.dataset_id is not None and cfg.dataset_name is not None: - raise ValueError( - "Both 'dataset_id' and 'dataset_name' are provided. " - "Please specify only one." - ) + if cfg.dataset_name is None: + raise ValueError("'dataset_name' must be provided.") # Login and get dataset nc.login() if cfg.org_id is not None: nc.set_organization(cfg.org_id) - if cfg.dataset_id is not None: - dataset = nc.get_dataset(id=cfg.dataset_id) - elif cfg.dataset_name is not None: - dataset = nc.get_dataset(name=cfg.dataset_name) - else: - raise ValueError("Either 'dataset_id' or 'dataset_name' must be provided.") + dataset = nc.get_dataset(name=cfg.dataset_name) dataset.cache_dir = _resolve_recording_cache_dir(cfg) dataset.cache_dir.mkdir(parents=True, exist_ok=True) diff --git a/tests/unit/ml/test_train.py b/tests/unit/ml/test_train.py index 80450ae4..b0c11abf 100644 --- a/tests/unit/ml/test_train.py +++ b/tests/unit/ml/test_train.py @@ -1257,18 +1257,9 @@ class TestMain: ( { "algorithm_id": "test-algorithm-id", - "dataset_id": None, "dataset_name": None, }, - "Either 'dataset_id' or 'dataset_name' must be provided", - ), - ( - { - "algorithm_id": "test-algorithm-id", - "dataset_id": "test-dataset-id", - "dataset_name": "test-dataset-name", - }, - "Both 'dataset_id' and 'dataset_name' are provided", + "'dataset_name' must be provided", ), ], ) @@ -1277,7 +1268,7 @@ def test_main_raises_validation_errors_for_invalid_configurations( ): base_cfg = { "algorithm_id": "test-algorithm-id", - "dataset_id": "test-dataset-id", + "dataset_name": "test-dataset-name", "local_output_dir": "/tmp/test", "batch_size": 8, "input_robot_data_spec": INPUT_ROBOT_DATA_SPEC, @@ -1299,7 +1290,6 @@ def test_main_loads_dataset_by_name_when_dataset_name_provided( ): cfg = OmegaConf.create({ "algorithm_id": "test-algorithm-id", - "dataset_id": None, "dataset_name": "test-dataset-name", "org_id": None, "device": None, @@ -1326,8 +1316,7 @@ def test_main_sets_organization_when_org_id_provided( ): cfg = OmegaConf.create({ "algorithm_id": "test-algorithm-id", - "dataset_id": "test-dataset-id", - "dataset_name": None, + "dataset_name": "test-dataset-name", "org_id": "test-org-id", "device": None, "local_output_dir": str(temp_output_dir), @@ -1346,7 +1335,7 @@ def test_main_sets_organization_when_org_id_provided( main(cfg) setup.mock_set_organization.assert_called_once_with("test-org-id") - setup.mock_get_dataset.assert_called_once_with(id="test-dataset-id") + setup.mock_get_dataset.assert_called_once_with(name="test-dataset-name") def test_main_uses_algorithm_config_when_algorithm_provided_instead_of_algorithm_id( self, monkeypatch, temp_output_dir @@ -1356,8 +1345,7 @@ def test_main_uses_algorithm_config_when_algorithm_provided_instead_of_algorithm "_target_": "tests.unit.ml.test_train.mock_model_class", }, "algorithm_id": None, - "dataset_id": "test-dataset-id", - "dataset_name": None, + "dataset_name": "test-dataset-name", "org_id": None, "device": None, "local_output_dir": str(temp_output_dir), @@ -1381,8 +1369,7 @@ def test_main_uses_default_device_when_device_is_none( ): cfg = OmegaConf.create({ "algorithm_id": "test-algorithm-id", - "dataset_id": "test-dataset-id", - "dataset_name": None, + "dataset_name": "test-dataset-name", "org_id": None, "device": None, "local_output_dir": str(temp_output_dir), @@ -1408,8 +1395,7 @@ def test_main_uses_explicit_device_when_device_is_provided( ): cfg = OmegaConf.create({ "algorithm_id": "test-algorithm-id", - "dataset_id": "test-dataset-id", - "dataset_name": None, + "dataset_name": "test-dataset-name", "org_id": None, "device": "cuda:1", "local_output_dir": str(temp_output_dir), @@ -1437,8 +1423,7 @@ def test_main_uses_provided_batch_size_when_not_auto( ): cfg = OmegaConf.create({ "algorithm_id": "test-algorithm-id", - "dataset_id": "test-dataset-id", - "dataset_name": None, + "dataset_name": "test-dataset-name", "org_id": None, "device": None, "local_output_dir": str(temp_output_dir), @@ -1464,8 +1449,7 @@ def test_main_loads_algorithm_by_id_when_algorithm_not_in_cfg_but_algorithm_id_p ): cfg = OmegaConf.create({ "algorithm_id": "test-algorithm-id", - "dataset_id": "test-dataset-id", - "dataset_name": None, + "dataset_name": "test-dataset-name", "org_id": None, "device": None, "local_output_dir": str(temp_output_dir), @@ -1492,39 +1476,12 @@ def test_main_loads_algorithm_by_id_when_algorithm_not_in_cfg_but_algorithm_id_p extract_dir=expected_extract_dir ) - def test_main_loads_dataset_by_id_when_dataset_id_provided_but_dataset_name_none( - self, monkeypatch, temp_output_dir - ): - cfg = OmegaConf.create({ - "algorithm_id": "test-algorithm-id", - "dataset_id": "test-dataset-id", - "dataset_name": None, - "org_id": None, - "device": None, - "local_output_dir": str(temp_output_dir), - "batch_size": 8, - "input_robot_data_spec": INPUT_ROBOT_DATA_SPEC, - "output_robot_data_spec": OUTPUT_ROBOT_DATA_SPEC, - "output_prediction_horizon": 5, - "frequency": 30, - "algorithm_params": None, - "max_prefetch_workers": 4, - }) - - setup = MainTestSetup(monkeypatch) - setup.setup_mocks() - - main(cfg) - - setup.mock_get_dataset.assert_called_once_with(id="test-dataset-id") - def test_main_converts_string_batch_size_to_int_when_not_auto( self, monkeypatch, temp_output_dir ): cfg = OmegaConf.create({ "algorithm_id": "test-algorithm-id", - "dataset_id": "test-dataset-id", - "dataset_name": None, + "dataset_name": "test-dataset-name", "org_id": None, "device": None, "local_output_dir": str(temp_output_dir), @@ -1557,8 +1514,7 @@ def test_main_uses_mp_spawn_for_distributed_training_when_world_size_greater_tha ): cfg = OmegaConf.create({ "algorithm_id": "test-algorithm-id", - "dataset_id": "test-dataset-id", - "dataset_name": None, + "dataset_name": "test-dataset-name", "org_id": None, "device": None, "local_output_dir": str(temp_output_dir), @@ -1597,8 +1553,7 @@ def test_main_uses_mp_spawn_for_distributed_training_when_world_size_greater_tha def test_main_calls_setup_logging(self, monkeypatch, temp_output_dir): cfg = OmegaConf.create({ "algorithm_id": "test-algorithm-id", - "dataset_id": "test-dataset-id", - "dataset_name": None, + "dataset_name": "test-dataset-name", "org_id": None, "device": None, "local_output_dir": str(temp_output_dir), @@ -1626,8 +1581,7 @@ def test_main_saves_local_metadata_for_local_runs( ): cfg = OmegaConf.create({ "algorithm_id": "test-algorithm-id", - "dataset_id": "test-dataset-id", - "dataset_name": None, + "dataset_name": "test-dataset-name", "org_id": "test-org", "device": None, "local_output_dir": str(temp_output_dir), @@ -1649,7 +1603,7 @@ def test_main_saves_local_metadata_for_local_runs( assert metadata_path.exists() metadata = json.loads(metadata_path.read_text()) assert metadata["algorithm"] == "test-algorithm" - assert metadata["dataset_id"] == "test-dataset-id" + assert metadata["dataset_name"] == "test-dataset-name" assert metadata["status"] == "RUNNING" assert "JOINT_POSITIONS" in metadata["input_robot_data_spec"]["robot-id-1"] @@ -1658,8 +1612,7 @@ def test_main_calls_dataset_synchronize_with_correct_parameters( ): cfg = OmegaConf.create({ "algorithm_id": "test-algorithm-id", - "dataset_id": "test-dataset-id", - "dataset_name": None, + "dataset_name": "test-dataset-name", "org_id": None, "device": None, "local_output_dir": str(temp_output_dir), @@ -1694,8 +1647,7 @@ def test_main_calls_dataset_synchronize_with_correct_parameters( def test_main_uses_default_recording_cache_dir(self, monkeypatch, temp_output_dir): cfg = OmegaConf.create({ "algorithm_id": "test-algorithm-id", - "dataset_id": "test-dataset-id", - "dataset_name": None, + "dataset_name": "test-dataset-name", "org_id": None, "device": None, "local_output_dir": str(temp_output_dir), @@ -1720,8 +1672,7 @@ def test_main_uses_custom_recording_cache_dir(self, monkeypatch, temp_output_dir custom_cache_dir = temp_output_dir / "custom-recording-cache" cfg = OmegaConf.create({ "algorithm_id": "test-algorithm-id", - "dataset_id": "test-dataset-id", - "dataset_name": None, + "dataset_name": "test-dataset-name", "org_id": None, "device": None, "local_output_dir": str(temp_output_dir), @@ -1747,8 +1698,7 @@ def test_main_uses_autotuning_when_batch_size_is_auto( ): cfg = OmegaConf.create({ "algorithm_id": "test-algorithm-id", - "dataset_id": "test-dataset-id", - "dataset_name": None, + "dataset_name": "test-dataset-name", "org_id": None, "device": None, "local_output_dir": str(temp_output_dir),