From 0905dd1d8d42bfd23c8de44a0c3dcc2b54f620fd Mon Sep 17 00:00:00 2001 From: Daniel Gomez Date: Wed, 9 Jul 2025 16:46:46 -0700 Subject: [PATCH] kpd: add optional mirror support We always clone a full repository. This is counter productive and wasteful. Allow users to specify that they are using kpd to help test patches for a git tree which we should expect a mirror on a target mirror path. We simply check for a mirror for you git tree and if present we use it. So for example, all kdevops enterprise deployments can easily profit from this as kdevops has support to mirror all target git trees it supports under /mirror/ through an NFS export for clients. And so small thing guests can be used for kpd instances, which can leverage this NFS export. This allows kpd to be run on smaller guests with less storage needs. This should allow more than one kpd instance to run on small guests too. Generated-by: Claude AI Signed-off-by: Luis Chamberlain Signed-off-by: Daniel Gomez --- configs/kpd.json | 9 ++-- kernel_patches_daemon/branch_worker.py | 24 +++++++++- kernel_patches_daemon/config.py | 4 ++ kernel_patches_daemon/github_sync.py | 2 + tests/test_branch_worker.py | 66 ++++++++++++++++++++++++++ tests/test_config.py | 11 +++++ tests/test_github_sync.py | 14 ++++++ 7 files changed, 126 insertions(+), 4 deletions(-) diff --git a/configs/kpd.json b/configs/kpd.json index 13fcb92..3c39ed7 100644 --- a/configs/kpd.json +++ b/configs/kpd.json @@ -30,7 +30,8 @@ "upstream_branch": "master", "ci_repo": "https://github.com/kernel-patches/vmtest.git", "ci_branch": "master", - "github_oauth_token": "" + "github_oauth_token": "", + "mirror_fallback_repo": "linux.git" }, "bpf": { "repo": "https://github.com/kernel-patches/bpf", @@ -38,8 +39,10 @@ "upstream_branch": "master", "ci_repo": "https://github.com/kernel-patches/vmtest.git", "ci_branch": "master", - "github_oauth_token": "" + "github_oauth_token": "", + "mirror_fallback_repo": "linux.git" } }, - "base_directory": "/tmp/repos" + "base_directory": "/tmp/repos", + "mirror_dir": "/mirror/" } diff --git a/kernel_patches_daemon/branch_worker.py b/kernel_patches_daemon/branch_worker.py index 6073046..5f237b4 100644 --- a/kernel_patches_daemon/branch_worker.py +++ b/kernel_patches_daemon/branch_worker.py @@ -664,6 +664,8 @@ def __init__( app_auth: Optional[Auth.AppInstallationAuth] = None, email: Optional[EmailConfig] = None, http_retries: Optional[int] = None, + mirror_fallback_repo: Optional[str] = None, + mirror_dir: Optional[str] = None, ) -> None: super().__init__( repo_url=repo_url, @@ -676,6 +678,8 @@ def __init__( self.email_config = email self.log_extractor = log_extractor + self.mirror_dir = mirror_dir + self.mirror_fallback_repo = mirror_fallback_repo self.ci_repo_url = ci_repo_url self.ci_repo_dir = _uniq_tmp_folder(ci_repo_url, ci_branch, base_directory) self.ci_branch = ci_branch @@ -810,9 +814,27 @@ def do_sync(self) -> None: def full_sync(self, path: str, url: str, branch: str) -> git.Repo: logging.info(f"Doing full clone from {redact_url(url)}, branch: {branch}") + multi_opts: Optional[List[str]] = None + if self.mirror_dir: + upstream_name = os.path.basename(self.upstream_url) + reference_path = os.path.join(self.mirror_dir, upstream_name) + + # If primary mirror doesn't exist, try fallback path + if not os.path.exists(reference_path) and self.mirror_fallback_repo: + fallback_path = os.path.join(self.mirror_dir, self.mirror_fallback_repo) + if os.path.exists(fallback_path): + reference_path = fallback_path + + # Use --reference-if-able when mirror path exists + if os.path.exists(reference_path): + multi_opts = ["--reference-if-able", reference_path] + with HistogramMetricTimer(git_clone_duration, {"branch": branch}): shutil.rmtree(path, ignore_errors=True) - repo = git.Repo.clone_from(url, path) + if multi_opts: + repo = git.Repo.clone_from(url, path, multi_options=multi_opts) + else: + repo = git.Repo.clone_from(url, path) _reset_repo(repo, f"origin/{branch}") git_clone_counter.add(1, {"branch": branch}) diff --git a/kernel_patches_daemon/config.py b/kernel_patches_daemon/config.py index 2a70ad1..6a7b4d2 100644 --- a/kernel_patches_daemon/config.py +++ b/kernel_patches_daemon/config.py @@ -83,6 +83,7 @@ class BranchConfig: ci_branch: str github_oauth_token: Optional[str] github_app_auth: Optional[GithubAppAuthConfig] + mirror_fallback_repo: Optional[str] = None @classmethod def from_json(cls, json: Dict) -> "BranchConfig": @@ -101,6 +102,7 @@ def from_json(cls, json: Dict) -> "BranchConfig": ci_branch=json["ci_branch"], github_oauth_token=json.get("github_oauth_token", None), github_app_auth=github_app_auth_config, + mirror_fallback_repo=json.get("mirror_fallback_repo", None), ) @@ -200,6 +202,7 @@ class KPDConfig: branches: Dict[str, BranchConfig] tag_to_branch_mapping: Dict[str, List[str]] base_directory: str + mirror_dir: Optional[str] = None @classmethod def from_json(cls, json: Dict) -> "KPDConfig": @@ -232,6 +235,7 @@ def from_json(cls, json: Dict) -> "KPDConfig": for name, json_config in json["branches"].items() }, base_directory=json["base_directory"], + mirror_dir=json.get("mirror_dir"), ) @classmethod diff --git a/kernel_patches_daemon/github_sync.py b/kernel_patches_daemon/github_sync.py index f0fe471..fa2e22b 100644 --- a/kernel_patches_daemon/github_sync.py +++ b/kernel_patches_daemon/github_sync.py @@ -111,6 +111,8 @@ def __init__( ci_branch=branch_config.ci_branch, log_extractor=_log_extractor_from_project(kpd_config.patchwork.project), base_directory=kpd_config.base_directory, + mirror_dir=kpd_config.mirror_dir, + mirror_fallback_repo=branch_config.mirror_fallback_repo, http_retries=http_retries, github_oauth_token=branch_config.github_oauth_token, app_auth=github_app_auth_from_branch_config(branch_config), diff --git a/tests/test_branch_worker.py b/tests/test_branch_worker.py index 2c8886e..5c1b36e 100644 --- a/tests/test_branch_worker.py +++ b/tests/test_branch_worker.py @@ -79,6 +79,7 @@ TEST_CI_REPO_URL = f"https://user:pass@127.0.0.1/ci-org/{TEST_CI_REPO}" TEST_CI_BRANCH = "test_ci_branch" TEST_BASE_DIRECTORY = "/repos" +TEST_MIRROR_DIRECTORY = "/mirror" TEST_BRANCH = "test-branch" TEST_CONFIG: Dict[str, Any] = { "version": 2, @@ -135,6 +136,8 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: "ci_branch": TEST_CI_BRANCH, "log_extractor": DefaultGithubLogExtractor(), "base_directory": TEST_BASE_DIRECTORY, + "mirror_dir": None, + "mirror_fallback_repo": None, } presets.update(kwargs) @@ -482,6 +485,69 @@ def test_fetch_repo_path_exists_git_exception(self) -> None: self._bw.fetch_repo(*fetch_params) fr.assert_called_once_with(*fetch_params) + def test_full_sync_with_mirror_dir(self) -> None: + bw = BranchWorkerMock(mirror_dir=TEST_MIRROR_DIRECTORY) + mirror_path = os.path.join( + TEST_MIRROR_DIRECTORY, os.path.basename(TEST_UPSTREAM_REPO_URL) + ) + with ( + patch("kernel_patches_daemon.branch_worker.os.path.exists") as exists, + patch("kernel_patches_daemon.branch_worker.shutil.rmtree") as rm, + ): + exists.side_effect = lambda p: p == mirror_path + bw.upstream_url = TEST_UPSTREAM_REPO_URL + bw.full_sync("somepath", "giturl", "branch") + self._git_repo_mock.clone_from.assert_called_once_with( + "giturl", + "somepath", + multi_options=["--reference-if-able", mirror_path], + ) + + def test_full_sync_with_mirror_fallback_repo(self) -> None: + fallback_repo = "linux.git" + bw = BranchWorkerMock( + mirror_dir=TEST_MIRROR_DIRECTORY, mirror_fallback_repo=fallback_repo + ) + primary_mirror_path = os.path.join( + TEST_MIRROR_DIRECTORY, os.path.basename(TEST_UPSTREAM_REPO_URL) + ) + fallback_mirror_path = os.path.join(TEST_MIRROR_DIRECTORY, fallback_repo) + with ( + patch("kernel_patches_daemon.branch_worker.os.path.exists") as exists, + patch("kernel_patches_daemon.branch_worker.shutil.rmtree") as rm, + ): + # Primary mirror doesn't exist, but fallback does + exists.side_effect = lambda p: p == fallback_mirror_path + bw.upstream_url = TEST_UPSTREAM_REPO_URL + bw.full_sync("somepath", "giturl", "branch") + # Should use fallback mirror path for reference + self._git_repo_mock.clone_from.assert_called_once_with( + "giturl", + "somepath", + multi_options=["--reference-if-able", fallback_mirror_path], + ) + + def test_full_sync_without_mirror_fallback_repo(self) -> None: + bw = BranchWorkerMock( + mirror_dir=TEST_MIRROR_DIRECTORY, mirror_fallback_repo=None + ) + mirror_path = os.path.join( + TEST_MIRROR_DIRECTORY, os.path.basename(TEST_UPSTREAM_REPO_URL) + ) + with ( + patch("kernel_patches_daemon.branch_worker.os.path.exists") as exists, + patch("kernel_patches_daemon.branch_worker.shutil.rmtree") as rm, + ): + # Mirror doesn't exist, and no fallback repo specified - should not use reference + exists.side_effect = lambda p: False + bw.upstream_url = TEST_UPSTREAM_REPO_URL + bw.full_sync("somepath", "giturl", "branch") + # Should clone without reference + self._git_repo_mock.clone_from.assert_called_once_with( + "giturl", + "somepath", + ) + def test_expire_branches(self) -> None: """Only the branch that matches pattern and is expired should be deleted""" not_expired_time = datetime.fromtimestamp(3 * BRANCH_TTL) diff --git a/tests/test_config.py b/tests/test_config.py index 7b28ae3..5795d84 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -215,8 +215,19 @@ def test_valid(self) -> None: ci_branch="ci_branch", github_app_auth=None, github_oauth_token="TEST_OAUTH_TOKEN", + mirror_fallback_repo=None, ), }, base_directory="/repos", + mirror_dir=None, ) self.assertEqual(config, expected_config) + + def test_branch_mirror_fallback_repo(self) -> None: + kpd_config_json = read_fixture("fixtures/kpd_config.json") + kpd_config_json["branches"]["oauth"]["mirror_fallback_repo"] = "linux.git" + + with patch("builtins.open", mock_open(read_data="TEST_KEY_FILE_CONTENT")): + config = KPDConfig.from_json(kpd_config_json) + + self.assertEqual(config.branches["oauth"].mirror_fallback_repo, "linux.git") diff --git a/tests/test_github_sync.py b/tests/test_github_sync.py index a9f36d0..640323a 100644 --- a/tests/test_github_sync.py +++ b/tests/test_github_sync.py @@ -127,6 +127,20 @@ class TestCase: gh.workers[TEST_BRANCH].ci_repo_dir.startswith(case.prefix), ) + def test_init_with_mirror_dir(self) -> None: + config = copy.copy(TEST_CONFIG) + config["mirror_dir"] = "/mirror" + kpd_config = KPDConfig.from_json(config) + gh = GithubSyncMock(kpd_config=kpd_config) + self.assertEqual("/mirror", gh.workers[TEST_BRANCH].mirror_dir) + + def test_init_with_mirror_fallback_repo(self) -> None: + config = copy.copy(TEST_CONFIG) + config["branches"][TEST_BRANCH]["mirror_fallback_repo"] = "linux.git" + kpd_config = KPDConfig.from_json(config) + gh = GithubSyncMock(kpd_config=kpd_config) + self.assertEqual("linux.git", gh.workers[TEST_BRANCH].mirror_fallback_repo) + def test_close_existing_prs_for_series(self) -> None: matching_pr_mock = MagicMock() matching_pr_mock.title = "matching"