diff --git a/Cargo.lock b/Cargo.lock index d64089ef8..4bde45310 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1183,6 +1183,15 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.27" @@ -1290,6 +1299,8 @@ dependencies = [ "reqwest", "serde", "serde_json", + "tempfile", + "walkdir", ] [[package]] @@ -1623,6 +1634,16 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -1725,6 +1746,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.59.0", +] + [[package]] name = "windows-core" version = "0.52.0" diff --git a/ansible/roles/dev-desktop/tasks/cleanup.yml b/ansible/roles/dev-desktop/tasks/cleanup.yml index 5d77eff5f..b6fe63dbe 100644 --- a/ansible/roles/dev-desktop/tasks/cleanup.yml +++ b/ansible/roles/dev-desktop/tasks/cleanup.yml @@ -1,11 +1,12 @@ --- -- name: Copy cleanup script - template: - src: clean-unused-checkouts.sh - dest: /etc/cron.cleanup_disk_space + +- name: Install clean-unused-checkouts binary + copy: + src: "{{ playbook_dir }}/../../../../setup-deploy-keys/target/release/clean-unused-checkouts" + dest: /usr/local/bin/clean-unused-checkouts owner: root group: root - mode: 0744 + mode: 0755 - name: Set up the cleanup cron job template: diff --git a/ansible/roles/dev-desktop/templates/clean-unused-checkouts.sh b/ansible/roles/dev-desktop/templates/clean-unused-checkouts.sh deleted file mode 100644 index 4e7927ca6..000000000 --- a/ansible/roles/dev-desktop/templates/clean-unused-checkouts.sh +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env bash - -# -# {{ ansible_managed }} -# - -# Clean up unused checkouts -# -# This script is used to find old checkouts that are no longer in use. Given the -# size of the build directory, regularly cleaning them up can save significant -# amounts of disk space. - -# Enable strict mode for Bash -# http://redsymbol.net/articles/unofficial-bash-strict-mode/ -set -euo pipefail -IFS=$'\n\t' - -# Print directories and their size instead of deleting them -dry_run=false - -# Default to search for checkouts older than 60 days -time="60" - -while [[ $# -gt 0 ]]; do - case $1 in - --dry-run) - dry_run=true - shift # past argument - ;; - -t|--time) - time="${2}" - shift # past argument - shift # past value - ;; - -*) - echo "Unknown option $1" - exit 1 - ;; - esac -done - -# Find all build or target directories created by users -# -# This command combines (`-o`) two different conditions to find all build and -# target directories that users have created. Within each home directory, we -# recursively look for directories that either have a file named `x.py` and a -# directory named `build`, or a file named `Cargo.toml` and a directory named -# `target`. -all_cache_directories=$(find /home -type d \( -name build -execdir test -f "x.py" \; -o -name target -execdir test -f "Cargo.toml" \; \) -print | sort | uniq) - -# For each checkout, we want to determine if the user has been working on it -# within the `$time` number of days. -unused_cache_directories=$(for directory in $all_cache_directories; do - project=$(dirname "${directory}") - - # Find all directories with files that have been modified less than $time days ago - modified=$(find "${project}" -type f -mtime -"${time}" -printf '%h\n' | xargs -r dirname | sort | uniq) - - # If no files have been modified in the last 90 days, then the project is - # considered old. - if [[ -z "${modified}" ]]; then - echo "${directory}" - fi -done) - -# Delete the build directories in the unused checkouts -for directory in $unused_cache_directories; do - if [[ "${dry_run}" == true ]]; then - du -sh "${directory}" - else - echo "Deleting ${directory}" - rm -rf "${directory}" - fi -done diff --git a/ansible/roles/dev-desktop/templates/cron_cleanup_disk_space.j2 b/ansible/roles/dev-desktop/templates/cron_cleanup_disk_space.j2 index e17b44264..7669dbd7f 100644 --- a/ansible/roles/dev-desktop/templates/cron_cleanup_disk_space.j2 +++ b/ansible/roles/dev-desktop/templates/cron_cleanup_disk_space.j2 @@ -1,2 +1,2 @@ PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin -0 0 * * * root /etc/cron.cleanup_disk_space +0 0 * * * root /usr/local/bin/clean-unused-checkouts diff --git a/setup-deploy-keys/Cargo.toml b/setup-deploy-keys/Cargo.toml index 5d60e99e4..01054e009 100644 --- a/setup-deploy-keys/Cargo.toml +++ b/setup-deploy-keys/Cargo.toml @@ -8,6 +8,10 @@ edition = "2021" name = "deploy" path = "src/deploy.rs" +[[bin]] +name = "clean-unused-checkouts" +path = "src/clean_unused_checkouts.rs" + [dependencies] clap = { version = "4", features = ["derive", "env"] } chrono = "0.4" @@ -15,3 +19,8 @@ reqwest = { version = "0.11", features = ["blocking", "json"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" base64 = "0.13" +walkdir = "2.4" + +[dev-dependencies] +tempfile = "3.10" + diff --git a/setup-deploy-keys/src/clean_unused_checkouts.rs b/setup-deploy-keys/src/clean_unused_checkouts.rs new file mode 100644 index 000000000..be8cfd101 --- /dev/null +++ b/setup-deploy-keys/src/clean_unused_checkouts.rs @@ -0,0 +1,235 @@ +use clap::Parser; +use std::fs; +use std::io; +use std::path::{Path, PathBuf}; +use std::time::{Duration, SystemTime}; +use walkdir::WalkDir; + +/// Clean up unused projects +/// +/// This CLI finds all projects that users have checked out on the dev-desktops and deletes +/// temporary files if the project has not been modified in a certain number of days. +/// +/// Specifically, the CLI will look for checkouts of `rust-lang/rust` and delete the `build` +/// directory. And it will find unused crates and delete the `target` directory. +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Cli { + /// Only print directories and their size, do not delete + #[arg(long)] + dry_run: bool, + + /// The root directory to search for projects + #[arg(short, long = "root-directory", default_value = "/home")] + root_directory: PathBuf, + + /// The maximum age of a project in days + /// + /// The CLI will only clean projects that have not been updated in the last `max-age` days. + #[arg(short, long = "max-age", default_value_t = 60)] + max_age: u32, +} + +fn find_cache_dirs(home: &Path) -> io::Result> { + // Use WalkDir to perform a safe recursive traversal. By default WalkDir does + // not follow symlinks which prevents accidental symlink loops. + let mut result = Vec::new(); + + for entry in WalkDir::new(home).follow_links(false) { + let entry = match entry { + Ok(e) => e, + Err(_) => continue, // skip entries we can't read + }; + + let path = entry.path(); + if !entry.file_type().is_dir() { + continue; + } + + // We're interested in artifact dirs named `build` (python) or + // `target` (Rust). When we find one, check the parent directory for the + // expected marker files (`x.py` for python projects, `Cargo.toml` for + // Rust) before including the artifact directory. + if let Some(name) = path.file_name().and_then(|n| n.to_str()) { + match name { + "build" => { + if path + .parent() + .map(|p| p.join("x.py").is_file()) + .unwrap_or(false) + { + result.push(path.to_path_buf()); + } + } + "target" => { + if path + .parent() + .map(|p| p.join("Cargo.toml").is_file()) + .unwrap_or(false) + { + result.push(path.to_path_buf()); + } + } + _ => {} + } + } + } + + result.sort(); + result.dedup(); + Ok(result) +} + +fn is_unused(dir: &Path, days: u64) -> io::Result { + let cutoff = SystemTime::now() - Duration::from_secs(days * 24 * 60 * 60); + let mut recent = false; + for entry in walkdir::WalkDir::new(dir.parent().unwrap_or(dir)) { + let entry = entry?; + if let Ok(meta) = entry.metadata() { + if let Ok(modified) = meta.modified() { + if modified > cutoff { + recent = true; + break; + } + } + } + } + Ok(!recent) +} + +fn print_or_delete(dir: &Path, dry_run: bool) { + if dry_run { + let size = get_dir_size(dir); + match size { + Ok(bytes) => { + println!( + "{:.2} MiB\t{}", + bytes as f64 / 1024.0 / 1024.0, + dir.display() + ); + } + Err(_) => { + println!("{}", dir.display()); + } + } + } else { + println!("Deleting {}", dir.display()); + let _ = fs::remove_dir_all(dir); + } +} + +fn get_dir_size(path: &Path) -> io::Result { + let mut size = 0u64; + for entry in walkdir::WalkDir::new(path) { + let entry = entry?; + if entry.file_type().is_file() { + size += entry.metadata()?.len(); + } + } + Ok(size) +} + +fn main() -> io::Result<()> { + let cli = Cli::parse(); + let cache_dirs = find_cache_dirs(&cli.root_directory)?; + for dir in cache_dirs { + if is_unused(&dir, cli.max_age as u64)? { + print_or_delete(&dir, cli.dry_run); + } + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs::{self, File}; + use std::io::Write; + use tempfile::tempdir; + + #[test] + fn test_get_dir_size_empty() { + let dir = tempdir().unwrap(); + assert_eq!(get_dir_size(dir.path()).unwrap(), 0); + } + + #[test] + fn test_get_dir_size_with_files() { + let dir = tempdir().unwrap(); + let file_path = dir.path().join("file1"); + let mut file = File::create(&file_path).unwrap(); + file.write_all(&[1u8; 1024]).unwrap(); + assert_eq!(get_dir_size(dir.path()).unwrap(), 1024); + } + + #[test] + fn test_is_project_dir_xpy_build() { + let root = tempdir().unwrap(); + let proj = root.path().join("proj_python"); + fs::create_dir_all(&proj).unwrap(); + File::create(proj.join("x.py")).unwrap(); + fs::create_dir(proj.join("build")).unwrap(); + + let found = find_cache_dirs(root.path()).unwrap(); + assert!(found.iter().any(|p| p.ends_with("proj_python/build"))); + } + + #[test] + fn test_is_project_dir_cargo_target() { + let root = tempdir().unwrap(); + let proj = root.path().join("proj_rust"); + fs::create_dir_all(&proj).unwrap(); + File::create(proj.join("Cargo.toml")).unwrap(); + fs::create_dir(proj.join("target")).unwrap(); + + let found = find_cache_dirs(root.path()).unwrap(); + assert!(found.iter().any(|p| p.ends_with("proj_rust/target"))); + } + + #[test] + fn test_is_project_dir_false() { + let root = tempdir().unwrap(); + let proj = root.path().join("proj_none"); + fs::create_dir_all(&proj).unwrap(); + // No marker files or artifact dirs + let found = find_cache_dirs(root.path()).unwrap(); + assert!(found.is_empty()); + } + + #[test] + fn test_print_or_delete_flow() { + let root = tempdir().unwrap(); + let proj = root.path().join("proj_print"); + let build = proj.join("build"); + fs::create_dir_all(&build).unwrap(); + let mut file = File::create(build.join("file.bin")).unwrap(); + file.write_all(&[0u8; 512]).unwrap(); + + // dry-run should not remove + print_or_delete(&build, true); + assert!(build.exists()); + + // actual delete should remove + print_or_delete(&build, false); + assert!(!build.exists()); + } + + #[cfg(unix)] + #[test] + fn test_find_cache_dirs_symlink_loop() { + use std::os::unix::fs::symlink; + + let root = tempdir().unwrap(); + let proj = root.path().join("proj_rust_loop"); + fs::create_dir_all(&proj).unwrap(); + File::create(proj.join("Cargo.toml")).unwrap(); + fs::create_dir(proj.join("target")).unwrap(); + + // create a symlink that points back to root (possible loop) + let loop_link = root.path().join("loop"); + let _ = symlink(root.path(), &loop_link); + + let found = find_cache_dirs(root.path()).unwrap(); + assert!(found.iter().any(|p| p.ends_with("proj_rust_loop/target"))); + } +}