password extraction

domai-tb · domai-tb · commit 0074fe714533 · 2024-08-15T21:35:17.000+02:00
diff --git a/Cargo.toml b/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "leakh"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+serde = "1.0"
+serde_derive = "1.0"
+toml = "0.5"
+regex = "1.8"
+clap =  "4.3.10"
+crossbeam = "0.8.4"
diff --git a/README.md b/README.md
@@ -1,2 +1,52 @@
 # leakh
-Small utility and helper command to handle password leakage files.
+
+leakh is a multi-threaded command line utility and helper tool to handle password leakage files.
+
+It uses regular expressions to extract passwords from `.txt` or `.csv` files. Each file, inside the given `directory`, is read out by a seperate thread that returns the list of all passwords and the count how often it appeard inside the list. After extracting all passwords, the resulting list is sorted accordingly to the count and douplicates are removed. It will write each password with its count in a seperate `$(output).stats.csv` file.
+
+## Usage
+
+```bash
+Extracts passwords from files
+
+Usage: leakh [OPTIONS] --config <FILE> --directory <DIR> --output <FILE>
+
+Options:
+  -c, --config <FILE>    Specifies the config file
+  -d, --directory <DIR>  Specifies the directory to scan for files
+  -o, --output <FILE>    Specifies the output file location
+  -v, --verbose          Enables verbose output
+  -h, --help             Print help
+  -V, --version          Print version
+```
+
+## Configuration
+
+leakh uses a configuration file that follows the `.toml` syntax.
+
+```toml
+# Default configuration for all files
+[default]
+# Regex pattern to extract password (e.g., password is after the second ":")
+pattern = "^[^:]+:(\\S[^\n]*)"
+# Minimum length for passwords to be considered valid
+min_length = 6
+# List of unwanted strings to filter out using regular expressions
+unwanted_strings = [
+    "imap\\.[^\\s]+",
+    "smtp\\.[^\\s]+",
+    "NULL",
+    "^#file_links.*",
+    "^lUCKY&quot;=&quot;=STEVEN.*"
+]
+
+# Optional specific configurations for individual files
+[files]
+
+# Custom configuration for "special_file.txt"
+[files."special_file.txt"]
+pattern = "\\|\\s*(\\S+)"
+unwanted_strings = ["domain\\.com", "test\\.com"]
+min_length = 8
+
+```
diff --git a/leakh.config.toml b/leakh.config.toml
@@ -0,0 +1,23 @@
+# Default configuration for all files
+[default]
+# Regex pattern to extract password (e.g., password is after the second ":")
+pattern = "^[^:]+:(\\S[^\n]*)"
+# Minimum length for passwords to be considered valid
+min_length = 6
+# List of unwanted strings to filter out using regular expressions
+unwanted_strings = [
+    "imap\\.[^\\s]+", 
+    "smtp\\.[^\\s]+", 
+    "NULL", 
+    "^#file_links.*",
+    "^lUCKY&quot;=&quot;=STEVEN.*"
+]
+
+# Optional specific configurations for individual files
+[files]
+
+# Custom configuration for "special_file.txt"
+# [files."special_file.txt"]
+# pattern = "\\|\\s*(\\S+)"
+# unwanted_strings = ["domain\\.com", "test\\.com"]
+# min_length = 8
diff --git a/src/main.rs b/src/main.rs
@@ -0,0 +1,190 @@
+use clap::{Arg, Command};
+use std::fs::{self, File};
+use std::io::{BufReader, BufRead, Write};
+use std::path::Path;
+use std::collections::HashMap;
+use serde_derive::Deserialize;
+use regex::Regex;
+use crossbeam::channel;
+use std::thread;
+
+// Struct for configuration from TOML file
+#[derive(Clone, Debug, Deserialize)]
+struct Config {
+    default: FileConfig,
+    files: Option<HashMap<String, FileConfig>>,
+}
+
+#[derive(Clone, Debug, Deserialize)]
+struct FileConfig {
+    pattern: String, // regex pattern to extract password
+    unwanted_strings: Vec<String>, // list of unwanted strings (as regex patterns)
+    min_length: usize, // minimum length for passwords
+}
+
+fn main() {
+    // Define command-line arguments using clap
+    let matches = Command::new("Password Extractor")
+        .version("1.0")
+        .author("Your Name <your.email@example.com>")
+        .about("Extracts passwords from files")
+        .arg(
+            Arg::new("config")
+                .short('c')
+                .long("config")
+                .value_name("FILE")
+                .help("Specifies the config file")
+                .required(true),
+        )
+        .arg(
+            Arg::new("directory")
+                .short('d')
+                .long("directory")
+                .value_name("DIR")
+                .help("Specifies the directory to scan for files")
+                .required(true),
+        )
+        .arg(
+            Arg::new("output")
+                .short('o')
+                .long("output")
+                .value_name("FILE")
+                .help("Specifies the output file location")
+                .required(true),
+        )
+        .arg(
+            Arg::new("verbose")
+                .short('v')
+                .long("verbose")
+                .help("Enables verbose output")
+                .action(clap::ArgAction::SetTrue),
+        )
+        .get_matches();
+
+    let config_path = matches.get_one::<String>("config").unwrap();
+    let directory_path = matches.get_one::<String>("directory").unwrap();
+    let output_path = matches.get_one::<String>("output").unwrap();
+    let verbose = *matches.get_one::<bool>("verbose").unwrap();
+
+    // Load and parse the configuration file
+    if verbose {
+        println!("Loading configuration from: {}", config_path);
+    }
+    let config: Config = load_config(config_path);
+
+    // Set up a channel to communicate between threads
+    let (sender, receiver) = channel::unbounded();
+
+    // Process the directory
+    for entry in fs::read_dir(directory_path).expect("Unable to read directory") {
+        let entry = entry.expect("Failed to read directory entry");
+        let path = entry.path();
+        if let Some(extension) = path.extension() {
+            match extension.to_str() {
+                Some("txt") | Some("csv") => {
+                    println!("Processing file: {}", path.display());
+
+                    let c_config = config.clone();
+                    let c_path = path.clone();
+                    let c_sender = sender.clone();
+
+                    thread::spawn(move || {
+                        let mut local_password_counts: HashMap<String, usize> = HashMap::new(); 
+                        process_file(&c_path, &c_config, &mut local_password_counts, verbose);
+                        c_sender.send(local_password_counts).expect("Failed to send results from thread");
+                    });
+                }
+                _ => {  
+                    println!("Ignore file: {}", path.display()); 
+                }
+            }
+        }
+    }
+
+    // Close the sending side of the channel so the receiver will know when to stop
+    drop(sender);
+
+    // Collect all the results from the threads
+    let mut password_counts: HashMap<String, usize> = HashMap::new();
+    for local_counts in receiver {
+        for (password, count) in local_counts {
+            *password_counts.entry(password).or_insert(0) += count;
+        }
+    }
+
+    // Sort passwords by count and write output
+    let mut sorted_passwords: Vec<(String, usize)> = password_counts.into_iter().collect();
+    sorted_passwords.sort_by(|a, b| b.1.cmp(&a.1));
+    write_output(output_path, &sorted_passwords);
+
+    println!("Password extraction complete. Output written to: {}", output_path);
+}
+
+// Load and parse the configuration file
+fn load_config(config_path: &str) -> Config {
+    let config_data = fs::read_to_string(config_path).expect("Unable to read config file");
+    toml::from_str(&config_data).expect("Invalid TOML format")
+}
+
+// Process a single file based on the configuration
+fn process_file(path: &Path, config: &Config, password_counts: &mut HashMap<String, usize>, verbose: bool) {
+    let file_name = path.file_name().unwrap().to_str().unwrap();
+    let file_config = config.files.as_ref()
+        .and_then(|files| files.get(file_name))
+        .unwrap_or(&config.default);
+
+    let pattern = Regex::new(&file_config.pattern).expect("Invalid regex pattern");
+
+    let file = File::open(path).expect("Unable to open file");
+    let reader = BufReader::new(file);
+
+    for line in reader.lines() {
+        let line = line.expect("Unable to read line");
+        if let Some(password) = extract_password(&line, &pattern, file_config, verbose) {
+            *password_counts.entry(password).or_insert(0) += 1;
+        }
+    }
+}
+
+// Extract the password from a line using the given pattern and filters
+fn extract_password(line: &str, pattern: &Regex, config: &FileConfig, verbose: bool) -> Option<String> {
+    if let Some(caps) = pattern.captures(line) {
+        let password = caps[1].to_string();
+        
+        // Compile the unwanted strings into regex patterns
+        for unwanted in &config.unwanted_strings {
+            let unwanted_pattern = Regex::new(unwanted).expect("Invalid unwanted string pattern");
+            if unwanted_pattern.is_match(&password) {
+                if verbose {
+                    println!("Filtered out unwanted password: {}", password);
+                }
+                return None;
+            }
+        }
+
+        // Filter out passwords that are shorter than the minimum length
+        if password.len() < config.min_length {
+            if verbose {
+                println!("Filtered out short password: {}", password);
+            }
+            return None;
+        }
+
+        return Some(password);
+    }
+    None
+}
+
+// Write the sorted passwords to the output file
+fn write_output(output_path: &str, sorted_passwords: &[(String, usize)]) {
+    let mut file = File::create(output_path).expect("Unable to create output file");
+    let mut stats_file = File::create(format!("{}.stats.csv", output_path)).expect("Unable to create stats file");
+
+    // stats file header
+    writeln!(stats_file, "Password,Count").expect("Unable to write to stats file");
+
+    for (password, count) in sorted_passwords {
+        writeln!(file, "{}", password).expect("Unable to write to output file");
+        writeln!(stats_file, "{},{}", password, count).expect("Unable to write to stats file");
+    }
+}