Skip to content

Commit 0074fe7

Browse files
committed
password extraction
1 parent 87b0dbc commit 0074fe7

File tree

4 files changed

+276
-1
lines changed

4 files changed

+276
-1
lines changed

Cargo.toml

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[package]
2+
name = "leakh"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
[dependencies]
7+
serde = "1.0"
8+
serde_derive = "1.0"
9+
toml = "0.5"
10+
regex = "1.8"
11+
clap = "4.3.10"
12+
crossbeam = "0.8.4"

README.md

+51-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,52 @@
11
# leakh
2-
Small utility and helper command to handle password leakage files.
2+
3+
leakh is a multi-threaded command line utility and helper tool to handle password leakage files.
4+
5+
It uses regular expressions to extract passwords from `.txt` or `.csv` files. Each file, inside the given `directory`, is read out by a seperate thread that returns the list of all passwords and the count how often it appeard inside the list. After extracting all passwords, the resulting list is sorted accordingly to the count and douplicates are removed. It will write each password with its count in a seperate `$(output).stats.csv` file.
6+
7+
## Usage
8+
9+
```bash
10+
Extracts passwords from files
11+
12+
Usage: leakh [OPTIONS] --config <FILE> --directory <DIR> --output <FILE>
13+
14+
Options:
15+
-c, --config <FILE> Specifies the config file
16+
-d, --directory <DIR> Specifies the directory to scan for files
17+
-o, --output <FILE> Specifies the output file location
18+
-v, --verbose Enables verbose output
19+
-h, --help Print help
20+
-V, --version Print version
21+
```
22+
23+
## Configuration
24+
25+
leakh uses a configuration file that follows the `.toml` syntax.
26+
27+
```toml
28+
# Default configuration for all files
29+
[default]
30+
# Regex pattern to extract password (e.g., password is after the second ":")
31+
pattern = "^[^:]+:(\\S[^\n]*)"
32+
# Minimum length for passwords to be considered valid
33+
min_length = 6
34+
# List of unwanted strings to filter out using regular expressions
35+
unwanted_strings = [
36+
"imap\\.[^\\s]+",
37+
"smtp\\.[^\\s]+",
38+
"NULL",
39+
"^#file_links.*",
40+
"^lUCKY&quot;=&quot;=STEVEN.*"
41+
]
42+
43+
# Optional specific configurations for individual files
44+
[files]
45+
46+
# Custom configuration for "special_file.txt"
47+
[files."special_file.txt"]
48+
pattern = "\\|\\s*(\\S+)"
49+
unwanted_strings = ["domain\\.com", "test\\.com"]
50+
min_length = 8
51+
52+
```

leakh.config.toml

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Default configuration for all files
2+
[default]
3+
# Regex pattern to extract password (e.g., password is after the second ":")
4+
pattern = "^[^:]+:(\\S[^\n]*)"
5+
# Minimum length for passwords to be considered valid
6+
min_length = 6
7+
# List of unwanted strings to filter out using regular expressions
8+
unwanted_strings = [
9+
"imap\\.[^\\s]+",
10+
"smtp\\.[^\\s]+",
11+
"NULL",
12+
"^#file_links.*",
13+
"^lUCKY&quot;=&quot;=STEVEN.*"
14+
]
15+
16+
# Optional specific configurations for individual files
17+
[files]
18+
19+
# Custom configuration for "special_file.txt"
20+
# [files."special_file.txt"]
21+
# pattern = "\\|\\s*(\\S+)"
22+
# unwanted_strings = ["domain\\.com", "test\\.com"]
23+
# min_length = 8

src/main.rs

+190
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
use clap::{Arg, Command};
2+
use std::fs::{self, File};
3+
use std::io::{BufReader, BufRead, Write};
4+
use std::path::Path;
5+
use std::collections::HashMap;
6+
use serde_derive::Deserialize;
7+
use regex::Regex;
8+
use crossbeam::channel;
9+
use std::thread;
10+
11+
// Struct for configuration from TOML file
12+
#[derive(Clone, Debug, Deserialize)]
13+
struct Config {
14+
default: FileConfig,
15+
files: Option<HashMap<String, FileConfig>>,
16+
}
17+
18+
#[derive(Clone, Debug, Deserialize)]
19+
struct FileConfig {
20+
pattern: String, // regex pattern to extract password
21+
unwanted_strings: Vec<String>, // list of unwanted strings (as regex patterns)
22+
min_length: usize, // minimum length for passwords
23+
}
24+
25+
fn main() {
26+
// Define command-line arguments using clap
27+
let matches = Command::new("Password Extractor")
28+
.version("1.0")
29+
.author("Your Name <[email protected]>")
30+
.about("Extracts passwords from files")
31+
.arg(
32+
Arg::new("config")
33+
.short('c')
34+
.long("config")
35+
.value_name("FILE")
36+
.help("Specifies the config file")
37+
.required(true),
38+
)
39+
.arg(
40+
Arg::new("directory")
41+
.short('d')
42+
.long("directory")
43+
.value_name("DIR")
44+
.help("Specifies the directory to scan for files")
45+
.required(true),
46+
)
47+
.arg(
48+
Arg::new("output")
49+
.short('o')
50+
.long("output")
51+
.value_name("FILE")
52+
.help("Specifies the output file location")
53+
.required(true),
54+
)
55+
.arg(
56+
Arg::new("verbose")
57+
.short('v')
58+
.long("verbose")
59+
.help("Enables verbose output")
60+
.action(clap::ArgAction::SetTrue),
61+
)
62+
.get_matches();
63+
64+
let config_path = matches.get_one::<String>("config").unwrap();
65+
let directory_path = matches.get_one::<String>("directory").unwrap();
66+
let output_path = matches.get_one::<String>("output").unwrap();
67+
let verbose = *matches.get_one::<bool>("verbose").unwrap();
68+
69+
// Load and parse the configuration file
70+
if verbose {
71+
println!("Loading configuration from: {}", config_path);
72+
}
73+
let config: Config = load_config(config_path);
74+
75+
// Set up a channel to communicate between threads
76+
let (sender, receiver) = channel::unbounded();
77+
78+
// Process the directory
79+
for entry in fs::read_dir(directory_path).expect("Unable to read directory") {
80+
let entry = entry.expect("Failed to read directory entry");
81+
let path = entry.path();
82+
if let Some(extension) = path.extension() {
83+
match extension.to_str() {
84+
Some("txt") | Some("csv") => {
85+
println!("Processing file: {}", path.display());
86+
87+
let c_config = config.clone();
88+
let c_path = path.clone();
89+
let c_sender = sender.clone();
90+
91+
thread::spawn(move || {
92+
let mut local_password_counts: HashMap<String, usize> = HashMap::new();
93+
process_file(&c_path, &c_config, &mut local_password_counts, verbose);
94+
c_sender.send(local_password_counts).expect("Failed to send results from thread");
95+
});
96+
}
97+
_ => {
98+
println!("Ignore file: {}", path.display());
99+
}
100+
}
101+
}
102+
}
103+
104+
// Close the sending side of the channel so the receiver will know when to stop
105+
drop(sender);
106+
107+
// Collect all the results from the threads
108+
let mut password_counts: HashMap<String, usize> = HashMap::new();
109+
for local_counts in receiver {
110+
for (password, count) in local_counts {
111+
*password_counts.entry(password).or_insert(0) += count;
112+
}
113+
}
114+
115+
// Sort passwords by count and write output
116+
let mut sorted_passwords: Vec<(String, usize)> = password_counts.into_iter().collect();
117+
sorted_passwords.sort_by(|a, b| b.1.cmp(&a.1));
118+
write_output(output_path, &sorted_passwords);
119+
120+
println!("Password extraction complete. Output written to: {}", output_path);
121+
}
122+
123+
// Load and parse the configuration file
124+
fn load_config(config_path: &str) -> Config {
125+
let config_data = fs::read_to_string(config_path).expect("Unable to read config file");
126+
toml::from_str(&config_data).expect("Invalid TOML format")
127+
}
128+
129+
// Process a single file based on the configuration
130+
fn process_file(path: &Path, config: &Config, password_counts: &mut HashMap<String, usize>, verbose: bool) {
131+
let file_name = path.file_name().unwrap().to_str().unwrap();
132+
let file_config = config.files.as_ref()
133+
.and_then(|files| files.get(file_name))
134+
.unwrap_or(&config.default);
135+
136+
let pattern = Regex::new(&file_config.pattern).expect("Invalid regex pattern");
137+
138+
let file = File::open(path).expect("Unable to open file");
139+
let reader = BufReader::new(file);
140+
141+
for line in reader.lines() {
142+
let line = line.expect("Unable to read line");
143+
if let Some(password) = extract_password(&line, &pattern, file_config, verbose) {
144+
*password_counts.entry(password).or_insert(0) += 1;
145+
}
146+
}
147+
}
148+
149+
// Extract the password from a line using the given pattern and filters
150+
fn extract_password(line: &str, pattern: &Regex, config: &FileConfig, verbose: bool) -> Option<String> {
151+
if let Some(caps) = pattern.captures(line) {
152+
let password = caps[1].to_string();
153+
154+
// Compile the unwanted strings into regex patterns
155+
for unwanted in &config.unwanted_strings {
156+
let unwanted_pattern = Regex::new(unwanted).expect("Invalid unwanted string pattern");
157+
if unwanted_pattern.is_match(&password) {
158+
if verbose {
159+
println!("Filtered out unwanted password: {}", password);
160+
}
161+
return None;
162+
}
163+
}
164+
165+
// Filter out passwords that are shorter than the minimum length
166+
if password.len() < config.min_length {
167+
if verbose {
168+
println!("Filtered out short password: {}", password);
169+
}
170+
return None;
171+
}
172+
173+
return Some(password);
174+
}
175+
None
176+
}
177+
178+
// Write the sorted passwords to the output file
179+
fn write_output(output_path: &str, sorted_passwords: &[(String, usize)]) {
180+
let mut file = File::create(output_path).expect("Unable to create output file");
181+
let mut stats_file = File::create(format!("{}.stats.csv", output_path)).expect("Unable to create stats file");
182+
183+
// stats file header
184+
writeln!(stats_file, "Password,Count").expect("Unable to write to stats file");
185+
186+
for (password, count) in sorted_passwords {
187+
writeln!(file, "{}", password).expect("Unable to write to output file");
188+
writeln!(stats_file, "{},{}", password, count).expect("Unable to write to stats file");
189+
}
190+
}

0 commit comments

Comments
 (0)