diff --git a/data/testcorpus.txt b/data/testcorpus.txt
index 7864444..bec2adb 100644
--- a/data/testcorpus.txt
+++ b/data/testcorpus.txt
@@ -78,7 +78,7 @@
     M-1206   .   cama(.)a
     M-403   .   a(.)tara
 023-as-spider:
-    M-955   j(..)a
+    M-955  .  j(..)a
     .   .   r(..)na
     .   .   (..)na
     L-106   23   (..)a
diff --git a/scripts/prove.pl b/scripts/prove.pl
index 1d7ec90..405bf71 100755
--- a/scripts/prove.pl
+++ b/scripts/prove.pl
@@ -1,46 +1,161 @@
-#!/usr/bin/perl -l
-use List::Util 'first';
-use Class::Struct;
+#!/usr/bin/env perl
 
-open(my $fh, '<', "mw.txt")
-   or die("Can't open mw.txt: $!\n");
+# Enable important safety and quality of life features
+use strict;      # Requires all variables to be declared
+use warnings;    # Enables helpful warning messages
+use utf8;        # Enable UTF-8 in source code
+use open qw(:std :utf8);  # Enable UTF-8 for input/output
+use feature 'say';  # Enables 'say' which is like print but adds newline automatically
 
-my @dict = <$fh>;
-chomp @dict;
-close $fh;
+# Import required modules
+use List::Util 'first'; # Provides useful list manipulation functions 
+use Getopt::Long;    # For command line argument processing
+use Pod::Usage;      # For documentation handling
+use Data::Dumper;  # Helpful for debugging - prints data structures
 
-$which = $ARGV[0];
+=head1 NAME
 
+ corpus_processor.pl - Process textual corpus data with named pattern matching
+
+=head1 SYNOPSIS
+
+    ./corpus_processor.pl [options] [target_character]
+
+    Options:
+        --dict-file=FILE    Dictionary file path (default: mw.txt)
+        --corpus-file=FILE  Corpus file path (default: ./data/testcorpus.txt)
+        --help             Show this help message
+        --man              Show full documentation
+
+    Examples:
+        ./corpus_processor.pl                    # Use default files and Process all characters
+        ./corpus_processor.pl A                  # Only process entries starting with 'A'
+        ./corpus_processor.pl --dict-file=custom.txt   # Use custom dictionary
+        ./corpus_processor.pl --corpus-file=new.txt    # Use custom corpus file
+        ./corpus_processor.pl --help             # Show help
+        ./corpus_processor.pl --man              # Show full manual
+
+
+=head1 DESCRIPTION
+
+    This script reads inscription data from a corpus file and matches patterns against 
+    a dictionary. It supports regex patterns and can filter processing by character.
+
+=cut
+
+# Global variables
 my %VALUES = ();
+my $target_char;    # Optional filter to only process entries starting with specific character
+my $dict_file;      # Dictionary file path
+my $corpus_file;    # Corpus file path
+
+# Process command line arguments
+sub process_args {
+    # Set defaults
+    $dict_file = "./mw.txt";
+    $corpus_file = "./data/testcorpus.txt";
+    
+    # Get options
+    GetOptions(
+        "dict-file=s"   => \$dict_file,
+        "corpus-file=s" => \$corpus_file,
+        "help"          => sub { pod2usage(1) },
+        "man"           => sub { pod2usage(-exitstatus => 0, -verbose => 2) }
+    ) or pod2usage(2);
+
+    # Get optional target character
+    $target_char = shift @ARGV if @ARGV;
+}
+
+# Main execution block
+{
+    process_args();
+    my $dictionary = load_dictionary($dict_file);
+    my $readtexts = &read_corpus($corpus_file);
+    my @texts = @$readtexts;
+
+    for my $text (@texts) {
+        &process_text($text, $dictionary, $target_char);
+    }
+
+    # Print results in sorted order
+    print_results();
+}
+
+
+=head2 load_dictionary
 
-my $readtexts = &readtexts();
-my @texts = @$readtexts;
+Loads the dictionary file containing words to match against.
 
-for my $text (@texts) {
-    &resolve($text);
+Parameters:
+    $dict_filename - Path to the dictionary file
+
+Returns:
+    Array reference containing dictionary words
+
+Dies with error message if file cannot be opened.
+
+=cut
+
+sub load_dictionary {
+    my ($dict_filename) = @_;
+    open(my $fh, '<', $dict_filename) or die "Cannot open $dict_filename: $!\n";
+    my @dict = <$fh>;
+    chomp(@dict);
+    close $fh;
+    return \@dict;
 }
 
-for(sort keys %VALUES) {
-    print "$_ = @{$VALUES{$_}}"
+
+=head2 print_results
+
+Prints the final results in sorted order by character.
+
+Format:
+    character = option1 option2 option3 ...
+
+=cut
+
+sub print_results {
+    for my $char (sort keys %VALUES) {
+        say "$char = @{$VALUES{$char}}";
+    }
 }
 
-sub resolve {
-    my ($textset) = @_;
-    $char = shift @$textset;
-    if($which) { next unless($which && $char =~/^$which/);}
+=head2 process_text
+
+Processes a single text entry, matching patterns and storing results.
+
+Parameters:
+    $text_set   - Reference to array containing glyph data
+    $dictionary - Reference to array of dictionary words
+    target_char - Process only those entries starting with specific character
+
+The function matches patterns against dictionary words and stores
+results in the global %values hash.
+
+=cut
+
+
+sub process_text {
+    my ($textset, $dictionary, $target_char) = @_;
+    my $char = shift @$textset;
+    if($target_char) { next unless($target_char && $char =~/^$target_char/);}
     my @options = ();
     for my $lines (@$textset) {
       for my $text (@$lines) {
         my ($line) = @$text; 
         my ($cisi, $inscr, $re) = @$text;
-        print "$char $cisi INSCR $inscr REGEX $re";
+        say "$char $cisi INSCR $inscr REGEX $re";
 
         if(scalar @options > 0) {
             my $optset = "(". join("|", @options) . ")";
             $re =~ s/\(\..*?\)/$optset/;
         }
-        my @matches = grep { /^$re$/ } @dict;
-        print "$inscr $re matches @matches[0..9]";
+        my @matches = grep { /^$re$/ } @$dictionary;
+        # Get up to 10 matches safely
+        my @display_matches = $#matches > 9 ? @matches[0..9] : @matches;
+        say "$inscr $re matches @display_matches" . (" " x (9 - $#display_matches));  # Padding to match original ;
         @options = ();
         for(@matches) {
             if (/^$re$/) { push @options, "$1" }
@@ -62,9 +177,30 @@ sub resolve {
     $VALUES{$char} = \@options;
 }
 
-sub readtexts {
-    print "Reading corpus...";
-    open(my $fh, '<', "./data/testcorpus.txt") || die "Cannot open data/testcorpus.txt";
+
+=head2 read_corpus
+
+Reads and parses the corpus file containing inscription data.
+
+Parameters:
+    $corpus_filename - Path to the corpus file
+
+Returns:
+    Array reference containing parsed corpus data in the format:
+    [
+        [glyph_name, [[cisi, inscription, regex], ...]], 
+        [glyph_name, [[cisi, inscription, regex], ...]], 
+        ...
+    ]
+
+Dies with error message if file cannot be opened.
+
+=cut
+
+sub read_corpus {
+    my ($corpus_file) = @_;
+    say "Reading corpus...";
+    open(my $fh, '<', $corpus_file) || die "Cannot open data/testcorpus.txt";
     my @corpus = <$fh>;
     close $fh;
     chomp(@corpus);
@@ -83,13 +219,111 @@ sub readtexts {
         }
         else {
             s/^\s+|\s+$//g;
+            # Debug the line before split
+            # say "DEBUG: Processing line: '$_'\n";
             my ($cisi, $inscr, $regex) = split(/\s+/, $_);
+            
+            # Validate we got all three fields
+            if (!defined $cisi || !defined $inscr || !defined $regex) {
+                say "WARNING: Invalid line format: '$_'\n";
+                say "  Got: cisi='", ($cisi//'<undef>'), 
+                      "' inscr='", ($inscr//'<undef>'), 
+                      "' regex='", ($regex//'<undef>'), "'\n";
+                next;  # Skip this line
+            }
+            
             push @$glyphs, [$cisi, $inscr, $regex];
-            print "CISI $cisi INSCR $inscr REGEX $regex";
+            say "CISI $cisi INSCR $inscr REGEX $regex";
+            # s/^\s+|\s+$//g;
+            # my ($cisi, $inscr, $regex) = split(/\s+/, $_);
+            # push @$glyphs, [$cisi, $inscr, $regex];
+            # say "CISI $cisi INSCR $inscr REGEX $regex";
         }
     }
     push(@$texts, [$glyph, $glyphs]) if $glyph;
-    print "Reading corpus...done";
+    say "Reading corpus...done";
 
     return $texts;
-}
\ No newline at end of file
+}
+
+
+=head1 DESCRIPTION
+
+Enhanced version of the corpus processing script that adds:
+- Documentation
+- Command line argument handling
+- Flexible file path configuration
+
+=head1 AUTHOR
+
+Original script by [@yajnadevam]
+
+=head1 CONTRIBUTORS
+
+Enhanced version with docs, comments and CLI  by [@performance]
+
+=head1 VERSION HISTORY
+
+=over 4
+
+=item v1.0 (Original)
+
+* Initial implementation
+
+=item v2.0 (Current)
+
+* Made file paths and target char configurable via command-line args
+* Added documentation
+* Improved error handling
+* Better code structure
+* Added UTF-8 support
+* Added a few debug logs
+
+=back
+
+=head1 LICENSE
+
+Same license as the original.
+
+=head1 CREDITS
+
+Based on the original corpus processing script developed by @yajnadevam.
+
+=cut
+
+__END__
+
+# Additional documentation:
+
+=head1 FILE FORMATS
+
+=head2 Dictionary File (mw.txt)
+
+One word per line, UTF-8 encoded.
+
+Example:
+    word1
+    word2
+    word3
+
+=head2 Corpus File (testcorpus.txt)
+
+Format:
+    CharacterName:
+    cisi inscription regex
+    cisi inscription regex
+    # comments are allowed
+    
+    NextCharacter:
+    cisi inscription regex
+    ...
+
+=head1 DEBUGGING
+
+To enable detailed debugging, uncomment the following line near the top of the script:
+# use Data::Dumper;
+
+Then add debug statements like:
+print Dumper($variable);
+
+=cut
\ No newline at end of file
diff --git a/scripts/test_harness.pl b/scripts/test_harness.pl
new file mode 100644
index 0000000..805ce77
--- /dev/null
+++ b/scripts/test_harness.pl
@@ -0,0 +1,151 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+use Test::More;
+use File::Temp qw(tempdir);
+use File::Path qw(make_path);
+use Capture::Tiny qw(capture);
+use File::Copy;
+use Cwd;
+
+# Store original directory
+my $original_dir = getcwd();
+my $test_dir = tempdir(CLEANUP => 1);
+
+sub setup_test_env {
+    my ($dict_file, $corpus_file) = @_;
+    make_path("$test_dir/data");
+    
+    my $abs_dict = File::Spec->rel2abs($dict_file);
+    my $abs_corpus = File::Spec->rel2abs($corpus_file);
+    
+    copy($abs_dict, "$test_dir/mw.txt") 
+        or die "Failed to copy dictionary file: $!";
+    copy($abs_corpus, "$test_dir/data/testcorpus.txt") 
+        or die "Failed to copy corpus file: $!";
+}
+
+# Function to count and analyze sections
+sub analyze_output {
+    my ($output) = @_;
+    my @sections;
+    my $current_section = '';
+    my $current_content = '';
+    
+    for my $line (split /\n/, $output) {
+        if ($line =~ /^(\d{3}-[^:]+):/) {
+            if ($current_section) {
+                push @sections, [$current_section, $current_content];
+            }
+            $current_section = $1;
+            $current_content = "$line\n";
+        } elsif ($current_section) {
+            $current_content .= "$line\n";
+        }
+    }
+    
+    # Add last section
+    if ($current_section) {
+        push @sections, [$current_section, $current_content];
+    }
+    
+    return \@sections;
+}
+
+# Function to run a script with given arguments
+sub run_script {
+    my ($script, $args) = @_;
+    my $abs_script = File::Spec->rel2abs($script, $original_dir);
+    
+    chdir $test_dir or die "Cannot change to test directory: $!";
+    
+    my ($stdout, $stderr, $exit) = capture {
+        system($^X, $abs_script, split(/\s+/, $args));
+    };
+    
+    chdir $original_dir or die "Cannot change back to original directory: $!";
+    
+    return ($stdout, $stderr, $exit);
+}
+
+# Main test execution
+sub main {
+    my ($dict_file, $corpus_file) = @_;
+    
+    die "Dictionary file not found: $dict_file" unless -f $dict_file;
+    die "Corpus file not found: $corpus_file" unless -f $corpus_file;
+
+    print "Setting up test environment...\n";
+    setup_test_env($dict_file, $corpus_file);
+
+    print "Running original script...\n";
+    my ($orig_stdout, $orig_stderr, $orig_exit) = run_script('orig_prove.pl', '');
+    
+    print "Running new script...\n";
+    my ($new_stdout, $new_stderr, $new_exit) = run_script('prove.pl', '');
+
+    # Analyze outputs
+    my $orig_sections = analyze_output($orig_stdout);
+    my $new_sections = analyze_output($new_stdout);
+    
+    print "Original sections: " . scalar(@$orig_sections) . "\n";
+    print "New sections: " . scalar(@$new_sections) . "\n\n";
+
+    # Compare sections
+    my $max_sections = scalar(@$orig_sections) > scalar(@$new_sections) ? 
+                      scalar(@$orig_sections) : scalar(@$new_sections);
+    
+    for my $i (0 .. $max_sections - 1) {
+        if ($i < @$orig_sections && $i < @$new_sections) {
+            my $orig_section = $orig_sections->[$i];
+            my $new_section = $new_sections->[$i];
+            
+            if ($orig_section->[0] ne $new_section->[0]) {
+                print "Section name mismatch at position $i:\n";
+                print "Original: $orig_section->[0]\n";
+                print "New: $new_section->[0]\n\n";
+            }
+            
+            if ($orig_section->[1] ne $new_section->[1]) {
+                print "Content mismatch in section $orig_section->[0]:\n";
+                print "Original:\n$orig_section->[1]\n";
+                print "New:\n$new_section->[1]\n";
+                print "-" x 80 . "\n";
+            }
+        } else {
+            print "Missing section at position $i\n";
+            if ($i < @$orig_sections) {
+                print "Original has: $orig_sections->[$i]->[0]\n";
+            }
+            if ($i < @$new_sections) {
+                print "New has: $new_sections->[$i]->[0]\n";
+            }
+            print "\n";
+        }
+    }
+
+    # Save outputs for inspection
+    open(my $orig_fh, '>', "original_output.txt");
+    print $orig_fh $orig_stdout;
+    close($orig_fh);
+    
+    open(my $new_fh, '>', "new_output.txt");
+    print $new_fh $new_stdout;
+    close($new_fh);
+
+    # Test output
+
+    is($new_stdout, $orig_stdout, "stderr matches");
+    is($new_stderr, $orig_stderr, "stderr matches");
+    is($new_exit, $orig_exit, "exit code matches");
+
+    done_testing();
+}
+
+# Check command line arguments
+unless (@ARGV == 2) {
+    die "Usage: $0 dictionary_file corpus_file\n";
+}
+
+main(@ARGV);
\ No newline at end of file