Initial Commit

neheb · Aug 7, 2013 · 9be4f49 · 9be4f49
commit 9be4f49
Show file tree

Hide file tree

Showing 4 changed files with 244 additions and 0 deletions.
diff --git a/README b/README
@@ -0,0 +1,20 @@
+Just a collection of utilities. Mostly bash scripts. Currently the tools are:
+
+jpegrescan - modified version of http://akuvian.org/src/jpgcrush.tar.gz
+It keeps EXIF data by default and supports arithmetic coding as well
+
+jpegfolder - script that uses jpegrescan on a given folder. keeps all EXIF data.
+Great for losslessly slimming down family photos.
+
+zipcrush - script that uses several utilities to create an extremely small zip file.
+Even smaller than zopfli I might add. The script has several dependencies:
+
+KZIP: http://advsys.net/ken/utils.htm or http://www.jonof.id.au/kenutils
+
+kzip2gz: http://encode.ru/threads/1630-kzip2gz-a-zip-(single-file-amp-deflate-compression)-to-gz-converter
+
+huffmix: http://encode.ru/threads/1313-Huffmix-a-PNGOUT-r-catalyst
+
+deflopt: http://www.walbeehm.com/download/
+
+defluff: http://encode.ru/threads/1214-defluff-a-deflate-huffman-optimizer
diff --git a/jpegfolder b/jpegfolder
@@ -0,0 +1,4 @@
+find -name "*.jpg" -exec jpegrescan '{}' \;
+find -name "*.JPG" -exec jpegrescan '{}' \;
+find -name "*.jpeg" -exec jpegrescan '{}' \;
+find -name "*.JPEG" -exec jpegrescan '{}' \;
diff --git a/jpegrescan b/jpegrescan
@@ -0,0 +1,142 @@
+#!/usr/bin/perl -ws
+# jpegrescan by Loren Merritt
+# Last updated: 2008-11-29
+# This code is public domain.
+
+use File::Slurp;
+@ARGV==1 or die "usage: jpegrescan in.jpg
+tries various progressive scan orders
+switches:
+  -s strip from all extra markers (`jpegtran -copy none` otherwise `jpegtran -copy all`)
+  -v verbose output
+  -q supress all output
+  -a use arithmetic coding (unsupported by most software)
+";
+$fin = $ARGV[0];
+$ftmp = "/tmp/$$.scan";
+$jtmp = "/tmp/$$.jpg";
+$verbose = $v;
+$quiet = $q;
+@strip = $s ? ("-copy","none") : ("-copy","all");
+@arith = $a ? ("-arithmetic") : ();
+undef $_ for $v,$q,$s,$a;
+undef $/;
+$|=1;
+
+# convert the input to baseline, just to make all the other conversions faster
+# FIXME there's still a bunch of redundant computation in separate calls to jpegtran
+open $OLDERR, ">&", STDERR;
+open STDERR, ">", $ftmp;
+open TRAN, "-|", "jpegtran", "-v", @strip, "-optimize", $fin or die;
+write_file($jtmp, <TRAN>);
+close TRAN;
+open STDERR, ">&", $OLDERR;
+
+$type = read_file($ftmp);
+$type =~ /components=(\d+)/ or die;
+$rgb = $1==3 ? 1 : $1==1 ? 0 : die "not RGB nor gray\n";
+
+# FIXME optimize order for either progressive transfer or decoding speed
+sub canonize {
+    my $txt = $prefix.$suffix.shift;
+    $txt =~ s/\s*;\s*/;\n/g;
+    $txt =~ s/^\s*//;
+    $txt =~ s/ +/ /g;
+    $txt =~ s/: (\d+) (\d+)/sprintf ": %2d %2d", $1, $2/ge;
+    # treat u and v identically. I shouldn't need to do this, but with jpegtran overhead it saves 9% speed. cost: .008% bitrate.
+    $txt =~ s/^2:.*\n//gm;
+    $txt =~ s/^1:(.+)\n/1:$1\n2:$1\n/gm;
+    # dc before ac, coarse before fine
+    my @txt = sort {"$a\n$b" =~ /: *(\d+) .* (\d);\n.*: *(\d+) .* (\d);/ or die; !$3 <=> !$1 or $4 <=> $2 or $a cmp $b;} split /\n/, $txt;
+    return join "\n", @txt;
+}
+
+sub try {
+    my $txt = canonize(shift);
+    return $memo{$txt} if $memo{$txt};
+    write_file($ftmp, $txt);
+    open TRAN, "-|", "jpegtran", @arith, @strip, "-scans", $ftmp, $jtmp or die;
+    $data = <TRAN>;
+    close TRAN;
+    my $s = length $data;
+    $s or die;
+    $memo{$txt} = $s;
+    !$quiet && print $verbose ? "$txt\n$s\n\n" : ".";
+    return $s;
+}
+
+sub triesn {
+    my($bmode, $bsize);
+    my ($limit, @modes) = @_;
+    my $overshoot = 0;
+    for(@modes) {
+        my $s = try($_);
+        if(!$bsize || $s < $bsize) {
+            $bsize = $s;
+            $bmode = $_;
+            $overshoot = 0;
+        } elsif(++$overshoot >= $limit) {
+            last;
+        }
+    }
+    return $bmode;
+}
+
+sub tries { triesn(99, @_); }
+
+$prefix = "";
+$suffix = "";
+
+if($rgb) {
+    # 012 helps very little
+    # 0/12 and 0/1/2 are pretty evenly matched in frequency, but 0/12 wins in total size if every image had to use the same mode
+    # dc refinement passes never help
+    $dc = tries("0: 0 0 0 0; 1 2: 0 0 0 0;",
+                "0: 0 0 0 0; 1: 0 0 0 0; 2: 0 0 0 0;");
+    # jpegtran won't let me omit dc entirely, but I can at least quantize it away to make the rest of the tests faster.
+    $prefix = "0 1 2: 0 0 0 9;";
+} else {
+    $dc = "0: 0 0 0 0;";
+    $prefix = "0: 0 0 0 9;";
+}
+
+# luma can make use of up to 3 refinement passes.
+# chroma can make use of up to 2 refinement passes.
+# refinement passes have some chance of being split (luma: 4%,4%,4%. chroma: 20%,8%) but the total bit gain is negligible.
+# msb pass should almost always be split (luma: 87%, chroma: 81%).
+# I have no theoretical reason for this list of split positions, they're just the most common in practice.
+# splitting into 3 ections is often slightly better, but the total number of bits saved is negligible.
+# FIXME: penalize lots of refinement passes because it's slower to decode. if so, then also force overwrite if bigger than the input.
+sub try_splits {
+    my $str = shift;
+    my %n = map {$_ => sprintf "$c: 1 %d $str; $c: %d 63 $str;", $_, $_+1} 2,5,8,12,18;
+    my $mode = triesn(2, "$c: 1 63 $str;", @n{2,8,5});
+    return $mode if $mode ne $n{8};
+    return triesn(1, $mode, @n{12,18});
+}
+
+foreach $c (0..$rgb) {
+    my @modes;
+    my $ml = "";
+    for(0..($c?2:3)) {
+        push @modes, "$c: 1 8 0 $_; $c: 9 63 0 $_;".$ml;
+        $ml .= sprintf("$c: 1 63 %d %d;", $_+1, $_);
+    }
+    my $refine = triesn(1, @modes);
+    $refine =~ s/.* (0 \d);//;
+    $ac .= $refine . try_splits($1);
+}
+
+$prefix = "";
+undef %memo;
+$mode = canonize($dc.$ac);
+try($mode);
+$size = $memo{$mode};
+!$quiet && print "\n$mode\n$size\n";
+$old_size = -s $fin;
+!$quiet && printf "%+.2f%%\n", ($size/$old_size-1)*100;
+if($size > $old_size) {
+    $data = read_file($fin);
+}
+write_file($fin, $data);
+unlink $ftmp, $jtmp;
diff --git a/zipcrush b/zipcrush
@@ -0,0 +1,78 @@
+kzip /b1024 1.zip "$1"
+kzip2gz 1.zip 1.gz
+rm 1.zip
+
+COUNTER=0
+while [	$COUNTER -lt 128 ]; do
+	kzip /rn /b1024 2.zip "$1"
+	kzip2gz 2.zip 2.gz
+	huffmix 1.gz 2.gz 1.gz
+	rm 2.gz
+	rm 2.zip
+	let COUNTER=COUNTER+1
+done
+
+COUNTER=0
+while [ $COUNTER -lt 128 ]; do
+        kzip /rn /b512 2.zip "$1"
+        kzip2gz 2.zip 2.gz
+        huffmix 1.gz 2.gz 1.gz
+        rm 2.gz
+        rm 2.zip
+        let COUNTER=COUNTER+1
+done
+
+COUNTER=0
+while [ $COUNTER -lt 128 ]; do
+        kzip /rn /b256 2.zip "$1"
+        kzip2gz 2.zip 2.gz
+        huffmix 1.gz 2.gz 1.gz
+        rm 2.gz
+        rm 2.zip
+        let COUNTER=COUNTER+1
+done
+
+COUNTER=0
+while [ $COUNTER -lt 128 ]; do
+        kzip /rn /b128 2.zip "$1"
+        kzip2gz 2.zip 2.gz
+        huffmix 1.gz 2.gz 1.gz
+        rm 2.gz
+        rm 2.zip
+        let COUNTER=COUNTER+1
+done
+
+COUNTER=0
+while [ $COUNTER -lt 128 ]; do
+        kzip /rn /b64 2.zip "$1"
+        kzip2gz 2.zip 2.gz
+        huffmix 1.gz 2.gz 1.gz
+        rm 2.gz
+        rm 2.zip
+        let COUNTER=COUNTER+1
+done
+
+COUNTER=0
+while [ $COUNTER -lt 128 ]; do
+        kzip /rn /b32 2.zip "$1"
+        kzip2gz 2.zip 2.gz
+        huffmix 1.gz 2.gz 1.gz
+        rm 2.gz
+        rm 2.zip
+        let COUNTER=COUNTER+1
+done
+
+COUNTER=0
+while [ $COUNTER -lt 128 ]; do
+        kzip /rn /b16 2.zip "$1"
+        kzip2gz 2.zip 2.gz
+        huffmix 1.gz 2.gz 1.gz
+        rm 2.gz
+        rm 2.zip
+        let COUNTER=COUNTER+1
+done
+
+deflopt 1.gz
+defluff < 1.gz > 11.gz
+deflopt 11.gz
+mv 11.gz "$1"