#!/usr/bin/env python
'''Mass filtering of page scans to improve text quality using ImageMagick.'''
import argparse
import glob
import os
import shutil
import tempfile
def main(args):
tempdir = tempfile.mkdtemp()
try:
files = []
for arg in args.input:
files.extend(glob.glob(arg))
for name in sorted(files):
convert(name, tempdir)
if os.path.exists(args.output):
if args.force:
os.remove(args.output)
else:
sys.exit('Output file "%s" exists. Use -f or --force to overwrite.')
os.system('convert %s/*.png -compress zip %s' % (tempdir, args.output))
finally:
shutil.rmtree(tempdir)
def convert(filename, tempdir):
inf = os.path.splitext(os.path.basename(filename))[0]
outf = os.path.join(tempdir, 'out-' + inf + '.png')
os.system('./localthresh -n yes -m 3 -r 35 -b 20 %s %s' % (inf, outf))
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Mass filtering of page scans using ImageMagick.')
parser.add_argument('input', nargs='+',
help='Files or glob pattern(s) for input file, e.g. /home/foo/scan-*.jpg')
parser.add_argument('-o', '--output', default='output.pdf',
help='Name of output file, default "%(default)s"')
parser.add_argument('-f', '--force', action='store_true',
help='Overwrite if output file already exists.')
args = parser.parse_args()
main(args)