#!/bin/bash # Copyright (C) 2009 Jonathan Ernst # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # This script will: # Scan an image and convert it to an A4-sized pdf, using jpeg compression, # then send an e-mail with the pdf attached and the ocr'ed text in the body. # # Note: tesseract, mutt, sane-utils and imagemagick are needed for this script to work. # # Ubuntu user's should be fine with (LANG=fre,eng,ita,etc.) : # sudo aptitude install tesseract-ocr tesseract-ocr-LANG mutt sane-utils imagemagick # # If you don't receive e-mail, check that your MTA is correctly configured ; again in Ubuntu: # # sudo aptitude install exim4 # sudo dpkg-reconfigure exim4-config # # Version: 20090914 ################### # CONFIGURATION # ################### # Override below configuration in $0.conf (e.g. scan2mail.conf); don't change anything here # as yout changes would be deleted by an update. # Simply copy paste the configuration entries you need to override (and only those) in $0.conf # and make your changes there. ################### # prefix for generated pdf files C_PREFIX=`date +%F` # temporary directory where scanned files are stored C_SCANDIR=/tmp/scantmp # device name (in case you have more than one device), use scanimage -L to find your device URI C_DEVICE="hpaio:/net/Officejet_J6400_series?ip=192.168.0.141" # resolution C_RESOLUTION=170 # jpeg quality C_QUALITY=30 # Addressbook, increment the number in brackets for each pair of entry # TODO: spaces in name are not supported yet C_ADDRESSBOOK_NAME[0]=Dummy1 C_ADDRESSBOOK_EMAIL[0]=dummy@dummy.com C_ADDRESSBOOK_NAME[1]=Dummy2 C_ADDRESSBOOK_EMAIL[1]=dummy2@dummy.com # update url, comment or leave empty for no update C_UPDATE_URL=http://ernstfamily.ch/jonathan/uploads/scan2mail # user interface strings case "$LANG" in *"fr_"* ) S_UPDATED="Script mis à jour, rechargement..." S_USAGE="Usage :" S_TITLE="scan2mail" S_CHOICE="Choix" S_NAME="Nom" S_EMAIL="E-mail" S_SUBJECT="Sujet" S_SCANNING="Numérisation..." S_SCANVERSO="Voulez-vous scanner le verso ?" S_PUTPAPER="Mettez une feuille dans le scanner et recommencez !" S_TURNSTACK="Retournez le tas de feuilles scanné et remettez le dans le scanner" S_GETTINGRECIPIENT="Obtention du destinataire" S_GETTINGSUBJECT="Obtention du sujet du mail" S_PREPARINGPAGE="Préparation de la page" S_GETTINGFILE="Récupération du fichier" S_SENDINGMAIL="Envoi de l'e-mail..." ;; *) S_UPDATED="Script updated, reloading..." S_USAGE="Usage:" S_TITLE="scan2mail" S_CHOICE="Choice" S_NAME="Name" S_EMAIL="E-mail" S_SUBJECT="Subject" S_SCANNING="Scanning..." S_SCANVERSO="Would you like to do a duplex scan?" S_PUTPAPER="Please put some pages in the scanned and start again!" S_TURNSTACK="Please turn the stack of scanned pages and put them back in the scanner" S_GETTINGRECIPIENT="Getting recipient" S_GETTINGSUBJECT="Getting mail subject" S_PREPARINGPAGE="Preparing page" S_GETTINGFILE="Getting file" S_SENDINGMAIL="Sending mail..." ;; esac # actual override, don't copy this line in $0.conf source $0.conf > /dev/null 2>&1 ################## # FUNCTIONS # ################## check_update () { CURRENTVERSION=`grep -m1 "# Version: " $0 | awk '{print$3}'` S_TITLE="$S_TITLE - $CURRENTVERSION" if [ "$C_UPDATE_URL" != "" ] ; then wget $C_UPDATE_URL --timeout=10 -a $0.log -O $0.new ONLINEVERSION=`grep -m1 "# Version: " $0.new | awk '{print$3}'` CHANGELOG=`grep -m1 -A10 "# $S_CHANGELOG" $0.new` if [ "$ONLINEVERSION" -gt "$CURRENTVERSION" ] ; then mv $0.new $0 chmod +x $0 echo $S_UPDATED $0 exit 0 fi rm $0.new fi } help () { echo echo $S_USAGE echo $1 echo "$1 " echo "$1 " echo echo " = recto|verso" echo } # clean up the temporary direcory clean () { echo Cleanup working directory rm -R $C_SCANDIR > /dev/null 2>&1 rm ~/sent > /dev/null 2>&1 } # scan documents scan () { scanimage --device-name=$C_DEVICE --mode Gray --source=ADF -x 210 -l 0 -y 297 --resolution=$C_RESOLUTION -t 0 --batch=$C_SCANDIR/%d$1 --format=pnm | tee >(zenity --progress --title "$S_TITLE" --text "$S_SCANNING" --pulsate --auto-close) } getfile () { echo $S_GETTINGFILE $1 cp $1 $C_SCANDIR/$C_PREFIX-$FILENAME.pdf pdftotext $1 $C_SCANDIR/ocr.txt > /dev/null 2>&1 } verso () { if [ "$1" = "" ] ; then zenity --question --text "$S_SCANVERSO" if [ "$?" = 0 ] ; then scan -verso fi else if [ "$1" = "verso" ] ; then echo $S_TURNSTACK sleep 10 scan -verso fi fi } # define recipient recipient () { echo $S_GETTINGRECIPIENT RECIPIENT=$1 if [ "$RECIPIENT" = "" ] ; then for ELEMENT in $(seq 0 $((${#C_ADDRESSBOOK_EMAIL[@]} - 1))) do RECIPIENTLIST="FALSE ${C_ADDRESSBOOK_EMAIL[$ELEMENT]} ${C_ADDRESSBOOK_NAME[$ELEMENT]} $RECIPIENTLIST" done RECIPIENT=$(zenity --width='400' --height='300' --list --title "$S_TITLE" --radiolist --separator " + " --column "$S_CHOICE" --column "$S_EMAIL" --column "$S_NAME" $RECIPIENTLIST) RETURNVALUE=$? fi } # mail's subject and filename subject () { echo $S_GETTINGSUBJECT SUBJECT="$1" if [ "$SUBJECT" = "" ] ; then SUBJECT=$(zenity --entry --title "$S_TITLE" --text "$S_SUBJECT") RETURNVALUE=$? fi FILENAME=$(echo $SUBJECT | sed s/' '/_/g) } page () { echo $S_PREPARINGPAGE $1... # clean up the scan (unpaper needs pnm files) unpaper --overwrite $1 $1 > /dev/null 2>&1 # convert the scan in tiff as tesseract only reads tiff convert $1 $1.tif > /dev/null 2>&1 # OCR using tesseract (will be used for the mail body) tesseract $1.tif $1 -l fra > /dev/null 2>&1 # Cleanup the OCR (TODO: to be improved) # TODO problème avec $ grep -v '^.$' $1.txt | grep -v '^..$' > $1.txt } # if zenity execution returns 1 (cancel), exit cancel () { if [ "$RETURNVALUE" = 1 ] ; then echo Exiting... exit fi } ################## # CODE # ################## check_update help $0 clean mkdir $C_SCANDIR > /dev/null 2>&1 recipient $2 cancel subject "$3" cancel if [ -f "$1" ] ; then getfile $1 else scan verso $1 for FILE in $C_SCANDIR/* ;do page $FILE done if [ -f $C_SCANDIR/1 ]; then I=1 for FILE in `ls -t $C_SCANDIR/*.tif | cut -d '.' -f1 | grep -v verso` do if [ -f $C_SCANDIR/$I-verso.tif ] ; then SCANLIST="$FILE.tif $C_SCANDIR/$I-verso.tif $SCANLIST" TXTLIST="$FILE.txt $C_SCANDIR/$I-verso.txt $TXTLIST" else SCANLIST="$FILE.tif $SCANLIST" TXTLIST="$FILE.txt $TXTLIST" fi ((I++)) done # create the pdf convert -compress jpeg -quality $C_QUALITY -page A4 $SCANLIST $C_SCANDIR/$C_PREFIX-$FILENAME.pdf # let's put everything in a file cat $TXTLIST > $C_SCANDIR/ocr.txt else zenity --error --text "$S_PUTPAPER" fi fi # let's see if we have something to send if [ -f $C_SCANDIR/$C_PREFIX-$FILENAME.pdf ]; then # send it using mutt echo $S_SENDINGMAIL mutt -s "$SUBJECT" -e "my_hdr Date: $4" -a $C_SCANDIR/$C_PREFIX-$FILENAME.pdf $RECIPIENT < $C_SCANDIR/ocr.txt fi clean