#! /bin/bash # Usage: sa-education-ham # # Purpose: inject into 'sa-learn --ham' the mail : # * whose file status has been modified in the last $THRESHOLD_DAYS days # * located in any subfolder of $MAIL_ROOT_DIR # * that has been replied to # # Background: any anti-spam bayesian filter, in order to be efficient, has to # be educated by as much as ham as spam emails. Our assertion is : a spam is # never replied to => an email that has been replied to is ham. # ## ### Configuration variables MAIL_ROOT_DIR='/var/mail' THRESHOLD_DAYS=7 DEBUG=1 # ## ### Main ### We copy the interesting emails to a temporary directory, since it's more # effective to run sa-learn once on a directory than once per email. # Create a temporary directory, exit if it fails. TMP_DIR=`mktemp -d -t sa-education-ham.XXXXXX` || exit 1 # Copy the interesting emails to the temporary directory. find $MAIL_ROOT_DIR \ -type f \ -mtime -$THRESHOLD_DAYS \ -name '*,R*' \ -exec cp {} $TMP_DIR \; # Inject these emails into sa-learn. sa-learn --ham $TMP_DIR # Clean. rm -rf $TMP_DIR