#!/usr/bin/perl -w # Dynamic expire daemon for INN 2.0 and timehash storage # by Olaf Titz , June 1998. Public domain. # This is a daemon, to be started along with innd, which periodically # (as given by the -t option) looks if spool space is getting tight, # and frees space by removing articles until enough (as given by the # -f and -i options) is free. # To set up: # 1. Make sure that innd creates its article files with mode 0600. # (by now: set ARTFILE_MODE in include/config.h.in) # thdexpire uses the group read/write flags for itself. # 2. Configure your storage classes carefully. Let the default go in # class 100 and choose the storage classes as relative (percent) # retention times. E.g. if you want to give alt.binaries.* a fifth # of the default time, put them in class 20. Storage classes above # 200 are ignored by this program, you can use them for # never-expire groups. 0 expires immediately. # 3. Set up your expire.ctl in a way that it puts only a maximum cap # on retention times. Run expire from news.daily as usual. However, # this usually will apply only to articles which have an Expires # line. Example: # *:A:7:never:never # 4. Ensure to start this daemon along with innd. # 5. To get information and statistics, run "thdexpire -r" (in # parallel to a running daemon). This will show you the current # actual retention times. # Modus operandi: # thdexpire works directly on the spool. It assumes the layout # described in the timehash section of storage.ctl(5) as of INN 2.0. # For every storage class associated with timehash, thdexpire keeps a # "work time" which is the time of the oldest article in this class # (not counting articles with Expires lines). This time is choosen so # that the difference of the work time of class N to now (i.e. the # retention time for class N) will be N/100 of the retention time of # class 100. The work time of all classes is continuously adjusted as # time goes by. Articles which are older than the work time are # deleted _if_ they don't have an explicit Expires line _or_ this line # is rogue. A "rogue Expires line" is defined as an Expires line which # is more than C days (as of the -c option) in the future. After each # pass which deletes articles, thdexpire calls expireindex to delete # the articles from the overview too (optionally). thdexpire keeps # some state in a DBM file in the db directory. These files are not # vital and can be deleted, it then just takes longer for the next # startup (search for oldest files). # Known bugs and issues: # - Currently assumes that all timehash spool directories are on one # filesystem. Extension to arbitrary FSs will be rather # straightforward. # - The handling of explicit Expires lines is everything but nice and # efficient. We use the "date" program to parse them, and only if we # have the GNU version. Otherwise, no check for rogue Expires is # made (i.e. Expires is unconditionally honored). It were nice if # this could be handled by putting them in their own storage class # (INN 2.1 perhaps?) # - The way we find the Xref data (needed for expiring overview) looks # suboptimal (reading the article), but I think this is the only # possibility, given that we can't access history data (there is no # way to make a token from the article number alone). # - Leaves empty directories. # - The code is ugly and uses too many global variables. Hey, I hacked # this together in two days :-) Should rewrite it in C. $ID='$Id: thdexpire,v 1.4 1998/06/28 14:11:43 olaf Exp $$'; use integer; use POSIX ":fcntl_h"; use SDBM_File; use Getopt::Std; require "/usr/news/etc/innshellvars.pl"; $df="/bin/df -P"; # which df - GNU preferred $dff=3; # n'th field of df output is the free amount chdir $inn::spool || die "chdir $inn::spool: $!"; $opt_r=0; # make a report $opt_t=30; # check interval in minutes $opt_f=50000; # required space in kilobytes $opt_i=5000; # required space in inodes $opt_c=90; # rogue Expires cutoff in days $opt_N=0; # dont actually delete articles $opt_v=0; # verbosity level $opt_O=0; # expire overview getopts("rt:f:i:c:Nv:O"); $_=$inn::pathdb; # shut up warning $sfile="$inn::pathdb/thdexpstat"; $ID=~/ ([^,]+,v [^ ]+)/; $ID=$1; if ($opt_r) { tie(%S, SDBM_File, $sfile, O_RDONLY, 0664) || die "open $sfile: $!"; &report; untie %S; exit 0; } (system "shlock", "-p", $$, "-f", "$inn::innddir/thdexpire.pid")>>8==0 || die "Already running"; tie(%S, SDBM_File, $sfile, O_RDWR|O_CREAT, 0664) || die "open $sfile: $!"; $SIG{'TERM'}=$SIG{'INT'}='finish'; $_=`date --help 2>&1`; $havegdate=m/gnu\.ai\.mit\.edu/; if ($opt_v>0) { printf "%s starting at %s\n", $ID, &wtime(time); if ($havegdate) { print "Using GNU 'date' for analyzing Expires headers\n"; } else { print "Won't analyze Expires headers\n"; } } $|=1; undef @c; $NOW=time; $ac=0; open(C, $inn::path_storagectl) || &err("open $inn::path_storagectl: $!"); while () { /^timehash:[^:]+:(\d+):/i && do { $c{$1}=1 unless ($1>200); }; } close C; opendir(CD, ".") || &err("opendir $inn::spool: $!"); while ($cd=readdir(CD), defined($cd)) { $cd=~/^time-([0-9a-f][0-9a-f])$/i || next; $c{hex($1)}=1 unless hex($1)>200; } closedir CD; @classes=sort {$a<=>$b} keys %c; foreach $c (@classes) { &initclass($c); $S{"work$;$c"}=$S{"oldest$;$c"}&0xFFFFFF00; } $S{"classes"}=join(",", @classes); $S{"inittime"}=time; $S{"ID"}=$ID; printf "Checked %d articles in %d seconds\n", $ac, time-$NOW if ($ac>0 && $opt_v>0); while (1) { $S{"lastrun"}=$NOW=time; printf "%s\n", &wtime($NOW) if ($opt_v>0); $nt=0; foreach $c (@classes) { $t=($NOW-$S{"work$;$c"})*100/$c; $nt=$t if ($nt<$t); } printf "Normal time (class 100): %s\n", &xtime($NOW-$nt) if ($opt_v>0); $S{"normaltime"}=$nt; open(F, "$df -k $inn::spool|") || &err("spawn df: $!"); $/="\n"; $_=; @F=split(/\s+/, ); close F; printf " free kb: %d", $F[$dff] if ($opt_v>2); $needk=$opt_f-$F[$dff]; open(F, "$df -i $inn::spool|") || &err("spawn df: $!"); $_=; @F=split(/\s+/, ); close F; printf " inodes: %d\n", $F[$dff] if ($opt_v>2); $needi=$opt_i-$F[$dff]; if ($needk<0&&$needi<0) { print " (nothing to do)\n" if ($opt_v>0); $tt=0; } else { printf " need to free %d kb, %d inodes\n", ($needk>0?$needk:0), ($needi>0?$needi:0) if ($opt_v>0); $decrement=$opt_t*60; $pass=0; @EI=() if ($opt_O); foreach $c (@classes) { $Dart{$c}=$Dkb{$c}=0; } $error=0; while (!$error && ($needk>0||$needi>0)) { $S{"normaltime"}-=$decrement; printf " normal time (100) becomes %ld\n", $S{"normaltime"} if ($opt_v>2); last if ($S{"normaltime"}<6*60*60); # sanity ++$pass; $Dart=$Dkb=0; foreach $c (@classes) { &worktime($c, $NOW-($S{"normaltime"}*$c/100)); $Dart+=$dart; $Dkb+=$dbb>>10; $Dart{$c}+=$dart; $Dkb{$c}+=$dbb>>10; last if ($error); } if ($Dart) { printf " pass %d deleted %d arts, %d kb\n", $pass, $Dart, $Dkb if ($opt_v>1); $decrement-=$decrement>>2 if ($decrement>10*60); } else { $decrement+=$decrement>>1 if ($decrement<4*60*60); } $needk-=$Dkb; $needi-=$Dart; } $Dkb=$Dart=0; foreach $c (@classes) { printf " class %3d: deleted %6d arts %10d kb\n", $c, $Dart{$c}, $Dkb{$c} if ($opt_v>1); $Dkb+=$Dkb{$c}; $Dart+=$Dart{$c}; } if ($opt_O) { if ($opt_N) { print join("\n", sort @EI), "\n" if ($opt_v>2); } else { if (open(E, "|expireindex -z -")) { print E join("\n", sort @EI), "\n"; close E; } } } $tt=time-$NOW; printf " deleted %d articles, %d kb in %d seconds\n", $Dart, $Dkb, time-$NOW if ($opt_v>0); if ($tt>$opt_t*60) { printf STDERR "Round needed %d seconds, interval is %d\n", $tt, $opt_t*60; $tt=$opt_t*60; } } sleep $opt_t*60-$tt; } &finish(0); sub initclass { my $C=shift; return if ($S{"oldest$;$C"}); my $oldest=time; $S{"oldest$;$C"}=$oldest; my $base=sprintf("%s/time-%02x", $inn::spool, $C); chdir $base || return; printf "Finding oldest in class %s\n", $base if ($opt_v>0); my $count=0; opendir(D0, "."); while ($d1=readdir(D0), defined($d1)) { $d1=~/^[0-9a-f][0-9a-f]$/ || next; chdir $d1; opendir(D1, ".") || next; while ($d2=readdir(D1), defined($d2)) { $d2=~/^[0-9a-f][0-9a-f]$/ || next; chdir $d2; opendir(D2, ".") || next; while ($a=readdir(D2), defined($a)) { $a=~/\./ && next; @S=stat($a); $oldest=$S[9] if ($S[9]<$oldest); ++$count; } closedir D2; chdir ".."; } closedir D1; chdir ".."; } closedir D0; $S{"count$;$C"}=$count; $S{"oldest$;$C"}=$oldest; $ac+=$count; } sub worktime { my $C=shift; my $goal=shift; $goal&=0xFFFFFF00; printf " goal for class %d becomes %s\n", $C, &xtime($goal) if ($opt_v>2); if ($goal>$NOW-10*60) { printf " goal for class %d is in the future\n", $C if ($opt_v>1); $error=1; return; } $cdir=sprintf("%s/time-%02x", $inn::spool, $C); chdir $cdir || &err("chdir $cdir: $!"); $dart=$dbb=0; while (($_=$S{"work$;$C"})<$goal) { printf " running: %08x\n", $_ if ($opt_v>2); ($aa,$bb,$cc) = (($_>>24)&0xFF, ($_>>16)&0xFF, ($_>>8)&0xFF); $dir=sprintf("%02x/%02x", $bb, $cc); $pat=sprintf("[0-9a-f]{4}-%02x[0-9a-f]{2}", $aa); if (opendir(D, $dir)) { while ($_=readdir(D), defined($_)) { /^$pat$/ || next; $art="$dir/$_"; @S=stat($art); next if ($#S<0); if (&checkart($art)) { if ($opt_O && $H=~/^Xref:\s+\S+\s+(.*)\r$/im) { $_=$1; tr+.+/+; push(@EI, split(/ /, $_)); } if ($opt_N) { print " would delete $art\n" if ($opt_v>2); } else { print " deleting $art\n" if ($opt_v>2); unlink $art; } ++$dart; $dbb+=$S[7]; } else { print " leaving alone $art\n" if ($opt_v>2); } } } else { printf " (no dir %s)\n", $dir if ($opt_v>2); } $S{"work$;$C"}+=0x100; $S{"oldest$;$c"}=$S{"work$;$c"} unless ($opt_N); } } sub checkart { return 0 if ($S[2]&0060 == 0040); return 1 if (!$opt_O && $S[2]&0060 == 0060); my $a=shift; # uses global variable @S for stat buffer, returns header in $H # group read bit (0040) set if status known # group write bit (0020) set if may delete open(A, $a) || return 0; $/=""; $H=; # We need to read the header in any case to get at the Xref line # for expireindex. Except when we know that this article is left alone. close A; return 1 if ($S[2]&0060 == 0060); if ($H=~/^Expires:[\s\t]+([^\r\n]+)\r?$/im && ($e=&parsedate($1), $e>$NOW && $e-$NOW<$opt_c*60*60*24)) { chmod 0640, $art; return 0; } else { chmod 0660, $art; return 1; } } sub parsedate { return $NOW+1 unless ($havegdate); my $d=shift; $d=~tr/ A-Za-z0-9:,.+-//cd; # sanitize $d=~s/\s+[A-Z]+$//; # remove zone names - likely bogus my $x=`date -d '$d' +%s 2>/dev/null`; return $x>0?$x:0; } sub report { $NOW=time; my $cc=$S{"classes"}; my $nt=$S{"normaltime"}; unless ($cc && $nt) { print "Not initialized.\n"; return; } printf "Version: %s (this: %s)\n", $S{"ID"}, $ID; printf "Started at: %s\n", &xtime($S{"inittime"}) if ($S{"inittime"}); printf "Last run: %s\n", &xtime($S{"lastrun"}) if ($S{"lastrun"}); printf "Classes: %s\n", $cc; foreach $c (split(/,/, $cc)) { printf "Class %d:\n", $c; #printf " Initial count %d articles\n", $S{"count$;$c"}; printf " Oldest article: %s\n", &xtime($S{"oldest$;$c"}); printf " Expiring at: %s\n", &xtime($S{"work$;$c"}); printf " Normal time: %s\n", &xtime($NOW-$nt*$c/100); } } sub wtime { my $t=shift; my @T=localtime($t); sprintf("%04d-%02d-%02d %02d:%02d", $T[5]+1900, $T[4]+1, $T[3], $T[2], $T[1]); } sub xtime { my $t=shift; if ($NOW-$t<0 || $NOW-$t>300*24*60*60) { return &wtime($t); } my @T=localtime($t); my @D=gmtime($NOW-$t); sprintf("%04d-%02d-%02d %02d:%02d (%dd %dh %dm)", $T[5]+1900, $T[4]+1, $T[3], $T[2], $T[1], $D[7], $D[2], $D[1]); } sub err { printf STDERR "%s\n", shift; &finish(0); } sub finish { untie(%S); unlink "$inn::innddir/thdexpire.pid"; exit 0; }