#!/usr/bin/perl -w # --------------------------------------------------------------------------------------------- # # corpusage.pl --- 'age' the words in matrix based on 'lastseen' date # # This program authored by Scott W Leighton ([email protected]) # for use with POPFile and it's components, which are Copyrighted # by John Graham-Cumming. The author hereby contributes this code # to the POPFile project under the terms of the POPFile License # Agreement. /Scott W Leighton/ March 5, 2004 # # Revised Mar 18, 2004 - Made Age Columns more flexible # # POPFile # Copyright (c) 2001-2004 John Graham-Cumming # # --------------------------------------------------------------------------------------------- use strict; use warnings; use DBI; use Date::Calc qw ( Delta_Days Today check_date Date_to_Text ); # # Main # my $user = $ARGV[0] || '1'; my $root = $ENV{POPFILE_ROOT} || './'; my $userroot = $ENV{POPFILE_USER} || './'; $root =~ s/[\/\\]$//; $userroot =~ s/[\/\\]$//; $root .= '/'; $userroot .= '/'; my %config; if ( open CONFIG, '<' . $root .'popfile.cfg' ) { while ( <CONFIG> ) { s/(\015|\012)//g; if ( /(\S+) (.+)/ ) { $config{$1}=$2; } } close CONFIG; } else { die "Unable to get POPFile's configuration from ${root}popfile.cfg : $!"; } # # Open the SQL database # my $dbname = $userroot . $config{bayes_database}; my $dbconnect = $config{bayes_dbconnect}; $dbconnect =~ s/\$dbname/$dbname/g; my $dbh = DBI->connect($dbconnect, $config{bayes_dbuser}, $config{bayes_dbauth}) || die "$0 requires version 0.21.0 or higher of POPFile\n"; # # Define some global work areas # my %aging = (); my $today = sprintf("%4d-%2d-%2d",Today()); # Get the buckets for this installation my %buckets; my @buckets = get_buckets(); my @days = qw ( 15 30 45 60 75 90 ); my @flds = (); foreach my $day (@days) { push @flds, "under$day"; } push @flds, ('over' . $days[-1], 'none'); # # Go thru each bucket, grab the words and calc age # foreach my $bucket (@buckets) { foreach my $fld (@flds) { $aging{$bucket}{$fld}=0; } if ($buckets{$bucket}{wordcount} > 0) { my $sth=$dbh->prepare("select words.word as word, matrix.lastseen as lastseen from matrix left join words on words.id = matrix.wordid where matrix.bucketid = ?;") || die $dbh->errstr; $sth->execute($buckets{$bucket}{id}) || die $dbh->errstr; while (my $row = $sth->fetchrow_hashref) { my @lastseen=split('-',$row->{lastseen}||''); if ($row->{lastseen} && check_date(@lastseen)) { my $wdays = Delta_Days(@lastseen,Today()); my $i; foreach my $day (@days) { $i++; last if $wdays < $day; next if $wdays > $day and $i < 6; $i = 7 if $i == 6; } $aging{$bucket}{$flds[$i-1]}++; } else { $aging{$bucket}{none}++; } } } } # # Print the report # print "Corpus Aging Prepared " . Date_to_Text(Today()) . "\n"; my $string = swrite(<<"END",$days[0],$days[0],$days[1]-1,$days[1],$days[2]-1,$days[2],$days[3]-1,$days[3],$days[4]-1,$days[4],$days[5]-1,$days[5] ); Under Over No Bucket @# @#-@# @#-@# @#-@# @#-@# @#-@# @# Date ------------ ----- ----- ----- ----- ----- ----- ----- ------- END print $string; my @tots=(); my @perc=(); my $gt = 0; foreach my $bucket (sort keys %aging) { for (my $i=0;$i<scalar(@flds);$i++) { $tots[$i]+=$aging{$bucket}{$flds[$i]}; $gt+=$aging{$bucket}{$flds[$i]}; } $string = swrite(<<"END",$bucket,$aging{$bucket}{$flds[0]},$aging{$bucket}{$flds[1]},$aging{$bucket}{$flds[2]},$aging{$bucket}{$flds[3]},$aging{$bucket}{$flds[4]},$aging{$bucket}{$flds[5]},$aging{$bucket}{$flds[6]},$aging{$bucket}{$flds[7]}); @<<<<<<<<<<<@#####@#####@#####@#####@#####@#####@#####@####### END print $string; } for (my $i=0;$i<scalar(@flds);$i++) { if ($gt) { $perc[$i]=($tots[$i] / $gt) * 100; } else { $perc[$i]=0; } } $string = swrite(<<"END","Totals",@tots,@perc ); ------------ ----- ----- ----- ----- ----- ----- ----- ------- @<<<<<<<<<<<@#####@#####@#####@#####@#####@#####@#####@####### @###.#@###.#@###.#@###.#@###.#@###.#@###.#@#####.# END print $string; # All Done exit(0); sub get_buckets { my $sth=$dbh->prepare('select name, id, pseudo from buckets where buckets.userid = ?;') || die $dbh->errstr; $sth->execute($user) || die $dbh->errstr; while (my $row = $sth->fetchrow_hashref) { $buckets{$row->{name}}{id}=$row->{id}; $buckets{$row->{name}}{psuedo}=$row->{psuedo}; # # get the wordcount for the bucket # my $sth2=$dbh->prepare('select sum(matrix.times) as btot from matrix where matrix.bucketid = ?;') || die $dbh->errstr; $sth2->execute($row->{id}) || die $dbh->errstr; while (my $row2 = $sth2->fetchrow_hashref) { $buckets{$row->{name}}{wordcount}=$row2->{btot}; } # # get the color of the bucket # $sth2=$dbh->prepare("select bucket_params.val as color from bucket_params left join bucket_template on bucket_params.btid = bucket_template.id where bucket_params.bucketid = ? and bucket_template.name = 'color' ;") || die $dbh->errstr; $sth2->execute($row->{id}) || die $dbh->errstr; while (my $row2 = $sth2->fetchrow_hashref) { $buckets{$row->{name}}{color}=$row2->{color}; } } return keys %buckets; } sub swrite { die "usage: swrite PICTURE ARGS" unless @_; my $format = shift; $^A = ""; formline($format,@_); return $^A; }
Should you find anything in the documentation that is incomplete, unclear, outdated or just plain wrong, please let us know and leave a note in the Documentation Forum.