Example Script - CorpusAge

#!/usr/bin/perl -w
# ---------------------------------------------------------------------------------------------
#
# corpusage.pl --- 'age' the words in matrix based on 'lastseen' date
#
# This program authored by Scott W Leighton (helphand@pacbell.net)
# for use with POPFile and it's components, which are Copyrighted
# by John Graham-Cumming. The author hereby contributes this code
# to the POPFile project under the terms of the POPFile License
# Agreement.    /Scott W Leighton/  March 5, 2004
#
# Revised Mar 18, 2004 - Made Age Columns more flexible
#
# POPFile
# Copyright (c) 2001-2004 John Graham-Cumming
#
# ---------------------------------------------------------------------------------------------
 
use strict;
use warnings;
use DBI;
use Date::Calc qw ( Delta_Days Today check_date Date_to_Text );
 
  #
  # Main
  #
 
  my $user = $ARGV[0] || '1';
  my $root = $ENV{POPFILE_ROOT} || './';
  my $userroot = $ENV{POPFILE_USER} || './';
  $root =~ s/[\/\\]$//;
  $userroot =~ s/[\/\\]$//;
  $root .= '/';
  $userroot .= '/';
 
  my %config;
 
  if ( open CONFIG, '<' . $root .'popfile.cfg'  ) {
        while ( <CONFIG> ) {
            s/(\015|\012)//g;
            if ( /(\S+) (.+)/ ) {
                $config{$1}=$2;
            }
        }
        close CONFIG;
  } else {
     die "Unable to get POPFile's configuration from ${root}popfile.cfg : $!";
  }
 
 
  #
  #  Open the SQL database
  #
 
  my $dbname = $userroot . $config{bayes_database};
  my $dbconnect = $config{bayes_dbconnect};
 
  $dbconnect =~ s/\$dbname/$dbname/g;
 
  my $dbh = DBI->connect($dbconnect,
                         $config{bayes_dbuser},
                         $config{bayes_dbauth}) ||
                         die "$0 requires version 0.21.0 or higher of POPFile\n";
 
 
  #
  #  Define some global work areas
  #
 
  my %aging = ();
  my $today = sprintf("%4d-%2d-%2d",Today());
 
  # Get the buckets for this installation
  my %buckets;
  my @buckets = get_buckets();
 
 
  my @days = qw ( 15 30 45 60 75 90 );
  my @flds = ();
  foreach my $day (@days) {
     push @flds, "under$day";
  }
  push @flds, ('over' . $days[-1], 'none');
 
 
  #
  # Go thru each bucket, grab the words and calc age
  #
 
  foreach my $bucket (@buckets) {
     foreach my $fld (@flds) {
         $aging{$bucket}{$fld}=0;
     }
     if ($buckets{$bucket}{wordcount} > 0) {
        my $sth=$dbh->prepare("select words.word as word,
                                      matrix.lastseen as lastseen
                               from matrix
                               left join words
                                  on words.id = matrix.wordid
                               where matrix.bucketid = ?;") || die $dbh->errstr;
        $sth->execute($buckets{$bucket}{id}) || die $dbh->errstr;
        while (my $row = $sth->fetchrow_hashref) {
            my @lastseen=split('-',$row->{lastseen}||'');
            if ($row->{lastseen} && check_date(@lastseen)) {
               my $wdays = Delta_Days(@lastseen,Today());
               my $i;
               foreach my $day (@days) {
                  $i++;
                  last if $wdays < $day;
                  next if $wdays > $day and $i < 6;
                  $i = 7 if $i == 6;
               }
               $aging{$bucket}{$flds[$i-1]}++;
            } else {
               $aging{$bucket}{none}++;
            }
        }
     }
  }
 
 
  #
  # Print the report
  #
 
  print "Corpus Aging Prepared " . Date_to_Text(Today()) . "\n";
  my $string = swrite(<<"END",$days[0],$days[0],$days[1]-1,$days[1],$days[2]-1,$days[2],$days[3]-1,$days[3],$days[4]-1,$days[4],$days[5]-1,$days[5] );
             Under                               Over    No
Bucket        @#   @#-@# @#-@# @#-@# @#-@# @#-@#  @#    Date
------------ ----- ----- ----- ----- ----- ----- ----- ------- 
END
  print $string;
 
  my @tots=();
  my @perc=();
  my $gt = 0;
 
  foreach my $bucket (sort keys %aging) {
     for (my $i=0;$i<scalar(@flds);$i++) {
         $tots[$i]+=$aging{$bucket}{$flds[$i]};
         $gt+=$aging{$bucket}{$flds[$i]};
     }
         $string = swrite(<<"END",$bucket,$aging{$bucket}{$flds[0]},$aging{$bucket}{$flds[1]},$aging{$bucket}{$flds[2]},$aging{$bucket}{$flds[3]},$aging{$bucket}{$flds[4]},$aging{$bucket}{$flds[5]},$aging{$bucket}{$flds[6]},$aging{$bucket}{$flds[7]});
@<<<<<<<<<<<@#####@#####@#####@#####@#####@#####@#####@#######
END
         print $string;
  }
 
 
  for (my $i=0;$i<scalar(@flds);$i++) {
     if ($gt) {
         $perc[$i]=($tots[$i] / $gt) * 100;
     } else {
         $perc[$i]=0;
     }
  }
 
  $string = swrite(<<"END","Totals",@tots,@perc );
------------ ----- ----- ----- ----- ----- ----- ----- -------
@<<<<<<<<<<<@#####@#####@#####@#####@#####@#####@#####@#######
            @###.#@###.#@###.#@###.#@###.#@###.#@###.#@#####.#
END
  print $string;
 
  # All Done
 
  exit(0);
 
 
sub get_buckets {
 
    my $sth=$dbh->prepare('select name, id, pseudo from buckets
             where buckets.userid = ?;') || die $dbh->errstr;
    $sth->execute($user) || die $dbh->errstr;
    while (my $row = $sth->fetchrow_hashref) {
         $buckets{$row->{name}}{id}=$row->{id};
         $buckets{$row->{name}}{psuedo}=$row->{psuedo};
         #
         # get the wordcount for the bucket
         #
         my $sth2=$dbh->prepare('select sum(matrix.times) as btot
             from matrix where matrix.bucketid = ?;') || die $dbh->errstr;
         $sth2->execute($row->{id}) || die $dbh->errstr;
         while (my $row2 = $sth2->fetchrow_hashref) {
             $buckets{$row->{name}}{wordcount}=$row2->{btot};
         }
         #
         # get the color of the bucket
         #
         $sth2=$dbh->prepare("select bucket_params.val as color
             from bucket_params
             left join bucket_template on bucket_params.btid
                = bucket_template.id
             where bucket_params.bucketid = ?
             and bucket_template.name = 'color' ;") || die $dbh->errstr;
         $sth2->execute($row->{id}) || die $dbh->errstr;
         while (my $row2 = $sth2->fetchrow_hashref) {
             $buckets{$row->{name}}{color}=$row2->{color};
         }
    }
    return keys %buckets;
}
 
 
sub swrite {
  die "usage: swrite PICTURE ARGS" unless @_;
  my $format = shift;
  $^A = "";
  formline($format,@_);
  return $^A;
}
 
corpusage.txt · Last modified: 2007/03/02 13:45 (external edit)

Should you find anything in the documentation that is incomplete, unclear, outdated or just plain wrong, please let us know and leave a note in the Documentation Forum.

Recent changes RSS feed Donate Driven by DokuWiki
The content of this wiki is protected by the GNU Fee Documentation License