This is an old revision of the document!


Example Script - CorpusAge

  -!/usr/bin/perl -w
  - ---------------------------------------------------------------------------------------------
  -
  - corpusage.pl --- 'age' the words in matrix based on 'lastseen' date
  -
  - This program authored by Scott W Leighton ([email protected])
  - for use with POPFile and it's components, which are Copyrighted
  - by John Graham-Cumming. The author hereby contributes this code
  - to the POPFile project under the terms of the POPFile License
  - Agreement.    /Scott W Leighton/  March 5, 2004
  -
  - Revised Mar 18, 2004 - Made Age Columns more flexible
  -
  - POPFile
  - Copyright (c) 2001-2004 John Graham-Cumming
  -
  - ---------------------------------------------------------------------------------------------

use strict;
use warnings;
use DBI;
use Date::Calc qw ( Delta_Days Today check_date Date_to_Text );

<code> #
 # Main
 #

 my $user = $ARGV[0] || '1';
 my $root = $ENV{POPFILE_ROOT} || './';
 my $userroot = $ENV{POPFILE_USER} || './';
 $root =~ s/[\/\\]$//;
 $userroot =~ s/[\/\\]$//;
 $root .= '/';
 $userroot .= '/';

 my %config;

 if ( open CONFIG, '<' . $root .'popfile.cfg'  ) {
       while ( <CONFIG> ) {
           s/(\015|\012)//g;
           if ( /(\S+) (.+)/ ) {
               $config{$1}=$2;
           }
       }
       close CONFIG;
 } else {
    die "Unable to get POPFile's configuration from ${root}popfile.cfg : $!";
 }


 #
 #  Open the SQL database
 #

 my $dbname = $userroot . $config{bayes_database};
 my $dbconnect = $config{bayes_dbconnect};

 $dbconnect =~ s/\$dbname/$dbname/g;

 my $dbh = DBI->connect($dbconnect,
                        $config{bayes_dbuser},
                        $config{bayes_dbauth}) ||
                        die "$0 requires version 0.21.0 or higher of POPFile\n";


 #
 #  Define some global work areas
 #

 my %aging = ();
 my $today = sprintf("%4d-%2d-%2d",Today());

 # Get the buckets for this installation
 my %buckets;
 my @buckets = get_buckets();


 my @days = qw ( 15 30 45 60 75 90 );
 my @flds = ();
 foreach my $day (@days) {
    push @flds, "under$day";
 }
 push @flds, ('over' . $days[-1], 'none');


 #
 # Go thru each bucket, grab the words and calc age
 #

 foreach my $bucket (@buckets) {
    foreach my $fld (@flds) {
        $aging{$bucket}{$fld}=0;
    }
    if ($buckets{$bucket}{wordcount} > 0) {
       my $sth=$dbh->prepare("select words.word as word,
                                     matrix.lastseen as lastseen
                              from matrix
                              left join words
                                 on words.id = matrix.wordid
                              where matrix.bucketid = ?;") || die $dbh->errstr;
       $sth->execute($buckets{$bucket}{id}) || die $dbh->errstr;
       while (my $row = $sth->fetchrow_hashref) {
           my @lastseen=split('-',$row->{lastseen}||//);
           if ($row->{lastseen} && check_date(@lastseen)) {
              my $wdays = Delta_Days(@lastseen,Today());
              my $i;
              foreach my $day (@days) {
                 $i++;
                 last if $wdays < $day;
                 next if $wdays > $day and $i < 6;
                 $i = 7 if $i == 6;
              }
              $aging{$bucket}{$flds[$i-1]}++;
           } else {
              $aging{$bucket}{none}++;
           }
       }
    }
 }


 #
 # Print the report
 #
 
 print "Corpus Aging Prepared " . Date_to_Text(Today()) . "\n";
 my $string = swrite(<<"END",$days[0],$days[0],$days[1]-1,$days[1],$days[2]-1,$days[2],$days[3]-1,$days[3],$days[4]-1,$days[4],$days[5]-1,$days[5] );
            Under                               Over    No

Bucket @# @#-@# @#-@# @#-@# @#-@# @#-@# @# Date ———— —– —– —– —– —– —– —– ——- END

 print $string;

 my @tots=();
 my @perc=();
 my $gt = 0;

 foreach my $bucket (sort keys %aging) {
    for (my $i=0;$i<scalar(@flds);$i++) {
        $tots[$i]+=$aging{$bucket}{$flds[$i]};
        $gt+=$aging{$bucket}{$flds[$i]};
    }
        $string = swrite(<<"END",$bucket,$aging{$bucket}{$flds[0]},$aging{$bucket}{$flds[1]},$aging{$bucket}{$flds[2]},$aging{$bucket}{$flds[3]},$aging{$bucket}{$flds[4]},$aging{$bucket}{$flds[5]},$aging{$bucket}{$flds[6]},$aging{$bucket}{$flds[7]});

@«««««<@#####@#####@#####@#####@#####@#####@#####@####### END

        print $string;
 }


 for (my $i=0;$i<scalar(@flds);$i++) {
    if ($gt) {
        $perc[$i]=($tots[$i] / $gt) * 100;
    } else {
        $perc[$i]=0;
    }
 }

 $string = swrite(<<"END","Totals",@tots,@perc );

———— —– —– —– —– —– —– —– ——- @«««««<@#####@#####@#####@#####@#####@#####@#####@#######

           @###.#@###.#@###.#@###.#@###.#@###.#@###.#@#####.#

END

 print $string;

 # All Done

 exit(0);

sub get_buckets {

   my $sth=$dbh->prepare('select name, id, pseudo from buckets
            where buckets.userid = ?;') || die $dbh->errstr;
   $sth->execute($user) || die $dbh->errstr;
   while (my $row = $sth->fetchrow_hashref) {
        $buckets{$row->{name}}{id}=$row->{id};
        $buckets{$row->{name}}{psuedo}=$row->{psuedo};
        #
        # get the wordcount for the bucket
        #
        my $sth2=$dbh->prepare('select sum(matrix.times) as btot
            from matrix where matrix.bucketid = ?;') || die $dbh->errstr;
        $sth2->execute($row->{id}) || die $dbh->errstr;
        while (my $row2 = $sth2->fetchrow_hashref) {
            $buckets{$row->{name}}{wordcount}=$row2->{btot};
        }
        #
        # get the color of the bucket
        #
        $sth2=$dbh->prepare("select bucket_params.val as color
            from bucket_params
            left join bucket_template on bucket_params.btid
               = bucket_template.id
            where bucket_params.bucketid = ?
            and bucket_template.name = 'color' ;") || die $dbh->errstr;
        $sth2->execute($row->{id}) || die $dbh->errstr;
        while (my $row2 = $sth2->fetchrow_hashref) {
            $buckets{$row->{name}}{color}=$row2->{color};
        }
   }
   return keys %buckets;

}

sub swrite {

 die "usage: swrite PICTURE ARGS" unless @_;
 my $format = shift;
 $^A = "";
 formline($format,@_);
 return $^A;

}

</code>

 
corpusage.1169631172.txt.gz · Last modified: 2008/02/08 19:49 (external edit)

Should you find anything in the documentation that is incomplete, unclear, outdated or just plain wrong, please let us know and leave a note in the Documentation Forum.

Recent changes RSS feed Donate Driven by DokuWiki
The content of this wiki is protected by the GNU Fee Documentation License