Differences

This shows you the differences between two versions of the page.

corpusage [2007/01/24 10:32]
manni
corpusage [2007/03/02 13:45] (current)
Line 1: Line 1:
===== Example Script - CorpusAge ===== ===== Example Script - CorpusAge =====
-<code> +<code perl
-  -!/usr/bin/perl -w +#!/usr/bin/perl -w 
-  - --------------------------------------------------------------------------------------------- +# --------------------------------------------------------------------------------------------- 
-  - +# 
-  - corpusage.pl --- 'age' the words in matrix based on 'lastseen' date +# corpusage.pl --- 'age' the words in matrix based on 'lastseen' date 
-  - +# 
-  - This program authored by Scott W Leighton ([email protected]+# This program authored by Scott W Leighton ([email protected]
-  - for use with POPFile and it's components, which are Copyrighted +# for use with POPFile and it's components, which are Copyrighted 
-  - by John Graham-Cumming. The author hereby contributes this code +# by John Graham-Cumming. The author hereby contributes this code 
-  - to the POPFile project under the terms of the POPFile License +# to the POPFile project under the terms of the POPFile License 
-  - Agreement.    /Scott W Leighton/  March 5, 2004 +# Agreement.    /Scott W Leighton/  March 5, 2004 
-  - +# 
-  - Revised Mar 18, 2004 - Made Age Columns more flexible +# Revised Mar 18, 2004 - Made Age Columns more flexible 
-  - +# 
-  - POPFile +# POPFile 
-  - Copyright (c) 2001-2004 John Graham-Cumming +# Copyright (c) 2001-2004 John Graham-Cumming 
-  - +# 
-  - ---------------------------------------------------------------------------------------------+# ---------------------------------------------------------------------------------------------
use strict; use strict;
Line 24: Line 24:
use Date::Calc qw ( Delta_Days Today check_date Date_to_Text ); use Date::Calc qw ( Delta_Days Today check_date Date_to_Text );
-<code> +  
- # Main +  # Main 
- #+  #
- my $user = $ARGV[0] || '1'; +  my $user = $ARGV[0] || '1'; 
- my $root = $ENV{POPFILE_ROOT} || './'; +  my $root = $ENV{POPFILE_ROOT} || './'; 
- my $userroot = $ENV{POPFILE_USER} || './'; +  my $userroot = $ENV{POPFILE_USER} || './'; 
- $root =~ s/[\/\\]$//; +  $root =~ s/[\/\\]$//; 
- $userroot =~ s/[\/\\]$//; +  $userroot =~ s/[\/\\]$//; 
- $root .= '/'; +  $root .= '/'; 
- $userroot .= '/';+  $userroot .= '/';
- my %config;+  my %config;
- if ( open CONFIG, '<' . $root .'popfile.cfg'  ) { +  if ( open CONFIG, '<' . $root .'popfile.cfg'  ) { 
-       while ( <CONFIG> ) { +        while ( <CONFIG> ) { 
-           s/(\015|\012)//g; +            s/(\015|\012)//g; 
-           if ( /(\S+) (.+)/ ) { +            if ( /(\S+) (.+)/ ) { 
-               $config{$1}=$2; +                $config{$1}=$2; 
-           +           
-       +       
-       close CONFIG; +        close CONFIG; 
- } else { +  } else { 
-    die "Unable to get POPFile's configuration from ${root}popfile.cfg : $!"; +     die "Unable to get POPFile's configuration from ${root}popfile.cfg : $!"; 
- }+  }
- +  
- #  Open the SQL database +  #  Open the SQL database 
- #+  #
- my $dbname = $userroot . $config{bayes_database}; +  my $dbname = $userroot . $config{bayes_database}; 
- my $dbconnect = $config{bayes_dbconnect};+  my $dbconnect = $config{bayes_dbconnect};
- $dbconnect =~ s/\$dbname/$dbname/g;+  $dbconnect =~ s/\$dbname/$dbname/g;
- my $dbh = DBI->connect($dbconnect, +  my $dbh = DBI->connect($dbconnect, 
-                        $config{bayes_dbuser}, +                         $config{bayes_dbuser}, 
-                        $config{bayes_dbauth}) || +                         $config{bayes_dbauth}) || 
-                        die "$0 requires version 0.21.0 or higher of POPFile\n";+                         die "$0 requires version 0.21.0 or higher of POPFile\n";
- +  
- #  Define some global work areas +  #  Define some global work areas 
- #+  #
- my %aging = (); +  my %aging = (); 
- my $today = sprintf("%4d-%2d-%2d",Today());+  my $today = sprintf("%4d-%2d-%2d",Today());
- # Get the buckets for this installation +  # Get the buckets for this installation 
- my %buckets; +  my %buckets; 
- my @buckets = get_buckets();+  my @buckets = get_buckets();
- my @days = qw ( 15 30 45 60 75 90 ); +  my @days = qw ( 15 30 45 60 75 90 ); 
- my @flds = (); +  my @flds = (); 
- foreach my $day (@days) { +  foreach my $day (@days) { 
-    push @flds, "under$day"; +     push @flds, "under$day"; 
- } + 
- push @flds, ('over' . $days[-1], 'none');+  push @flds, ('over' . $days[-1], 'none');
- +  
- # Go thru each bucket, grab the words and calc age +  # Go thru each bucket, grab the words and calc age 
- #+  #
- foreach my $bucket (@buckets) { +  foreach my $bucket (@buckets) { 
-    foreach my $fld (@flds) { +     foreach my $fld (@flds) { 
-        $aging{$bucket}{$fld}=0; +         $aging{$bucket}{$fld}=0; 
-    +     
-    if ($buckets{$bucket}{wordcount} > 0) { +     if ($buckets{$bucket}{wordcount} > 0) { 
-       my $sth=$dbh->prepare("select words.word as word, +        my $sth=$dbh->prepare("select words.word as word, 
-                                     matrix.lastseen as lastseen +                                      matrix.lastseen as lastseen 
-                              from matrix +                               from matrix 
-                              left join words +                               left join words 
-                                 on words.id = matrix.wordid +                                  on words.id = matrix.wordid 
-                              where matrix.bucketid = ?;") || die $dbh->errstr; +                               where matrix.bucketid = ?;") || die $dbh->errstr; 
-       $sth->execute($buckets{$bucket}{id}) || die $dbh->errstr; +        $sth->execute($buckets{$bucket}{id}) || die $dbh->errstr; 
-       while (my $row = $sth->fetchrow_hashref) { +        while (my $row = $sth->fetchrow_hashref) { 
-           my @lastseen=split('-',$row->{lastseen}||//); +            my @lastseen=split('-',$row->{lastseen}||''); 
-           if ($row->{lastseen} && check_date(@lastseen)) { +            if ($row->{lastseen} && check_date(@lastseen)) { 
-              my $wdays = Delta_Days(@lastseen,Today()); +               my $wdays = Delta_Days(@lastseen,Today()); 
-              my $i; +               my $i; 
-              foreach my $day (@days) { +               foreach my $day (@days) { 
-                 $i++; +                  $i++; 
-                 last if $wdays < $day; +                  last if $wdays < $day; 
-                 next if $wdays > $day and $i < 6; +                  next if $wdays > $day and $i < 6; 
-                 $i = 7 if $i == 6; +                  $i = 7 if $i == 6; 
-              +               
-              $aging{$bucket}{$flds[$i-1]}++; +               $aging{$bucket}{$flds[$i-1]}++; 
-           } else { +            } else { 
-              $aging{$bucket}{none}++; +               $aging{$bucket}{none}++; 
-           +           
-       +       
-    +     
- }+  }
- +  
- # Print the report +  # Print the report 
- # + 
-  +   
- print "Corpus Aging Prepared " . Date_to_Text(Today()) . "\n"; +  print "Corpus Aging Prepared " . Date_to_Text(Today()) . "\n"; 
- my $string = swrite(<<"END",$days[0],$days[0],$days[1]-1,$days[1],$days[2]-1,$days[2],$days[3]-1,$days[3],$days[4]-1,$days[4],$days[5]-1,$days[5] ); +  my $string = swrite(<<"END",$days[0],$days[0],$days[1]-1,$days[1],$days[2]-1,$days[2],$days[3]-1,$days[3],$days[4]-1,$days[4],$days[5]-1,$days[5] ); 
-            Under                               Over    No</code>+             Under                               Over    No
Bucket        @#   @#[email protected]# @#[email protected]# @#[email protected]# @#[email protected]# @#[email protected]#  @#    Date Bucket        @#   @#[email protected]# @#[email protected]# @#[email protected]# @#[email protected]# @#[email protected]#  @#    Date
------------ ----- ----- ----- ----- ----- ----- ----- ------- ------------ ----- ----- ----- ----- ----- ----- ----- -------
END END
-<code> print $string;+  print $string;
- my @tots=(); +  my @tots=(); 
- my @perc=(); +  my @perc=(); 
- my $gt = 0;+  my $gt = 0;
- foreach my $bucket (sort keys %aging) { +  foreach my $bucket (sort keys %aging) { 
-    for (my $i=0;$i<scalar(@flds);$i++) { +     for (my $i=0;$i<scalar(@flds);$i++) { 
-        $tots[$i]+=$aging{$bucket}{$flds[$i]}; +         $tots[$i]+=$aging{$bucket}{$flds[$i]}; 
-        $gt+=$aging{$bucket}{$flds[$i]}; +         $gt+=$aging{$bucket}{$flds[$i]}; 
-    +     
-        $string = swrite(<<"END",$bucket,$aging{$bucket}{$flds[0]},$aging{$bucket}{$flds[1]},$aging{$bucket}{$flds[2]},$aging{$bucket}{$flds[3]},$aging{$bucket}{$flds[4]},$aging{$bucket}{$flds[5]},$aging{$bucket}{$flds[6]},$aging{$bucket}{$flds[7]});</code>;+         $string = swrite(<<"END",$bucket,$aging{$bucket}{$flds[0]},$aging{$bucket}{$flds[1]},$aging{$bucket}{$flds[2]},$aging{$bucket}{$flds[3]},$aging{$bucket}{$flds[4]},$aging{$bucket}{$flds[5]},$aging{$bucket}{$flds[6]},$aging{$bucket}{$flds[7]});
@<<<<<<<<<<<@#####@#####@#####@#####@#####@#####@#####@####### @<<<<<<<<<<<@#####@#####@#####@#####@#####@#####@#####@#######
END END
-<code>        print $string; +         print $string; 
- }+  }
- for (my $i=0;$i<scalar(@flds);$i++) { +  for (my $i=0;$i<scalar(@flds);$i++) { 
-    if ($gt) { +     if ($gt) { 
-        $perc[$i]=($tots[$i] / $gt) * 100; +         $perc[$i]=($tots[$i] / $gt) * 100; 
-    } else { +     } else { 
-        $perc[$i]=0; +         $perc[$i]=0; 
-    +     
- }+  }
- $string = swrite(<<"END","Totals",@tots,@perc );</code>;+  $string = swrite(<<"END","Totals",@tots,@perc );
------------ ----- ----- ----- ----- ----- ----- ----- ------- ------------ ----- ----- ----- ----- ----- ----- ----- -------
@<<<<<<<<<<<@#####@#####@#####@#####@#####@#####@#####@####### @<<<<<<<<<<<@#####@#####@#####@#####@#####@#####@#####@#######
-<code>           @###.#@###.#@###.#@###.#@###.#@###.#@###.#@#####.#</code>+            @###.#@###.#@###.#@###.#@###.#@###.#@###.#@#####.#
END END
-<code> print $string;+  print $string;
- # All Done+  # All Done
- exit(0);</code>;+  exit(0);
sub get_buckets { sub get_buckets {
-<code>   my $sth=$dbh->prepare('select name, id, pseudo from buckets +    my $sth=$dbh->prepare('select name, id, pseudo from buckets 
-            where buckets.userid = ?;') || die $dbh->errstr; +             where buckets.userid = ?;') || die $dbh->errstr; 
-   $sth->execute($user) || die $dbh->errstr; +    $sth->execute($user) || die $dbh->errstr; 
-   while (my $row = $sth->fetchrow_hashref) { +    while (my $row = $sth->fetchrow_hashref) { 
-        $buckets{$row->{name}}{id}=$row->{id}; +         $buckets{$row->{name}}{id}=$row->{id}; 
-        $buckets{$row->{name}}{psuedo}=$row->{psuedo}; +         $buckets{$row->{name}}{psuedo}=$row->{psuedo}; 
-        +         
-        # get the wordcount for the bucket +         # get the wordcount for the bucket 
-        +         
-        my $sth2=$dbh->prepare('select sum(matrix.times) as btot +         my $sth2=$dbh->prepare('select sum(matrix.times) as btot 
-            from matrix where matrix.bucketid = ?;') || die $dbh->errstr; +             from matrix where matrix.bucketid = ?;') || die $dbh->errstr; 
-        $sth2->execute($row->{id}) || die $dbh->errstr; +         $sth2->execute($row->{id}) || die $dbh->errstr; 
-        while (my $row2 = $sth2->fetchrow_hashref) { +         while (my $row2 = $sth2->fetchrow_hashref) { 
-            $buckets{$row->{name}}{wordcount}=$row2->{btot}; +             $buckets{$row->{name}}{wordcount}=$row2->{btot}; 
-        +         
-        +         
-        # get the color of the bucket +         # get the color of the bucket 
-        +         
-        $sth2=$dbh->prepare("select bucket_params.val as color +         $sth2=$dbh->prepare("select bucket_params.val as color 
-            from bucket_params +             from bucket_params 
-            left join bucket_template on bucket_params.btid +             left join bucket_template on bucket_params.btid 
-               = bucket_template.id +                = bucket_template.id 
-            where bucket_params.bucketid = ? +             where bucket_params.bucketid = ? 
-            and bucket_template.name = 'color' ;") || die $dbh->errstr; +             and bucket_template.name = 'color' ;") || die $dbh->errstr; 
-        $sth2->execute($row->{id}) || die $dbh->errstr; +         $sth2->execute($row->{id}) || die $dbh->errstr; 
-        while (my $row2 = $sth2->fetchrow_hashref) { +         while (my $row2 = $sth2->fetchrow_hashref) { 
-            $buckets{$row->{name}}{color}=$row2->{color}; +             $buckets{$row->{name}}{color}=$row2->{color}; 
-        +         
-   +   
-   return keys %buckets;</code>;+    return keys %buckets;
} }
sub swrite { sub swrite {
-<code> die "usage: swrite PICTURE ARGS" unless @_; +  die "usage: swrite PICTURE ARGS" unless @_; 
- my $format = shift; +  my $format = shift; 
- $^A = ""; +  $^A = ""; 
- formline($format,@_); +  formline($format,@_); 
- return $^A;</code>;+  return $^A;
} }
- 
- 
</code> </code>
 
corpusage.1169631172.txt.gz · Last modified: 2008/02/08 19:49 (external edit)

Should you find anything in the documentation that is incomplete, unclear, outdated or just plain wrong, please let us know and leave a note in the Documentation Forum.

Recent changes RSS feed Donate Driven by DokuWiki
The content of this wiki is protected by the GNU Fee Documentation License