Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Next revision
Previous revision
corpusage [2007/01/24 09:32] mannicorpusage [2007/03/02 13:45] (current) – external edit 127.0.0.1
Line 1: Line 1:
 ===== Example Script - CorpusAge ===== ===== Example Script - CorpusAge =====
-<code> +<code perl
-  -!/usr/bin/perl -w +#!/usr/bin/perl -w 
-  --------------------------------------------------------------------------------------------- +--------------------------------------------------------------------------------------------- 
-  - +# 
-  corpusage.pl --- 'age' the words in matrix based on 'lastseen' date +corpusage.pl --- 'age' the words in matrix based on 'lastseen' date 
-  - +# 
-  This program authored by Scott W Leighton ([email protected]+This program authored by Scott W Leighton ([email protected]
-  for use with POPFile and it's components, which are Copyrighted +for use with POPFile and it's components, which are Copyrighted 
-  by John Graham-Cumming. The author hereby contributes this code +by John Graham-Cumming. The author hereby contributes this code 
-  to the POPFile project under the terms of the POPFile License +to the POPFile project under the terms of the POPFile License 
-  Agreement.    /Scott W Leighton/  March 5, 2004 +Agreement.    /Scott W Leighton/  March 5, 2004 
-  - +# 
-  Revised Mar 18, 2004 - Made Age Columns more flexible +Revised Mar 18, 2004 - Made Age Columns more flexible 
-  - +# 
-  POPFile +POPFile 
-  Copyright (c) 2001-2004 John Graham-Cumming +Copyright (c) 2001-2004 John Graham-Cumming 
-  - +# 
-  ---------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------
  
 use strict; use strict;
Line 24: Line 24:
 use Date::Calc qw ( Delta_Days Today check_date Date_to_Text ); use Date::Calc qw ( Delta_Days Today check_date Date_to_Text );
  
-<code> +  
- # Main +  # Main 
- #+  #
  
- my $user = $ARGV[0] || '1'; +  my $user = $ARGV[0] || '1'; 
- my $root = $ENV{POPFILE_ROOT} || './'; +  my $root = $ENV{POPFILE_ROOT} || './'; 
- my $userroot = $ENV{POPFILE_USER} || './'; +  my $userroot = $ENV{POPFILE_USER} || './'; 
- $root =~ s/[\/\\]$//; +  $root =~ s/[\/\\]$//; 
- $userroot =~ s/[\/\\]$//; +  $userroot =~ s/[\/\\]$//; 
- $root .= '/'; +  $root .= '/'; 
- $userroot .= '/';+  $userroot .= '/';
  
- my %config;+  my %config;
  
- if ( open CONFIG, '<' . $root .'popfile.cfg'  ) { +  if ( open CONFIG, '<' . $root .'popfile.cfg'  ) { 
-       while ( <CONFIG> ) { +        while ( <CONFIG> ) { 
-           s/(\015|\012)//g; +            s/(\015|\012)//g; 
-           if ( /(\S+) (.+)/ ) { +            if ( /(\S+) (.+)/ ) { 
-               $config{$1}=$2; +                $config{$1}=$2; 
-           +            
-       +        
-       close CONFIG; +        close CONFIG; 
- } else { +  } else { 
-    die "Unable to get POPFile's configuration from ${root}popfile.cfg : $!"; +     die "Unable to get POPFile's configuration from ${root}popfile.cfg : $!"; 
- }+  }
  
  
- +  
- #  Open the SQL database +  #  Open the SQL database 
- #+  #
  
- my $dbname = $userroot . $config{bayes_database}; +  my $dbname = $userroot . $config{bayes_database}; 
- my $dbconnect = $config{bayes_dbconnect};+  my $dbconnect = $config{bayes_dbconnect};
  
- $dbconnect =~ s/\$dbname/$dbname/g;+  $dbconnect =~ s/\$dbname/$dbname/g;
  
- my $dbh = DBI->connect($dbconnect, +  my $dbh = DBI->connect($dbconnect, 
-                        $config{bayes_dbuser}, +                         $config{bayes_dbuser}, 
-                        $config{bayes_dbauth}) || +                         $config{bayes_dbauth}) || 
-                        die "$0 requires version 0.21.0 or higher of POPFile\n";+                         die "$0 requires version 0.21.0 or higher of POPFile\n";
  
  
- +  
- #  Define some global work areas +  #  Define some global work areas 
- #+  #
  
- my %aging = (); +  my %aging = (); 
- my $today = sprintf("%4d-%2d-%2d",Today());+  my $today = sprintf("%4d-%2d-%2d",Today());
  
- # Get the buckets for this installation +  # Get the buckets for this installation 
- my %buckets; +  my %buckets; 
- my @buckets = get_buckets();+  my @buckets = get_buckets();
  
  
- my @days = qw ( 15 30 45 60 75 90 ); +  my @days = qw ( 15 30 45 60 75 90 ); 
- my @flds = (); +  my @flds = (); 
- foreach my $day (@days) { +  foreach my $day (@days) { 
-    push @flds, "under$day"; +     push @flds, "under$day"; 
- } +  
- push @flds, ('over' . $days[-1], 'none');+  push @flds, ('over' . $days[-1], 'none');
  
  
- +  
- # Go thru each bucket, grab the words and calc age +  # Go thru each bucket, grab the words and calc age 
- #+  #
  
- foreach my $bucket (@buckets) { +  foreach my $bucket (@buckets) { 
-    foreach my $fld (@flds) { +     foreach my $fld (@flds) { 
-        $aging{$bucket}{$fld}=0; +         $aging{$bucket}{$fld}=0; 
-    +     
-    if ($buckets{$bucket}{wordcount} > 0) { +     if ($buckets{$bucket}{wordcount} > 0) { 
-       my $sth=$dbh->prepare("select words.word as word, +        my $sth=$dbh->prepare("select words.word as word, 
-                                     matrix.lastseen as lastseen +                                      matrix.lastseen as lastseen 
-                              from matrix +                               from matrix 
-                              left join words +                               left join words 
-                                 on words.id = matrix.wordid +                                  on words.id = matrix.wordid 
-                              where matrix.bucketid = ?;") || die $dbh->errstr; +                               where matrix.bucketid = ?;") || die $dbh->errstr; 
-       $sth->execute($buckets{$bucket}{id}) || die $dbh->errstr; +        $sth->execute($buckets{$bucket}{id}) || die $dbh->errstr; 
-       while (my $row = $sth->fetchrow_hashref) { +        while (my $row = $sth->fetchrow_hashref) { 
-           my @lastseen=split('-',$row->{lastseen}||//); +            my @lastseen=split('-',$row->{lastseen}||''); 
-           if ($row->{lastseen} && check_date(@lastseen)) { +            if ($row->{lastseen} && check_date(@lastseen)) { 
-              my $wdays = Delta_Days(@lastseen,Today()); +               my $wdays = Delta_Days(@lastseen,Today()); 
-              my $i; +               my $i; 
-              foreach my $day (@days) { +               foreach my $day (@days) { 
-                 $i++; +                  $i++; 
-                 last if $wdays < $day; +                  last if $wdays < $day; 
-                 next if $wdays > $day and $i < 6; +                  next if $wdays > $day and $i < 6; 
-                 $i = 7 if $i == 6; +                  $i = 7 if $i == 6; 
-              +               
-              $aging{$bucket}{$flds[$i-1]}++; +               $aging{$bucket}{$flds[$i-1]}++; 
-           } else { +            } else { 
-              $aging{$bucket}{none}++; +               $aging{$bucket}{none}++; 
-           +            
-       +        
-    +     
- }+  }
  
  
- +  
- # Print the report +  # Print the report 
- # +  
-  +   
- print "Corpus Aging Prepared " . Date_to_Text(Today()) . "\n"; +  print "Corpus Aging Prepared " . Date_to_Text(Today()) . "\n"; 
- my $string = swrite(<<"END",$days[0],$days[0],$days[1]-1,$days[1],$days[2]-1,$days[2],$days[3]-1,$days[3],$days[4]-1,$days[4],$days[5]-1,$days[5] ); +  my $string = swrite(<<"END",$days[0],$days[0],$days[1]-1,$days[1],$days[2]-1,$days[2],$days[3]-1,$days[3],$days[4]-1,$days[4],$days[5]-1,$days[5] ); 
-            Under                               Over    No</code>+             Under                               Over    No
 Bucket        @#   @#-@# @#-@# @#-@# @#-@# @#-@#  @#    Date Bucket        @#   @#-@# @#-@# @#-@# @#-@# @#-@#  @#    Date
 ------------ ----- ----- ----- ----- ----- ----- ----- -------  ------------ ----- ----- ----- ----- ----- ----- ----- ------- 
 END END
-<code> print $string;+  print $string;
  
- my @tots=(); +  my @tots=(); 
- my @perc=(); +  my @perc=(); 
- my $gt = 0;+  my $gt = 0;
  
- foreach my $bucket (sort keys %aging) { +  foreach my $bucket (sort keys %aging) { 
-    for (my $i=0;$i<scalar(@flds);$i++) { +     for (my $i=0;$i<scalar(@flds);$i++) { 
-        $tots[$i]+=$aging{$bucket}{$flds[$i]}; +         $tots[$i]+=$aging{$bucket}{$flds[$i]}; 
-        $gt+=$aging{$bucket}{$flds[$i]}; +         $gt+=$aging{$bucket}{$flds[$i]}; 
-    +     
-        $string = swrite(<<"END",$bucket,$aging{$bucket}{$flds[0]},$aging{$bucket}{$flds[1]},$aging{$bucket}{$flds[2]},$aging{$bucket}{$flds[3]},$aging{$bucket}{$flds[4]},$aging{$bucket}{$flds[5]},$aging{$bucket}{$flds[6]},$aging{$bucket}{$flds[7]});</code>+         $string = swrite(<<"END",$bucket,$aging{$bucket}{$flds[0]},$aging{$bucket}{$flds[1]},$aging{$bucket}{$flds[2]},$aging{$bucket}{$flds[3]},$aging{$bucket}{$flds[4]},$aging{$bucket}{$flds[5]},$aging{$bucket}{$flds[6]},$aging{$bucket}{$flds[7]});
 @<<<<<<<<<<<@#####@#####@#####@#####@#####@#####@#####@####### @<<<<<<<<<<<@#####@#####@#####@#####@#####@#####@#####@#######
 END END
-<code>        print $string; +         print $string; 
- }+  }
  
  
- for (my $i=0;$i<scalar(@flds);$i++) { +  for (my $i=0;$i<scalar(@flds);$i++) { 
-    if ($gt) { +     if ($gt) { 
-        $perc[$i]=($tots[$i] / $gt) * 100; +         $perc[$i]=($tots[$i] / $gt) * 100; 
-    } else { +     } else { 
-        $perc[$i]=0; +         $perc[$i]=0; 
-    +     
- }+  }
  
- $string = swrite(<<"END","Totals",@tots,@perc );</code>+  $string = swrite(<<"END","Totals",@tots,@perc );
 ------------ ----- ----- ----- ----- ----- ----- ----- ------- ------------ ----- ----- ----- ----- ----- ----- ----- -------
 @<<<<<<<<<<<@#####@#####@#####@#####@#####@#####@#####@####### @<<<<<<<<<<<@#####@#####@#####@#####@#####@#####@#####@#######
-<code>           @###.#@###.#@###.#@###.#@###.#@###.#@###.#@#####.#</code>+            @###.#@###.#@###.#@###.#@###.#@###.#@###.#@#####.#
 END END
-<code> print $string;+  print $string;
  
- # All Done+  # All Done
  
- exit(0);</code>+  exit(0);
  
  
 sub get_buckets { sub get_buckets {
  
-<code>   my $sth=$dbh->prepare('select name, id, pseudo from buckets +    my $sth=$dbh->prepare('select name, id, pseudo from buckets 
-            where buckets.userid = ?;') || die $dbh->errstr; +             where buckets.userid = ?;') || die $dbh->errstr; 
-   $sth->execute($user) || die $dbh->errstr; +    $sth->execute($user) || die $dbh->errstr; 
-   while (my $row = $sth->fetchrow_hashref) { +    while (my $row = $sth->fetchrow_hashref) { 
-        $buckets{$row->{name}}{id}=$row->{id}; +         $buckets{$row->{name}}{id}=$row->{id}; 
-        $buckets{$row->{name}}{psuedo}=$row->{psuedo}; +         $buckets{$row->{name}}{psuedo}=$row->{psuedo}; 
-        +         
-        # get the wordcount for the bucket +         # get the wordcount for the bucket 
-        +         
-        my $sth2=$dbh->prepare('select sum(matrix.times) as btot +         my $sth2=$dbh->prepare('select sum(matrix.times) as btot 
-            from matrix where matrix.bucketid = ?;') || die $dbh->errstr; +             from matrix where matrix.bucketid = ?;') || die $dbh->errstr; 
-        $sth2->execute($row->{id}) || die $dbh->errstr; +         $sth2->execute($row->{id}) || die $dbh->errstr; 
-        while (my $row2 = $sth2->fetchrow_hashref) { +         while (my $row2 = $sth2->fetchrow_hashref) { 
-            $buckets{$row->{name}}{wordcount}=$row2->{btot}; +             $buckets{$row->{name}}{wordcount}=$row2->{btot}; 
-        +         
-        +         
-        # get the color of the bucket +         # get the color of the bucket 
-        +         
-        $sth2=$dbh->prepare("select bucket_params.val as color +         $sth2=$dbh->prepare("select bucket_params.val as color 
-            from bucket_params +             from bucket_params 
-            left join bucket_template on bucket_params.btid +             left join bucket_template on bucket_params.btid 
-               = bucket_template.id +                = bucket_template.id 
-            where bucket_params.bucketid = ? +             where bucket_params.bucketid = ? 
-            and bucket_template.name = 'color' ;") || die $dbh->errstr; +             and bucket_template.name = 'color' ;") || die $dbh->errstr; 
-        $sth2->execute($row->{id}) || die $dbh->errstr; +         $sth2->execute($row->{id}) || die $dbh->errstr; 
-        while (my $row2 = $sth2->fetchrow_hashref) { +         while (my $row2 = $sth2->fetchrow_hashref) { 
-            $buckets{$row->{name}}{color}=$row2->{color}; +             $buckets{$row->{name}}{color}=$row2->{color}; 
-        +         
-   +    
-   return keys %buckets;</code>+    return keys %buckets;
 } }
  
  
 sub swrite { sub swrite {
-<code> die "usage: swrite PICTURE ARGS" unless @_; +  die "usage: swrite PICTURE ARGS" unless @_; 
- my $format = shift; +  my $format = shift; 
- $^A = ""; +  $^A = ""; 
- formline($format,@_); +  formline($format,@_); 
- return $^A;</code>+  return $^A;
 } }
- 
- 
 </code> </code>
  
 
corpusage.1169631172.txt.gz · Last modified: 2008/02/08 19:49 (external edit)
Old revisions

Should you find anything in the documentation that is incomplete, unclear, outdated or just plain wrong, please let us know and leave a note in the Documentation Forum.

Recent changes RSS feed Donate Driven by DokuWiki
The content of this wiki is protected by the GNU Fee Documentation License