This shows you the differences between two versions of the page.
Next revision | Previous revision | ||
corpusage [2007/01/24 09:32] – manni | corpusage [2007/03/02 13:45] (current) – external edit 127.0.0.1 | ||
---|---|---|---|
Line 1: | Line 1: | ||
===== Example Script - CorpusAge ===== | ===== Example Script - CorpusAge ===== | ||
- | < | + | < |
- | -!/ | + | #!/ |
- | | + | # --------------------------------------------------------------------------------------------- |
- | - | + | # |
- | | + | # corpusage.pl --- ' |
- | - | + | # |
- | | + | # This program authored by Scott W Leighton ([email protected]) |
- | | + | # for use with POPFile and it's components, which are Copyrighted |
- | | + | # by John Graham-Cumming. The author hereby contributes this code |
- | | + | # to the POPFile project under the terms of the POPFile License |
- | | + | # Agreement. |
- | - | + | # |
- | | + | # Revised Mar 18, 2004 - Made Age Columns more flexible |
- | - | + | # |
- | | + | # POPFile |
- | | + | # Copyright (c) 2001-2004 John Graham-Cumming |
- | - | + | # |
- | | + | # --------------------------------------------------------------------------------------------- |
use strict; | use strict; | ||
Line 24: | Line 24: | ||
use Date::Calc qw ( Delta_Days Today check_date Date_to_Text ); | use Date::Calc qw ( Delta_Days Today check_date Date_to_Text ); | ||
- | < | + | |
- | # Main | + | # Main |
- | # | + | # |
- | my $user = $ARGV[0] || ' | + | |
- | my $root = $ENV{POPFILE_ROOT} || ' | + | my $root = $ENV{POPFILE_ROOT} || ' |
- | my $userroot = $ENV{POPFILE_USER} || ' | + | my $userroot = $ENV{POPFILE_USER} || ' |
- | $root =~ s/ | + | $root =~ s/ |
- | | + | $userroot =~ s/ |
- | $root .= '/'; | + | $root .= '/'; |
- | | + | $userroot .= '/'; |
- | my %config; | + | |
- | if ( open CONFIG, '<' | + | |
- | | + | while ( < |
- | | + | s/ |
- | | + | if ( /(\S+) (.+)/ ) { |
- | | + | $config{$1}=$2; |
- | | + | } |
- | | + | } |
- | | + | close CONFIG; |
- | } else { | + | } else { |
- | die " | + | |
- | } | + | } |
- | # | + | |
- | # | + | # Open the SQL database |
- | # | + | # |
- | my $dbname = $userroot . $config{bayes_database}; | + | |
- | my $dbconnect = $config{bayes_dbconnect}; | + | my $dbconnect = $config{bayes_dbconnect}; |
- | $dbconnect =~ s/ | + | |
- | my $dbh = DBI-> | + | |
- | $config{bayes_dbuser}, | + | |
- | $config{bayes_dbauth}) || | + | |
- | die "$0 requires version 0.21.0 or higher of POPFile\n"; | + | |
- | # | + | |
- | # | + | # Define some global work areas |
- | # | + | # |
- | my %aging = (); | + | |
- | my $today = sprintf(" | + | my $today = sprintf(" |
- | # Get the buckets for this installation | + | |
- | my %buckets; | + | my %buckets; |
- | my @buckets = get_buckets(); | + | my @buckets = get_buckets(); |
- | my @days = qw ( 15 30 45 60 75 90 ); | + | |
- | my @flds = (); | + | my @flds = (); |
- | | + | foreach my $day (@days) { |
- | push @flds, " | + | |
- | } | + | } |
- | push @flds, (' | + | push @flds, (' |
- | # | + | |
- | # Go thru each bucket, grab the words and calc age | + | # Go thru each bucket, grab the words and calc age |
- | # | + | # |
- | foreach my $bucket (@buckets) { | + | |
- | foreach my $fld (@flds) { | + | |
- | $aging{$bucket}{$fld}=0; | + | |
- | } | + | |
- | if ($buckets{$bucket}{wordcount} > 0) { | + | |
- | | + | my $sth=$dbh-> |
- | | + | matrix.lastseen as lastseen |
- | from matrix | + | |
- | left join words | + | |
- | | + | on words.id = matrix.wordid |
- | where matrix.bucketid = ?;") || die $dbh-> | + | |
- | | + | $sth-> |
- | | + | while (my $row = $sth-> |
- | | + | my @lastseen=split(' |
- | | + | if ($row-> |
- | my $wdays = Delta_Days(@lastseen, | + | |
- | my $i; | + | |
- | foreach my $day (@days) { | + | |
- | | + | $i++; |
- | | + | last if $wdays < $day; |
- | | + | next if $wdays > $day and $i < 6; |
- | | + | $i = 7 if $i == 6; |
- | } | + | |
- | $aging{$bucket}{$flds[$i-1]}++; | + | |
- | | + | } else { |
- | $aging{$bucket}{none}++; | + | |
- | | + | } |
- | | + | } |
- | } | + | |
- | } | + | } |
- | # | + | |
- | # Print the report | + | # Print the report |
- | # | + | # |
- | + | ||
- | print " | + | print " |
- | my $string = swrite(<<" | + | my $string = swrite(<<" |
- | Under | + | |
Bucket | Bucket | ||
------------ ----- ----- ----- ----- ----- ----- ----- ------- | ------------ ----- ----- ----- ----- ----- ----- ----- ------- | ||
END | END | ||
- | < | + | |
- | my @tots=(); | + | |
- | my @perc=(); | + | my @perc=(); |
- | my $gt = 0; | + | my $gt = 0; |
- | foreach my $bucket (sort keys %aging) { | + | |
- | for (my $i=0; | + | |
- | $tots[$i]+=$aging{$bucket}{$flds[$i]}; | + | |
- | $gt+=$aging{$bucket}{$flds[$i]}; | + | |
- | } | + | |
- | $string = swrite(<<" | + | |
@<<<<<<<<<<< | @<<<<<<<<<<< | ||
END | END | ||
- | < | + | print $string; |
- | } | + | } |
- | for (my $i=0; | + | |
- | if ($gt) { | + | |
- | $perc[$i]=($tots[$i] / $gt) * 100; | + | |
- | } else { | + | |
- | $perc[$i]=0; | + | |
- | } | + | |
- | } | + | } |
- | $string = swrite(<<" | + | |
------------ ----- ----- ----- ----- ----- ----- ----- ------- | ------------ ----- ----- ----- ----- ----- ----- ----- ------- | ||
@<<<<<<<<<<< | @<<<<<<<<<<< | ||
- | < | + | |
END | END | ||
- | < | + | |
- | # All Done | + | |
- | exit(0);</ | + | |
sub get_buckets { | sub get_buckets { | ||
- | < | + | |
- | where buckets.userid = ?;') || die $dbh-> | + | |
- | | + | $sth-> |
- | | + | while (my $row = $sth-> |
- | $buckets{$row-> | + | |
- | $buckets{$row-> | + | |
- | # | + | |
- | # get the wordcount for the bucket | + | |
- | # | + | |
- | my $sth2=$dbh-> | + | |
- | from matrix where matrix.bucketid = ?;') || die $dbh-> | + | |
- | $sth2-> | + | |
- | while (my $row2 = $sth2-> | + | |
- | $buckets{$row-> | + | |
- | } | + | |
- | # | + | |
- | # get the color of the bucket | + | |
- | # | + | |
- | $sth2=$dbh-> | + | |
- | from bucket_params | + | |
- | left join bucket_template on bucket_params.btid | + | |
- | | + | = bucket_template.id |
- | where bucket_params.bucketid = ? | + | |
- | and bucket_template.name = ' | + | |
- | $sth2-> | + | |
- | while (my $row2 = $sth2-> | + | |
- | $buckets{$row-> | + | |
- | } | + | |
- | | + | } |
- | | + | return keys %buckets; |
} | } | ||
sub swrite { | sub swrite { | ||
- | < | + | |
- | my $format = shift; | + | my $format = shift; |
- | $^A = ""; | + | $^A = ""; |
- | | + | formline($format, |
- | | + | return $^A; |
} | } | ||
- | |||
- | |||
</ | </ | ||
Should you find anything in the documentation that is incomplete, unclear, outdated or just plain wrong, please let us know and leave a note in the Documentation Forum.