# Module of TWiki Enterprise Collaboration Platform, http://TWiki.org/ # # Copyright (C) 1999-2007 Peter Thoeny, peter@thoeny.org # and TWiki Contributors. All Rights Reserved. TWiki Contributors # are listed in the AUTHORS file in the root of this distribution. # NOTE: Please extend that file, not this notice. # # Additional copyrights apply to some or all of the code in this # file as follows: # Copyright (C) 2002 Richard Donkin, rdonkin@bigfoot.com # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. For # more details read LICENSE in the root of this distribution. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # As per the GPL, removal of this notice is prohibited. =begin twiki ---+ package TWiki::UI::Statistics Statistics extraction and presentation =cut package TWiki::UI::Statistics; use strict; use Assert; use File::Copy qw(copy); use IO::File; use Error qw( :try ); my $debug = 0; BEGIN { # Do a dynamic 'use locale' for this module if( $TWiki::cfg{UseLocale} ) { require locale; import locale(); } } =pod ---++ StaticMethod statistics( $session ) =statistics= command handler. This method is designed to be invoked via the =TWiki::UI::run= method. Generate statistics topic. If a web is specified in the session object, generate WebStatistics topic update for that web. Otherwise do it for all webs =cut sub statistics { my $session = shift; my $webName = $session->{webName}; my $tmp = ''; # web to redirect to after finishing my $destWeb = $TWiki::cfg{UsersWebName}; my $logDate = $session->{cgiQuery}->param( 'logdate' ) || ''; $logDate =~ s/[^0-9]//g; # remove all non numerals $debug = $session->{cgiQuery}->param( 'debug' ); unless( $session->inContext( 'command_line' )) { # running from CGI $session->writePageHeader(); print CGI::start_html(-title=>'TWiki: Create Usage Statistics'); } # Initial messages _printMsg( $session, 'TWiki: Create Usage Statistics' ); _printMsg( $session, '!Do not interrupt this script!' ); _printMsg( $session, '(Please wait until page download has finished)' ); unless( $logDate ) { $logDate = TWiki::Time::formatTime( time(), '$year$mo', 'servertime' ); } my $logMonth; my $logYear; if ( $logDate =~ /^(\d\d\d\d)(\d\d)$/ ) { $logYear = $1; $logMonth = $TWiki::Time::ISOMONTH[ ( $2 % 12 ) - 1 ]; } else { _printMsg( $session, "!Error in date $logDate - must be YYYYMM" ); return; } my $logMonthYear = "$logMonth $logYear"; _printMsg( $session, "* Statistics for $logMonthYear" ); my $logFile = $TWiki::cfg{LogFileName}; $logFile =~ s/%DATE%/$logDate/g; unless( -e $logFile ) { _printMsg( $session, "!Log file $logFile does not exist; aborting" ); return; } # Copy the log file to temp file, since analysis could take some time # FIXME move the temp dir stuff to TWiki.cfg my $tmpDir; if ( $TWiki::cfg{OS} eq 'UNIX' ) { $tmpDir = $ENV{'TEMP'} || "/tmp"; } elsif ( $TWiki::cfg{OS} eq 'WINDOWS' ) { $tmpDir = $ENV{'TEMP'} || "c:/"; } else { # FIXME handle other OSs properly - assume Unix for now. $tmpDir = "/tmp"; } my $randNo = int ( rand 1000); # For mod_perl with threading... my $tmpFilename = TWiki::Sandbox::untaintUnchecked( "$tmpDir/twiki-stats.$$.$randNo" ); File::Copy::copy ($logFile, $tmpFilename) or throw Error::Simple( 'Cannot backup log file: '.$! ); my $TMPFILE = new IO::File; open $TMPFILE, $tmpFilename or throw Error::Simple( 'Cannot open backup file: '.$! ); # Do a single data collection pass on the temporary copy of logfile, # then process each web once. my ($viewRef, $contribRef, $statViewsRef, $statSavesRef, $statUploadsRef) = _collectLogData( $session, $TMPFILE, $logMonthYear ); my @weblist; my $webSet = TWiki::Sandbox::untaintUnchecked($session->{cgiQuery}->param( 'webs' )) || $session->{requestedWebName}; if( $webSet) { # do specific webs push( @weblist, split( /,\s*/, $webSet )); } else { # otherwise do all user webs: @weblist = $session->{store}->getListOfWebs( 'user' ); } my $firstTime = 1; foreach my $web ( @weblist ) { try { $destWeb = _processWeb( $session, $web, $logMonthYear, $viewRef, $contribRef, $statViewsRef, $statSavesRef, $statUploadsRef, $firstTime ); } catch TWiki::AccessControlException with { _printMsg( $session, ' - ERROR: no permission to CHANGE statistics topic in '.$web); } $firstTime = 0; } close $TMPFILE; # Shouldn't be necessary with 'my' unlink $tmpFilename;# FIXME: works on Windows??? Unlink before # usage to ensure deleted on crash? if( !$session->inContext( 'command_line' ) ) { $tmp = $TWiki::cfg{Stats}{TopicName}; my $url = $session->getScriptUrl( 0, 'view', $destWeb, $tmp ); _printMsg( $session, '* Go back to ' . CGI::a( { href => $url, rel => 'nofollow' }, $tmp) ); } _printMsg( $session, 'End creating usage statistics' ); print CGI::end_html() unless( $session->inContext( 'command_line' ) ); } # Debug only # Print all entries in a view or contrib hash, sorted by web and item name sub _debugPrintHash { my ($statsRef) = @_; # print "Main.WebHome views = " . ${$statsRef}{'Main'}{'WebHome'}."\n"; # print "Main web, TWikiGuest contribs = " . ${$statsRef}{'Main'}{'Main.TWikiGuest'}."\n"; foreach my $web ( sort keys %$statsRef) { my $count = 0; print $web,' web:',"\n"; # Get reference to the sub-hash for this web my $webhashref = ${$statsRef}{$web}; # print 'webhashref is ' . ref ($webhashref) ."\n"; # Items can be topics (for view hash) or users (for contrib hash) foreach my $item ( sort keys %$webhashref ) { print " $item = ",( ${$webhashref}{$item} || 0 ),"\n"; $count += ${$webhashref}{$item}; } print " WEB TOTAL = $count\n"; } } # Process the whole log file and collect information in hash tables. # Must build stats for all webs, to handle case of renames into web # requested for a single-web statistics run. # # Main hash tables are divided by web: # # $view{$web}{$TopicName} == number of views, by topic # $contrib{$web}{"Main.".$WikiName} == number of saves/uploads, by user sub _collectLogData { my( $session, $TMPFILE, $theLogMonthYear ) = @_; # Log file format: # | date | user | op | web.topic | notes | ip | # date = e.g. 03 Feb 2000 - 02:43 # user = e.g. Main.PeterThoeny # user = e.g. PeterThoeny # user = e.g. peter (intranet login) # web.topic = e.g MyWeb.MyTopic # notes = e.g. minor # notes = e.g. not on thursdays # ip = e.g. 127.0.0.5 my %view; # Hash of hashes, counts topic views by (web, topic) my %contrib; # Hash of hashes, counts uploads/saves by (web, user) # Hashes for each type of statistic, one hash entry per web my %statViews; my %statSaves; my %statUploads; binmode $TMPFILE; while ( my $line = <$TMPFILE> ) { my @fields = split( /\s*\|\s*/, $line ); my( $date, $logFileUserName ); while( !$date && scalar( @fields )) { $date = shift @fields; } while( !$logFileUserName && scalar( @fields )) { $logFileUserName = shift @fields; } my $userObj = $session->{users}->findUser($logFileUserName); my( $opName, $webTopic, $notes, $ip ) = @fields; # ignore minor changes - not statistically helpful next if( $notes && $notes =~ /(minor|dontNotify)/ ); # ignore searches for now - idea: make a "top search phrase list" next if( $opName && $opName =~ /(search)/ ); # ignore "renamed web" log lines next if( $opName && $opName =~ /(renameweb)/ ); # ignore "change password" log lines next if( $opName && $opName =~ /(changepasswd)/ ); # .+ is used because topics name can contain stuff like !, (, ), =, -, _ and they should have stats anyway if( $opName && $webTopic =~ /(^$TWiki::regex{webNameRegex})\.($TWiki::regex{wikiWordRegex}$|$TWiki::regex{abbrevRegex}|.+)/ ) { my $webName = $1; my $topicName = $2; if( $opName eq 'view' ) { next if ($topicName eq 'WebRss'); next if ($topicName eq 'WebAtom'); $statViews{$webName}++; unless( $notes && $notes =~ /\(not exist\)/ ) { $view{$webName}{$topicName}++; } } elsif( $opName eq 'save' ) { $statSaves{$webName}++; $contrib{$webName}{$userObj->webDotWikiName()}++; } elsif( $opName eq 'upload' ) { $statUploads{$webName}++; $contrib{$webName}{$userObj->webDotWikiName()}++; } elsif( $opName eq 'rename' ) { # Pick up the old and new topic names $notes =~/moved to ($TWiki::regex{webNameRegex})\.($TWiki::regex{wikiWordRegex}|$TWiki::regex{abbrevRegex}|\w+)/o; my $newTopicWeb = $1; my $newTopicName = $2; # Get number of views for old topic this month (may be zero) my $oldViews = $view{$webName}{$topicName} || 0; # Transfer views from old to new topic $view{$newTopicWeb}{$newTopicName} = $oldViews; delete $view{$webName}{$topicName}; # Transfer views from old to new web if ( $newTopicWeb ne $webName ) { $statViews{$webName} -= $oldViews; $statViews{$newTopicWeb} += $oldViews; } } } else { $session->writeDebug('WebStatistics: Bad logfile line '.$line); } } return \%view, \%contrib, \%statViews, \%statSaves, \%statUploads; } sub _processWeb { my( $session, $web, $theLogMonthYear, $viewRef, $contribRef, $statViewsRef, $statSavesRef, $statUploadsRef, $isFirstTime ) = @_; my( $topic, $user ) = ( $session->{topicName}, $session->{user} ); if( $isFirstTime ) { _printMsg( $session, '* Executed by '.$user->wikiName() ); } _printMsg( $session, "* Reporting on TWiki.$web web" ); # Handle null values, print summary message to browser/stdout my $statViews = $statViewsRef->{$web}; my $statSaves = $statSavesRef->{$web}; my $statUploads = $statUploadsRef->{$web}; $statViews ||= 0; $statSaves ||= 0; $statUploads ||= 0; _printMsg( $session, " - view: $statViews, save: $statSaves, upload: $statUploads" ); # Get the top N views and contribs in this web my (@topViews) = _getTopList( $TWiki::cfg{Stats}{TopViews}, $web, $viewRef ); my (@topContribs) = _getTopList( $TWiki::cfg{Stats}{TopContrib}, $web, $contribRef ); # Print information to stdout my $statTopViews = ''; my $statTopContributors = ''; if( @topViews ) { $statTopViews = join( CGI::br(), @topViews ); $topViews[0] =~ s/[\[\]]*//g; _printMsg( $session, ' - top view: '.$topViews[0] ); } if( @topContribs ) { $statTopContributors = join( CGI::br(), @topContribs ); _printMsg( $session, ' - top contributor: '.$topContribs[0] ); } # Update the WebStatistics topic my $tmp; my $statsTopic = $TWiki::cfg{Stats}{TopicName}; # DEBUG # $statsTopic = 'TestStatistics'; # Create this by hand if( $session->{store}->topicExists( $web, $statsTopic ) ) { my( $meta, $text ) = $session->{store}->readTopic( undef, $web, $statsTopic, undef ); my @lines = split( /\r?\n/, $text ); my $statLine; my $idxStat = -1; my $idxTmpl = -1; for( my $x = 0; $x < @lines; $x++ ) { $tmp = $lines[$x]; # Check for existing line for this month+year if( $tmp =~ /$theLogMonthYear/ ) { $idxStat = $x; } elsif( $tmp =~ /<\!\-\-statDate\-\->/ ) { $statLine = $tmp; $idxTmpl = $x; } } if( ! $statLine ) { $statLine = '| | | | | | |'; } $statLine =~ s/<\!\-\-statDate\-\->/$theLogMonthYear/; $statLine =~ s/<\!\-\-statViews\-\->/ $statViews/; $statLine =~ s/<\!\-\-statSaves\-\->/ $statSaves/; $statLine =~ s/<\!\-\-statUploads\-\->/ $statUploads/; $statLine =~ s/<\!\-\-statTopViews\-\->/$statTopViews/; $statLine =~ s/<\!\-\-statTopContributors\-\->/$statTopContributors/; if( $idxStat >= 0 ) { # entry already exists, need to update $lines[$idxStat] = $statLine; } elsif( $idxTmpl >= 0 ) { # entry does not exist, add after line $lines[$idxTmpl] = "$lines[$idxTmpl]\n$statLine"; } else { # entry does not exist, add at the end $lines[@lines] = $statLine; } $text = join( "\n", @lines ); $text .= "\n"; $session->{store}->saveTopic( $user, $web, $statsTopic, $text, $meta, { minor => 1, dontlog => 1 } ); _printMsg( $session, " - Topic $statsTopic updated" ); } else { _printMsg( $session, "! Warning: No updates done, topic $web.$statsTopic does not exist" ); } return $web; } # Get the items with top N frequency counts # Items can be topics (for view hash) or users (for contrib hash) sub _getTopList { my( $theMaxNum, $webName, $statsRef ) = @_; # Get reference to the sub-hash for this web my $webhashref = $statsRef->{$webName}; # print "Main.WebHome views = " . $statsRef->{$webName}{'WebHome'}."\n"; # print "Main web, TWikiGuest contribs = " . ${$statsRef}{$webName}{'Main.TWikiGuest'}."\n"; my @list = (); my $topicName; my $statValue; # Convert sub hash of item=>statsvalue pairs into an array, @list, # of '$statValue $topicName', ready for sorting. while( ( $topicName, $statValue ) = each( %$webhashref ) ) { # Right-align statistic value for sorting $statValue = sprintf '%7d', $statValue; # Add new array item at end of array if( $topicName =~ /\./ ) { $list[@list] = "$statValue $topicName"; } else { $list[@list] = "$statValue [[$topicName]]"; } } # DEBUG # print " top N list for $webName\n"; # print join "\n", @list; # Sort @list by frequency and pick the top N entries if( @list ) { # Strip initial spaces @list = map{ s/^\s*//; $_ } @list; @list = # Prepend spaces depending on no. of digits map{ s/^([0-9][0-9][^0-9])/\ \;$1/; $_ } map{ s/^([0-9][^0-9])/\ \;\ \;$1/; $_ } # Sort numerically, descending order sort { (split / /, $b)[0] <=> (split / /, $a)[0] } @list; if( $theMaxNum >= @list ) { $theMaxNum = @list - 1; } return @list[0..$theMaxNum]; } return @list; } sub _printMsg { my( $session, $msg ) = @_; if( $session->inContext('command_line') ) { $msg =~ s/ / /go; } else { if( $msg =~ s/^\!// ) { $msg = CGI::h4( CGI::span( { class=>'twikiAlert' }, $msg )); } elsif( $msg =~ /^[A-Z]/ ) { # SMELL: does not support internationalised script messages $msg =~ s/^([A-Z].*)/CGI::h3($1)/ge; } else { $msg =~ s/(\*\*\*.*)/CGI::span( { class=>'twikiAlert' }, $1 )/ge; $msg =~ s/^\s\s/  /go; $msg =~ s/^\s/ /go; $msg .= CGI::br(); } $msg =~ s/==([A-Z]*)==/'=='.CGI::span( { class=>'twikiAlert' }, $1 ).'=='/ge; } print $msg,"\n"; } 1;