wiki-archive/twiki/lib/TWiki/Search.pm

1126 lines
42 KiB
Perl

# Module of TWiki Enterprise Collaboration Platform, http://TWiki.org/
#
# Copyright (C) 2000-2007 Peter Thoeny, peter@thoeny.org
# and TWiki Contributors. All Rights Reserved. TWiki Contributors
# are listed in the AUTHORS file in the root of this distribution.
# NOTE: Please extend that file, not this notice.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version. For
# more details read LICENSE in the root of this distribution.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# As per the GPL, removal of this notice is prohibited.
=pod
---+ package TWiki::Search
This module implements all the search functionality.
=cut
package TWiki::Search;
use strict;
use Assert;
use TWiki::Sandbox;
use TWiki::User;
use TWiki::Time;
my $emptySearch = 'something.Very/unLikelyTo+search-for;-)';
BEGIN {
# 'Use locale' for internationalisation of Perl sorting and searching -
# main locale settings are done in TWiki::setupLocale
# Do a dynamic 'use locale' for this module
if( $TWiki::cfg{UseLocale} ) {
require locale;
import locale();
}
}
=pod
---++ ClassMethod new ($session)
Constructor for the singleton Search engine object.
=cut
sub new {
my ( $class, $session ) = @_;
my $this = bless( {}, $class );
ASSERT($session->isa( 'TWiki')) if DEBUG;
$this->{session} = $session;
return $this;
}
# Untaints the search value (text string, regex or search expression) by
# 'filtering in' valid characters only.
sub _filterSearchString {
my $this = shift;
my $searchString = shift;
my $type = shift;
# Use filtering-out of regexes only if (1) on a safe sandbox platform
# OR (2) administrator has explicitly configured $forceUnsafeRegexes == 1.
#
# Only well-secured intranet sites, authenticated for all access
# (view, edit, attach, search, etc), AND forced to use unsafe
# platforms, should use the $forceUnsafeRegexes flag.
my $unsafePlatform = ( not ($this->{session}->{sandbox}->{SAFE} ) );
# FIXME: Use of new global
my $useFilterIn = ($unsafePlatform and not $TWiki::cfg{ForceUnsafeRegexes});
########################################################################
# SMELL: commented out useless condition; $langAlphabetic was always 1,#
# and is now removed from TWiki.pm. What was this supposed to do? #
########################################################################
# Non-alphabetic language sites (e.g. Japanese and Chinese) cannot use
# filtering-in and must use safe pipes, since TWiki does not currently
# support Unicode, required for filtering-in. Alphabetic languages such
# as English, French, Russian, Greek and most European languages are
# handled by filtering-in.
#if ( not $TWiki::langAlphabetic and $unsafePlatform ) {
# # Best option is to upgrade Perl.
# die "You are using a non-alphabetic language on a non-safe-pipes platform. This is a serious SECURITY RISK,\nso TWiki cannot be used as it is currently installed - please\nread TWiki:Codev/SafePipes for options to avoid or remove this risk.";
#}
my $mixedAlphaNum = $TWiki::regex{mixedAlphaNum};
my $validChars; # String of valid characters or POSIX
# regex elements (e.g. [:alpha:] from
# _setupRegexes) - designed to
# be used within a character class.
if( $type eq 'regex' ) {
# Regular expression search - example: soap;wsdl;web service;!shampoo;[Ff]red
if ( $useFilterIn ) {
# Filter in
# TWiki search syntax and limited regex syntax
$validChars = ${mixedAlphaNum}.' !;.[]\\*\\+';
} else {
# Filter out - only for use on safe pipe platform or
# if forced by admin
# FIXME: Review and test since first versions were broken
# SMELL: CC commented out next two lines as they escape
# escape chars in REs
#$searchString =~ s/(^|[^\\])(['"`\\])/$1\\$2/g; # Escape all types of quotes and backslashes
#$searchString =~ s/([\@\$])\(/$1\\\(/g; # Escape @( ... ) and $( ... )
}
} elsif( $type eq 'literal' ) {
# Filter in
# Literal search - search for exactly what was typed in (old style
# TWiki non-regex search)
# Alphanumeric, spaces, selected punctuation
$validChars = ${mixedAlphaNum}.' \.';
} else {
# FIXME: spaces not working - url encoded in search pattern
# Filter in
# Keyword search (new style, Google-like).
# Example: soap +wsdl +"web service" -shampoo
$validChars = ${mixedAlphaNum}.' +"-';
}
if ( $useFilterIn ) {
# Clean up - delete all invalid characters
# FIXME: be sure to escape special characters in literal
$searchString =~ s/[^${validChars}]+//go;
}
# Untaint - same for filtering in and out since already sanitised
$searchString =~ /^(.*)$/;
$searchString = $1;
# Limit string length
$searchString = substr($searchString, 0, 1500);
}
=pod
---++ StaticMethod getTextPattern ( $text, $pattern )
Sanitise search pattern - currently used for FormattedSearch only
=cut
sub getTextPattern {
my( $text, $pattern ) = @_;
$pattern =~ s/([^\\])([\$\@\%\&\#\'\`\/])/$1\\$2/go; # escape some special chars
$pattern = TWiki::Sandbox::untaintUnchecked( $pattern );
my $OK = 0;
eval {
$OK = ( $text =~ s/$pattern/$1/is );
};
$text = '' unless( $OK );
return $text;
}
# Split the search string into tokens depending on type of search.
# Search is an 'AND' of all tokens - various syntaxes implemented
# by this routine.
sub _tokensFromSearchString {
my( $this, $searchString, $type ) = @_;
my @tokens = ();
if( $type eq 'regex' ) {
# Regular expression search Example: soap;wsdl;web service;!shampoo
@tokens = split( /;/, $searchString );
} elsif( $type eq 'literal' ) {
# Literal search (old style)
$tokens[0] = $searchString;
} else {
# Keyword search (Google-style) - implemented by converting
# to regex format. Example: soap +wsdl +"web service" -shampoo
# Prevent tokenizing on spaces in "literal string"
$searchString =~ s/(\".*?)\"/&_translateSpace($1)/geo;
$searchString =~ s/[\+\-]\s+//go;
# Build pattern of stop words
my $prefs = $this->{session}->{prefs};
my $stopWords = $prefs->getPreferencesValue( 'SEARCHSTOPWORDS' ) || '';
$stopWords =~ s/[\s\,]+/\|/go;
$stopWords =~ s/[\(\)]//go;
# Tokenize string taking account of literal strings, then remove
# stop words and convert '+' and '-' syntax.
@tokens =
map { s/^\+//o; s/^\-/\!/o; s/^"//o; $_ } # remove +, change - to !, remove "
grep { ! /^($stopWords)$/i } # remove stopwords
map { s/$TWiki::TranslationToken/ /go; $_ } # restore space
split( /[\s]+/, $searchString ); # split on spaces
}
return @tokens;
}
# Convert spaces into translation token characters (typically NULs),
# preventing tokenization.
#
# FIXME: Terminology confusing here!
sub _translateSpace {
my $text = shift;
$text =~ s/\s+/$TWiki::TranslationToken/go;
return $text;
}
# Search a single web based on parameters - @theTokens is a list of
# search terms to be ANDed together, $topic is list of one or more topics.
#
sub _searchTopicsInWeb {
my( $this, $web, $topic, $scope, $type,
$caseSensitive, @theTokens ) = @_;
my @topicList = ();
return @topicList unless( @theTokens ); # bail out if no search string
my $store = $this->{session}->{store};
if( $topic ) {
# limit search to topic list
if( $topic =~ /^\^\([\_\-\+$TWiki::regex{mixedAlphaNum}\|]+\)\$$/ ) {
# topic list without wildcards
# for speed, do not get all topics in web
# but convert topic pattern into topic list
my $topics = $topic;
$topics =~ s/^\^\(//o;
$topics =~ s/\)\$//o;
# build list from topic pattern
@topicList = split( /\|/, $topics );
} else {
# topic list with wildcards
@topicList = $store->getTopicNames( $web );
if( $caseSensitive ) {
# limit by topic name,
@topicList = grep( /$topic/, @topicList );
} else {
# Codev.SearchTopicNameAndTopicText
@topicList = grep( /$topic/i, @topicList );
}
}
} else {
@topicList = $store->getTopicNames( $web );
}
# default scope is 'text'
$scope = 'text' unless( $scope =~ /^(topic|all)$/ );
# AND search - search once for each token, ANDing result together
foreach my $token ( @theTokens ) {
# search on each token
my $invertSearch = ( $token =~ s/^\!//o );
# flag for AND NOT search
my @scopeTextList = ();
my @scopeTopicList = ();
return @topicList unless( @topicList );
# scope can be 'topic' (default), 'text' or "all"
# scope='text', e.g. Perl search on topic name:
unless( $scope eq 'text' ) {
my $qtoken = $token;
# FIXME I18N
$qtoken = quotemeta( $qtoken ) if( $type ne 'regex' );
if( $caseSensitive ) {
# fix for Codev.SearchWithNoPipe
@scopeTopicList = grep( /$qtoken/, @topicList );
} else {
@scopeTopicList = grep( /$qtoken/i, @topicList );
}
}
# scope='text', e.g. grep search on topic text:
unless( $scope eq 'topic' ) {
# search only for the topic name, ignoring matching lines.
# We will make a mess of reporting the matches later on.
my $matches = $store->searchInWebContent
( $token, $web, \@topicList,
{ type => $type, casesensitive => $caseSensitive,
files_without_match => 1 } );
@scopeTextList = keys %$matches;
}
if( @scopeTextList && @scopeTopicList ) {
# join 'topic' and 'text' lists
push( @scopeTextList, @scopeTopicList );
my %seen = ();
# make topics unique
@scopeTextList = sort grep { ! $seen{$_} ++ } @scopeTextList;
} elsif( @scopeTopicList ) {
@scopeTextList = @scopeTopicList;
}
if( $invertSearch ) {
# do AND NOT search
my %seen = ();
foreach my $topic ( @scopeTextList ) {
$seen{$topic} = 1;
}
@scopeTextList = ();
foreach my $topic ( @topicList ) {
push( @scopeTextList, $topic ) unless( $seen{$topic} );
}
}
# reduced topic list for next token
@topicList = @scopeTextList;
}
return @topicList;
}
sub _makeTopicPattern {
my( $topic ) = @_ ;
return '' unless( $topic );
# 'Web*, FooBar' ==> ( 'Web*', 'FooBar' ) ==> ( 'Web.*', "FooBar" )
my @arr = map { s/[^\*\_\-\+$TWiki::regex{mixedAlphaNum}]//go; s/\*/\.\*/go; $_ }
split( /,\s*/, $topic );
return '' unless( @arr );
# ( 'Web.*', 'FooBar' ) ==> "^(Web.*|FooBar)$"
return '^(' . join( '|', @arr ) . ')$';
}
=pod
---++ ObjectMethod searchWeb (...)
Search one or more webs according to the parameters.
If =_callback= is set, that means the caller wants results as
soon as they are ready. =_callback_ should be set to a reference
to a function which takes =_cbdata= as the first parameter and
remaining parameters the same as 'print'.
If =_callback= is set, the result is always undef. Otherwise the
result is a string containing the rendered search results.
If =inline= is set, then the results are *not* decorated with
the search template head and tail blocks.
Note: If =format= is set, =template= will be ignored.
Note: For legacy, if =regex= is defined, it will force type='regex'
SMELL: If =template= is defined =bookview= will not work
SMELL: it seems that if you define =_callback= or =inline= then you are
responsible for converting the TML to HTML yourself!
FIXME: =callback= cannot work with format parameter (consider format='| $topic |'
=cut
sub searchWeb {
my $this = shift;
ASSERT($this->isa( 'TWiki::Search')) if DEBUG;
my %params = @_;
my $callback = $params{_callback};
my $cbdata = $params{_cbdata};
my $baseTopic = $params{basetopic} || $this->{session}->{topicName};
my $baseWeb = $params{baseweb} || $this->{session}->{webName};
my $doBookView = TWiki::isTrue( $params{bookview} );
my $caseSensitive = TWiki::isTrue( $params{casesensitive} );
my $excludeTopic = $params{excludetopic} || '';
my $doExpandVars = TWiki::isTrue( $params{expandvariables} );
my $format = $params{format} || '';
my $header = $params{header};
my $inline = $params{inline};
my $limit = $params{limit} || '';
my $doMultiple = TWiki::isTrue( $params{multiple} );
my $nonoise = TWiki::isTrue( $params{nonoise} );
my $noEmpty = TWiki::isTrue( $params{noempty}, $nonoise );
# Note: a defined header overrides noheader
my $noHeader =
!defined($header) && TWiki::isTrue( $params{noheader}, $nonoise)
# Note: This is done for Cairo compatibility
|| (!$header && $format && $inline);
my $noSearch = TWiki::isTrue( $params{nosearch}, $nonoise );
my $noSummary = TWiki::isTrue( $params{nosummary}, $nonoise );
my $zeroResults = 1 - TWiki::isTrue( ($params{zeroresults} || 'on'), $nonoise );
my $noTotal = TWiki::isTrue( $params{nototal}, $nonoise );
my $newLine = $params{newline} || '';
my $sortOrder = $params{order} || '';
my $revSort = TWiki::isTrue( $params{reverse} );
my $scope = $params{scope} || '';
my $searchString = $params{search} || $emptySearch;
my $separator = $params{separator};
my $template = $params{template} || '';
my $topic = $params{topic} || '';
my $type = $params{type} || '';
my $webName = $params{web} || '';
my $date = $params{date} || '';
my $recurse = $params{'recurse'} || '';
my $finalTerm = $inline ? ( $params{nofinalnewline} || 0 ) : 0;
$baseWeb =~ s/\./\//go;
my $session = $this->{session};
my $renderer = $session->{renderer};
# Limit search results
if ($limit =~ /(^\d+$)/o) {
# only digits, all else is the same as
# an empty string. "+10" won't work.
$limit = $1;
} else {
# change 'all' to 0, then to big number
$limit = 0;
}
$limit = 32000 unless( $limit );
$type = 'regex' if( $params{regex} );
# Filter the search string for security and untaint it
$searchString = $this->_filterSearchString( $searchString, $type );
my $mixedAlpha = $TWiki::regex{mixedAlpha};
if( defined( $separator )) {
$separator =~ s/\$n\(\)/\n/gos; # expand "$n()" to new line
$separator =~ s/\$n([^$mixedAlpha]|$)/\n$1/gos;
}
if( $newLine ) {
$newLine =~ s/\$n\(\)/\n/gos; # expand "$n()" to new line
$newLine =~ s/\$n([^$mixedAlpha]|$)/\n$1/gos;
}
my $searchResult = '';
my $homeWeb = $session->{webName};
my $homeTopic = $TWiki::cfg{HomeTopicName};
my $store = $session->{store};
my %excludeWeb;
my @tmpWebs;
# A value of 'all' or 'on' by itself gets all webs,
# otherwise ignored (unless there is a web called 'All'.)
my $searchAllFlag = ( $webName =~ /(^|[\,\s])(all|on)([\,\s]|$)/i );
if( $webName ) {
foreach my $web ( split( /[\,\s]+/, $webName ) ) {
$web =~ s#\.#/#go;
# the web processing loop filters for valid web names,
# so don't do it here.
if ( $web =~ s/^-// ) {
$excludeWeb{$web} = 1;
} else {
push( @tmpWebs, $web );
if( TWiki::isTrue( $recurse ) || $web =~ /^(all|on)$/i ) {
my $webarg = ($web =~/^(all|on)$/i) ? undef : $web;
push( @tmpWebs,
$store->getListOfWebs( 'user,allowed', $webarg ));
}
}
}
} else {
# default to current web
push( @tmpWebs, $session->{webName} );
if ( TWiki::isTrue( $recurse )) {
push( @tmpWebs, $store->getListOfWebs( 'user,allowed',
$session->{webName} ));
}
}
my @webs;
foreach my $web ( @tmpWebs ) {
push( @webs, $web ) unless $excludeWeb{$web};
$excludeWeb{$web} = 1;
}
# E.g. "Bug*, *Patch" ==> "^(Bug.*|.*Patch)$"
$topic = _makeTopicPattern( $topic );
# E.g. "Web*, FooBar" ==> "^(Web.*|FooBar)$"
$excludeTopic = _makeTopicPattern( $excludeTopic );
my $output = '';
my $tmpl = '';
my $originalSearch = $searchString;
my $spacedTopic;
if( $format ) {
$template = 'searchformat';
} elsif( $template ) {
# template definition overrides book and rename views
} elsif( $doBookView ) {
$template = 'searchbookview';
} else {
$template = 'search';
}
$tmpl = $session->{templates}->readTemplate( $template );
# SMELL: the only META tags in a template will be METASEARCH
# Why the heck are they being filtered????
$tmpl =~ s/\%META{.*?}\%//go; # remove %META{'parent'}%
# Split template into 5 sections
my( $tmplHead, $tmplSearch, $tmplTable, $tmplNumber, $tmplTail ) =
split( /%SPLIT%/, $tmpl );
# Invalid template?
if( ! $tmplTail ) {
my $mess =
CGI::h1('TWiki Installation Error') .
'Incorrect format of '.$template.' template (missing sections? There should be 4 %SPLIT% tags)';
if ( defined $callback ) {
&$callback( $cbdata, $mess );
return undef;
} else {
return $mess;
}
}
# Expand tags in template sections
$tmplSearch = $session->handleCommonTags( $tmplSearch,
$homeWeb,
$homeTopic );
$tmplNumber = $session->handleCommonTags( $tmplNumber,
$homeWeb,
$homeTopic );
# If not inline search, also expand tags in head and tail sections
unless( $inline ) {
$tmplHead = $session->handleCommonTags( $tmplHead,
$homeWeb,
$homeTopic );
if( defined $callback ) {
$tmplHead = $renderer->getRenderedVersion( $tmplHead,
$homeWeb,
$homeTopic );
$tmplHead =~ s|</*nop/*>||goi; # remove <nop> tags
&$callback( $cbdata, $tmplHead );
} else {
# don't getRenderedVersion; this will be done by a single
# call at the end.
$searchResult .= $tmplHead;
}
}
# Generate 'Search:' part showing actual search string used
unless( $noSearch ) {
my $searchStr = $searchString;
$searchStr = '' if( $searchString eq $emptySearch );
$searchStr =~ s/&/&amp;/go;
$searchStr =~ s/</&lt;/go;
$searchStr =~ s/>/&gt;/go;
$searchStr =~ s/^\.\*$/Index/go;
$tmplSearch =~ s/%SEARCHSTRING%/$searchStr/go;
if( defined $callback ) {
$tmplSearch = $renderer->getRenderedVersion( $tmplSearch,
$homeWeb,
$homeTopic );
$tmplSearch =~ s|</*nop/*>||goi; # remove <nop> tag
&$callback( $cbdata, $tmplSearch );
} else {
# don't getRenderedVersion; will be done later
$searchResult .= $tmplSearch;
}
}
# Split the search string into tokens depending on type of search -
# each token is ANDed together by actual search
my @tokens = $this->_tokensFromSearchString( $searchString, $type );
# Write log entry
# FIXME: Move log entry further down to log actual webs searched
if( ( $TWiki::cfg{Log}{search} ) && ( ! $inline ) ) {
my $t = join( ' ', @webs );
$session->writeLog( 'search', $t, $searchString );
}
# Loop through webs
my $isAdmin = $session->{user}->isAdmin();
my $ttopics = 0;
foreach my $web ( @webs ) {
$web =~ s/$TWiki::cfg{NameFilter}//go;
$web = TWiki::Sandbox::untaintUnchecked( $web );
next unless $store->webExists( $web ); # can't process what ain't thar
my $prefs = $session->{prefs};
my $thisWebNoSearchAll = $prefs->getWebPreferencesValue( 'NOSEARCHALL', $web ) || '';
# make sure we can report this web on an 'all' search
# DON'T filter out unless it's part of an 'all' search.
next if ( $searchAllFlag
&& ! $isAdmin
&& ( $thisWebNoSearchAll =~ /on/i || $web =~ /^[\.\_]/ )
&& $web ne $session->{webName} );
# Run the search on topics in this web
my @topicList = $this->_searchTopicsInWeb(
$web, $topic, $scope, $type, $caseSensitive, @tokens );
# exclude topics, Codev.ExcludeWebTopicsFromSearch
if( $caseSensitive && $excludeTopic ) {
@topicList = grep( !/$excludeTopic/, @topicList );
} elsif( $excludeTopic ) {
@topicList = grep( !/$excludeTopic/i, @topicList );
}
next if( $noEmpty && ! @topicList ); # Nothing to show for this web
my $topicInfo = {};
# sort the topic list by date, author or topic name, and cache the
# info extracted to do the sorting
if( $sortOrder eq 'modified' ) {
# For performance:
# * sort by approx time (to get a rough list)
# * shorten list to the limit + some slack
# * sort by rev date on shortened list to get the accurate list
# SMELL: Ciaro had efficient two stage handling of modified sort.
# SMELL: In Dakar this seems to be pointless since latest rev
# time is taken from topic instead of dir list.
my $slack = 10;
if( $limit + 2 * $slack < scalar( @topicList ) ) {
# sort by approx latest rev time
my @tmpList =
map { $_->[1] }
sort {$a->[0] <=> $b->[0] }
map { [ $store->getTopicLatestRevTime( $web, $_ ), $_ ] }
@topicList;
@tmpList = reverse( @tmpList ) if( $revSort );
# then shorten list and build the hashes for date and author
my $idx = $limit + $slack;
@topicList = ();
foreach( @tmpList ) {
push( @topicList, $_ );
$idx -= 1;
last if $idx <= 0;
}
}
$topicInfo = $this->_sortTopics( $web, \@topicList,
$sortOrder, !$revSort );
} elsif( $sortOrder =~ /^creat/ || # topic creation time
$sortOrder eq 'editby' || # author
$sortOrder =~ s/^formfield\((.*)\)$/$1/ # form field
) {
$topicInfo = $this->_sortTopics( $web, \@topicList,
$sortOrder, !$revSort );
} else {
# simple sort, see Codev.SchwartzianTransformMisused
# note no extraction of topic info here, as not needed
# for the sort. Instead it will be read lazily, later on.
if( $revSort ) {
@topicList = sort {$b cmp $a} @topicList;
} else {
@topicList = sort {$a cmp $b} @topicList;
}
}
if( $date ){
use TWiki::Time;
my @ends = &TWiki::Time::parseInterval( $date );
my @resultList = ();
foreach my $topic ( @topicList ) {
# if date falls out of interval: exclude topic from result
my $topicdate = $store->getTopicLatestRevTime( $web, $topic );
push( @resultList, $topic )
unless ( $topicdate < $ends[0] || $topicdate > $ends[1] );
}
@topicList = @resultList;
}
# header and footer of $web
my( $beforeText, $repeatText, $afterText ) =
split( /%REPEAT%/, $tmplTable );
if( defined $header ) {
$beforeText = TWiki::expandStandardEscapes($header);
$beforeText =~ s/\$web/$web/gos; # expand name of web
if( defined( $separator )) {
$beforeText .= $separator;
} else {
$beforeText =~ s/([^\n])$/$1\n/os; # add new line at end if needed
}
}
# output the list of topics in $web
my $ntopics = 0;
my $headerDone = $noHeader;
foreach my $topic ( @topicList ) {
my $forceRendering = 0;
unless( exists( $topicInfo->{$topic} ) ) {
# not previously cached
$topicInfo->{$topic} =
$this->_extractTopicInfo( $web, $topic, 0, undef );
}
my $epochSecs = $topicInfo->{$topic}->{modified};
my $revDate = TWiki::Time::formatTime( $epochSecs );
my $isoDate = TWiki::Time::formatTime( $epochSecs, '$iso', 'gmtime');
my $revUser = $topicInfo->{$topic}->{editby} || 'UnknownUser';
my $ru = $session->{users}->findUser( $revUser );
my $revNum = $topicInfo->{$topic}->{revNum} || 0;
# Check security
# FIXME - how do we deal with user login not being available if
# coming from search script?
my $allowView = $topicInfo->{$topic}->{allowView};
next unless $allowView;
my ( $meta, $text );
# Special handling for format='...'
if( $format ) {
( $meta, $text ) = $this->_getTextAndMeta( $topicInfo, $web, $topic );
if( $doExpandVars ) {
if( $web eq $baseWeb && $topic eq $baseTopic ) {
# primitive way to prevent recursion
$text =~ s/%SEARCH/%<nop>SEARCH/g;
}
$text = $session->handleCommonTags( $text, $web, $topic );
}
}
my @multipleHitLines = ();
if( $doMultiple ) {
my $pattern = $tokens[$#tokens]; # last token in an AND search
$pattern = quotemeta( $pattern ) if( $type ne 'regex' );
( $meta, $text ) = $this->_getTextAndMeta( $topicInfo, $web, $topic ) unless $text;
if( $caseSensitive ) {
@multipleHitLines = reverse grep { /$pattern/ } split( /[\n\r]+/, $text );
} else {
@multipleHitLines = reverse grep { /$pattern/i } split( /[\n\r]+/, $text );
}
}
# SMELL: this loop is a rather hairy; why not do it thus:
# while(scalar(@multipleHitLines))?
# presumably you are relying on the fact that text will be set
# when doMultiple is off, even though @multipleHitLines will
# be empty? I can't work it out.
do { # multiple=on loop
my $out = '';
$text = pop( @multipleHitLines ) if( scalar( @multipleHitLines ) );
if( $format ) {
$out = $format;
$out = TWiki::expandStandardEscapes( $out );
$out =~ s/\$web/$web/gs;
$out =~ s/\$topic\(([^\)]*)\)/TWiki::Render::breakName( $topic, $1 )/ges;
$out =~ s/\$topic/$topic/gs;
$out =~ s/\$date/$revDate/gs;
$out =~ s/\$isodate/$isoDate/gs;
$out =~ s/\$rev/$revNum/gs;
$out =~ s/\$wikiusername/$ru->webDotWikiName()/ges;
$out =~ s/\$wikiname/$ru->wikiName()/ges;
$out =~ s/\$username/$ru->login()/ges;
my $r1info = {};
$out =~ s/\$createdate/$this->_getRev1Info( $web, $topic, 'date', $r1info )/ges;
$out =~ s/\$createusername/$this->_getRev1Info( $web, $topic, 'username', $r1info )/ges;
$out =~ s/\$createwikiname/$this->_getRev1Info( $web, $topic, 'wikiname', $r1info )/ges;
$out =~ s/\$createwikiusername/$this->_getRev1Info( $web, $topic, 'wikiusername', $r1info )/ges;
if( $out =~ m/\$text/ ) {
( $meta, $text ) = $this->_getTextAndMeta( $topicInfo, $web, $topic ) unless $text;
if( $topic eq $session->{topicName} ) {
# defuse SEARCH in current topic to prevent loop
$text =~ s/%SEARCH{.*?}%/SEARCH{...}/go;
}
$out =~ s/\$text/$text/gos;
$forceRendering = 1 unless( $doMultiple );
}
} else {
$out = $repeatText;
}
$out =~ s/%WEB%/$web/go;
$out =~ s/%TOPICNAME%/$topic/go;
$out =~ s/%TIME%/$revDate/o;
my $srev = 'r' . $revNum;
if( $revNum eq '0' || $revNum eq '1' ) {
$srev = CGI::span( { class => 'twikiNew' }, ($this->{session}->{i18n}->maketext('NEW')) );
}
$out =~ s/%REVISION%/$srev/o;
$out =~ s/%AUTHOR%/$revUser/o;
if( ( $inline || $format ) && ( ! ( $forceRendering ) ) ) {
# do nothing
} else {
# don't callback yet because of table
# rendering
#$out = $session->handleCommonTags( $out, $web, $topic );
#$out = $renderer->getRenderedVersion( $out, $web, $topic );
}
if( $doBookView ) {
# BookView
( $meta, $text ) = $this->_getTextAndMeta( $topicInfo, $web, $topic ) unless $text;
if( $web eq $baseWeb && $topic eq $baseTopic ) {
# primitive way to prevent recursion
$text =~ s/%SEARCH/%<nop>SEARCH/g;
}
$text = $session->handleCommonTags( $text, $web, $topic );
$text = $session->{renderer}->getRenderedVersion
( $text, $web, $topic );
# FIXME: What about meta data rendering?
$out =~ s/%TEXTHEAD%/$text/go;
} elsif( $format ) {
$out =~ s/\$summary(?:\(([^\)]*)\))?/$renderer->makeTopicSummary( $text, $topic, $web, $1 )/ges;
$out =~ s/\$changes(?:\(([^\)]*)\))?/$renderer->summariseChanges($ru,$web,$topic,$1,$revNum)/ges;
$out =~ s/\$formfield\(\s*([^\)]*)\s*\)/TWiki::Render::renderFormFieldArg( $meta, $1 )/ges;
$out =~ s/\$parent\(([^\)]*)\)/TWiki::Render::breakName( $meta->getParent(), $1 )/ges;
$out =~ s/\$parent/$meta->getParent()/ges;
$out =~ s/\$formname/$meta->getFormName()/ges;
$out =~ s/\$count\((.*?\s*\.\*)\)/_countPattern( $text, $1 )/ges;
# FIXME: Allow all regex characters but escape them
# Note: The RE requires a .* at the end of a pattern to avoid false positives
# in pattern matching
$out =~ s/\$pattern\((.*?\s*\.\*)\)/getTextPattern( $text, $1 )/ges;
$out =~ s/\r?\n/$newLine/gos if( $newLine );
if( defined( $separator )) {
$out .= $separator;
} else {
# add new line at end if needed
# SMELL: why?
$out =~ s/([^\n])$/$1\n/s;
}
} elsif( $noSummary ) {
$out =~ s/%TEXTHEAD%//go;
$out =~ s/&nbsp;//go;
} else {
# regular search view
( $meta, $text ) = $this->_getTextAndMeta(
$topicInfo, $web, $topic ) unless $text;
$text = $renderer->makeTopicSummary( $text, $topic, $web );
$out =~ s/%TEXTHEAD%/$text/go;
}
# lazy output of header (only if needed for the first time)
unless( $headerDone ) {
$headerDone = 1;
my $prefs = $session->{prefs};
my $thisWebBGColor =
$prefs->getWebPreferencesValue( 'WEBBGCOLOR', $web ) ||
'\#FF00FF';
$beforeText =~ s/%WEBBGCOLOR%/$thisWebBGColor/go;
$beforeText =~ s/%WEB%/$web/go;
$beforeText = $session->handleCommonTags
( $beforeText, $web, $topic );
if ( defined $callback ) {
$beforeText =
$renderer->getRenderedVersion(
$beforeText, $web, $topic );
$beforeText =~ s|</*nop/*>||goi; # remove <nop> tag
&$callback( $cbdata, $beforeText );
} else {
$searchResult .= $beforeText;
}
}
#don't expand if a format is specified - it breaks tables and stuff
unless( $format ) {
$out =
$renderer->getRenderedVersion( $out, $web, $topic );
}
# output topic (or line if multiple=on)
if ( defined $callback ) {
$out =~ s|</*nop/*>||goi; # remove <nop> tag
&$callback( $cbdata, $out );
} else {
$searchResult .= $out;
}
} while( @multipleHitLines ); # multiple=on loop
$ntopics += 1;
$ttopics += 1;
# delete topic info to clear any cached data
undef $topicInfo->{$topic};
last if( $ntopics >= $limit );
} # end topic loop
# output footer only if hits in web
if( $ntopics ) {
# output footer of $web
$afterText = $session->handleCommonTags( $afterText,
$web,
$homeTopic );
if( $inline || $format ) {
$afterText =~ s/\n$//os; # remove trailing new line
}
if ( defined $callback ) {
$afterText =
$renderer->getRenderedVersion( $afterText,
$web,
$homeTopic );
$afterText =~ s|</*nop/*>||goi; # remove <nop> tag
&$callback( $cbdata, $afterText );
} else {
$searchResult .= $afterText;
}
}
# output number of topics (only if hits in web or if
# only searching one web)
if( $ntopics || scalar( @webs ) < 2 ) {
unless( $noTotal ) {
my $thisNumber = $tmplNumber;
$thisNumber =~ s/%NTOPICS%/$ntopics/go;
if ( defined $callback ) {
$thisNumber =
$renderer->getRenderedVersion( $thisNumber,
$web,
$homeTopic );
$thisNumber =~ s|</*nop/*>||goi; # remove <nop> tag
&$callback( $cbdata, $thisNumber );
} else {
$searchResult .= $thisNumber;
}
}
}
} # end of: foreach my $web ( @webs )
return '' if ( $ttopics == 0 && $zeroResults );
if( $format && ! $finalTerm ) {
if( $separator ) {
$searchResult =~ s/$separator$//s; # remove separator at end
} else {
$searchResult =~ s/\n$//os; # remove trailing new line
}
}
unless( $inline ) {
$tmplTail = $session->handleCommonTags( $tmplTail,
$homeWeb,
$homeTopic );
if( defined $callback ) {
$tmplTail = $renderer->getRenderedVersion( $tmplTail,
$homeWeb,
$homeTopic );
$tmplTail =~ s|</*nop/*>||goi; # remove <nop> tag
&$callback( $cbdata, $tmplTail );
} else {
$searchResult .= $tmplTail;
}
}
return undef if ( defined $callback );
return $searchResult if $inline;
$searchResult = $session->handleCommonTags( $searchResult,
$homeWeb,
$homeTopic );
$searchResult = $renderer->getRenderedVersion( $searchResult,
$homeWeb,
$homeTopic );
return $searchResult;
}
# extract topic info required for sorting and sort.
sub _sortTopics{
my ( $this, $web, $topics, $sortfield, $revSort ) = @_;
my $topicInfo = {};
foreach my $topic ( @$topics ) {
$topicInfo->{$topic} = $this->_extractTopicInfo( $web, $topic, $sortfield );
}
if( $revSort ) {
@$topics = map { $_->[1] }
sort { _compare( $b->[0], $a->[0] ) }
map { [ $topicInfo->{$_}->{$sortfield}, $_ ] }
@$topics;
} else {
@$topics = map { $_->[1] }
sort { _compare( $a->[0], $b->[0] ) }
map { [ $topicInfo->{$_}->{$sortfield}, $_ ] }
@$topics;
}
return $topicInfo;
}
# RE for a full-spec floating-point number
my $number = qr/^[-+]?[0-9]+(\.[0-9]*)?([Ee][-+]?[0-9]+)?$/s;
sub _compare {
if( $_[0] =~ /$number/o && $_[1] =~ /$number/o ) {
# when sorting numbers do it largest first; this is just because
# this is what date comparisons need.
return $_[1] <=> $_[0];
} else {
return $_[1] cmp $_[0];
}
}
# extract topic info
sub _extractTopicInfo {
my ( $this, $web, $topic, $sortfield ) = @_;
my $info = {};
my $session = $this->{session};
my $store = $session->{store};
my ( $meta, $text ) = $this->_getTextAndMeta( undef, $web, $topic );
$info->{text} = $text;
$info->{meta} = $meta;
my ( $revdate, $revuser, $revnum ) = $meta->getRevisionInfo();
$info->{editby} = $revuser ? $revuser->webDotWikiName() : '';
$info->{modified} = $revdate;
$info->{revNum} = $revnum;
$info->{allowView} =
$session->{security}->
checkAccessPermission( 'view', $session->{user},
$text, $meta,
$topic, $web );
return $info unless $sortfield;
if ( $sortfield =~ /^creat/ ) {
( $info->{$sortfield} ) = $meta->getRevisionInfo( 1 );
} elsif ( !defined( $info->{$sortfield} )) {
$info->{$sortfield} = TWiki::Render::renderFormFieldArg( $meta, $sortfield );
}
return $info;
}
# get the text and meta for a topic
sub _getTextAndMeta {
my( $this, $topicInfo, $web, $topic ) = @_;
my ( $meta, $text );
my $store = $this->{session}->{store};
# read from cache if it's there
if ( $topicInfo ) {
$text = $topicInfo->{$topic}->{text};
$meta = $topicInfo->{$topic}->{meta};
}
unless( defined $text ) {
( $meta, $text ) =
$store->readTopic( undef, $web, $topic, undef );
$text =~ s/%WEB%/$web/gos;
$text =~ s/%TOPIC%/$topic/gos;
}
return ( $meta, $text );
}
# Returns the topic revision info of the base version,
# attributes are 'date', 'username', 'wikiname',
# 'wikiusername'. Revision info is cached in the search
# object for speed.
sub _getRev1Info {
my( $this, $web, $topic, $attr, $info ) = @_;
my $key = $web.'.'.$topic;
my $store = $this->{session}->{store};
unless( $info->{webTopic} && $info->{webTopic} eq $key ) {
my $meta = new TWiki::Meta( $this->{session}, $web, $topic );
my ( $d, $u ) = $meta->getRevisionInfo( 1 );
$info->{date} = $d;
$info->{user} = $u;
$info->{webTopic} = $key;
}
if( $attr eq 'username' ) {
return $info->{user}->login();
}
if( $attr eq 'wikiname' ) {
return $info->{user}->wikiName();
}
if( $attr eq 'wikiusername' ) {
return $info->{user}->webDotWikiName();
}
if( $attr eq 'date' ) {
return TWiki::Time::formatTime( $info->{date} );
}
return 1;
}
# With the same argument as $pattern, returns a number which is the count of
# occurences of the pattern argument.
sub _countPattern {
my( $theText, $thePattern ) = @_;
$thePattern =~ s/([^\\])([\$\@\%\&\#\'\`\/])/$1\\$2/go; # escape some special chars
$thePattern =~ /(.*)/; # untaint
$thePattern = $1;
my $OK = 0;
eval {
# counting hack, see: http://dev.perl.org/perl6/rfc/110.html
$OK = () = $theText =~ /$thePattern/g;
};
return $OK;
}
1;