# Module of TWiki Enterprise Collaboration Platform, http://TWiki.org/ # # Copyright (C) 2000-2007 Peter Thoeny, peter@thoeny.org # and TWiki Contributors. All Rights Reserved. TWiki Contributors # are listed in the AUTHORS file in the root of this distribution. # NOTE: Please extend that file, not this notice. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. For # more details read LICENSE in the root of this distribution. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # As per the GPL, removal of this notice is prohibited. =pod ---+ package TWiki::Search This module implements all the search functionality. =cut package TWiki::Search; use strict; use Assert; use TWiki::Sandbox; use TWiki::User; use TWiki::Time; my $emptySearch = 'something.Very/unLikelyTo+search-for;-)'; BEGIN { # 'Use locale' for internationalisation of Perl sorting and searching - # main locale settings are done in TWiki::setupLocale # Do a dynamic 'use locale' for this module if( $TWiki::cfg{UseLocale} ) { require locale; import locale(); } } =pod ---++ ClassMethod new ($session) Constructor for the singleton Search engine object. =cut sub new { my ( $class, $session ) = @_; my $this = bless( {}, $class ); ASSERT($session->isa( 'TWiki')) if DEBUG; $this->{session} = $session; return $this; } # Untaints the search value (text string, regex or search expression) by # 'filtering in' valid characters only. sub _filterSearchString { my $this = shift; my $searchString = shift; my $type = shift; # Use filtering-out of regexes only if (1) on a safe sandbox platform # OR (2) administrator has explicitly configured $forceUnsafeRegexes == 1. # # Only well-secured intranet sites, authenticated for all access # (view, edit, attach, search, etc), AND forced to use unsafe # platforms, should use the $forceUnsafeRegexes flag. my $unsafePlatform = ( not ($this->{session}->{sandbox}->{SAFE} ) ); # FIXME: Use of new global my $useFilterIn = ($unsafePlatform and not $TWiki::cfg{ForceUnsafeRegexes}); ######################################################################## # SMELL: commented out useless condition; $langAlphabetic was always 1,# # and is now removed from TWiki.pm. What was this supposed to do? # ######################################################################## # Non-alphabetic language sites (e.g. Japanese and Chinese) cannot use # filtering-in and must use safe pipes, since TWiki does not currently # support Unicode, required for filtering-in. Alphabetic languages such # as English, French, Russian, Greek and most European languages are # handled by filtering-in. #if ( not $TWiki::langAlphabetic and $unsafePlatform ) { # # Best option is to upgrade Perl. # die "You are using a non-alphabetic language on a non-safe-pipes platform. This is a serious SECURITY RISK,\nso TWiki cannot be used as it is currently installed - please\nread TWiki:Codev/SafePipes for options to avoid or remove this risk."; #} my $mixedAlphaNum = $TWiki::regex{mixedAlphaNum}; my $validChars; # String of valid characters or POSIX # regex elements (e.g. [:alpha:] from # _setupRegexes) - designed to # be used within a character class. if( $type eq 'regex' ) { # Regular expression search - example: soap;wsdl;web service;!shampoo;[Ff]red if ( $useFilterIn ) { # Filter in # TWiki search syntax and limited regex syntax $validChars = ${mixedAlphaNum}.' !;.[]\\*\\+'; } else { # Filter out - only for use on safe pipe platform or # if forced by admin # FIXME: Review and test since first versions were broken # SMELL: CC commented out next two lines as they escape # escape chars in REs #$searchString =~ s/(^|[^\\])(['"`\\])/$1\\$2/g; # Escape all types of quotes and backslashes #$searchString =~ s/([\@\$])\(/$1\\\(/g; # Escape @( ... ) and $( ... ) } } elsif( $type eq 'literal' ) { # Filter in # Literal search - search for exactly what was typed in (old style # TWiki non-regex search) # Alphanumeric, spaces, selected punctuation $validChars = ${mixedAlphaNum}.' \.'; } else { # FIXME: spaces not working - url encoded in search pattern # Filter in # Keyword search (new style, Google-like). # Example: soap +wsdl +"web service" -shampoo $validChars = ${mixedAlphaNum}.' +"-'; } if ( $useFilterIn ) { # Clean up - delete all invalid characters # FIXME: be sure to escape special characters in literal $searchString =~ s/[^${validChars}]+//go; } # Untaint - same for filtering in and out since already sanitised $searchString =~ /^(.*)$/; $searchString = $1; # Limit string length $searchString = substr($searchString, 0, 1500); } =pod ---++ StaticMethod getTextPattern ( $text, $pattern ) Sanitise search pattern - currently used for FormattedSearch only =cut sub getTextPattern { my( $text, $pattern ) = @_; $pattern =~ s/([^\\])([\$\@\%\&\#\'\`\/])/$1\\$2/go; # escape some special chars $pattern = TWiki::Sandbox::untaintUnchecked( $pattern ); my $OK = 0; eval { $OK = ( $text =~ s/$pattern/$1/is ); }; $text = '' unless( $OK ); return $text; } # Split the search string into tokens depending on type of search. # Search is an 'AND' of all tokens - various syntaxes implemented # by this routine. sub _tokensFromSearchString { my( $this, $searchString, $type ) = @_; my @tokens = (); if( $type eq 'regex' ) { # Regular expression search Example: soap;wsdl;web service;!shampoo @tokens = split( /;/, $searchString ); } elsif( $type eq 'literal' ) { # Literal search (old style) $tokens[0] = $searchString; } else { # Keyword search (Google-style) - implemented by converting # to regex format. Example: soap +wsdl +"web service" -shampoo # Prevent tokenizing on spaces in "literal string" $searchString =~ s/(\".*?)\"/&_translateSpace($1)/geo; $searchString =~ s/[\+\-]\s+//go; # Build pattern of stop words my $prefs = $this->{session}->{prefs}; my $stopWords = $prefs->getPreferencesValue( 'SEARCHSTOPWORDS' ) || ''; $stopWords =~ s/[\s\,]+/\|/go; $stopWords =~ s/[\(\)]//go; # Tokenize string taking account of literal strings, then remove # stop words and convert '+' and '-' syntax. @tokens = map { s/^\+//o; s/^\-/\!/o; s/^"//o; $_ } # remove +, change - to !, remove " grep { ! /^($stopWords)$/i } # remove stopwords map { s/$TWiki::TranslationToken/ /go; $_ } # restore space split( /[\s]+/, $searchString ); # split on spaces } return @tokens; } # Convert spaces into translation token characters (typically NULs), # preventing tokenization. # # FIXME: Terminology confusing here! sub _translateSpace { my $text = shift; $text =~ s/\s+/$TWiki::TranslationToken/go; return $text; } # Search a single web based on parameters - @theTokens is a list of # search terms to be ANDed together, $topic is list of one or more topics. # sub _searchTopicsInWeb { my( $this, $web, $topic, $scope, $type, $caseSensitive, @theTokens ) = @_; my @topicList = (); return @topicList unless( @theTokens ); # bail out if no search string my $store = $this->{session}->{store}; if( $topic ) { # limit search to topic list if( $topic =~ /^\^\([\_\-\+$TWiki::regex{mixedAlphaNum}\|]+\)\$$/ ) { # topic list without wildcards # for speed, do not get all topics in web # but convert topic pattern into topic list my $topics = $topic; $topics =~ s/^\^\(//o; $topics =~ s/\)\$//o; # build list from topic pattern @topicList = split( /\|/, $topics ); } else { # topic list with wildcards @topicList = $store->getTopicNames( $web ); if( $caseSensitive ) { # limit by topic name, @topicList = grep( /$topic/, @topicList ); } else { # Codev.SearchTopicNameAndTopicText @topicList = grep( /$topic/i, @topicList ); } } } else { @topicList = $store->getTopicNames( $web ); } # default scope is 'text' $scope = 'text' unless( $scope =~ /^(topic|all)$/ ); # AND search - search once for each token, ANDing result together foreach my $token ( @theTokens ) { # search on each token my $invertSearch = ( $token =~ s/^\!//o ); # flag for AND NOT search my @scopeTextList = (); my @scopeTopicList = (); return @topicList unless( @topicList ); # scope can be 'topic' (default), 'text' or "all" # scope='text', e.g. Perl search on topic name: unless( $scope eq 'text' ) { my $qtoken = $token; # FIXME I18N $qtoken = quotemeta( $qtoken ) if( $type ne 'regex' ); if( $caseSensitive ) { # fix for Codev.SearchWithNoPipe @scopeTopicList = grep( /$qtoken/, @topicList ); } else { @scopeTopicList = grep( /$qtoken/i, @topicList ); } } # scope='text', e.g. grep search on topic text: unless( $scope eq 'topic' ) { # search only for the topic name, ignoring matching lines. # We will make a mess of reporting the matches later on. my $matches = $store->searchInWebContent ( $token, $web, \@topicList, { type => $type, casesensitive => $caseSensitive, files_without_match => 1 } ); @scopeTextList = keys %$matches; } if( @scopeTextList && @scopeTopicList ) { # join 'topic' and 'text' lists push( @scopeTextList, @scopeTopicList ); my %seen = (); # make topics unique @scopeTextList = sort grep { ! $seen{$_} ++ } @scopeTextList; } elsif( @scopeTopicList ) { @scopeTextList = @scopeTopicList; } if( $invertSearch ) { # do AND NOT search my %seen = (); foreach my $topic ( @scopeTextList ) { $seen{$topic} = 1; } @scopeTextList = (); foreach my $topic ( @topicList ) { push( @scopeTextList, $topic ) unless( $seen{$topic} ); } } # reduced topic list for next token @topicList = @scopeTextList; } return @topicList; } sub _makeTopicPattern { my( $topic ) = @_ ; return '' unless( $topic ); # 'Web*, FooBar' ==> ( 'Web*', 'FooBar' ) ==> ( 'Web.*', "FooBar" ) my @arr = map { s/[^\*\_\-\+$TWiki::regex{mixedAlphaNum}]//go; s/\*/\.\*/go; $_ } split( /,\s*/, $topic ); return '' unless( @arr ); # ( 'Web.*', 'FooBar' ) ==> "^(Web.*|FooBar)$" return '^(' . join( '|', @arr ) . ')$'; } =pod ---++ ObjectMethod searchWeb (...) Search one or more webs according to the parameters. If =_callback= is set, that means the caller wants results as soon as they are ready. =_callback_ should be set to a reference to a function which takes =_cbdata= as the first parameter and remaining parameters the same as 'print'. If =_callback= is set, the result is always undef. Otherwise the result is a string containing the rendered search results. If =inline= is set, then the results are *not* decorated with the search template head and tail blocks. Note: If =format= is set, =template= will be ignored. Note: For legacy, if =regex= is defined, it will force type='regex' SMELL: If =template= is defined =bookview= will not work SMELL: it seems that if you define =_callback= or =inline= then you are responsible for converting the TML to HTML yourself! FIXME: =callback= cannot work with format parameter (consider format='| $topic |' =cut sub searchWeb { my $this = shift; ASSERT($this->isa( 'TWiki::Search')) if DEBUG; my %params = @_; my $callback = $params{_callback}; my $cbdata = $params{_cbdata}; my $baseTopic = $params{basetopic} || $this->{session}->{topicName}; my $baseWeb = $params{baseweb} || $this->{session}->{webName}; my $doBookView = TWiki::isTrue( $params{bookview} ); my $caseSensitive = TWiki::isTrue( $params{casesensitive} ); my $excludeTopic = $params{excludetopic} || ''; my $doExpandVars = TWiki::isTrue( $params{expandvariables} ); my $format = $params{format} || ''; my $header = $params{header}; my $inline = $params{inline}; my $limit = $params{limit} || ''; my $doMultiple = TWiki::isTrue( $params{multiple} ); my $nonoise = TWiki::isTrue( $params{nonoise} ); my $noEmpty = TWiki::isTrue( $params{noempty}, $nonoise ); # Note: a defined header overrides noheader my $noHeader = !defined($header) && TWiki::isTrue( $params{noheader}, $nonoise) # Note: This is done for Cairo compatibility || (!$header && $format && $inline); my $noSearch = TWiki::isTrue( $params{nosearch}, $nonoise ); my $noSummary = TWiki::isTrue( $params{nosummary}, $nonoise ); my $zeroResults = 1 - TWiki::isTrue( ($params{zeroresults} || 'on'), $nonoise ); my $noTotal = TWiki::isTrue( $params{nototal}, $nonoise ); my $newLine = $params{newline} || ''; my $sortOrder = $params{order} || ''; my $revSort = TWiki::isTrue( $params{reverse} ); my $scope = $params{scope} || ''; my $searchString = $params{search} || $emptySearch; my $separator = $params{separator}; my $template = $params{template} || ''; my $topic = $params{topic} || ''; my $type = $params{type} || ''; my $webName = $params{web} || ''; my $date = $params{date} || ''; my $recurse = $params{'recurse'} || ''; my $finalTerm = $inline ? ( $params{nofinalnewline} || 0 ) : 0; $baseWeb =~ s/\./\//go; my $session = $this->{session}; my $renderer = $session->{renderer}; # Limit search results if ($limit =~ /(^\d+$)/o) { # only digits, all else is the same as # an empty string. "+10" won't work. $limit = $1; } else { # change 'all' to 0, then to big number $limit = 0; } $limit = 32000 unless( $limit ); $type = 'regex' if( $params{regex} ); # Filter the search string for security and untaint it $searchString = $this->_filterSearchString( $searchString, $type ); my $mixedAlpha = $TWiki::regex{mixedAlpha}; if( defined( $separator )) { $separator =~ s/\$n\(\)/\n/gos; # expand "$n()" to new line $separator =~ s/\$n([^$mixedAlpha]|$)/\n$1/gos; } if( $newLine ) { $newLine =~ s/\$n\(\)/\n/gos; # expand "$n()" to new line $newLine =~ s/\$n([^$mixedAlpha]|$)/\n$1/gos; } my $searchResult = ''; my $homeWeb = $session->{webName}; my $homeTopic = $TWiki::cfg{HomeTopicName}; my $store = $session->{store}; my %excludeWeb; my @tmpWebs; # A value of 'all' or 'on' by itself gets all webs, # otherwise ignored (unless there is a web called 'All'.) my $searchAllFlag = ( $webName =~ /(^|[\,\s])(all|on)([\,\s]|$)/i ); if( $webName ) { foreach my $web ( split( /[\,\s]+/, $webName ) ) { $web =~ s#\.#/#go; # the web processing loop filters for valid web names, # so don't do it here. if ( $web =~ s/^-// ) { $excludeWeb{$web} = 1; } else { push( @tmpWebs, $web ); if( TWiki::isTrue( $recurse ) || $web =~ /^(all|on)$/i ) { my $webarg = ($web =~/^(all|on)$/i) ? undef : $web; push( @tmpWebs, $store->getListOfWebs( 'user,allowed', $webarg )); } } } } else { # default to current web push( @tmpWebs, $session->{webName} ); if ( TWiki::isTrue( $recurse )) { push( @tmpWebs, $store->getListOfWebs( 'user,allowed', $session->{webName} )); } } my @webs; foreach my $web ( @tmpWebs ) { push( @webs, $web ) unless $excludeWeb{$web}; $excludeWeb{$web} = 1; } # E.g. "Bug*, *Patch" ==> "^(Bug.*|.*Patch)$" $topic = _makeTopicPattern( $topic ); # E.g. "Web*, FooBar" ==> "^(Web.*|FooBar)$" $excludeTopic = _makeTopicPattern( $excludeTopic ); my $output = ''; my $tmpl = ''; my $originalSearch = $searchString; my $spacedTopic; if( $format ) { $template = 'searchformat'; } elsif( $template ) { # template definition overrides book and rename views } elsif( $doBookView ) { $template = 'searchbookview'; } else { $template = 'search'; } $tmpl = $session->{templates}->readTemplate( $template ); # SMELL: the only META tags in a template will be METASEARCH # Why the heck are they being filtered???? $tmpl =~ s/\%META{.*?}\%//go; # remove %META{'parent'}% # Split template into 5 sections my( $tmplHead, $tmplSearch, $tmplTable, $tmplNumber, $tmplTail ) = split( /%SPLIT%/, $tmpl ); # Invalid template? if( ! $tmplTail ) { my $mess = CGI::h1('TWiki Installation Error') . 'Incorrect format of '.$template.' template (missing sections? There should be 4 %SPLIT% tags)'; if ( defined $callback ) { &$callback( $cbdata, $mess ); return undef; } else { return $mess; } } # Expand tags in template sections $tmplSearch = $session->handleCommonTags( $tmplSearch, $homeWeb, $homeTopic ); $tmplNumber = $session->handleCommonTags( $tmplNumber, $homeWeb, $homeTopic ); # If not inline search, also expand tags in head and tail sections unless( $inline ) { $tmplHead = $session->handleCommonTags( $tmplHead, $homeWeb, $homeTopic ); if( defined $callback ) { $tmplHead = $renderer->getRenderedVersion( $tmplHead, $homeWeb, $homeTopic ); $tmplHead =~ s|||goi; # remove tags &$callback( $cbdata, $tmplHead ); } else { # don't getRenderedVersion; this will be done by a single # call at the end. $searchResult .= $tmplHead; } } # Generate 'Search:' part showing actual search string used unless( $noSearch ) { my $searchStr = $searchString; $searchStr = '' if( $searchString eq $emptySearch ); $searchStr =~ s/&/&/go; $searchStr =~ s//>/go; $searchStr =~ s/^\.\*$/Index/go; $tmplSearch =~ s/%SEARCHSTRING%/$searchStr/go; if( defined $callback ) { $tmplSearch = $renderer->getRenderedVersion( $tmplSearch, $homeWeb, $homeTopic ); $tmplSearch =~ s|||goi; # remove tag &$callback( $cbdata, $tmplSearch ); } else { # don't getRenderedVersion; will be done later $searchResult .= $tmplSearch; } } # Split the search string into tokens depending on type of search - # each token is ANDed together by actual search my @tokens = $this->_tokensFromSearchString( $searchString, $type ); # Write log entry # FIXME: Move log entry further down to log actual webs searched if( ( $TWiki::cfg{Log}{search} ) && ( ! $inline ) ) { my $t = join( ' ', @webs ); $session->writeLog( 'search', $t, $searchString ); } # Loop through webs my $isAdmin = $session->{user}->isAdmin(); my $ttopics = 0; foreach my $web ( @webs ) { $web =~ s/$TWiki::cfg{NameFilter}//go; $web = TWiki::Sandbox::untaintUnchecked( $web ); next unless $store->webExists( $web ); # can't process what ain't thar my $prefs = $session->{prefs}; my $thisWebNoSearchAll = $prefs->getWebPreferencesValue( 'NOSEARCHALL', $web ) || ''; # make sure we can report this web on an 'all' search # DON'T filter out unless it's part of an 'all' search. next if ( $searchAllFlag && ! $isAdmin && ( $thisWebNoSearchAll =~ /on/i || $web =~ /^[\.\_]/ ) && $web ne $session->{webName} ); # Run the search on topics in this web my @topicList = $this->_searchTopicsInWeb( $web, $topic, $scope, $type, $caseSensitive, @tokens ); # exclude topics, Codev.ExcludeWebTopicsFromSearch if( $caseSensitive && $excludeTopic ) { @topicList = grep( !/$excludeTopic/, @topicList ); } elsif( $excludeTopic ) { @topicList = grep( !/$excludeTopic/i, @topicList ); } next if( $noEmpty && ! @topicList ); # Nothing to show for this web my $topicInfo = {}; # sort the topic list by date, author or topic name, and cache the # info extracted to do the sorting if( $sortOrder eq 'modified' ) { # For performance: # * sort by approx time (to get a rough list) # * shorten list to the limit + some slack # * sort by rev date on shortened list to get the accurate list # SMELL: Ciaro had efficient two stage handling of modified sort. # SMELL: In Dakar this seems to be pointless since latest rev # time is taken from topic instead of dir list. my $slack = 10; if( $limit + 2 * $slack < scalar( @topicList ) ) { # sort by approx latest rev time my @tmpList = map { $_->[1] } sort {$a->[0] <=> $b->[0] } map { [ $store->getTopicLatestRevTime( $web, $_ ), $_ ] } @topicList; @tmpList = reverse( @tmpList ) if( $revSort ); # then shorten list and build the hashes for date and author my $idx = $limit + $slack; @topicList = (); foreach( @tmpList ) { push( @topicList, $_ ); $idx -= 1; last if $idx <= 0; } } $topicInfo = $this->_sortTopics( $web, \@topicList, $sortOrder, !$revSort ); } elsif( $sortOrder =~ /^creat/ || # topic creation time $sortOrder eq 'editby' || # author $sortOrder =~ s/^formfield\((.*)\)$/$1/ # form field ) { $topicInfo = $this->_sortTopics( $web, \@topicList, $sortOrder, !$revSort ); } else { # simple sort, see Codev.SchwartzianTransformMisused # note no extraction of topic info here, as not needed # for the sort. Instead it will be read lazily, later on. if( $revSort ) { @topicList = sort {$b cmp $a} @topicList; } else { @topicList = sort {$a cmp $b} @topicList; } } if( $date ){ use TWiki::Time; my @ends = &TWiki::Time::parseInterval( $date ); my @resultList = (); foreach my $topic ( @topicList ) { # if date falls out of interval: exclude topic from result my $topicdate = $store->getTopicLatestRevTime( $web, $topic ); push( @resultList, $topic ) unless ( $topicdate < $ends[0] || $topicdate > $ends[1] ); } @topicList = @resultList; } # header and footer of $web my( $beforeText, $repeatText, $afterText ) = split( /%REPEAT%/, $tmplTable ); if( defined $header ) { $beforeText = TWiki::expandStandardEscapes($header); $beforeText =~ s/\$web/$web/gos; # expand name of web if( defined( $separator )) { $beforeText .= $separator; } else { $beforeText =~ s/([^\n])$/$1\n/os; # add new line at end if needed } } # output the list of topics in $web my $ntopics = 0; my $headerDone = $noHeader; foreach my $topic ( @topicList ) { my $forceRendering = 0; unless( exists( $topicInfo->{$topic} ) ) { # not previously cached $topicInfo->{$topic} = $this->_extractTopicInfo( $web, $topic, 0, undef ); } my $epochSecs = $topicInfo->{$topic}->{modified}; my $revDate = TWiki::Time::formatTime( $epochSecs ); my $isoDate = TWiki::Time::formatTime( $epochSecs, '$iso', 'gmtime'); my $revUser = $topicInfo->{$topic}->{editby} || 'UnknownUser'; my $ru = $session->{users}->findUser( $revUser ); my $revNum = $topicInfo->{$topic}->{revNum} || 0; # Check security # FIXME - how do we deal with user login not being available if # coming from search script? my $allowView = $topicInfo->{$topic}->{allowView}; next unless $allowView; my ( $meta, $text ); # Special handling for format='...' if( $format ) { ( $meta, $text ) = $this->_getTextAndMeta( $topicInfo, $web, $topic ); if( $doExpandVars ) { if( $web eq $baseWeb && $topic eq $baseTopic ) { # primitive way to prevent recursion $text =~ s/%SEARCH/%SEARCH/g; } $text = $session->handleCommonTags( $text, $web, $topic ); } } my @multipleHitLines = (); if( $doMultiple ) { my $pattern = $tokens[$#tokens]; # last token in an AND search $pattern = quotemeta( $pattern ) if( $type ne 'regex' ); ( $meta, $text ) = $this->_getTextAndMeta( $topicInfo, $web, $topic ) unless $text; if( $caseSensitive ) { @multipleHitLines = reverse grep { /$pattern/ } split( /[\n\r]+/, $text ); } else { @multipleHitLines = reverse grep { /$pattern/i } split( /[\n\r]+/, $text ); } } # SMELL: this loop is a rather hairy; why not do it thus: # while(scalar(@multipleHitLines))? # presumably you are relying on the fact that text will be set # when doMultiple is off, even though @multipleHitLines will # be empty? I can't work it out. do { # multiple=on loop my $out = ''; $text = pop( @multipleHitLines ) if( scalar( @multipleHitLines ) ); if( $format ) { $out = $format; $out = TWiki::expandStandardEscapes( $out ); $out =~ s/\$web/$web/gs; $out =~ s/\$topic\(([^\)]*)\)/TWiki::Render::breakName( $topic, $1 )/ges; $out =~ s/\$topic/$topic/gs; $out =~ s/\$date/$revDate/gs; $out =~ s/\$isodate/$isoDate/gs; $out =~ s/\$rev/$revNum/gs; $out =~ s/\$wikiusername/$ru->webDotWikiName()/ges; $out =~ s/\$wikiname/$ru->wikiName()/ges; $out =~ s/\$username/$ru->login()/ges; my $r1info = {}; $out =~ s/\$createdate/$this->_getRev1Info( $web, $topic, 'date', $r1info )/ges; $out =~ s/\$createusername/$this->_getRev1Info( $web, $topic, 'username', $r1info )/ges; $out =~ s/\$createwikiname/$this->_getRev1Info( $web, $topic, 'wikiname', $r1info )/ges; $out =~ s/\$createwikiusername/$this->_getRev1Info( $web, $topic, 'wikiusername', $r1info )/ges; if( $out =~ m/\$text/ ) { ( $meta, $text ) = $this->_getTextAndMeta( $topicInfo, $web, $topic ) unless $text; if( $topic eq $session->{topicName} ) { # defuse SEARCH in current topic to prevent loop $text =~ s/%SEARCH{.*?}%/SEARCH{...}/go; } $out =~ s/\$text/$text/gos; $forceRendering = 1 unless( $doMultiple ); } } else { $out = $repeatText; } $out =~ s/%WEB%/$web/go; $out =~ s/%TOPICNAME%/$topic/go; $out =~ s/%TIME%/$revDate/o; my $srev = 'r' . $revNum; if( $revNum eq '0' || $revNum eq '1' ) { $srev = CGI::span( { class => 'twikiNew' }, ($this->{session}->{i18n}->maketext('NEW')) ); } $out =~ s/%REVISION%/$srev/o; $out =~ s/%AUTHOR%/$revUser/o; if( ( $inline || $format ) && ( ! ( $forceRendering ) ) ) { # do nothing } else { # don't callback yet because of table # rendering #$out = $session->handleCommonTags( $out, $web, $topic ); #$out = $renderer->getRenderedVersion( $out, $web, $topic ); } if( $doBookView ) { # BookView ( $meta, $text ) = $this->_getTextAndMeta( $topicInfo, $web, $topic ) unless $text; if( $web eq $baseWeb && $topic eq $baseTopic ) { # primitive way to prevent recursion $text =~ s/%SEARCH/%SEARCH/g; } $text = $session->handleCommonTags( $text, $web, $topic ); $text = $session->{renderer}->getRenderedVersion ( $text, $web, $topic ); # FIXME: What about meta data rendering? $out =~ s/%TEXTHEAD%/$text/go; } elsif( $format ) { $out =~ s/\$summary(?:\(([^\)]*)\))?/$renderer->makeTopicSummary( $text, $topic, $web, $1 )/ges; $out =~ s/\$changes(?:\(([^\)]*)\))?/$renderer->summariseChanges($ru,$web,$topic,$1,$revNum)/ges; $out =~ s/\$formfield\(\s*([^\)]*)\s*\)/TWiki::Render::renderFormFieldArg( $meta, $1 )/ges; $out =~ s/\$parent\(([^\)]*)\)/TWiki::Render::breakName( $meta->getParent(), $1 )/ges; $out =~ s/\$parent/$meta->getParent()/ges; $out =~ s/\$formname/$meta->getFormName()/ges; $out =~ s/\$count\((.*?\s*\.\*)\)/_countPattern( $text, $1 )/ges; # FIXME: Allow all regex characters but escape them # Note: The RE requires a .* at the end of a pattern to avoid false positives # in pattern matching $out =~ s/\$pattern\((.*?\s*\.\*)\)/getTextPattern( $text, $1 )/ges; $out =~ s/\r?\n/$newLine/gos if( $newLine ); if( defined( $separator )) { $out .= $separator; } else { # add new line at end if needed # SMELL: why? $out =~ s/([^\n])$/$1\n/s; } } elsif( $noSummary ) { $out =~ s/%TEXTHEAD%//go; $out =~ s/ //go; } else { # regular search view ( $meta, $text ) = $this->_getTextAndMeta( $topicInfo, $web, $topic ) unless $text; $text = $renderer->makeTopicSummary( $text, $topic, $web ); $out =~ s/%TEXTHEAD%/$text/go; } # lazy output of header (only if needed for the first time) unless( $headerDone ) { $headerDone = 1; my $prefs = $session->{prefs}; my $thisWebBGColor = $prefs->getWebPreferencesValue( 'WEBBGCOLOR', $web ) || '\#FF00FF'; $beforeText =~ s/%WEBBGCOLOR%/$thisWebBGColor/go; $beforeText =~ s/%WEB%/$web/go; $beforeText = $session->handleCommonTags ( $beforeText, $web, $topic ); if ( defined $callback ) { $beforeText = $renderer->getRenderedVersion( $beforeText, $web, $topic ); $beforeText =~ s|||goi; # remove tag &$callback( $cbdata, $beforeText ); } else { $searchResult .= $beforeText; } } #don't expand if a format is specified - it breaks tables and stuff unless( $format ) { $out = $renderer->getRenderedVersion( $out, $web, $topic ); } # output topic (or line if multiple=on) if ( defined $callback ) { $out =~ s|||goi; # remove tag &$callback( $cbdata, $out ); } else { $searchResult .= $out; } } while( @multipleHitLines ); # multiple=on loop $ntopics += 1; $ttopics += 1; # delete topic info to clear any cached data undef $topicInfo->{$topic}; last if( $ntopics >= $limit ); } # end topic loop # output footer only if hits in web if( $ntopics ) { # output footer of $web $afterText = $session->handleCommonTags( $afterText, $web, $homeTopic ); if( $inline || $format ) { $afterText =~ s/\n$//os; # remove trailing new line } if ( defined $callback ) { $afterText = $renderer->getRenderedVersion( $afterText, $web, $homeTopic ); $afterText =~ s|||goi; # remove tag &$callback( $cbdata, $afterText ); } else { $searchResult .= $afterText; } } # output number of topics (only if hits in web or if # only searching one web) if( $ntopics || scalar( @webs ) < 2 ) { unless( $noTotal ) { my $thisNumber = $tmplNumber; $thisNumber =~ s/%NTOPICS%/$ntopics/go; if ( defined $callback ) { $thisNumber = $renderer->getRenderedVersion( $thisNumber, $web, $homeTopic ); $thisNumber =~ s|||goi; # remove tag &$callback( $cbdata, $thisNumber ); } else { $searchResult .= $thisNumber; } } } } # end of: foreach my $web ( @webs ) return '' if ( $ttopics == 0 && $zeroResults ); if( $format && ! $finalTerm ) { if( $separator ) { $searchResult =~ s/$separator$//s; # remove separator at end } else { $searchResult =~ s/\n$//os; # remove trailing new line } } unless( $inline ) { $tmplTail = $session->handleCommonTags( $tmplTail, $homeWeb, $homeTopic ); if( defined $callback ) { $tmplTail = $renderer->getRenderedVersion( $tmplTail, $homeWeb, $homeTopic ); $tmplTail =~ s|||goi; # remove tag &$callback( $cbdata, $tmplTail ); } else { $searchResult .= $tmplTail; } } return undef if ( defined $callback ); return $searchResult if $inline; $searchResult = $session->handleCommonTags( $searchResult, $homeWeb, $homeTopic ); $searchResult = $renderer->getRenderedVersion( $searchResult, $homeWeb, $homeTopic ); return $searchResult; } # extract topic info required for sorting and sort. sub _sortTopics{ my ( $this, $web, $topics, $sortfield, $revSort ) = @_; my $topicInfo = {}; foreach my $topic ( @$topics ) { $topicInfo->{$topic} = $this->_extractTopicInfo( $web, $topic, $sortfield ); } if( $revSort ) { @$topics = map { $_->[1] } sort { _compare( $b->[0], $a->[0] ) } map { [ $topicInfo->{$_}->{$sortfield}, $_ ] } @$topics; } else { @$topics = map { $_->[1] } sort { _compare( $a->[0], $b->[0] ) } map { [ $topicInfo->{$_}->{$sortfield}, $_ ] } @$topics; } return $topicInfo; } # RE for a full-spec floating-point number my $number = qr/^[-+]?[0-9]+(\.[0-9]*)?([Ee][-+]?[0-9]+)?$/s; sub _compare { if( $_[0] =~ /$number/o && $_[1] =~ /$number/o ) { # when sorting numbers do it largest first; this is just because # this is what date comparisons need. return $_[1] <=> $_[0]; } else { return $_[1] cmp $_[0]; } } # extract topic info sub _extractTopicInfo { my ( $this, $web, $topic, $sortfield ) = @_; my $info = {}; my $session = $this->{session}; my $store = $session->{store}; my ( $meta, $text ) = $this->_getTextAndMeta( undef, $web, $topic ); $info->{text} = $text; $info->{meta} = $meta; my ( $revdate, $revuser, $revnum ) = $meta->getRevisionInfo(); $info->{editby} = $revuser ? $revuser->webDotWikiName() : ''; $info->{modified} = $revdate; $info->{revNum} = $revnum; $info->{allowView} = $session->{security}-> checkAccessPermission( 'view', $session->{user}, $text, $meta, $topic, $web ); return $info unless $sortfield; if ( $sortfield =~ /^creat/ ) { ( $info->{$sortfield} ) = $meta->getRevisionInfo( 1 ); } elsif ( !defined( $info->{$sortfield} )) { $info->{$sortfield} = TWiki::Render::renderFormFieldArg( $meta, $sortfield ); } return $info; } # get the text and meta for a topic sub _getTextAndMeta { my( $this, $topicInfo, $web, $topic ) = @_; my ( $meta, $text ); my $store = $this->{session}->{store}; # read from cache if it's there if ( $topicInfo ) { $text = $topicInfo->{$topic}->{text}; $meta = $topicInfo->{$topic}->{meta}; } unless( defined $text ) { ( $meta, $text ) = $store->readTopic( undef, $web, $topic, undef ); $text =~ s/%WEB%/$web/gos; $text =~ s/%TOPIC%/$topic/gos; } return ( $meta, $text ); } # Returns the topic revision info of the base version, # attributes are 'date', 'username', 'wikiname', # 'wikiusername'. Revision info is cached in the search # object for speed. sub _getRev1Info { my( $this, $web, $topic, $attr, $info ) = @_; my $key = $web.'.'.$topic; my $store = $this->{session}->{store}; unless( $info->{webTopic} && $info->{webTopic} eq $key ) { my $meta = new TWiki::Meta( $this->{session}, $web, $topic ); my ( $d, $u ) = $meta->getRevisionInfo( 1 ); $info->{date} = $d; $info->{user} = $u; $info->{webTopic} = $key; } if( $attr eq 'username' ) { return $info->{user}->login(); } if( $attr eq 'wikiname' ) { return $info->{user}->wikiName(); } if( $attr eq 'wikiusername' ) { return $info->{user}->webDotWikiName(); } if( $attr eq 'date' ) { return TWiki::Time::formatTime( $info->{date} ); } return 1; } # With the same argument as $pattern, returns a number which is the count of # occurences of the pattern argument. sub _countPattern { my( $theText, $thePattern ) = @_; $thePattern =~ s/([^\\])([\$\@\%\&\#\'\`\/])/$1\\$2/go; # escape some special chars $thePattern =~ /(.*)/; # untaint $thePattern = $1; my $OK = 0; eval { # counting hack, see: http://dev.perl.org/perl6/rfc/110.html $OK = () = $theText =~ /$thePattern/g; }; return $OK; } 1;