0)
die("This version of the Zoom search script requires PHP 4.2.0 or higher. You are currently using: PHP " . phpversion() . " ");
$SETTINGSFILE = dirname(__FILE__)."/settings.php";
$WORDMAPFILE = dirname(__FILE__)."/zoom_wordmap.zdat";
$DICTIONARYFILE = dirname(__FILE__)."/zoom_dictionary.zdat";
$PAGEDATAFILE = dirname(__FILE__)."/zoom_pagedata.zdat";
$SPELLINGFILE = dirname(__FILE__)."/zoom_spelling.zdat";
$PAGETEXTFILE = dirname(__FILE__)."/zoom_pagetext.zdat";
$PAGEINFOFILE = dirname(__FILE__)."/zoom_pageinfo.zdat";
$RECOMMENDEDFILE = dirname(__FILE__)."/zoom_recommended.zdat";
// Check for dependent files
if (!file_exists($SETTINGSFILE) || !file_exists($WORDMAPFILE) || !file_exists($DICTIONARYFILE))
{
print("Zoom files missing error: Zoom is missing one or more of the required index data files. Please make sure the generated index files are uploaded to the same path as this search script. ");
return;
}
require($SETTINGSFILE);
if ($Spelling == 1 && !file_exists($SPELLINGFILE))
print("Zoom files missing error: Zoom is missing the 'zoom_spelling.zdat' file required for the Spelling Suggestion feature which has been enabled. ");
// ----------------------------------------------------------------------------
// Settings
// ----------------------------------------------------------------------------
// The options available in the dropdown menu for number of results
// per page
$PerPageOptions = array(10, 20, 50, 100);
/*
// For foreign language support, setlocale may be required on the server for
// wildcards and highlighting to work. Uncomment the following lines and specify
// the appropriate locale information
//if (setlocale(LC_ALL, "ru_RU.cp1251") == false) // for russian
// print("Failed to change locale setting or locale setting invalid");
*/
// Index format information
$PAGEDATA_URL = 0;
$PAGEDATA_TITLE = 1;
$PAGEDATA_DESC = 2;
$PAGEDATA_IMG = 3;
$MaxPageDataLineLen = 5178;
$METAFIELD_TYPE = 0;
$METAFIELD_NAME = 1;
$METAFIELD_SHOW = 2;
$METAFIELD_FORM = 3;
$METAFIELD_METHOD = 4;
$METAFIELD_DROPDOWN = 5;
$METAFIELD_TYPE_NUMERIC = 0;
$METAFIELD_TYPE_TEXT = 1;
$METAFIELD_TYPE_DROPDOWN = 2;
$METAFIELD_TYPE_MULTI = 3;
$METAFIELD_TYPE_MONEY = 4;
$METAFIELD_METHOD_EXACT = 0;
$METAFIELD_METHOD_LESSTHAN = 1;
$METAFIELD_METHOD_LESSTHANORE = 2;
$METAFIELD_METHOD_GREATERTHAN = 3;
$METAFIELD_METHOD_GREATERTHANORE = 4;
$METAFIELD_METHOD_SUBSTRING = 5;
$METAFIELD_NOVALUE_MARKER = 4294967295;
// ----------------------------------------------------------------------------
// Parameter initialisation
// ----------------------------------------------------------------------------
// Send HTTP header to define meta charset
if (isset($Charset) && $NoCharset == 0)
header("Content-Type: text/html; charset=" . $Charset);
// For versions of PHP before 4.1.0
// we will emulate the superglobals by creating references
// NOTE: references created are NOT superglobals
if (!isset($_SERVER) && isset($HTTP_SERVER_VARS))
$_SERVER = &$HTTP_SERVER_VARS;
if (!isset($_GET) && isset($HTTP_GET_VARS))
$_GET = &$HTTP_GET_VARS;
if (!isset($_POST) && isset($HTTP_POST_VARS))
$_POST = &$HTTP_POST_VARS;
// fix get/post variables if magic quotes are enabled
if (get_magic_quotes_gpc() == 1)
{
if (isset($_GET))
while (list($key, $value) = each($_GET))
{
if (!is_array($value))
$_GET["$key"] = stripslashes($value);
}
if (isset($_POST))
while (list($key, $value) = each($_POST))
$_POST["$key"] = stripslashes($value);
}
// check magic_quotes for runtime stuff (reading from files, etc)
if (get_magic_quotes_runtime() == 1)
set_magic_quotes_runtime(0);
// we use the method=GET and 'query' parameter now (for sub-result pages etc)
$IsZoomQuery = 0;
if (isset($_GET['zoom_query']))
{
$query = $_GET['zoom_query'];
$IsZoomQuery = 1;
}
else
$query = "";
// number of results per page, defaults to 10 if not specified
if (isset($_GET['zoom_per_page']))
{
$per_page = intval($_GET['zoom_per_page']);
if ($per_page < 1)
$per_page = 1;
}
else
$per_page = 10;
// current result page number, defaults to the first page if not specified
$NewSearch = 0;
if (isset($_GET['zoom_page']))
{
$page = intval($_GET['zoom_page']);
if ($page < 1)
$page = 1;
}
else
{
$page = 1;
$NewSearch = 1;
}
// AND operator.
// 1 if we are searching for ALL terms
// 0 if we are searching for ANY terms (default)
if (isset($_GET['zoom_and']))
$and = intval($_GET['zoom_and']);
elseif (isset($DefaultToAnd) && $DefaultToAnd == 1)
$and = 1;
else
$and = 0;
// for category support
if ($UseCats == 1)
{
if (isset($_GET['zoom_cat']))
{
if (is_array($_GET['zoom_cat']))
$cat = $_GET['zoom_cat'];
else
$cat = array($_GET['zoom_cat']);
$cat = array_filter($cat, "is_numeric");
}
else
$cat = array(-1); // default to search all categories
$num_zoom_cats = count($cat);
if ($num_zoom_cats == 0)
$cat = array(-1); // default to search all categories
}
// for sorting options
// zero is default (relevance)
// 1 is sort by date (if Date/Time is available)
if (isset($_GET['zoom_sort']))
$sort = intval($_GET['zoom_sort']);
else
$sort = 0;
if (isset($LinkBackURL) == false || strlen($LinkBackURL) < 1)
$SelfURL = $_SERVER['PHP_SELF'];
else
$SelfURL = $LinkBackURL;
// init. link target string
$zoom_target = "";
if ($UseLinkTarget == 1 && isset($LinkTarget))
$zoom_target = " target=\"" . $LinkTarget . "\" ";
$UseMBFunctions = 0;
if ($UseUTF8 == 1)
{
if (function_exists('mb_strtolower'))
$UseMBFunctions = 1;
}
if ($UseStemming == 1)
{
$porterStemmer = new PorterStemmer();
}
// ----------------------------------------------------------------------------
// Template buffers
// ----------------------------------------------------------------------------
// defines for output elements
$OUTPUT_FORM_START = 0;
$OUTPUT_FORM_END = 1;
$OUTPUT_FORM_SEARCHBOX = 2;
$OUTPUT_FORM_SEARCHBUTTON = 3;
$OUTPUT_FORM_RESULTSPERPAGE = 4;
$OUTPUT_FORM_MATCH = 5;
$OUTPUT_FORM_CATEGORIES = 6;
$OUTPUT_FORM_CUSTOMMETA = 7;
$OUTPUT_HEADING = 8;
$OUTPUT_SUMMARY = 9;
$OUTPUT_SUGGESTION = 10;
$OUTPUT_PAGESCOUNT = 11;
$OUTPUT_SORTING = 12;
$OUTPUT_SEARCHTIME = 13;
$OUTPUT_RECOMMENDED = 14;
$OUTPUT_PAGENUMBERS = 15;
$OUTPUT_CATSUMMARY = 16;
$OUTPUT_TAG_COUNT = 17;
$OutputBuffers = array_fill(0, $OUTPUT_TAG_COUNT, "");
$OutputResultsBuffer = "";
$TemplateShowTags = array(
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
""
);
$TemplateDefaultTag = "";
$TemplateDefaultTagLen = strlen($TemplateDefaultTag);
$TemplateSearchFormTag = "";
$TemplateSearchFormTagLen = strlen($TemplateSearchFormTag);
$TemplateResultsTag = "";
$TemplateResultsTagLen = strlen($TemplateResultsTag);
$TemplateQueryTag = "";
$TemplateQueryTagLen = strlen($TemplateQueryTag);
$OutputBuffers[$OUTPUT_FORM_START] = "
";
// Indexes for dict structure
$DICT_WORD = 0;
$DICT_PTR = 1;
$DICT_VARCOUNT = 2;
$DICT_VARIANTS = 3;
// ----------------------------------------------------------------------------
// Functions
// ----------------------------------------------------------------------------
function ShowDefaultForm()
{
global $OutputBuffers;
global $OUTPUT_FORM_SEARCHBOX, $OUTPUT_FORM_SEARCHBUTTON, $OUTPUT_FORM_RESULTSPERPAGE;
global $OUTPUT_FORM_MATCH, $OUTPUT_FORM_CATEGORIES, $OUTPUT_FORM_CUSTOMMETA;
global $OUTPUT_FORM_START, $OUTPUT_FORM_END;
print($OutputBuffers[$OUTPUT_FORM_START]);
print($OutputBuffers[$OUTPUT_FORM_SEARCHBOX]);
print($OutputBuffers[$OUTPUT_FORM_SEARCHBUTTON]);
print($OutputBuffers[$OUTPUT_FORM_RESULTSPERPAGE]);
print($OutputBuffers[$OUTPUT_FORM_MATCH]);
print($OutputBuffers[$OUTPUT_FORM_CATEGORIES]);
print($OutputBuffers[$OUTPUT_FORM_CUSTOMMETA]);
print($OutputBuffers[$OUTPUT_FORM_END]);
}
function ShowDefaultSearchPage()
{
global $OutputResultsBuffer;
global $OutputBuffers;
global $OUTPUT_HEADING, $OUTPUT_SUMMARY, $OUTPUT_SUGGESTION, $OUTPUT_PAGESCOUNT;
global $OUTPUT_RECOMMENDED, $OUTPUT_SORTING, $OUTPUT_PAGENUMBERS, $OUTPUT_SEARCHTIME;
global $OUTPUT_CATSUMMARY;
ShowDefaultForm();
// now show the default results layout
print($OutputBuffers[$OUTPUT_HEADING]);
print($OutputBuffers[$OUTPUT_SUMMARY]);
print($OutputBuffers[$OUTPUT_CATSUMMARY]);
print($OutputBuffers[$OUTPUT_SUGGESTION]);
print($OutputBuffers[$OUTPUT_PAGESCOUNT]);
print($OutputBuffers[$OUTPUT_RECOMMENDED]);
print($OutputBuffers[$OUTPUT_SORTING]);
print($OutputResultsBuffer);
print($OutputBuffers[$OUTPUT_PAGENUMBERS]);
print($OutputBuffers[$OUTPUT_SEARCHTIME]);
}
function ShowTemplate()
{
global $ZoomInfo;
global $OutputBuffers;
global $TemplateShowTags;
global $OUTPUT_TAG_COUNT;
global $TemplateSearchFormTag, $TemplateSearchFormTagLen;
global $TemplateDefaultTag, $TemplateDefaultTagLen;
global $TemplateResultsTag, $TemplateResultsTagLen;
global $OutputResultsBuffer;
global $TemplateQueryTag, $TemplateQueryTagLen, $queryForHTML;
// DO NOT MODIFY THE TEMPLATE FILENAME BELOW:
$TemplateFilename = "search_template.html";
// Note that there is no practical need to change the TemplateFilename. This file
// is not visible to the end user. The search link on your website should point to
// "search.php", and not the template file.
//
// Note also that you cannot change the filename to a PHP or ASP file.
// The template file will only be treated as a static HTML page and changing the
// extension will not alter this behaviour. Please see this FAQ support page
// for a solution: http://www.wrensoft.com/zoom/support/faq_ssi.html
//Open and print start of result page template
$TemplateFilename = dirname(__FILE__) . "/" . $TemplateFilename;
$template = file ($TemplateFilename);
$numtlines = count($template); //Number of lines in the template
$template_line = 0;
$templatePtr = $template[$template_line];
while ($template_line < $numtlines && $templatePtr != "")
{
$tagPos = strpos($templatePtr, "\n";
// Replace the key text with the following
if ($FormFormat > 0)
{
// Insert the form
$OutputBuffers[$OUTPUT_FORM_SEARCHBOX] = $STR_FORM_SEARCHFOR . " \n";
$OutputBuffers[$OUTPUT_FORM_SEARCHBUTTON] = "\n";
if ($FormFormat == 2)
{
$OutputBuffers[$OUTPUT_FORM_RESULTSPERPAGE] = "" . $STR_FORM_RESULTS_PER_PAGE . "\n";
$OutputBuffers[$OUTPUT_FORM_RESULTSPERPAGE] .= "
\n";
if ($UseCats)
{
$OutputBuffers[$OUTPUT_FORM_CATEGORIES] = "\n";
$OutputBuffers[$OUTPUT_FORM_CATEGORIES] .= $STR_FORM_CATEGORY . " ";
if ($SearchMultiCats)
{
$OutputBuffers[$OUTPUT_FORM_CATEGORIES] .= "
\n";
}
else
{
$OutputBuffers[$OUTPUT_FORM_START] .= "\n";
$OutputBuffers[$OUTPUT_FORM_START] .= "\n";
$OutputBuffers[$OUTPUT_FORM_START] .= "\n";
}
}
// Give up early if no search words provided
$IsEmptyMetaQuery = false;
if (empty($query))
{
$NoSearch = false;
if ($UseMetaFields == 1)
{
if ($IsZoomQuery == 1)
$IsEmptyMetaQuery = true;
else
$NoSearch = true;
}
else
{
// only display 'no query' line if no form is shown
if ($IsZoomQuery == 1)
{
$OutputBuffers[$OUTPUT_SUMMARY] .= "
" . $STR_NO_QUERY . "
";
}
$NoSearch = true;
}
if ($NoSearch)
{
//Let others know about Zoom.
if ($ZoomInfo == 1)
$OutputBuffers[$OUTPUT_PAGENUMBERS] .= "
\n";
// Begin main search loop -----------------------------------------------------
//$pagesCount = count($urls);
$pagesCount = $NumPages;
$outputline = 0;
$IsMaxLimitExceeded = 0;
$wordsmatched = 0;
// Initialise $res_table to be a 2D array of count($pages) long, filled with zeros.
//$res_table = array_fill(0, $pagesCount, array_fill(0, 6, 0));
$res_table = array();
for ($i = 0; $i < $pagesCount; $i++)
{
$res_table[$i] = array();
$res_table[$i][0] = 0; // score
$res_table[$i][1] = 0; // num of sw matched
$res_table[$i][2] = 0; // pagetext ptr #1
$res_table[$i][3] = 0; // pagetext ptr #2
$res_table[$i][4] = 0; // pagetext ptr #3
$res_table[$i][5] = 0; // 'and' user search terms matched
$res_table[$i][6] = 0; // combined prox field
}
$exclude_count = 0;
// check if word is in skipword file
$SkippedWords = 0;
$context_maxgoback = 1;
$SkippedExactPhrase = 0;
$maxscore = 0;
// queryForURL is the query prepared to be passed in a URL.
$queryForURL = urlencode($query);
// Find recommended links if any (before stemming)
$num_recs_found = 0;
if ($Recommended == 1)
{
for ($rl = 0; $rl < $rec_count && $num_recs_found < $RecommendedMax; $rl++)
{
$rec_word = $rec[$rl][0];
$rec_idx = intval($rec[$rl][1]);
if (strchr($rec_word, ','))
{
$rec_multiwords = split(",", $rec_word);
$rec_multiwords_count = count($rec_multiwords);
for ($rlm = 0; $rlm < $rec_multiwords_count; $rlm++)
{
if (RecLinkWordMatch($rec_multiwords[$rlm], $rec_idx) == true)
break;
}
}
else
RecLinkWordMatch($rec_word, $rec_idx);
}
if ($num_recs_found > 0)
$OutputBuffers[$OUTPUT_RECOMMENDED] .= "
";
}
// Prepopulate some data for each searchword
$sw_results = array();
$search_terms_ids = array();
$phrase_terms_ids = array();
for ($sw = 0; $sw < $NumSearchWords; $sw++)
{
$sw_results[$sw] = 0;
$UseWildCards[$sw] = 0;
// for main search terms
$search_terms_ids[$sw] = array();
// for exact phrases
$phrase_terms_ids[$sw] = array();
if (strpos($SearchWords[$sw], "*") !== false || strpos($SearchWords[$sw], "?") !== false)
{
$RegExpSearchWords[$sw] = pattern2regexp($SearchWords[$sw]);
$UseWildCards[$sw] = 1;
}
if ($Highlighting == 1 && $UseWildCards[$sw] == 0)
{
$RegExpSearchWords[$sw] = $SearchWords[$sw];
if (strpos($RegExpSearchWords[$sw], "\\") !== false)
$RegExpSearchWords[$sw] = str_replace("\\", "\\\\", $RegExpSearchWords[$sw]);
if (strpos($RegExpSearchWords[$sw], "/") !== false)
$RegExpSearchWords[$sw] = str_replace("/", "\/", $RegExpSearchWords[$sw]);
if (strpos($RegExpSearchWords[$sw], "+") !== false)
$RegExpSearchWords[$sw] = str_replace("+", "\+", $RegExpSearchWords[$sw]);
}
}
for ($sw = 0; $sw < $NumSearchWords; $sw++)
{
if ($SearchWords[$sw] == "")
continue;
// check min length
if (strlen($SearchWords[$sw]) < $MinWordLen)
{
SkipSearchWord($sw);
continue;
}
$ExactPhrase = 0;
$ExcludeTerm = 0;
// Check exclusion searches
if ($SearchWords[$sw][0] == "-")
{
$SearchWords[$sw] = substr($SearchWords[$sw], 1);
$ExcludeTerm = 1;
$exclude_count++;
}
// Stem the words if necessary (only AFTER stripping exclusion char)
if ($UseStemming == 1)
{
if ($AllowExactPhrase == 0 || strpos($SearchWords[$sw], " ") === false)
$SearchWords[$sw] = $porterStemmer->Stem($SearchWords[$sw]);
}
if ($AllowExactPhrase == 1 && strpos($SearchWords[$sw], " ") !== false)
{
// Initialise exact phrase matching for this search term
$ExactPhrase = 1;
$phrase_terms = split(" ", $SearchWords[$sw]);
//$phrase_terms = preg_split("/\W+/", $SearchWords[$sw], -1, 0 /*PREG_SPLIT_DELIM_CAPTURE*/);
$num_phrase_terms = count($phrase_terms);
if ($num_phrase_terms > $context_maxgoback)
$context_maxgoback = $num_phrase_terms;
$phrase_terms_data = array();
if ($UseStemming == 1)
{
for ($j = 0; $j < $num_phrase_terms; $j++)
$phrase_terms[$j] = $porterStemmer->Stem($phrase_terms[$j]);
}
$tmpid = 0;
$WordNotFound = 0;
$j = 0;
for ($j = 0; $j < $num_phrase_terms; $j++)
{
$tmpid = GetDictID($phrase_terms[$j]);
if ($tmpid == -1) // word is not in dictionary
{
$WordNotFound = 1;
break;
}
$phrase_terms_ids[$sw][$j] = $tmpid;
$wordmap_row = $dict[$tmpid][$DICT_PTR];
if ($wordmap_row != -1)
{
fseek($fp_wordmap, $wordmap_row);
$countbytes = fread($fp_wordmap, 2);
$phrase_data_count[$j] = ord($countbytes[0]) | ord($countbytes[1])<<8;
for ($xbi = 0; $xbi < $phrase_data_count[$j]; $xbi++) {
$xbindata = fread($fp_wordmap, 8);
if (strlen($xbindata) == 0)
$OutputResultsBuffer .= "error in wordmap file: expected data not found";
$phrase_terms_data[$j][$xbi] = unpack("Cscore/Cprox/vpagenum/Vptr", $xbindata);
}
}
else
{
$phrase_data_count[$j] = 0;
$phrase_terms_data[$j] = 0;
}
}
$phrase_terms_ids[$sw][$j] = 0; // null terminate the list
if ($WordNotFound == 1)
continue;
}
else if ($UseWildCards[$sw])
{
$pattern = "/";
// match entire word
if ($SearchAsSubstring == 0)
$pattern = $pattern . "\A";
$pattern = $pattern . $RegExpSearchWords[$sw];
if ($SearchAsSubstring == 0)
$pattern = $pattern . "\Z";
if ($ToLowerSearchWords != 0)
$pattern = $pattern . "/i";
else
$pattern = $pattern . "/";
}
for ($i = 0; $i < $dict_count; $i++)
{
$dictline = $dict[$i];
$word = $dict[$i][$DICT_WORD];
// if we're not using wildcards, direct match
if ($ExactPhrase == 1)
{
// todo: move to next phrase term if first phrase term is skipped?
// compare first term in exact phrase
//$result = wordcasecmp($phrase_terms[0], $word);
if ($i == $phrase_terms_ids[$sw][0])
$result = 0;
else
$result = 1;
}
else if ($UseWildCards[$sw] == 0)
{
if ($SearchAsSubstring == 0)
$result = wordcasecmp($SearchWords[$sw], $word);
else
{
if (mystristr($word, $SearchWords[$sw]) == FALSE)
$result = 1; // not matched
else
$result = 0; // matched
}
}
else
{
// if we have wildcards...
$result = !(preg_match($pattern, $word));
}
// result = 0 if matched, result != 0 if not matched.
// word found but indicated to be not indexed or skipped
if ($result == 0 && $dictline[$DICT_PTR] == -1)
{
if ($UseWildCards[$sw] == 0 && $SearchAsSubstring == 0)
{
if ($ExactPhrase == 1)
$SkippedExactPhrase = 1;
SkipSearchWord($sw);
break;
}
else
continue;
}
if ($result == 0)
{
// keyword found in the dictionary
$wordsmatched++;
if ($ExcludeTerm == false && $wordsmatched > $MaxMatches)
{
$IsMaxLimitExceeded = true;
break;
}
/// remember the dictionary ID for this matched search term
$search_terms_ids[$sw] = $i;
if ($ExactPhrase == 1)
{
// we'll use the wordmap data for the first term that we have worked out earlier
$data = $phrase_terms_data[0];
$data_count = $phrase_data_count[0];
$ContextSeeks = 0;
}
else
{
// seek to position in wordmap file
fseek($fp_wordmap, $dictline[$DICT_PTR]);
//print "seeking in wordmap: " . $dictline[1] . " ";
// first 2 bytes is data count
$countbytes = fread($fp_wordmap, 2);
$data_count = ord($countbytes[0]) | ord($countbytes[1])<<8;
//print "data count: " . $data_count . " ";
for ($bi = 0; $bi < $data_count; $bi++)
{
$bindata = fread($fp_wordmap, 8);
if (strlen($bindata) == 0)
$OutputResultsBuffer .= "Error in wordmap file: expected data not found";
$data[$bi] = unpack("Cscore/Cprox/vpagenum/Vptr", $bindata);
}
}
$sw_results[$sw] += $data_count;
// Go through wordmap for each page this word appears on
for ($j = 0; $j < $data_count; $j++)
{
$score = $data[$j]["score"];
$prox = $data[$j]["prox"];
$txtptr = $data[$j]["ptr"];
$ipage = $data[$j]["pagenum"];
if ($score == 0)
continue;
if ($pageinfo[$ipage]["boost"] != 0)
{
$score *= ($pageinfo[$ipage]["boost"] / 10);
$score = ceil($score);
}
if ($ExactPhrase == 1)
{
$maxptr = $data[$j]["ptr"];
$maxptr_term = 0;
$GotoNextPage = 0;
// Check if all of the other words in the phrase appears on this page.
for ($xi = 0; $xi < $num_phrase_terms && $GotoNextPage == 0; $xi++)
{
// see if this word appears at all on this page, if not, we stop scanning page.
// do not check for skipped words (data count value of zero)
if ($phrase_data_count[$xi] != 0)
{
// check wordmap for this search phrase to see if it appears on the current page.
for ($xbi = 0; $xbi < $phrase_data_count[$xi]; $xbi++)
{
if ($phrase_terms_data[$xi][$xbi]["pagenum"] == $data[$j]["pagenum"])
{
// make sure that words appear in same proximity
$overlapProx = $phrase_terms_data[$xi][$xbi]["prox"] << 1;
if (($data[$j]["prox"] & $phrase_terms_data[$xi][$xbi]["prox"]) == 0 &&
($data[$j]["prox"] & $overlapProx) == 0)
{
$GotoNextPage = 1;
}
else
{
// intersection, this term appears on both pages, goto next term
// remember biggest pointer.
if ($phrase_terms_data[$xi][$xbi]["ptr"] > $maxptr)
{
$maxptr = $phrase_terms_data[$xi][$xbi]["ptr"];
$maxptr_term = $xi;
}
$score += $phrase_terms_data[$xi][$xbi]["score"];
}
break;
}
}
if ($xbi == $phrase_data_count[$xi]) // if not found
{
$GotoNextPage = 1;
break; // goto next page
}
}
} // end phrase term for loop
if ($GotoNextPage == 1)
{
continue;
}
// Check how many context seeks we have made.
$ContextSeeks++;
if ($ContextSeeks > $MaxContextSeeks)
{
$IsMaxLimitExceeded = true;
break;
}
// ok, so this page contains all of the words in the phrase
$FoundPhrase = 0;
$FoundFirstWord = 0;
// we goto the first occurance of the first word in pagetext
$pos = $maxptr - (($maxptr_term+3) * $DictIDLen); // assume 3 possible punctuations.
// do not seek further back than the occurance of the first word (avoid wrong page)
if ($pos < $data[$j]["ptr"])
$pos = $data[$j]["ptr"];
fseek($fp_pagetext, $pos);
// now we look for the phrase within the context of this page
do
{
for ($xi = 0; $xi < $num_phrase_terms; $xi++)
{
// do...while loop to ignore punctuation marks in context phrase
do
{
// Inlined (and unlooped) the following function for speed reasons
//$xword_id = GetNextDictWord($fp_pagetext);
$bytes_buffer = fread($fp_pagetext, $DictIDLen);
if ($DictIDLen == 4)
{
$xword_id = ord($bytes_buffer[0]);
$xword_id = $xword_id | ord($bytes_buffer[1]) << 8;
$xword_id = $xword_id | ord($bytes_buffer[2]) << (8*2);
$variant_index = $bytes_buffer[3];
}
else
{
$xword_id = ord($bytes_buffer[0]);
$xword_id = $xword_id | ord($bytes_buffer[1]) << 8;
$variant_index = $bytes_buffer[2];
}
$pos += $DictIDLen;
// check if we are at the end of page (wordid = 0) or invalid $xword_id
if ($xword_id == 0 || $xword_id == 1 || $xword_id >= $dict_count)
break;
} while ($xword_id <= $DictReservedLimit && !feof($fp_pagetext));
if ($xword_id == 0 || $xword_id == 1 || $xword_id >= $dict_count)
break;
// if the words are NOT the same, we break out
if ($xword_id != $phrase_terms_ids[$sw][$xi])
{
// also check against first word
if ($xi != 0 && $xword_id == $phrase_terms_ids[$sw][0])
$xi = 0; // matched first word
else
break;
}
// remember how many times we find the first word on this page
if ($xi == 0)
{
$FoundFirstWord++;
// remember the position of the 'start' of this phrase
$txtptr = $pos - $DictIDLen;
}
}
if ($xi == $num_phrase_terms)
{
// exact phrase found!
$FoundPhrase = 1;
}
} while ($xword_id != 0 && $FoundPhrase == 0 &&
$FoundFirstWord <= $data[$j]["score"]);
if ($FoundPhrase != 1)
continue; // goto next page.
$checktime = time();
$checkTimeDiff = abs($starttime - $checktime);
if ($checkTimeDiff > $MaxSearchTime)
{
$IsMaxLimitExceeded = true;
break;
}
}
//Check if page is already in output list
$pageexists = 0;
if ($ExcludeTerm == 1)
{
// we clear out the score entry so that it'll be excluded in the filtering stage
$res_table[$ipage][0] = 0;
}
elseif ($res_table[$ipage][0] == 0)
{
// not in list, count this page as a unique match
$res_table[$ipage][0] = $score;
$res_table[$ipage][2] = $txtptr;
$res_table[$ipage][6] = $prox;
}
else
{
// already in list
if ($res_table[$ipage][0] > 10000)
{
// take it easy if its too big (to prevent huge scores)
$res_table[$ipage][0] += 1;
}
else
{
$res_table[$ipage][0] += $score; //Add in score
//$res_table[$ipage][0] *= 2; //Double Score as we have two words matching
}
// store the next two searchword matches
if ($res_table[$ipage][1] > 0 && $res_table[$ipage][1] < $MaxContextKeywords)
{
if ($res_table[$ipage][3] == 0)
$res_table[$ipage][3] = $txtptr;
elseif ($res_table[$ipage][4] == 0)
$res_table[$ipage][4] = $txtptr;
}
$res_table[$ipage][6] = $res_table[$ipage][6] & $prox;
}
$res_table[$ipage][1] += 1;
if ($res_table[$ipage][0] > $maxscore)
$maxscore = $res_table[$ipage][0];
// store the 'and' user search terms matched' value
if ($res_table[$ipage][5] == $sw || $res_table[$ipage][5] == $sw-$SkippedWords-$exclude_count)
$res_table[$ipage][5] += 1;
}
if ($UseWildCards[$sw] == 0 && $SearchAsSubstring == 0)
break; //This search word was found, so skip to next
}
}
}
//Close the files
fclose($fp_wordmap);
if ($SkippedWords > 0)
{
$OutputBuffers[$OUTPUT_SUMMARY] .= "
\n";
for ($catit = 0; $catit < $NumCats; $catit++)
{
if ($CatCounter[$catit] > 0)
{
// if all the results found belonged in this current category, then we don't show it in the summary
if ($CatCounter[$catit] != $matches)
{
$OutputBuffers[$OUTPUT_CATSUMMARY] .= "
";
}
else
{
// Clear the cat summary if we decided we didn't need to show it afterall
$DisplayCatSummary = 0;
$OutputBuffers[$OUTPUT_CATSUMMARY] = "";
}
}
}
if ($DisplayCatSummary == 1)
$OutputBuffers[$OUTPUT_CATSUMMARY] .= "
\n
\n";
}
if ($Spelling == 1)
{
// load in spellings file
$fp_spell = fopen($SPELLINGFILE, "rt");
$i = 0;
while (!feof($fp_spell))
{
$spline = fgets($fp_spell, $MaxKeyWordLineLen);
if (strlen($spline) > 0)
{
$spell[$i] = explode(" ", $spline, 4);
$i++;
}
}
fclose($fp_spell);
$spell_count = $i;
$SuggestStr = "";
$SuggestionFound = 0;
$SuggestionCount = 0;
$word = "";
$word2 = "";
$word3 = "";
$tmpWordStr = ""; // for local stemming and comparison
for ($sw = 0; $sw < $NumSearchWords; $sw++)
{
if ($sw_results[$sw] >= $SpellingWhenLessThan)
{
// this word has enough results
if ($sw > 0)
$SuggestStr = $SuggestStr . " ";
$SuggestStr = $SuggestStr . $SearchWords[$sw];
}
else
{
// this word returned less results than threshold, and requires spelling suggestions
$sw_spcode = GetSPCode($SearchWords[$sw]);
if (strlen($sw_spcode) > 0)
{
$SuggestionFound = 0;
for ($i = 0; $i < $spell_count && $SuggestionFound == 0; $i++)
{
$spcode = $spell[$i][0];
if ($spcode == $sw_spcode)
{
$j = 0;
while ($SuggestionFound == 0 && $j < 3 && isset($spell[$i][1+$j]))
{
$dictid = intval($spell[$i][1+$j]);
$word = GetSpellingWord($dictid);
$tmpWordStr = $word;
if ($UseStemming == 1)
{
$tmpWordStr = strtolower($tmpWordStr);
$tmpWordStr = $porterStemmer->Stem($tmpWordStr);
}
if (wordcasecmp($tmpWordStr, $SearchWords[$sw]) == 0)
{
// Check that it is not a skipped word or the same word
$SuggestionFound = 0;
}
else
{
$SuggestionFound = 1;
$SuggestionCount++;
if ($NumSearchWords == 1) // if single word search
{
if ($j < 1 && isset($spell[$i][1+$j+1]))
{
$dictid = intval($spell[$i][1+$j+1]);
$word2 = GetSpellingWord($dictid);
$tmpWordStr = $word2;
if ($UseStemming == 1)
{
$tmpWordStr = strtolower($tmpWordStr);
$tmpWordStr = $porterStemmer->Stem($tmpWordStr);
}
if (wordcasecmp($tmpWordStr, $SearchWords[$sw]) == 0)
$word2 = "";
}
if ($j < 2 && isset($spell[$i][1+$j+2]))
{
$dictid = intval($spell[$i][1+$j+2]);
$word3 = GetSpellingWord($dictid);
$tmpWordStr = $word3;
if ($UseStemming == 1)
{
$tmpWordStr = strtolower($tmpWordStr);
$tmpWordStr = $porterStemmer->Stem($tmpWordStr);
}
if (wordcasecmp($tmpWordStr, $SearchWords[$sw]) == 0)
$word3 = "";
}
}
}
$j++;
}
}
elseif (strcmp($spcode, $sw_spcode) > 0)
{
break;
}
}
if ($SuggestionFound == 1)
{
if ($sw > 0)
$SuggestStr = $SuggestStr . " ";
$SuggestStr = $SuggestStr . $word; // add string AFTER so we can preserve order of words
}
}
}
}
if ($SuggestionCount > 0)
{
$OutputBuffers[$OUTPUT_SUGGESTION] .= "
"; // end of results style tag
// Show links to other result pages
if ($num_pages > 1)
{
// 10 results to the left of the current page
$start_range = $page - 10;
if ($start_range < 1)
$start_range = 1;
// 10 to the right
$end_range = $page + 10;
if ($end_range > $num_pages)
$end_range = $num_pages;
$OutputBuffers[$OUTPUT_PAGENUMBERS] .= "