CWIS Developer Documentation
SearchEngine.php
Go to the documentation of this file.
1 <?PHP
2 #
3 # FILE: SearchEngine.php
4 #
5 # Open Source Metadata Archive Search Engine (OSMASE)
6 # Copyright 2002-2014 Edward Almasy and Internet Scout Research Group
7 # http://scout.wisc.edu
8 #
9 
13 class SearchEngine {
14 
15  # ---- PUBLIC INTERFACE --------------------------------------------------
16 
17  # possible types of logical operators
18  const LOGIC_AND = 1;
19  const LOGIC_OR = 2;
20 
21  # flags used for indicating field types
22  const FIELDTYPE_TEXT = 1;
23  const FIELDTYPE_NUMERIC = 2;
24  const FIELDTYPE_DATE = 3;
26 
27  # flags used for indicating word states
28  const WORD_PRESENT = 1;
29  const WORD_EXCLUDED = 2;
30  const WORD_REQUIRED = 4;
31 
39  {
40  # create database object for our use
41  $this->DB = new Database();
42 
43  # save item access parameters
44  $this->ItemTableName = $ItemTableName;
45  $this->ItemIdFieldName = $ItemIdFieldName;
46 
47  # set default debug state
48  $this->DebugLevel = 0;
49  }
50 
59  function AddField(
60  $FieldName, $FieldType, $Weight, $UsedInKeywordSearch)
61  {
62  # save values
63  $this->FieldInfo[$FieldName]["FieldType"] = $FieldType;
64  $this->FieldInfo[$FieldName]["Weight"] = $Weight;
65  $this->FieldInfo[$FieldName]["InKeywordSearch"] =
66  $UsedInKeywordSearch ? TRUE : FALSE;
67  }
68 
74  function FieldType($FieldName)
75  {
76  return $this->FieldInfo[$FieldName]["FieldType"];
77  }
78 
84  function FieldWeight($FieldName)
85  {
86  return $this->FieldInfo[$FieldName]["Weight"];
87  }
88 
94  function FieldInKeywordSearch($FieldName)
95  {
96  return $this->FieldInfo[$FieldName]["InKeywordSearch"];
97  }
98 
103  function DebugLevel($NewValue)
104  {
105  $this->DebugLevel = $NewValue;
106  }
107 
108 
109  # ---- search functions
110 
127  function Search($SearchString, $StartingResult = 0, $NumberOfResults = 10,
128  $SortByField = NULL, $SortDescending = TRUE)
129  {
130  # pass off the request to grouped search (for now) if appropriate
131  if ($SearchGroups instanceof SearchParameterSet)
132  {
133  return $this->GroupedSearch($SearchString, $StartingResult,
134  $NumberOfResults, $SortByField, $SortDescending);
135  }
136 
137  # interpret and filter out magic debugging keyword (if any)
138  $SearchString = $this->SetDebugLevel($SearchString);
139  $this->DMsg(0, "In Search() with search string \"".$SearchString."\"");
140 
141  # save start time to use in calculating search time
142  $StartTime = microtime(TRUE);
143 
144  # clear word counts
145  $this->InclusiveTermCount = 0;
146  $this->RequiredTermCount = 0;
147  $this->ExcludedTermCount = 0;
148 
149  # parse search string into terms
150  $Words = $this->ParseSearchStringForWords($SearchString);
151  $this->DMsg(1, "Found ".count($Words)." words");
152 
153  # parse search string for phrases
154  $Phrases = $this->ParseSearchStringForPhrases($SearchString);
155  $this->DMsg(1, "Found ".count($Phrases)." phrases");
156 
157  # if only excluded terms specified
158  if ($this->ExcludedTermCount && !$this->InclusiveTermCount)
159  {
160  # load all records
161  $this->DMsg(1, "Loading all records");
162  $Scores = $this->LoadScoresForAllRecords();
163  }
164  else
165  {
166  # perform searches
167  $Scores = $this->SearchForWords($Words);
168  $this->DMsg(1, "Found ".count($Scores)." results after word search");
169  $Scores = $this->SearchForPhrases($Phrases, $Scores);
170  $this->DMsg(1, "Found ".count($Scores)." results after phrase search");
171  }
172 
173  # if search results found
174  if (count($Scores) > 0)
175  {
176  # handle any excluded words
177  $Scores = $this->FilterOnExcludedWords($Words, $Scores);
178 
179  # strip off any results that don't contain required words
180  $Scores = $this->FilterOnRequiredWords($Scores);
181  }
182 
183  # count, sort, and trim search result scores list
184  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
185  $SortByField, $SortDescending);
186 
187  # record search time
188  $this->LastSearchTime = microtime(TRUE) - $StartTime;
189 
190  # return list of items to caller
191  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
192  return $Scores;
193  }
194 
214  function FieldedSearch($SearchStrings, $StartingResult = 0, $NumberOfResults = 10,
215  $SortByField = NULL, $SortDescending = TRUE)
216  {
217  # pass off the request to grouped search (for now) if appropriate
218  if ($SearchGroups instanceof SearchParameterSet)
219  {
220  return $this->GroupedSearch($SearchString, $StartingResult,
221  $NumberOfResults, $SortByField, $SortDescending);
222  }
223 
224  # interpret and filter out magic debugging keyword (if any)
225  $SearchStrings = $this->SetDebugLevel($SearchStrings);
226  $this->DMsg(0, "In FieldedSearch() with "
227  .count($SearchStrings)." search strings");
228 
229  # save start time to use in calculating search time
230  $StartTime = microtime(TRUE);
231 
232  # perform search
233  $Scores = $this->SearchAcrossFields($SearchStrings);
234  $Scores = ($Scores === NULL) ? array() : $Scores;
235 
236  # count, sort, and trim search result scores list
237  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
238  $SortByField, $SortDescending);
239 
240  # record search time
241  $this->LastSearchTime = microtime(TRUE) - $StartTime;
242 
243  # return list of items to caller
244  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
245  return $Scores;
246  }
247 
267  function GroupedSearch($SearchGroups, $StartingResult = 0, $NumberOfResults = 10,
268  $SortByField = NULL, $SortDescending = TRUE)
269  {
270  # if search parameter set was passed in
271  if ($SearchGroups instanceof SearchParameterSet)
272  {
273  # convert search groups to legacy format
274  $SearchGroups = $SearchGroups->GetAsLegacyArray();
275 
276  # retrieve base logic from legacy format
277  $BaseLogic = $SearchGroups["Logic"];
278  unset($SearchGroups["Logic"]);
279  }
280  else
281  {
282  # use default base logic
283  $BaseLogic = $this->DefaultSearchLogic;
284  }
285 
286  # interpret and filter out magic debugging keyword (if any)
287  foreach ($SearchGroups as $Index => $Groups)
288  {
289  if (isset($SearchGroups[$Index]["SearchStrings"]))
290  {
291  $SearchGroups[$Index]["SearchStrings"] =
292  $this->SetDebugLevel($SearchGroups[$Index]["SearchStrings"]);
293  }
294  }
295  $this->DMsg(0, "In GroupedSearch() with "
296  .count($SearchGroups)." search groups");
297 
298  # save start time to use in calculating search time
299  $StartTime = microtime(TRUE);
300 
301  # start with no results
302  $Scores = array();
303 
304  # save AND/OR search setting
305  $SavedDefaultLogic = $this->DefaultSearchLogic;
306 
307  # for each search group
308  $FirstSearch = TRUE;
309  foreach ($SearchGroups as $Group)
310  {
311  $this->DMsg(0, "----- GROUP ---------------------------");
312 
313  # if group has AND/OR setting specified
314  if (isset($Group["Logic"]))
315  {
316  # use specified AND/OR setting
317  $this->DefaultSearchLogic = $Group["Logic"];
318  }
319  else
320  {
321  # use saved AND/OR setting
322  $this->DefaultSearchLogic = $SavedDefaultLogic;
323  }
324  $this->DMsg(2, "Logic is "
325  .(($this->DefaultSearchLogic == self::LOGIC_AND) ? "AND" : "OR"));
326 
327  # if we have search strings for this group
328  if (isset($Group["SearchStrings"]))
329  {
330  # perform search
331  $GroupScores = $this->SearchAcrossFields($Group["SearchStrings"]);
332 
333  # if search was conducted
334  if ($GroupScores !== NULL)
335  {
336  # if base AND/OR setting is OR or this is first search
337  if (($BaseLogic == self::LOGIC_OR) || $FirstSearch)
338  {
339  # add search results to result list
340  foreach ($GroupScores as $ItemId => $Score)
341  {
342  if (isset($Scores[$ItemId]))
343  {
344  $Scores[$ItemId] += $Score;
345  }
346  else
347  {
348  $Scores[$ItemId] = $Score;
349  }
350  }
351 
352  # (reset flag indicating first search)
353  $FirstSearch = FALSE;
354  }
355  else
356  {
357  # AND search results with previous results
358  $OldScores = $Scores;
359  $Scores = array();
360  foreach ($GroupScores as $ItemId => $Score)
361  {
362  if (isset($OldScores[$ItemId]))
363  {
364  $Scores[$ItemId] = $OldScores[$ItemId] + $Score;
365  }
366  }
367  }
368  }
369  }
370  }
371 
372  # restore AND/OR search setting
373  $this->DefaultSearchLogic = $SavedDefaultLogic;
374 
375  # count, sort, and trim search result scores list
376  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
377  $SortByField, $SortDescending);
378 
379  # record search time
380  $this->LastSearchTime = microtime(TRUE) - $StartTime;
381 
382  # return search results to caller
383  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
384  return $Scores;
385  }
386 
391  function AddResultFilterFunction($FunctionName)
392  {
393  # save filter function name
394  $this->FilterFuncs[] = $FunctionName;
395  }
396 
402  function DefaultSearchLogic($NewSetting = NULL)
403  {
404  if ($NewSetting != NULL)
405  {
406  $this->DefaultSearchLogic = $NewSetting;
407  }
409  }
410 
416  function SearchTermsRequiredByDefault($NewSetting = TRUE)
417  {
418  $this->DefaultSearchLogic($NewSetting ? self::LOGIC_AND : self::LOGIC_OR);
419  }
420 
425  function NumberOfResults()
426  {
428  }
429 
434  function SearchTerms()
435  {
436  return $this->SearchTermList;
437  }
438 
443  function SearchTime()
444  {
445  return $this->LastSearchTime;
446  }
447 
455  function FieldedSearchWeightScale($SearchStrings)
456  {
457  $Weight = 0;
458  $IncludedKeywordSearch = FALSE;
459  foreach ($SearchStrings as $FieldName => $SearchStringArray)
460  {
461  if ($FieldName == "XXXKeywordXXX")
462  {
463  $IncludedKeywordSearch = TRUE;
464  }
465  else
466  {
467  if (array_key_exists($FieldName, $this->FieldInfo))
468  {
469  $Weight += $this->FieldInfo[$FieldName]["Weight"];
470  }
471  }
472  }
473  if ($IncludedKeywordSearch)
474  {
475  foreach ($this->FieldInfo as $FieldName => $Info)
476  {
477  if ($Info["InKeywordSearch"])
478  {
479  $Weight += $Info["Weight"];
480  }
481  }
482  }
483  return $Weight;
484  }
485 
486 
487  # ---- search database update functions
488 
494  function UpdateForItem($ItemId, $ItemType = 0)
495  {
496  # clear word count added flags for this item
497  unset($this->WordCountAdded);
498 
499  # delete any existing info for this item
500  $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
501  $this->DB->Query("DELETE FROM SearchItemTypes WHERE ItemId = ".$ItemId);
502 
503  # save item type
504  $this->DB->Query("INSERT INTO SearchItemTypes (ItemId, ItemType)"
505  ." VALUES (".intval($ItemId).", ".intval($ItemType).")");
506 
507  # for each metadata field
508  foreach ($this->FieldInfo as $FieldName => $Info)
509  {
510  # if search weight for field is positive
511  if ($Info["Weight"] > 0)
512  {
513  # retrieve text for field
514  $Text = $this->GetFieldContent($ItemId, $FieldName);
515 
516  # if text is array
517  if (is_array($Text))
518  {
519  # for each text string in array
520  foreach ($Text as $String)
521  {
522  # record search info for text
523  $this->RecordSearchInfoForText($ItemId, $FieldName,
524  $Info["Weight"], $String,
525  $Info["InKeywordSearch"]);
526  }
527  }
528  else
529  {
530  # record search info for text
531  $this->RecordSearchInfoForText($ItemId, $FieldName,
532  $Info["Weight"], $Text,
533  $Info["InKeywordSearch"]);
534  }
535  }
536  }
537  }
538 
545  function UpdateForItems($StartingItemId, $NumberOfItems)
546  {
547  # retrieve IDs for specified number of items starting at specified ID
548  $this->DB->Query("SELECT ".$this->ItemIdFieldName." FROM ".$this->ItemTableName
549  ." WHERE ".$this->ItemIdFieldName." >= ".$StartingItemId
550  ." ORDER BY ".$this->ItemIdFieldName." LIMIT ".$NumberOfItems);
551  $ItemIds = $this->DB->FetchColumn($this->ItemIdFieldName);
552 
553  # for each retrieved item ID
554  foreach ($ItemIds as $ItemId)
555  {
556  # update search info for item
557  $this->UpdateForItem($ItemId);
558  }
559 
560  # return ID of last item updated to caller
561  return $ItemId;
562  }
563 
568  function DropItem($ItemId)
569  {
570  # drop all entries pertaining to item from word count table
571  $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
572  $this->DB->Query("DELETE FROM SearchItemTypes WHERE ItemId = ".$ItemId);
573  }
574 
579  function DropField($FieldName)
580  {
581  # retrieve our ID for field
582  $FieldId = $this->DB->Query("SELECT FieldId FROM SearchFields "
583  ."WHERE FieldName = '".addslashes($FieldName)."'", "FieldId");
584 
585  # drop all entries pertaining to field from word counts table
586  $this->DB->Query("DELETE FROM SearchWordCounts WHERE FieldId = \'".$FieldId."\'");
587 
588  # drop field from our fields table
589  $this->DB->Query("DELETE FROM SearchFields WHERE FieldId = \'".$FieldId."\'");
590  }
591 
596  function SearchTermCount()
597  {
598  return $this->DB->Query("SELECT COUNT(*) AS TermCount"
599  ." FROM SearchWords", "TermCount");
600  }
601 
606  function ItemCount()
607  {
608  return $this->DB->Query("SELECT COUNT(DISTINCT ItemId) AS ItemCount"
609  ." FROM SearchWordCounts", "ItemCount");
610  }
611 
619  function AddSynonyms($Word, $Synonyms)
620  {
621  # asssume no synonyms will be added
622  $AddCount = 0;
623 
624  # get ID for word
625  $WordId = $this->GetWordId($Word, TRUE);
626 
627  # for each synonym passed in
628  foreach ($Synonyms as $Synonym)
629  {
630  # get ID for synonym
631  $SynonymId = $this->GetWordId($Synonym, TRUE);
632 
633  # if synonym is not already in database
634  $this->DB->Query("SELECT * FROM SearchWordSynonyms"
635  ." WHERE (WordIdA = ".$WordId
636  ." AND WordIdB = ".$SynonymId.")"
637  ." OR (WordIdB = ".$WordId
638  ." AND WordIdA = ".$SynonymId.")");
639  if ($this->DB->NumRowsSelected() == 0)
640  {
641  # add synonym entry to database
642  $this->DB->Query("INSERT INTO SearchWordSynonyms"
643  ." (WordIdA, WordIdB)"
644  ." VALUES (".$WordId.", ".$SynonymId.")");
645  $AddCount++;
646  }
647  }
648 
649  # report to caller number of new synonyms added
650  return $AddCount;
651  }
652 
659  function RemoveSynonyms($Word, $Synonyms = NULL)
660  {
661  # find ID for word
662  $WordId = $this->GetWordId($Word);
663 
664  # if ID found
665  if ($WordId !== NULL)
666  {
667  # if no specific synonyms provided
668  if ($Synonyms === NULL)
669  {
670  # remove all synonyms for word
671  $this->DB->Query("DELETE FROM SearchWordSynonyms"
672  ." WHERE WordIdA = '".$WordId."'"
673  ." OR WordIdB = '".$WordId."'");
674  }
675  else
676  {
677  # for each specified synonym
678  foreach ($Synonyms as $Synonym)
679  {
680  # look up ID for synonym
681  $SynonymId = $this->GetWordId($Synonym);
682 
683  # if synonym ID was found
684  if ($SynonymId !== NULL)
685  {
686  # delete synonym entry
687  $this->DB->Query("DELETE FROM SearchWordSynonyms"
688  ." WHERE (WordIdA = '".$WordId."'"
689  ." AND WordIdB = '".$SynonymId."')"
690  ." OR (WordIdB = '".$WordId."'"
691  ." AND WordIdA = '".$SynonymId."')");
692  }
693  }
694  }
695  }
696  }
697 
701  function RemoveAllSynonyms()
702  {
703  $this->DB->Query("DELETE FROM SearchWordSynonyms");
704  }
705 
711  function GetSynonyms($Word)
712  {
713  # assume no synonyms will be found
714  $Synonyms = array();
715 
716  # look up ID for word
717  $WordId = $this->GetWordId($Word);
718 
719  # if word ID was found
720  if ($WordId !== NULL)
721  {
722  # look up IDs of all synonyms for this word
723  $this->DB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms"
724  ." WHERE WordIdA = ".$WordId
725  ." OR WordIdB = ".$WordId);
726  $SynonymIds = array();
727  while ($Record = $this->DB->FetchRow)
728  {
729  $SynonymIds[] = ($Record["WordIdA"] == $WordId)
730  ? $Record["WordIdB"] : $Record["WordIdA"];
731  }
732 
733  # for each synonym ID
734  foreach ($SynonymIds as $SynonymId)
735  {
736  # look up synonym word and add to synonym list
737  $Synonyms[] = $this->GetWord($SynonymId);
738  }
739  }
740 
741  # return synonyms to caller
742  return $Synonyms;
743  }
744 
749  function GetAllSynonyms()
750  {
751  # assume no synonyms will be found
752  $SynonymList = array();
753 
754  # for each synonym ID pair
755  $OurDB = new Database();
756  $OurDB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms");
757  while ($Record = $OurDB->FetchRow())
758  {
759  # look up words
760  $Word = $this->GetWord($Record["WordIdA"]);
761  $Synonym = $this->GetWord($Record["WordIdB"]);
762 
763  # if we do not already have an entry for the word
764  # or synonym is not listed for this word
765  if (!isset($SynonymList[$Word])
766  || !in_array($Synonym, $SynonymList[$Word]))
767  {
768  # add entry for synonym
769  $SynonymList[$Word][] = $Synonym;
770  }
771 
772  # if we do not already have an entry for the synonym
773  # or word is not listed for this synonym
774  if (!isset($SynonymList[$Synonym])
775  || !in_array($Word, $SynonymList[$Synonym]))
776  {
777  # add entry for word
778  $SynonymList[$Synonym][] = $Word;
779  }
780  }
781 
782  # for each word
783  # (this loop removes reciprocal duplicates)
784  foreach ($SynonymList as $Word => $Synonyms)
785  {
786  # for each synonym for that word
787  foreach ($Synonyms as $Synonym)
788  {
789  # if synonym has synonyms and word is one of them
790  if (isset($SynonymList[$Synonym])
791  && isset($SynonymList[$Word])
792  && in_array($Word, $SynonymList[$Synonym])
793  && in_array($Synonym, $SynonymList[$Word]))
794  {
795  # if word has less synonyms than synonym
796  if (count($SynonymList[$Word])
797  < count($SynonymList[$Synonym]))
798  {
799  # remove synonym from synonym list for word
800  $SynonymList[$Word] = array_diff(
801  $SynonymList[$Word], array($Synonym));
802 
803  # if no synonyms left for word
804  if (!count($SynonymList[$Word]))
805  {
806  # remove empty synonym list for word
807  unset($SynonymList[$Word]);
808  }
809  }
810  else
811  {
812  # remove word from synonym list for synonym
813  $SynonymList[$Synonym] = array_diff(
814  $SynonymList[$Synonym], array($Word));
815 
816  # if no synonyms left for word
817  if (!count($SynonymList[$Synonym]))
818  {
819  # remove empty synonym list for word
820  unset($SynonymList[$Synonym]);
821  }
822  }
823  }
824  }
825  }
826 
827  # sort array alphabetically (just for convenience)
828  foreach ($SynonymList as $Word => $Synonyms)
829  {
830  asort($SynonymList[$Word]);
831  }
832  ksort($SynonymList);
833 
834  # return 2D array of synonyms to caller
835  return $SynonymList;
836  }
837 
843  function SetAllSynonyms($SynonymList)
844  {
845  # remove all existing synonyms
846  $this->RemoveAllSynonyms();
847 
848  # for each synonym entry passed in
849  foreach ($SynonymList as $Word => $Synonyms)
850  {
851  # add synonyms for word
852  $this->AddSynonyms($Word, $Synonyms);
853  }
854  }
855 
864  function LoadSynonymsFromFile($FileName)
865  {
866  # asssume no synonyms will be added
867  $AddCount = 0;
868 
869  # read in contents of file
870  $Lines = file($FileName, FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
871 
872  # if file contained lines
873  if (count($Lines))
874  {
875  # for each line of file
876  foreach ($Lines as $Line)
877  {
878  # if line is not a comment
879  if (!preg_match("/[\s]*#/", $Line))
880  {
881  # split line into words
882  $Words = preg_split("/[\s,]+/", $Line);
883 
884  # if synonyms found
885  if (count($Words) > 1)
886  {
887  # separate out word and synonyms
888  $Word = array_shift($Words);
889 
890  # add synonyms
891  $AddCount += $this->AddSynonyms($Word, $Words);
892  }
893  }
894  }
895  }
896 
897  # return count of synonyms added to caller
898  return $AddCount;
899  }
900 
901 
902  # ---- PRIVATE INTERFACE -------------------------------------------------
903 
904  protected $DB;
905  protected $DebugLevel;
906  protected $ItemTableName;
907  protected $ItemIdFieldName;
909  protected $LastSearchTime;
910  protected $FilterFuncs;
911  protected $DefaultSearchLogic = self::LOGIC_AND;
912  protected $StemmingEnabled = TRUE;
913  protected $SynonymsEnabled = TRUE;
914 
915  private $WordCountAdded;
916  private $FieldIds;
917  private $FieldInfo;
918  private $RequiredTermCount;
919  private $RequiredTermCounts;
920  private $InclusiveTermCount;
921  private $ExcludedTermCount;
922  private $SearchTermList;
923 
924  const STEM_ID_OFFSET = 1000000;
925 
926 
927  # ---- common private functions (used in both searching and DB build)
928 
938  private function ParseSearchStringForWords($SearchString, $IgnorePhrases = FALSE)
939  {
940  # strip off any surrounding whitespace
941  $Text = trim($SearchString);
942 
943  # set up normalization replacement strings
944  $Patterns = array(
945  "/'s[^a-z0-9\\-+~]+/i", # get rid of possessive plurals
946  "/'/", # get rid of single quotes / apostrophes
947  "/\"[^\"]*\"/", # get rid of phrases (NOTE: HARD-CODED
948  # INDEX BELOW!!!) "
949  "/\\([^)]*\\)/", # get rid of groups (NOTE: HARD-CODED
950  # INDEX BELOW!!!)
951  "/[^a-z0-9\\-+~]+/i", # convert non-alphanumerics
952  # / non-minus/plus to a space
953  "/([^\\s])-+/i", # convert minus preceded by anything
954  # but whitespace to a space
955  "/([^\\s])\\++/i", # convert plus preceded by anything
956  # but whitespace to a space
957  "/-\\s/i", # convert minus followed by whitespace to a space
958  "/\\+\\s/i", # convert plus followed by whitespace to a space
959  "/~\\s/i", # convert tilde followed by whitespace to a space
960  "/[ ]+/" # convert multiple spaces to one space
961  );
962  $Replacements = array(
963  " ",
964  "",
965  " ",
966  " ",
967  "\\1 ",
968  "\\1 ",
969  " ",
970  " ",
971  " ",
972  " ",
973  " "
974  );
975 
976  # if we are supposed to ignore phrases and groups (series of words
977  # in quotes or surrounded by parens)
978  if ($IgnorePhrases)
979  {
980  # switch phrase removal to double quote removal (HARD-CODED
981  # INDEX INTO PATTERN LIST!!)
982  $Patterns[2] = "/\"/";
983 
984  # switch group removal to paren removal (HARD-CODED INDEX
985  # INTO PATTERN LIST!!)
986  $Patterns[3] = "/[\(\)]+/";
987  }
988 
989  # remove punctuation from text and normalize whitespace
990  $Text = preg_replace($Patterns, $Replacements, $Text);
991  $this->DMsg(2, "Normalized search string is '".$Text."'");
992 
993  # convert text to lower case
994  $Text = strtolower($Text);
995 
996  # strip off any extraneous whitespace
997  $Text = trim($Text);
998 
999  # start with an empty array
1000  $Words = array();
1001 
1002  # if we have no words left after parsing
1003  if (strlen($Text) != 0)
1004  {
1005  # for each word
1006  foreach (explode(" ", $Text) as $Word)
1007  {
1008  # grab first character of word
1009  $FirstChar = substr($Word, 0, 1);
1010 
1011  # strip off option characters and set flags appropriately
1012  $Flags = self::WORD_PRESENT;
1013  if ($FirstChar == "-")
1014  {
1015  $Word = substr($Word, 1);
1016  $Flags |= self::WORD_EXCLUDED;
1017  if (!isset($Words[$Word]))
1018  {
1019  $this->ExcludedTermCount++;
1020  }
1021  }
1022  else
1023  {
1024  if ($FirstChar == "~")
1025  {
1026  $Word = substr($Word, 1);
1027  }
1028  elseif (($this->DefaultSearchLogic == self::LOGIC_AND)
1029  || ($FirstChar == "+"))
1030  {
1031  if ($FirstChar == "+")
1032  {
1033  $Word = substr($Word, 1);
1034  }
1035  $Flags |= self::WORD_REQUIRED;
1036  if (!isset($Words[$Word]))
1037  {
1038  $this->RequiredTermCount++;
1039  }
1040  }
1041  if (!isset($Words[$Word]))
1042  {
1043  $this->InclusiveTermCount++;
1044  $this->SearchTermList[] = $Word;
1045  }
1046  }
1047 
1048  # store flags to indicate word found
1049  $Words[$Word] = $Flags;
1050  $this->DMsg(3, "Word identified (".$Word.")");
1051  }
1052  }
1053 
1054  # return normalized words to caller
1055  return $Words;
1056  }
1057 
1064  private function GetFieldId($FieldName)
1065  {
1066  # if field ID is not in cache
1067  if (!isset($this->FieldIds[$FieldName]))
1068  {
1069  # look up field info in database
1070  $this->DB->Query("SELECT FieldId FROM SearchFields "
1071  ."WHERE FieldName = '".addslashes($FieldName)."'");
1072 
1073  # if field was found
1074  if ($Record = $this->DB->FetchRow())
1075  {
1076  # load info from DB record
1077  $FieldId = $Record["FieldId"];
1078  }
1079  else
1080  {
1081  # add field to database
1082  $this->DB->Query("INSERT INTO SearchFields (FieldName) "
1083  ."VALUES ('".addslashes($FieldName)."')");
1084 
1085  # retrieve ID for newly added field
1086  $FieldId = $this->DB->LastInsertId();
1087  }
1088 
1089  # cache field info
1090  $this->FieldIds[$FieldName] = $FieldId;
1091  }
1092 
1093  # return cached ID to caller
1094  return $this->FieldIds[$FieldName];
1095  }
1096 
1104  private function GetWordId($Word, $AddIfNotFound = FALSE)
1105  {
1106  static $WordIdCache;
1107 
1108  # if word was in ID cache
1109  if (isset($WordIdCache[$Word]))
1110  {
1111  # use ID from cache
1112  $WordId = $WordIdCache[$Word];
1113  }
1114  else
1115  {
1116  # look up ID in database
1117  $WordId = $this->DB->Query("SELECT WordId"
1118  ." FROM SearchWords"
1119  ." WHERE WordText='".addslashes($Word)."'",
1120  "WordId");
1121 
1122  # if ID was not found and caller requested it be added
1123  if (($WordId === NULL) && $AddIfNotFound)
1124  {
1125  # add word to database
1126  $this->DB->Query("INSERT INTO SearchWords (WordText)"
1127  ." VALUES ('".addslashes(strtolower($Word))."')");
1128 
1129  # get ID for newly added word
1130  $WordId = $this->DB->LastInsertId();
1131  }
1132 
1133  # save ID to cache
1134  $WordIdCache[$Word] = $WordId;
1135  }
1136 
1137  # return ID to caller
1138  return $WordId;
1139  }
1140 
1148  private function GetStemId($Stem, $AddIfNotFound = FALSE)
1149  {
1150  static $StemIdCache;
1151 
1152  # if stem was in ID cache
1153  if (isset($StemIdCache[$Stem]))
1154  {
1155  # use ID from cache
1156  $StemId = $StemIdCache[$Stem];
1157  }
1158  else
1159  {
1160  # look up ID in database
1161  $StemId = $this->DB->Query("SELECT WordId"
1162  ." FROM SearchStems"
1163  ." WHERE WordText='".addslashes($Stem)."'",
1164  "WordId");
1165 
1166  # if ID was not found and caller requested it be added
1167  if (($StemId === NULL) && $AddIfNotFound)
1168  {
1169  # add stem to database
1170  $this->DB->Query("INSERT INTO SearchStems (WordText)"
1171  ." VALUES ('".addslashes(strtolower($Stem))."')");
1172 
1173  # get ID for newly added stem
1174  $StemId = $this->DB->LastInsertId();
1175  }
1176 
1177  # adjust from DB ID value to stem ID value
1178  $StemId += self::STEM_ID_OFFSET;
1179 
1180  # save ID to cache
1181  $StemIdCache[$Stem] = $StemId;
1182  }
1183 
1184  # return ID to caller
1185  return $StemId;
1186  }
1187 
1193  private function GetWord($WordId)
1194  {
1195  static $WordCache;
1196 
1197  # if word was in cache
1198  if (isset($WordCache[$WordId]))
1199  {
1200  # use word from cache
1201  $Word = $WordCache[$WordId];
1202  }
1203  else
1204  {
1205  # adjust search location and word ID if word is stem
1206  $TableName = "SearchWords";
1207  if ($WordId >= self::STEM_ID_OFFSET)
1208  {
1209  $TableName = "SearchStems";
1210  $WordId -= self::STEM_ID_OFFSET;
1211  }
1212 
1213  # look up word in database
1214  $Word = $this->DB->Query("SELECT WordText"
1215  ." FROM ".$TableName
1216  ." WHERE WordId='".$WordId."'",
1217  "WordText");
1218 
1219  # save word to cache
1220  $WordCache[$WordId] = $Word;
1221  }
1222 
1223  # return word to caller
1224  return $Word;
1225  }
1226 
1227 
1228  # ---- private functions used in searching
1229 
1238  private function SearchAcrossFields($SearchStrings)
1239  {
1240  # start by assuming no search will be done
1241  $Scores = NULL;
1242 
1243  # clear word counts
1244  $this->InclusiveTermCount = 0;
1245  $this->RequiredTermCount = 0;
1246  $this->ExcludedTermCount = 0;
1247 
1248  # for each field
1249  $NeedComparisonSearch = FALSE;
1250  foreach ($SearchStrings as $FieldName => $SearchStringArray)
1251  {
1252  # convert search string to array if needed
1253  if (!is_array($SearchStringArray))
1254  {
1255  $SearchStringArray = array($SearchStringArray);
1256  }
1257 
1258  # for each search string for this field
1259  foreach ($SearchStringArray as $SearchString)
1260  {
1261  # if field is keyword or field is text and does not look
1262  # like comparison match
1263  $NotComparisonSearch = !preg_match("/^[><!]=./", $SearchString)
1264  && !preg_match("/^[><=]./", $SearchString);
1265  if (($FieldName == "XXXKeywordXXX")
1266  || (isset($this->FieldInfo[$FieldName])
1267  && ($this->FieldInfo[$FieldName]["FieldType"]
1268  == self::FIELDTYPE_TEXT)
1269  && $NotComparisonSearch))
1270  {
1271  $this->DMsg(0, "Searching text field \""
1272  .$FieldName."\" for string \"$SearchString\"");
1273 
1274  # normalize text and split into words
1275  $Words[$FieldName] =
1276  $this->ParseSearchStringForWords($SearchString);
1277 
1278  # calculate scores for matching items
1279  if (count($Words[$FieldName]))
1280  {
1281  $Scores = $this->SearchForWords(
1282  $Words[$FieldName], $FieldName, $Scores);
1283  $this->DMsg(3, "Have "
1284  .count($Scores)." results after word search");
1285  }
1286 
1287  # split into phrases
1288  $Phrases[$FieldName] =
1289  $this->ParseSearchStringForPhrases($SearchString);
1290 
1291  # handle any phrases
1292  if (count($Phrases[$FieldName]))
1293  {
1294  $Scores = $this->SearchForPhrases(
1295  $Phrases[$FieldName], $Scores, $FieldName, TRUE, FALSE);
1296  $this->DMsg(3, "Have "
1297  .count($Scores)." results after phrase search");
1298  }
1299  }
1300  else
1301  {
1302  # set flag to indicate possible comparison search candidate found
1303  $NeedComparisonSearch = TRUE;
1304  }
1305  }
1306  }
1307 
1308  # perform comparison searches
1309  if ($NeedComparisonSearch)
1310  {
1311  $Scores = $this->SearchForComparisonMatches($SearchStrings, $Scores);
1312  $this->DMsg(3, "Have ".count($Scores)." results after comparison search");
1313  }
1314 
1315  # if no results found and exclusions specified
1316  if (!count($Scores) && $this->ExcludedTermCount)
1317  {
1318  # load all records
1319  $Scores = $this->LoadScoresForAllRecords();
1320  }
1321 
1322  # if search results found
1323  if (count($Scores))
1324  {
1325  # for each search text string
1326  foreach ($SearchStrings as $FieldName => $SearchStringArray)
1327  {
1328  # convert search string to array if needed
1329  if (!is_array($SearchStringArray))
1330  {
1331  $SearchStringArray = array($SearchStringArray);
1332  }
1333 
1334  # for each search string for this field
1335  foreach ($SearchStringArray as $SearchString)
1336  {
1337  # if field is text
1338  if (($FieldName == "XXXKeywordXXX")
1339  || (isset($this->FieldInfo[$FieldName])
1340  && ($this->FieldInfo[$FieldName]["FieldType"]
1341  == self::FIELDTYPE_TEXT)))
1342  {
1343  # if there are words in search text
1344  if (isset($Words[$FieldName]))
1345  {
1346  # handle any excluded words
1347  $Scores = $this->FilterOnExcludedWords(
1348  $Words[$FieldName], $Scores, $FieldName);
1349  }
1350 
1351  # handle any excluded phrases
1352  if (isset($Phrases[$FieldName]))
1353  {
1354  $Scores = $this->SearchForPhrases(
1355  $Phrases[$FieldName], $Scores,
1356  $FieldName, FALSE, TRUE);
1357  }
1358  }
1359  }
1360  }
1361 
1362  # strip off any results that don't contain required words
1363  $Scores = $this->FilterOnRequiredWords($Scores);
1364  }
1365 
1366  # return search result scores to caller
1367  return $Scores;
1368  }
1369 
1380  private function SearchForWords(
1381  $Words, $FieldName = "XXXKeywordXXX", $Scores = NULL)
1382  {
1383  $DB = $this->DB;
1384 
1385  # start with empty search result scores list if none passed in
1386  if ($Scores == NULL)
1387  {
1388  $Scores = array();
1389  }
1390 
1391  # grab field ID
1392  $FieldId = $this->GetFieldId($FieldName);
1393 
1394  # for each word
1395  foreach ($Words as $Word => $Flags)
1396  {
1397  unset($Counts);
1398  $this->DMsg(2, "Searching for word '${Word}' in field ".$FieldName);
1399 
1400  # if word is not excluded
1401  if (!($Flags & self::WORD_EXCLUDED))
1402  {
1403  # look up record ID for word
1404  $this->DMsg(2, "Looking up word \"".$Word."\"");
1405  $WordId = $this->GetWordId($Word);
1406 
1407  # if word is in DB
1408  if ($WordId !== NULL)
1409  {
1410  # look up counts for word
1411  $DB->Query("SELECT ItemId,Count FROM SearchWordCounts "
1412  ."WHERE WordId = ".$WordId
1413  ." AND FieldId = ".$FieldId);
1414  $Counts = $DB->FetchColumn("Count", "ItemId");
1415 
1416  # if synonym support is enabled
1417  if ($this->SynonymsEnabled)
1418  {
1419  # look for any synonyms
1420  $DB->Query("SELECT WordIdA, WordIdB"
1421  ." FROM SearchWordSynonyms"
1422  ." WHERE WordIdA = ".$WordId
1423  ." OR WordIdB = ".$WordId);
1424 
1425  # if synonyms were found
1426  if ($DB->NumRowsSelected())
1427  {
1428  # retrieve synonym IDs
1429  $SynonymIds = array();
1430  while ($Record = $DB->FetchRow())
1431  {
1432  $SynonymIds[] = ($Record["WordIdA"] == $WordId)
1433  ? $Record["WordIdB"]
1434  : $Record["WordIdA"];
1435  }
1436 
1437  # for each synonym
1438  foreach ($SynonymIds as $SynonymId)
1439  {
1440  # retrieve counts for synonym
1441  $DB->Query("SELECT ItemId,Count"
1442  ." FROM SearchWordCounts"
1443  ." WHERE WordId = ".$SynonymId
1444  ." AND FieldId = ".$FieldId);
1445  $SynonymCounts = $DB->FetchColumn("Count", "ItemId");
1446 
1447  # for each count
1448  foreach ($SynonymCounts as $ItemId => $Count)
1449  {
1450  # adjust count because it's a synonym
1451  $AdjustedCount = ceil($Count / 2);
1452 
1453  # add count to existing counts
1454  if (isset($Counts[$ItemId]))
1455  {
1456  $Counts[$ItemId] += $AdjustedCount;
1457  }
1458  else
1459  {
1460  $Counts[$ItemId] = $AdjustedCount;
1461  }
1462  }
1463  }
1464  }
1465  }
1466  }
1467 
1468  # if stemming is enabled
1469  if ($this->StemmingEnabled)
1470  {
1471  # retrieve stem ID
1472  $Stem = PorterStemmer::Stem($Word);
1473  $this->DMsg(2, "Looking up stem \"".$Stem."\"");
1474  $StemId = $this->GetStemId($Stem);
1475 
1476  # if ID found for stem
1477  if ($StemId !== NULL)
1478  {
1479  # retrieve counts for stem
1480  $DB->Query("SELECT ItemId,Count"
1481  ." FROM SearchWordCounts"
1482  ." WHERE WordId = ".$StemId
1483  ." AND FieldId = ".$FieldId);
1484  $StemCounts = $DB->FetchColumn("Count", "ItemId");
1485 
1486  # for each count
1487  foreach ($StemCounts as $ItemId => $Count)
1488  {
1489  # adjust count because it's a stem
1490  $AdjustedCount = ceil($Count / 2);
1491 
1492  # add count to existing counts
1493  if (isset($Counts[$ItemId]))
1494  {
1495  $Counts[$ItemId] += $AdjustedCount;
1496  }
1497  else
1498  {
1499  $Counts[$ItemId] = $AdjustedCount;
1500  }
1501  }
1502  }
1503  }
1504 
1505  # if counts were found
1506  if (isset($Counts))
1507  {
1508  # for each count
1509  foreach ($Counts as $ItemId => $Count)
1510  {
1511  # if word flagged as required
1512  if ($Flags & self::WORD_REQUIRED)
1513  {
1514  # increment required word count for record
1515  if (isset($this->RequiredTermCounts[$ItemId]))
1516  {
1517  $this->RequiredTermCounts[$ItemId]++;
1518  }
1519  else
1520  {
1521  $this->RequiredTermCounts[$ItemId] = 1;
1522  }
1523  }
1524 
1525  # add to item record score
1526  if (isset($Scores[$ItemId]))
1527  {
1528  $Scores[$ItemId] += $Count;
1529  }
1530  else
1531  {
1532  $Scores[$ItemId] = $Count;
1533  }
1534  }
1535  }
1536  }
1537  }
1538 
1539  # return basic scores to caller
1540  return $Scores;
1541  }
1542 
1549  private function ParseSearchStringForPhrases($SearchString)
1550  {
1551  # split into chunks delimited by double quote marks
1552  $Pieces = explode("\"", $SearchString); # "
1553 
1554  # for each pair of chunks
1555  $Index = 2;
1556  $Phrases = array();
1557  while ($Index < count($Pieces))
1558  {
1559  # grab phrase from chunk
1560  $Phrase = trim(addslashes($Pieces[$Index - 1]));
1561  $Flags = self::WORD_PRESENT;
1562 
1563  # grab first character of phrase
1564  $FirstChar = substr($Pieces[$Index - 2], -1);
1565 
1566  # set flags to reflect any option characters
1567  if ($FirstChar == "-")
1568  {
1569  $Flags |= self::WORD_EXCLUDED;
1570  if (!isset($Phrases[$Phrase]))
1571  {
1572  $this->ExcludedTermCount++;
1573  }
1574  }
1575  else
1576  {
1577  if ((($this->DefaultSearchLogic == self::LOGIC_AND)
1578  && ($FirstChar != "~"))
1579  || ($FirstChar == "+"))
1580  {
1581  $Flags |= self::WORD_REQUIRED;
1582  if (!isset($Phrases[$Phrase]))
1583  {
1584  $this->RequiredTermCount++;
1585  }
1586  }
1587  if (!isset($Phrases[$Phrase]))
1588  {
1589  $this->InclusiveTermCount++;
1590  $this->SearchTermList[] = $Phrase;
1591  }
1592  }
1593  $Phrases[$Phrase] = $Flags;
1594 
1595  # move to next pair of chunks
1596  $Index += 2;
1597  }
1598 
1599  # return phrases to caller
1600  return $Phrases;
1601  }
1602 
1603  protected function SearchFieldForPhrases($FieldName, $Phrase)
1604  {
1605  # error out
1606  exit("<br>SE - ERROR: SearchFieldForPhrases() not implemented<br>\n");
1607  }
1608 
1609  private function SearchForPhrases($Phrases, $Scores, $FieldName = "XXXKeywordXXX",
1610  $ProcessNonExcluded = TRUE, $ProcessExcluded = TRUE)
1611  {
1612  # if phrases are found
1613  if (count($Phrases) > 0)
1614  {
1615  # if this is a keyword search
1616  if ($FieldName == "XXXKeywordXXX")
1617  {
1618  # for each field
1619  foreach ($this->FieldInfo as $KFieldName => $Info)
1620  {
1621  # if field is marked to be included in keyword searches
1622  if ($Info["InKeywordSearch"])
1623  {
1624  # call ourself with that field
1625  $Scores = $this->SearchForPhrases(
1626  $Phrases, $Scores, $KFieldName,
1627  $ProcessNonExcluded, $ProcessExcluded);
1628  }
1629  }
1630  }
1631  else
1632  {
1633  # for each phrase
1634  foreach ($Phrases as $Phrase => $Flags)
1635  {
1636  $this->DMsg(2, "Searching for phrase '".$Phrase
1637  ."' in field ".$FieldName);
1638 
1639  # if phrase flagged as excluded and we are doing excluded
1640  # phrases or phrase flagged as non-excluded and we
1641  # are doing non-excluded phrases
1642  if (($ProcessExcluded && ($Flags & self::WORD_EXCLUDED))
1643  || ($ProcessNonExcluded && !($Flags & self::WORD_EXCLUDED)))
1644  {
1645  # initialize score list if necessary
1646  if ($Scores === NULL) { $Scores = array(); }
1647 
1648  # retrieve list of items that contain phrase
1649  $ItemIds = $this->SearchFieldForPhrases(
1650  $FieldName, $Phrase);
1651 
1652  # for each item that contains phrase
1653  foreach ($ItemIds as $ItemId)
1654  {
1655  # if we are doing excluded phrases and phrase
1656  # flagged as excluded
1657  if ($ProcessExcluded && ($Flags & self::WORD_EXCLUDED))
1658  {
1659  # knock item off of list
1660  unset($Scores[$ItemId]);
1661  }
1662  elseif ($ProcessNonExcluded)
1663  {
1664  # calculate phrase value based on number of
1665  # words and field weight
1666  $PhraseScore = count(preg_split("/[\s]+/",
1667  $Phrase, -1, PREG_SPLIT_NO_EMPTY))
1668  * $this->FieldInfo[$FieldName]["Weight"];
1669  $this->DMsg(2, "Phrase score is ".$PhraseScore);
1670 
1671  # bump up item record score
1672  if (isset($Scores[$ItemId]))
1673  {
1674  $Scores[$ItemId] += $PhraseScore;
1675  }
1676  else
1677  {
1678  $Scores[$ItemId] = $PhraseScore;
1679  }
1680 
1681  # if phrase flagged as required
1682  if ($Flags & self::WORD_REQUIRED)
1683  {
1684  # increment required word count for record
1685  if (isset($this->RequiredTermCounts[$ItemId]))
1686  {
1687  $this->RequiredTermCounts[$ItemId]++;
1688  }
1689  else
1690  {
1691  $this->RequiredTermCounts[$ItemId] = 1;
1692  }
1693  }
1694  }
1695  }
1696  }
1697  }
1698  }
1699  }
1700 
1701  # return updated scores to caller
1702  return $Scores;
1703  }
1704 
1705  private function FilterOnExcludedWords($Words, $Scores, $FieldName = "XXXKeywordXXX")
1706  {
1707  $DB = $this->DB;
1708 
1709  # grab field ID
1710  $FieldId = $this->GetFieldId($FieldName);
1711 
1712  # for each word
1713  foreach ($Words as $Word => $Flags)
1714  {
1715  # if word flagged as excluded
1716  if ($Flags & self::WORD_EXCLUDED)
1717  {
1718  # look up record ID for word
1719  $WordId = $this->GetWordId($Word);
1720 
1721  # if word is in DB
1722  if ($WordId !== NULL)
1723  {
1724  # look up counts for word
1725  $DB->Query("SELECT ItemId FROM SearchWordCounts "
1726  ."WHERE WordId=${WordId} AND FieldId=${FieldId}");
1727 
1728  # for each count
1729  while ($Record = $DB->FetchRow())
1730  {
1731  # if item record is in score list
1732  $ItemId = $Record["ItemId"];
1733  if (isset($Scores[$ItemId]))
1734  {
1735  # remove item record from score list
1736  $this->DMsg(3, "Filtering out item ".$ItemId
1737  ." because it contained word \"".$Word."\"");
1738  unset($Scores[$ItemId]);
1739  }
1740  }
1741  }
1742  }
1743  }
1744 
1745  # returned filtered score list to caller
1746  return $Scores;
1747  }
1748 
1749  private function FilterOnRequiredWords($Scores)
1750  {
1751  # if there were required words
1752  if ($this->RequiredTermCount > 0)
1753  {
1754  # for each item
1755  foreach ($Scores as $ItemId => $Score)
1756  {
1757  # if item does not meet required word count
1758  if (!isset($this->RequiredTermCounts[$ItemId])
1759  || ($this->RequiredTermCounts[$ItemId]
1760  < $this->RequiredTermCount))
1761  {
1762  # filter out item
1763  $this->DMsg(4, "Filtering out item ".$ItemId
1764  ." because it didn't have required word count of "
1765  .$this->RequiredTermCount
1766  .(isset($this->RequiredTermCounts[$ItemId])
1767  ? " (only had "
1768  .$this->RequiredTermCounts[$ItemId]
1769  : " (had none")
1770  .")");
1771  unset($Scores[$ItemId]);
1772  }
1773  }
1774  }
1775 
1776  # return filtered list to caller
1777  return $Scores;
1778  }
1779 
1780  # count, sort, and trim search result scores list
1781  private function CleanScores($Scores, $StartingResult, $NumberOfResults,
1782  $SortByField, $SortDescending)
1783  {
1784  # perform any requested filtering
1785  $this->DMsg(0, "Have ".count($Scores)." results before filter callbacks");
1786  $Scores = $this->FilterOnSuppliedFunctions($Scores);
1787 
1788  # save total number of results available
1789  $this->NumberOfResultsAvailable = count($Scores);
1790 
1791  # if no sorting field specified
1792  if ($SortByField === NULL)
1793  {
1794  # sort result list by score
1795  if ($SortDescending)
1796  {
1797  arsort($Scores, SORT_NUMERIC);
1798  }
1799  else
1800  {
1801  asort($Scores, SORT_NUMERIC);
1802  }
1803  }
1804  else
1805  {
1806  # get list of item IDs in sorted order
1807  $SortedIds = $this->GetItemIdsSortedByField(
1808  $SortByField, $SortDescending);
1809 
1810  # if we have sorted item IDs
1811  if (count($SortedIds) && count($Scores))
1812  {
1813  # strip sorted ID list down to those that appear in search results
1814  $SortedIds = array_intersect($SortedIds, array_keys($Scores));
1815 
1816  # rebuild score list in sorted order
1817  foreach ($SortedIds as $Id)
1818  {
1819  $NewScores[$Id] = $Scores[$Id];
1820  }
1821  $Scores = $NewScores;
1822  }
1823  else
1824  {
1825  # sort result list by score
1826  arsort($Scores, SORT_NUMERIC);
1827  }
1828  }
1829 
1830  # trim result list to match range requested by caller
1831  $ScoresKeys = array_slice(
1832  array_keys($Scores), $StartingResult, $NumberOfResults);
1833  $TrimmedScores = array();
1834  foreach ($ScoresKeys as $Key) { $TrimmedScores[$Key] = $Scores[$Key]; }
1835 
1836  # returned cleaned search result scores list to caller
1837  return $TrimmedScores;
1838  }
1839 
1840  protected function FilterOnSuppliedFunctions($Scores)
1841  {
1842  # if filter functions have been set
1843  if (isset($this->FilterFuncs))
1844  {
1845  # for each result
1846  foreach ($Scores as $ItemId => $Score)
1847  {
1848  # for each filter function
1849  foreach ($this->FilterFuncs as $FuncName)
1850  {
1851  # if filter function return TRUE for item
1852  if (call_user_func($FuncName, $ItemId))
1853  {
1854  # discard result
1855  $this->DMsg(2, "Filter callback <i>".$FuncName
1856  ."</i> rejected item ".$ItemId);
1857  unset($Scores[$ItemId]);
1858 
1859  # bail out of filter func loop
1860  continue 2;
1861  }
1862  }
1863  }
1864  }
1865 
1866  # return filtered list to caller
1867  return $Scores;
1868  }
1869 
1870  private function SearchForComparisonMatches($SearchStrings, $Scores)
1871  {
1872  # for each field
1873  $Index = 0;
1874  foreach ($SearchStrings as $SearchFieldName => $SearchStringArray)
1875  {
1876  # if field is not keyword
1877  if ($SearchFieldName != "XXXKeywordXXX")
1878  {
1879  # convert search string to array if needed
1880  if (!is_array($SearchStringArray))
1881  {
1882  $SearchStringArray = array($SearchStringArray);
1883  }
1884 
1885  # for each search string for this field
1886  foreach ($SearchStringArray as $SearchString)
1887  {
1888  # if search string looks like comparison search
1889  $FoundOperator = preg_match("/^[><!]=./", $SearchString)
1890  || preg_match("/^[><=]./", $SearchString);
1891  if ($FoundOperator
1892  || (isset($this->FieldInfo[$SearchFieldName]["FieldType"])
1893  && ($this->FieldInfo[$SearchFieldName]["FieldType"]
1894  != self::FIELDTYPE_TEXT)))
1895  {
1896  # determine value
1897  $Patterns = array("/^[><!]=/", "/^[><=]/");
1898  $Replacements = array("", "");
1899  $Value = trim(preg_replace(
1900  $Patterns, $Replacements, $SearchString));
1901 
1902  # determine and save operator
1903  if (!$FoundOperator)
1904  {
1905  $Operators[$Index] = "=";
1906  }
1907  else
1908  {
1909  $Term = trim($SearchString);
1910  $FirstChar = $Term{0};
1911  $FirstTwoChars = $FirstChar.$Term{1};
1912  if ($FirstTwoChars == ">=")
1913  { $Operators[$Index] = ">="; }
1914  elseif ($FirstTwoChars == "<=")
1915  { $Operators[$Index] = "<="; }
1916  elseif ($FirstTwoChars == "!=")
1917  { $Operators[$Index] = "!="; }
1918  elseif ($FirstChar == ">")
1919  { $Operators[$Index] = ">"; }
1920  elseif ($FirstChar == "<")
1921  { $Operators[$Index] = "<"; }
1922  elseif ($FirstChar == "=")
1923  { $Operators[$Index] = "="; }
1924  }
1925 
1926  # if operator was found
1927  if (isset($Operators[$Index]))
1928  {
1929  # save value
1930  $Values[$Index] = $Value;
1931 
1932  # save field name
1933  $FieldNames[$Index] = $SearchFieldName;
1934  $this->DMsg(3, "Added comparison (field = <i>"
1935  .$FieldNames[$Index]."</i> op = <i>"
1936  .$Operators[$Index]."</i> val = <i>"
1937  .$Values[$Index]."</i>)");
1938 
1939  # move to next comparison array entry
1940  $Index++;
1941  }
1942  }
1943  }
1944  }
1945  }
1946 
1947  # if comparisons found
1948  if (isset($Operators))
1949  {
1950  # perform comparisons on fields and gather results
1951  $Results = $this->SearchFieldsForComparisonMatches(
1952  $FieldNames, $Operators, $Values);
1953 
1954  # if search logic is set to AND
1955  if ($this->DefaultSearchLogic == self::LOGIC_AND)
1956  {
1957  # if results were found
1958  if (count($Results))
1959  {
1960  # if there were no prior results and no terms for keyword search
1961  if ((count($Scores) == 0) && ($this->InclusiveTermCount == 0))
1962  {
1963  # add all results to scores
1964  foreach ($Results as $ItemId)
1965  {
1966  $Scores[$ItemId] = 1;
1967  }
1968  }
1969  else
1970  {
1971  # remove anything from scores that is not part of results
1972  foreach ($Scores as $ItemId => $Score)
1973  {
1974  if (in_array($ItemId, $Results) == FALSE)
1975  {
1976  unset($Scores[$ItemId]);
1977  }
1978  }
1979  }
1980  }
1981  else
1982  {
1983  # clear scores
1984  $Scores = array();
1985  }
1986  }
1987  else
1988  {
1989  # add result items to scores
1990  if ($Scores === NULL) { $Scores = array(); }
1991  foreach ($Results as $ItemId)
1992  {
1993  if (isset($Scores[$ItemId]))
1994  {
1995  $Scores[$ItemId] += 1;
1996  }
1997  else
1998  {
1999  $Scores[$ItemId] = 1;
2000  }
2001  }
2002  }
2003  }
2004 
2005  # return results to caller
2006  return $Scores;
2007  }
2008 
2009  private function SetDebugLevel($SearchStrings)
2010  {
2011  # if search info is an array
2012  if (is_array($SearchStrings))
2013  {
2014  # for each array element
2015  foreach ($SearchStrings as $FieldName => $SearchStringArray)
2016  {
2017  # if element is an array
2018  if (is_array($SearchStringArray))
2019  {
2020  # for each array element
2021  foreach ($SearchStringArray as $Index => $SearchString)
2022  {
2023  # pull out search string if present
2024  $SearchStrings[$FieldName][$Index] =
2025  $this->ExtractDebugLevel($SearchString);
2026  }
2027  }
2028  else
2029  {
2030  # pull out search string if present
2031  $SearchStrings[$FieldName] =
2032  $this->ExtractDebugLevel($SearchStringArray);
2033  }
2034  }
2035  }
2036  else
2037  {
2038  # pull out search string if present
2039  $SearchStrings = $this->ExtractDebugLevel($SearchStrings);
2040  }
2041 
2042  # return new search info to caller
2043  return $SearchStrings;
2044  }
2045 
2046  private function ExtractDebugLevel($SearchString)
2047  {
2048  # if search string contains debug level indicator
2049  if (strstr($SearchString, "DBUGLVL="))
2050  {
2051  # remove indicator and set debug level
2052  $Level = preg_replace("/^\\s*DBUGLVL=([1-9]{1,2}).*/", "\\1", $SearchString);
2053  if ($Level > 0)
2054  {
2055  $this->DebugLevel = $Level;
2056  $this->DMsg(0, "Setting debug level to ".$Level);
2057  $SearchString = preg_replace("/DBUGLVL=${Level}/", "", $SearchString);
2058  }
2059  }
2060 
2061  # return (possibly) modified search string to caller
2062  return $SearchString;
2063  }
2064 
2065  # load and return search result scores array containing all possible records
2066  private function LoadScoresForAllRecords()
2067  {
2068  # start with empty list
2069  $Scores = array();
2070 
2071  # for every item
2072  $this->DB->Query("SELECT ".$this->ItemIdFieldName
2073  ." FROM ".$this->ItemTableName);
2074  while ($Record = $this->DB->FetchRow())
2075  {
2076  # set score for item to 1
2077  $Scores[$Record[$this->ItemIdFieldName]] = 1;
2078  }
2079 
2080  # return array with all scores to caller
2081  return $Scores;
2082  }
2083 
2084 
2085  # ---- private functions used in building search database
2086 
2094  private function UpdateWordCount($Word, $ItemId, $FieldId, $Weight = 1)
2095  {
2096  # retrieve ID for word
2097  $WordIds[] = $this->GetWordId($Word, TRUE);
2098 
2099  # if stemming is enabled
2100  if ($this->StemmingEnabled)
2101  {
2102  # retrieve ID for stem of word
2103  $Stem = PorterStemmer::Stem($Word, TRUE);
2104  $WordIds[] = $this->GetStemId($Stem, TRUE);
2105  }
2106 
2107  # for word and stem of word
2108  foreach ($WordIds as $WordId)
2109  {
2110  # if word count already added to database
2111  if (isset($this->WordCountAdded[$WordId][$FieldId]))
2112  {
2113  # update word count
2114  $this->DB->Query("UPDATE SearchWordCounts SET Count=Count+".$Weight
2115  ." WHERE WordId=".$WordId
2116  ." AND ItemId=".$ItemId
2117  ." AND FieldId=".$FieldId);
2118  }
2119  else
2120  {
2121  # add word count to DB
2122  $this->DB->Query("INSERT INTO SearchWordCounts"
2123  ." (WordId, ItemId, FieldId, Count) VALUES"
2124  ." (".$WordId.", ".$ItemId.", ".$FieldId.", ".$Weight.")");
2125 
2126  # remember that we added count for this word
2127  $this->WordCountAdded[$WordId][$FieldId] = TRUE;
2128  }
2129 
2130  # decrease weight for stem
2131  $Weight = ceil($Weight / 2);
2132  }
2133  }
2134 
2135  protected function GetFieldContent($ItemId, $FieldName)
2136  {
2137  # error out
2138  exit("<br>SE - ERROR: GetFieldContent() not implemented<br>\n");
2139  }
2140 
2141  private function RecordSearchInfoForText(
2142  $ItemId, $FieldName, $Weight, $Text, $IncludeInKeyword)
2143  {
2144  # normalize text
2145  $Words = $this->ParseSearchStringForWords($Text, TRUE);
2146 
2147  # if there was text left after parsing
2148  if (count($Words) > 0)
2149  {
2150  # get ID for field
2151  $FieldId = $this->GetFieldId($FieldName);
2152 
2153  # if text should be included in keyword searches
2154  if ($IncludeInKeyword)
2155  {
2156  # get ID for keyword field
2157  $KeywordFieldId = $this->GetFieldId("XXXKeywordXXX");
2158  }
2159 
2160  # for each word
2161  foreach ($Words as $Word => $Flags)
2162  {
2163  # update count for word
2164  $this->UpdateWordCount($Word, $ItemId, $FieldId);
2165 
2166  # if text should be included in keyword searches
2167  if ($IncludeInKeyword)
2168  {
2169  # update keyword field count for word
2170  $this->UpdateWordCount(
2171  $Word, $ItemId, $KeywordFieldId, $Weight);
2172  }
2173  }
2174  }
2175  }
2176 
2177  # print debug message if level set high enough
2178  protected function DMsg($Level, $Msg)
2179  {
2180  if ($this->DebugLevel > $Level)
2181  {
2182  print("SE: ".$Msg."<br>\n");
2183  }
2184  }
2185 
2186  # ---- BACKWARD COMPATIBILITY --------------------------------------------
2187 
2188  # possible types of logical operators
2189  const SEARCHLOGIC_AND = 1;
2190  const SEARCHLOGIC_OR = 2;
2191 }
2192 
2193 ?>
SearchTermCount()
Get total number of search terms indexed by search engine.
SetAllSynonyms($SynonymList)
Set all synonyms.
DropItem($ItemId)
Drop all data pertaining to item from search database.
DropField($FieldName)
Drop all data pertaining to field from search database.
RemoveSynonyms($Word, $Synonyms=NULL)
Remove synonym(s).
NumberOfResults()
Get number of results found by most recent search.
LoadSynonymsFromFile($FileName)
Load synonyms from a file.
Set of parameters used to perform a search.
__construct($ItemTableName, $ItemIdFieldName)
Object constructor.
SQL database abstraction object with smart query caching.
AddField($FieldName, $FieldType, $Weight, $UsedInKeywordSearch)
Add field to include in searching.
Search($SearchString, $StartingResult=0, $NumberOfResults=10, $SortByField=NULL, $SortDescending=TRUE)
Perform search.
GetAllSynonyms()
Get all synonyms.
SearchTermsRequiredByDefault($NewSetting=TRUE)
Set default search logic.
const FIELDTYPE_NUMERIC
FilterOnSuppliedFunctions($Scores)
AddSynonyms($Word, $Synonyms)
Add synonyms.
const FIELDTYPE_DATERANGE
UpdateForItem($ItemId, $ItemType=0)
Update search database for the specified item.
const FIELDTYPE_DATE
SearchTerms()
Get normalized list of search terms.
const WORD_EXCLUDED
GroupedSearch($SearchGroups, $StartingResult=0, $NumberOfResults=10, $SortByField=NULL, $SortDescending=TRUE)
Perform search with logical groups of fielded searches.
ItemCount()
Get total number of items indexed by search engine.
FieldedSearch($SearchStrings, $StartingResult=0, $NumberOfResults=10, $SortByField=NULL, $SortDescending=TRUE)
Perform search across multiple fields, with different values or comparisons specified for each field...
const STEM_ID_OFFSET
FieldWeight($FieldName)
Get search weight for specified field.
FieldInKeywordSearch($FieldName)
Get whether specified field is included in keyword searches.
RemoveAllSynonyms()
Remove all synonyms.
DMsg($Level, $Msg)
const WORD_PRESENT
Core metadata archive search engine class.
FieldedSearchWeightScale($SearchStrings)
Get total of weights for all fields involved in search, useful for assessing scale of scores in searc...
DefaultSearchLogic($NewSetting=NULL)
Get/set default search logic (LOGIC_AND or LOGIC_OR).
FieldType($FieldName)
Get type of specified field (text/numeric/date/daterange).
const FIELDTYPE_TEXT
const WORD_REQUIRED
DebugLevel($NewValue)
Set debug output level.
UpdateForItems($StartingItemId, $NumberOfItems)
Update search database for the specified range of items.
GetFieldContent($ItemId, $FieldName)
AddResultFilterFunction($FunctionName)
Add function that will be called to filter search results.
SearchTime()
Get time that last search took, in seconds.
GetSynonyms($Word)
Get synonyms for word.