3 # FILE: SPTSearchEngine.php
5 # Part of the Collection Workflow Integration System (CWIS)
6 # Copyright 2011-2013 Edward Almasy and Internet Scout Research Group
7 # http://scout.wisc.edu/cwis/
14 # pass database handle and config values to real search engine object
15 parent::__construct(
"Resources",
"ResourceId");
17 # for each field defined in schema
19 $Fields = $this->Schema->GetFields();
20 foreach ($Fields as $Field)
22 # determine field type for searching
23 switch ($Field->Type())
35 $FieldType = self::FIELDTYPE_TEXT;
40 $FieldType = self::FIELDTYPE_NUMERIC;
44 $FieldType = self::FIELDTYPE_DATERANGE;
48 $FieldType = self::FIELDTYPE_DATE;
56 exit(
"ERROR: unknown field type "
57 .$Field->Type().
" in SPTSearchEngine.php");
61 if ($FieldType !== NULL)
63 # add field to search engine
64 $this->
AddField($Field->Name(), $FieldType,
65 $Field->SearchWeight(),
66 $Field->IncludeInKeywordSearch());
83 # if this is a reference field
84 $Field = $this->Schema->GetFieldByName($FieldName);
87 # retrieve IDs of referenced items
88 $ReferredItemIds = $Resource->Get($FieldName);
90 # for each referred item
91 $ReturnValue = array();
92 foreach ($ReferredItemIds as $RefId)
94 # retrieve title value for item and add to returned values
96 $ReturnValue[] = $RefResource->GetMapped(
"Title");
99 # return referred item titles to caller
104 # retrieve text (including variants) from resource object and return to caller
105 return $Resource->Get($FieldName, FALSE, TRUE);
109 # overloaded version of method to retrieve resource/phrase match list
112 # normalize and escape search phrase for use in SQL query
113 $SearchPhrase = strtolower(addslashes($Phrase));
115 # query DB for matching list based on field type
116 $Field = $this->Schema->GetFieldByName($FieldName);
117 switch ($Field->Type())
123 $QueryString =
"SELECT DISTINCT ResourceId FROM Resources "
124 .
"WHERE POSITION('".$SearchPhrase.
"'"
125 .
" IN LOWER(`".$Field->DBFieldName().
"`)) ";
129 $QueryString =
"SELECT DISTINCT ResourceId FROM Resources "
130 .
"WHERE POSITION('".$SearchPhrase.
"'"
131 .
" IN LOWER(`".$Field->DBFieldName().
"AltText`)) ";
135 $NameTableSize = $this->DB->Query(
"SELECT COUNT(*) AS NameCount"
136 .
" FROM ControlledNames",
"NameCount");
137 $QueryString =
"SELECT DISTINCT ResourceNameInts.ResourceId "
138 .
"FROM ResourceNameInts, ControlledNames "
139 .
"WHERE POSITION('".$SearchPhrase.
"' IN LOWER(ControlledName)) "
140 .
"AND ControlledNames.ControlledNameId"
141 .
" = ResourceNameInts.ControlledNameId "
142 .
"AND ControlledNames.FieldId = ".$Field->Id();
143 $SecondQueryString =
"SELECT DISTINCT ResourceNameInts.ResourceId "
144 .
"FROM ResourceNameInts, ControlledNames, VariantNames "
145 .
"WHERE POSITION('".$SearchPhrase.
"' IN LOWER(VariantName)) "
146 .
"AND VariantNames.ControlledNameId"
147 .
" = ResourceNameInts.ControlledNameId "
148 .
"AND ControlledNames.ControlledNameId"
149 .
" = ResourceNameInts.ControlledNameId "
150 .
"AND ControlledNames.FieldId = ".$Field->Id();
154 $QueryString =
"SELECT DISTINCT ResourceNameInts.ResourceId "
155 .
"FROM ResourceNameInts, ControlledNames "
156 .
"WHERE POSITION('".$SearchPhrase.
"' IN LOWER(ControlledName)) "
157 .
"AND ControlledNames.ControlledNameId"
158 .
" = ResourceNameInts.ControlledNameId "
159 .
"AND ControlledNames.FieldId = ".$Field->Id();
163 $QueryString =
"SELECT DISTINCT ResourceClassInts.ResourceId "
164 .
"FROM ResourceClassInts, Classifications "
165 .
"WHERE POSITION('".$SearchPhrase
166 .
"' IN LOWER(ClassificationName)) "
167 .
"AND Classifications.ClassificationId"
168 .
" = ResourceClassInts.ClassificationId "
169 .
"AND Classifications.FieldId = ".$Field->Id();
173 $UserId = $this->DB->Query(
"SELECT UserId FROM APUsers "
174 .
"WHERE POSITION('".$SearchPhrase
175 .
"' IN LOWER(UserName)) "
176 .
"OR POSITION('".$SearchPhrase
177 .
"' IN LOWER(RealName))",
"UserId");
180 $QueryString =
"SELECT DISTINCT ResourceId FROM Resources "
181 .
"WHERE `".$Field->DBFieldName().
"` = ".$UserId;
186 if ($SearchPhrase > 0)
188 $QueryString =
"SELECT DISTINCT ResourceId FROM Resources "
189 .
"WHERE `".$Field->DBFieldName()
190 .
"` = ".(int)$SearchPhrase;
198 # (these types not yet handled by search engine for phrases)
202 # build match list based on results returned from DB
203 if (isset($QueryString))
205 $this->
DMsg(7,
"Performing phrase search query (<i>".$QueryString.
"</i>)");
206 if ($this->
DebugLevel > 9) { $StartTime = microtime(TRUE); }
207 $this->DB->Query($QueryString);
210 $EndTime = microtime(TRUE);
211 if (($StartTime - $EndTime) > 0.1)
213 printf(
"SE: Query took %.2f seconds<br>\n",
214 ($EndTime - $StartTime));
217 $MatchList = $this->DB->FetchColumn(
"ResourceId");
218 if (isset($SecondQueryString))
220 $this->
DMsg(7,
"Performing second phrase search query"
221 .
" (<i>".$SecondQueryString.
"</i>)");
222 if ($this->
DebugLevel > 9) { $StartTime = microtime(TRUE); }
223 $this->DB->Query($SecondQueryString);
226 $EndTime = microtime(TRUE);
227 if (($StartTime - $EndTime) > 0.1)
229 printf(
"SE: query took %.2f seconds<br>\n",
230 ($EndTime - $StartTime));
233 $MatchList = $MatchList + $this->DB->FetchColumn(
"ResourceId");
238 $MatchList = array();
241 # return list of matching resources to caller
245 # search field for records that meet comparison
248 # use SQL keyword appropriate to current search logic for combining operations
251 # for each comparison
252 foreach ($FieldNames as $Index => $FieldName)
254 $Operator = $Operators[$Index];
255 $Value = $Values[$Index];
257 # determine query based on field type
258 $Field = $this->Schema->GetFieldByName($FieldName);
261 switch ($Field->Type())
269 if (isset($Queries[
"Resources"]))
271 $Queries[
"Resources"] .= $CombineWord;
275 $Queries[
"Resources"] =
"SELECT DISTINCT ResourceId"
276 .
" FROM Resources WHERE ";
280 $User =
new CWUser($Value);
281 $Value = $User->Id();
283 $Queries[
"Resources"] .=
"`".$Field->DBFieldName()
284 .
"` ".$Operator.
" '".addslashes($Value).
"' ";
288 $QueryIndex =
"ResourceNameInts".$Field->Id();
289 if (!isset($Queries[$QueryIndex][
"A"]))
291 $Queries[$QueryIndex][
"A"] =
292 "SELECT DISTINCT ResourceId"
293 .
" FROM ResourceNameInts, ControlledNames "
294 .
" WHERE ControlledNames.FieldId = ".$Field->Id()
296 $CloseQuery[$QueryIndex][
"A"] = TRUE;
297 $ComparisonCount[$QueryIndex][
"A"] = 1;
298 $ComparisonCountField[$QueryIndex][
"A"] =
"ControlledName";
302 $Queries[$QueryIndex][
"A"] .=
" OR ";
303 $ComparisonCount[$QueryIndex][
"A"]++;
305 $Queries[$QueryIndex][
"A"] .=
306 "((ResourceNameInts.ControlledNameId"
307 .
" = ControlledNames.ControlledNameId"
308 .
" AND ControlledName "
309 .$Operator.
" '".addslashes($Value).
"'))";
310 if (!isset($Queries[$QueryIndex][
"B"]))
312 $Queries[$QueryIndex][
"B"] =
313 "SELECT DISTINCT ResourceId"
314 .
" FROM ResourceNameInts, ControlledNames,"
316 .
" WHERE ControlledNames.FieldId = ".$Field->Id()
318 $CloseQuery[$QueryIndex][
"B"] = TRUE;
319 $ComparisonCount[$QueryIndex][
"B"] = 1;
320 $ComparisonCountField[$QueryIndex][
"B"] =
"ControlledName";
324 $Queries[$QueryIndex][
"B"] .=
" OR ";
325 $ComparisonCount[$QueryIndex][
"B"]++;
327 $Queries[$QueryIndex][
"B"] .=
328 "((ResourceNameInts.ControlledNameId"
329 .
" = ControlledNames.ControlledNameId"
330 .
" AND ResourceNameInts.ControlledNameId"
331 .
" = VariantNames.ControlledNameId"
333 .$Operator.
" '".addslashes($Value).
"'))";
337 $QueryIndex =
"ResourceNameInts".$Field->Id();
338 if (!isset($Queries[$QueryIndex]))
340 $Queries[$QueryIndex] =
341 "SELECT DISTINCT ResourceId"
342 .
" FROM ResourceNameInts, ControlledNames "
343 .
" WHERE ControlledNames.FieldId = ".$Field->Id()
345 $CloseQuery[$QueryIndex] = TRUE;
346 $ComparisonCount[$QueryIndex] = 1;
347 $ComparisonCountField[$QueryIndex] =
"ControlledName";
351 $Queries[$QueryIndex] .=
" OR ";
352 $ComparisonCount[$QueryIndex]++;
354 $Queries[$QueryIndex] .=
355 "(ResourceNameInts.ControlledNameId"
356 .
" = ControlledNames.ControlledNameId"
357 .
" AND ControlledName ".$Operator
358 .
" '".addslashes($Value).
"')";
362 $QueryIndex =
"ResourceClassInts".$Field->Id();
363 if (!isset($Queries[$QueryIndex]))
365 $Queries[$QueryIndex] =
"SELECT DISTINCT ResourceId"
366 .
" FROM ResourceClassInts, Classifications"
367 .
" WHERE ResourceClassInts.ClassificationId"
368 .
" = Classifications.ClassificationId"
369 .
" AND Classifications.FieldId"
370 .
" = ".$Field->Id().
" AND ( ";
371 $CloseQuery[$QueryIndex] = TRUE;
372 $ComparisonCount[$QueryIndex] = 1;
373 $ComparisonCountField[$QueryIndex] =
"ClassificationName";
377 $Queries[$QueryIndex] .=
" OR ";
378 $ComparisonCount[$QueryIndex]++;
380 $Queries[$QueryIndex] .=
" ClassificationName "
381 .$Operator.
" '".addslashes($Value).
"'";
385 # if value appears to have time component or text description
386 if (strpos($Value,
":")
387 || strstr($Value,
"day")
388 || strstr($Value,
"week")
389 || strstr($Value,
"month")
390 || strstr($Value,
"year")
391 || strstr($Value,
"hour")
392 || strstr($Value,
"minute"))
394 if (isset($Queries[
"Resources"]))
396 $Queries[
"Resources"] .= $CombineWord;
400 $Queries[
"Resources"] =
"SELECT DISTINCT ResourceId"
401 .
" FROM Resources WHERE ";
404 # flip operator if necessary
405 if (strstr($Value,
"ago"))
407 $OperatorFlipMap = array(
413 $Operator = isset($OperatorFlipMap[$Operator])
414 ? $OperatorFlipMap[$Operator] : $Operator;
417 # use strtotime method to build condition
418 $TimestampValue = strtotime($Value);
419 if (($TimestampValue !== FALSE) && ($TimestampValue != -1))
421 if ((date(
"H:i:s", $TimestampValue) ==
"00:00:00")
422 && (strpos($Value,
"00:00") === FALSE)
423 && ($Operator ==
"<="))
426 date(
"Y-m-d", $TimestampValue).
" 23:59:59";
430 $NormalizedValue = date(
431 "Y-m-d H:i:s", $TimestampValue);
436 $NormalizedValue = addslashes($Value);
438 $Queries[
"Resources"] .=
439 " ( `".$Field->DBFieldName().
"` "
441 .
" '".$NormalizedValue.
"' ) ";
445 # use Date object method to build condition
446 $Date =
new Date($Value);
447 if ($Date->Precision())
449 if (isset($Queries[
"Resources"]))
451 $Queries[
"Resources"] .= $CombineWord;
455 $Queries[
"Resources"] =
"SELECT DISTINCT ResourceId"
456 .
" FROM Resources WHERE ";
458 $Queries[
"Resources"] .=
" ( ".$Date->SqlCondition(
459 $Field->DBFieldName(), NULL, $Operator).
" ) ";
465 $Date =
new Date($Value);
466 if ($Date->Precision())
468 if (isset($Queries[
"Resources"]))
470 $Queries[
"Resources"] .= $CombineWord;
474 $Queries[
"Resources"] =
"SELECT DISTINCT ResourceId"
475 .
" FROM Resources WHERE ";
477 $Queries[
"Resources"] .=
" ( ".$Date->SqlCondition(
478 $Field->DBFieldName().
"Begin",
479 $Field->DBFieldName().
"End", $Operator).
" ) ";
484 $QueryIndex =
"ReferenceInts".$Field->Id();
485 if (!isset($Queries[$QueryIndex]))
487 if (!isset($NameField))
490 $this->Schema->GetFieldByMappedName(
493 $Queries[$QueryIndex] =
494 "SELECT DISTINCT RI.SrcResourceId AS ResourceId"
495 .
" FROM ReferenceInts AS RI, Resources AS R "
496 .
" WHERE RI.FieldId = ".$Field->Id()
498 $CloseQuery[$QueryIndex] = TRUE;
499 $ComparisonCount[$QueryIndex] = 1;
500 $ComparisonCountField[$QueryIndex] =
501 "R.`".$NameField->DBFieldName().
"`";
505 $Queries[$QueryIndex] .= $CombineWord;
506 $ComparisonCount[$QueryIndex]++;
508 $Queries[$QueryIndex] .=
"(R.`".$NameField->DBFieldName().
"` "
509 .$Operator.
" '".addslashes($Value).
"'"
510 .
" AND R.ResourceId = RI.DstResourceId)";
515 # (these types not yet handled by search engine for comparisons)
524 # for each assembled query
525 foreach ($Queries as $QueryIndex => $Query)
527 # if query has multiple parts
528 if (is_array($Query))
530 # for each part of query
531 $ResourceIds = array();
532 foreach ($Query as $PartIndex => $PartQuery)
534 # add closing paren if query was flagged to be closed
535 if (isset($CloseQuery[$QueryIndex][$PartIndex]))
539 && ($ComparisonCount[$QueryIndex][$PartIndex] > 1))
541 $PartQuery .=
"GROUP BY ResourceId"
542 .
" HAVING COUNT(DISTINCT "
543 .$ComparisonCountField[$QueryIndex][$PartIndex]
545 .$ComparisonCount[$QueryIndex][$PartIndex];
549 # perform query and retrieve IDs
550 $this->
DMsg(5,
"Performing comparison query <i>"
552 $this->DB->Query($PartQuery);
553 $ResourceIds = $ResourceIds
554 + $this->DB->FetchColumn(
"ResourceId");
555 $this->
DMsg(5,
"Comparison query produced <i>"
556 .count($ResourceIds).
"</i> results");
561 # add closing paren if query was flagged to be closed
562 if (isset($CloseQuery[$QueryIndex]))
566 && ($ComparisonCount[$QueryIndex] > 1))
568 $Query .=
"GROUP BY ResourceId"
569 .
" HAVING COUNT(DISTINCT "
570 .$ComparisonCountField[$QueryIndex]
572 .$ComparisonCount[$QueryIndex];
576 # perform query and retrieve IDs
577 $this->
DMsg(5,
"Performing comparison query <i>".$Query.
"</i>");
578 $this->DB->Query($Query);
579 $ResourceIds = $this->DB->FetchColumn(
"ResourceId");
580 $this->
DMsg(5,
"Comparison query produced <i>"
581 .count($ResourceIds).
"</i> results");
584 # if we already have some results
587 # if search logic is set to AND
590 # remove anything from results that was not returned from query
591 $Results = array_intersect($Results, $ResourceIds);
595 # add values returned from query to results
596 $Results = array_unique(array_merge($Results, $ResourceIds));
601 # set results to values returned from query
602 $Results = $ResourceIds;
608 # initialize results to empty list
612 # return results to caller
619 return $RFactory->GetResourceIdsSortedBy($FieldName, !$SortDescending);
623 $TaskPriority = ApplicationFramework::PRIORITY_LOW)
626 $TaskDescription =
"Update search data for"
627 .
" <a href=\"r".$ItemId.
"\"><i>"
628 .$Item->GetMapped(
"Title").
"</i></a>";
629 $GLOBALS[
"AF"]->QueueUniqueTask(array(__CLASS__,
"RunUpdateForItem"),
630 array(intval($ItemId)), $TaskPriority, $TaskDescription);
635 # bail out if item ID is negative (indicating a temporary record)
636 if ($ItemId < 0) {
return; }
638 # check that resource still exists
640 if (!$RFactory->ItemExists($ItemId)) {
return; }
642 # retrieve schema ID of item to use for item type
644 $ItemType = $Resource->SchemaId();
646 # update search data for resource
648 $SearchEngine->UpdateForItem($ItemId, $ItemType);
661 # classifications and names associated with these search results
662 $SearchClasses = array();
663 $SearchNames = array();
665 # disable DB cache for the search suggestions process,
666 # this avoids memory exhaustion.
670 if (count($SearchResults)>0)
672 foreach (array_chunk($SearchResults, 1000, TRUE) as $Chunk)
674 # pull out all the Classifications that were associated
675 # with our search results
676 $DB->Query(
"SELECT ResourceId,ClassificationId FROM ResourceClassInts "
677 .
"WHERE ResourceId IN "
678 .
"(".implode(
",", array_keys($Chunk)).
")");
679 foreach (
$DB->FetchRows() as $Row)
681 $SearchClasses[ $Row[
"ClassificationId"] ] []= $Row[
"ResourceId"] ;
684 # similarly with controlled names
685 $DB->Query(
"SELECT ResourceId,ControlledNameId FROM ResourceNameInts "
686 .
"WHERE ResourceId in "
687 .
"(".implode(
",", array_keys($Chunk)).
")");
688 foreach (
$DB->FetchRows() as $Row)
690 $SearchNames[ $Row[
"ControlledNameId"] ] []= $Row[
"ResourceId"];
695 # generate a map of FieldId -> Field Names for all of the generated facets:
696 $SuggestionsById = array();
698 # pull relevant Classification names out of the DB
699 if (count($SearchClasses)>0)
701 foreach (array_chunk($SearchClasses, 1000, TRUE) as $Chunk)
703 $DB->Query(
"SELECT FieldId,ClassificationId,ClassificationName"
704 .
" FROM Classifications"
705 .
" WHERE ClassificationId"
706 .
" IN (".implode(
",", array_keys($Chunk)).
")");
707 foreach (
$DB->FetchRows() as $Row)
709 $SuggestionsById[$Row[
"FieldId"]] []=
710 array(
"Id" => $Row[
"ClassificationId"],
711 "Name" => $Row[
"ClassificationName"],
712 "Count" => count($SearchClasses[
713 $Row[
"ClassificationId"]]));
718 # pull relevant ControlledNames out of the DB
719 if (count($SearchNames)>0)
721 foreach (array_chunk($SearchNames, 1000, TRUE) as $Chunk)
723 $DB->Query(
"SELECT FieldId,ControlledNameId,ControlledName"
724 .
" FROM ControlledNames"
725 .
" WHERE ControlledNameId"
726 .
" IN (".implode(
",", array_keys($SearchNames)).
")");
727 foreach (
$DB->FetchRows() as $Row)
729 $SuggestionsById[$Row[
"FieldId"]] []=
730 array(
"Id" => $Row[
"ControlledNameId"],
731 "Name" => $Row[
"ControlledName"],
733 $SearchNames[$Row[
"ControlledNameId"]]));
738 # translate the suggestions that we have in terms of the
739 # FieldIds to suggestions in terms of the field names
740 $SuggestionsByFieldName = array();
742 # if we have suggestions to offer
743 if (count($SuggestionsById)>0)
747 # gill in an array that maps FieldNames to search links
748 # which would be appropriate for that field
749 foreach ($SuggestionsById as $FieldId => $FieldValues)
751 $ThisField = $Schema->GetField($FieldId);
753 # bail on fields that didn't exist and on fields that the
754 # current user cannot view, and on fields that are
755 # disabled for advanced searching
756 if (is_object($ThisField) &&
758 $ThisField->IncludeInFacetedSearch() &&
759 $ThisField->Enabled() &&
760 $User->HasPriv($ThisField->ViewingPrivileges()))
762 $SuggestionsByFieldName[ $ThisField->Name() ] = array();
764 foreach ($FieldValues as $Value)
766 $SuggestionsByFieldName [ $ThisField->Name() ] [$Value[
"Id"]] =
767 array(
"Name" => $Value[
"Name"],
"Count" => $Value[
"Count"] );
773 ksort($SuggestionsByFieldName);
775 return $SuggestionsByFieldName;
780 # functions for backward compatability w/ old SPT code
SearchFieldForPhrases($FieldName, $Phrase)
SQL database abstraction object with smart query caching.
AddField($FieldName, $FieldType, $Weight, $UsedInKeywordSearch)
Add field to include in searching.
UpdateForResource($ItemId)
UpdateForItem($ItemId, $ItemType=0)
Update search database for the specified item.
GetFieldContent($ItemId, $FieldName)
Overloaded version of method to retrieve text from DB.
static RunUpdateForItem($ItemId)
SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values)
Represents a "resource" in CWIS.
static GetResultFacets($SearchResults, $User)
Generate a list of suggested additional search terms that can be used for faceted searching...
static GetItemIdsSortedByField($FieldName, $SortDescending)
Core metadata archive search engine class.
DefaultSearchLogic($NewSetting=NULL)
Get/set default search logic (LOGIC_AND or LOGIC_OR).
DebugLevel($NewValue)
Set debug output level.
Factory for Resource objects.
CWIS-specific user class.
static QueueUpdateForItem($ItemId, $TaskPriority=ApplicationFramework::PRIORITY_LOW)