CWIS Developer Documentation
RSSClient.php
Go to the documentation of this file.
1 <?PHP
2 #
3 # FILE: RSSClient.php
4 #
5 # Part of the ScoutLib application support library
6 # Copyright 2002-2013 Edward Almasy and Internet Scout Research Group
7 # http://scout.wisc.edu/
8 #
9 
13 class RSSClient {
14 
15  # ---- PUBLIC INTERFACE --------------------------------------------------
16 
29  function RSSClient($ServerUrl, $CacheDB = NULL, $RefreshTime = 600, $Encoding = "UTF-8", $DebugLevel = 0)
30  {
31  # set default debug level
32  $this->DebugLevel = $DebugLevel;
33 
34  # set default encoding
35  $this->Encoding = $Encoding;
36 
37  # save cache details
38  $this->CacheDB = $CacheDB;
39  $this->RefreshTime = $RefreshTime;
40 
41  # query server (or cache) for XML text
42  $this->XmlText = $this->QueryServerWithCaching(
44 
45  # create XML parser and parse text
46  $this->Parser = new XMLParser($this->Encoding);
47  if ($this->DebugLevel > 3) { $Parser->SetDebugLevel($this->DebugLevel - 3); }
48  $this->Parser->ParseText($this->XmlText);
49 
50  if ($this->DebugLevel) { print("RSSClient->RSSClient() returned ".strlen($this->XmlText)." characters from server query<br>\n"); }
51  }
52 
58  function ServerUrl($NewValue = NULL)
59  {
60  # if new RSS server URL supplied
61  if (($NewValue != NULL) && ($NewValue != $this->ServerUrl))
62  {
63  # save new value
64  $this->ServerUrl = $NewValue;
65 
66  # re-read XML from server at new URL
67  $this->XmlText = $this->QueryServerWithCaching(
68  $NewValue,
69  $this->CacheDB,
70  $this->RefreshTime);
71 
72  # create new XML parser and parse text
73  $this->Parser = new XMLParser();
74  if ($this->DebugLevel > 3) { $Parser->SetDebugLevel($this->DebugLevel - 3); }
75  $this->Parser->ParseText($this->XmlText);
76  }
77 
78  # return RSS server URL to caller
79  return $this->ServerUrl;
80  }
81 
88  function Encoding($NewValue = NULL)
89  {
90  # if new encoding supplied
91  if (($NewValue != NULL) && ($NewValue != $this->Encoding))
92  {
93  # save new value
94  $this->Encoding = $NewValue;
95 
96  # re-read XML from server
97  $this->XmlText = $this->QueryServerWithCaching(
98  $this->ServerUrl,
99  $this->CacheDB,
100  $this->RefreshTime);
101 
102  # create new XML parser and parse text
103  $this->Parser = new XMLParser($this->Encoding);
104  if ($this->DebugLevel > 3) { $Parser->SetDebugLevel($this->DebugLevel - 3); }
105  $this->Parser->ParseText($this->XmlText);
106  }
107 
108  # return encoding to caller
109  return $this->Encoding;
110  }
111 
118  {
119  # if neither the XML file nor the HTTP response headers specify an
120  # encoding, there is an overwhelming chance that it's ISO-8859-1, so
121  # use it as the default
122  $Encoding = "ISO-8859-1";
123 
124  # only get up to the the encoding portion of the XML declartion
125  # http://www.w3.org/TR/2006/REC-xml-20060816/#sec-prolog-dtd
126  $S = '[ \t\r\n]';
127  $Eq = "{$S}?={$S}?";
128  $VersionNum = '1.0';
129  $EncName = '[A-Za-z]([A-Za-z0-9._]|-)*';
130  $VersionInfo = "{$S}version{$Eq}('{$VersionNum}'|\"{$VersionNum}\")";
131  $EncodingDecl = "{$S}encoding{$Eq}('{$EncName}'|\"{$EncName}\")";
132  $XMLDecl = "<\?xml{$VersionInfo}({$EncodingDecl})?";
133  $RegEx = "/{$XMLDecl}/";
134 
135  # try to find the encoding, index 3 will be set if encoding is declared
136  preg_match($RegEx, $this->XmlText, $Matches);
137 
138  # give precedence to the encoding specified within the XML file since
139  # a RSS feed publisher might not have access to HTTP response headers
140  if (count($Matches) >= 4)
141  {
142  # also need to strip off the quotes
143  $Encoding = trim($Matches[3], "'\"");
144  }
145 
146  # then give precedence to the charset parameter in the Content-Type
147  # response header
148  else if ($this->CacheDB)
149  {
150  # create cache table if it doesn't exist
151  $DB = $this->CacheDB;
152  $ServerUrl = addslashes($this->ServerUrl);
153 
154  # get the cache value
155  $DB->Query("
156  SELECT * FROM RSSClientCache
157  WHERE ServerUrl = '".$ServerUrl."'");
158  $Exists = ($DB->NumRowsSelected() > 0);
159  $Cache = $DB->FetchRow();
160 
161  # if cached and charset parameter was given in the response headers
162  if ($Exists && strlen($Cache["Charset"]))
163  {
164  $Encoding = $Cache["Charset"];
165  }
166  }
167 
168  $this->Encoding($Encoding);
169  }
170 
179  function GetItems($NumberOfItems = NULL, $ChannelName = NULL)
180  {
181  # start by assuming no items will be found
182  $Items = array();
183 
184  # move parser to area in XML with items
186  $Parser->SeekToRoot();
187  $Result = $Parser->SeekTo("rss");
188  if ($Result === NULL)
189  {
190  $Result = $Parser->SeekTo("rdf:RDF");
191  }
192  else
193  {
194  $Parser->SeekTo("channel");
195  }
196 
197  # if items are found
198  $ItemCount = $Parser->SeekTo("item");
199  if ($ItemCount)
200  {
201  # for each record
202  $Index = 0;
203  do
204  {
205  # retrieve item info
206  $Items[$Index]["title"] = $Parser->GetData("title");
207  $Items[$Index]["description"] = $Parser->GetData("description");
208  $Items[$Index]["link"] = $Parser->GetData("link");
209  $Items[$Index]["enclosure"] = $Parser->GetAttributes("enclosure");
210 
211  $Index++;
212  }
213  while ($Parser->NextItem() && (($NumberOfItems == NULL) || ($Index < $NumberOfItems)));
214  }
215 
216  # return records to caller
217  return $Items;
218  }
219 
224  function GetChannelTitle()
225  {
226  if (!isset($this->ChannelTitle)) { $this->LoadChannelInfo(); }
227  return $this->ChannelTitle;
228  }
229 
234  function GetChannelLink()
235  {
236  if (!isset($this->ChannelLink)) { $this->LoadChannelInfo(); }
237  return $this->ChannelLink;
238  }
239 
245  {
246  if (!isset($this->ChannelDescription)) { $this->LoadChannelInfo(); }
248  }
249 
254  function UsedCachedData()
255  {
257  }
258 
259  # ---- PRIVATE INTERFACE -------------------------------------------------
260 
261  var $CacheDB;
265  var $SetSpec;
268  var $XmlText;
269  var $Parser;
274 
280  function SetDebugLevel($NewLevel)
281  {
282  $this->DebugLevel = $NewLevel;
283  }
284 
294  function GetXmlInfo($Url)
295  {
296  $Text = @file_get_contents($Url);
297  $Type = NULL;
298  $Charset = NULL;
299 
300  # get the type and charset if the fetch was successful
301  if ($Text !== FALSE)
302  {
303  # this must come after file_get_contents() and before any other remote
304  # fetching is done
305  $Headers = $http_response_header;
306 
307  # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17
308  $LWS = '([ \t]*|\r\n[ \t]+)';
309  $Token = '[!\x23-\x27*+-.\x30-\x39\x41-\x5A\x5E-\x7A|~]+';
310  $QuotedPair = '\\[\x00-\x7F]';
311  $QdText = "([^\\x00-\\x1F\\x7F\"]|{$LWS})";
312  $QuotedString = "\"({$QdText}|{$QuotedPair})*\"";
313  $Value = "({$Token}|{$QuotedString})";
314  $Parameter = "{$Token}{$LWS}={$LWS}{$Value}";
315 
316  # these make the Content-Type regex specific to Content-Type
317  # values with charset parameters in them, but make capturing
318  # the charset much easier
319  $BasicParameter = "(;{$LWS}{$Parameter})*";
320  $CharsetParameter = "(;{$LWS}charset{$LWS}={$LWS}{$Value})";
321  $ModParameter = "{$BasicParameter}{$CharsetParameter}{$BasicParameter}";
322  $MediaType = "({$Token}{$LWS}\\/{$LWS}{$Token}){$LWS}{$ModParameter}";
323 
324  # back to the spec
325  $ContentType = "Content-Type{$LWS}:{$LWS}{$MediaType}{$LWS}";
326  $RegEx = "/^{$ContentType}$/i";
327 
328  foreach ($Headers as $Header)
329  {
330  preg_match($RegEx, $Header, $Matches);
331 
332  if (isset($Matches[3]) && isset($Matches[19]))
333  {
334  $Type = $Matches[3];
335  $Charset = $Matches[19];
336  break;
337  }
338  }
339  }
340 
341  return array($Text, $Type, $Charset);
342  }
343 
354  function QueryServerWithCaching($ServerUrl, $CacheDB, $RefreshTime)
355  {
356  # save RSS server URL
357  $this->ServerUrl = $ServerUrl;
358 
359  # save caching info (if any)
360  if ($CacheDB)
361  {
362  $this->CacheDB = $CacheDB;
363  }
364 
365  # if caching info was supplied
366  if ($this->CacheDB)
367  {
368  $DB = $this->CacheDB;
369 
370  # look up cached information for this server
371  $QueryTimeCutoff = date("Y-m-d H:i:s", (time() - $RefreshTime));
372  $DB->Query("
373  SELECT * FROM RSSClientCache
374  WHERE ServerUrl = '".addslashes($ServerUrl)."'
375  AND LastQueryTime > '".$QueryTimeCutoff."'");
376 
377  # if we have cached info that has not expired
378  if ($CachedXml = $DB->FetchField("CachedXml"))
379  {
380  # use cached info
381  $QueryResult = $CachedXml;
382  $this->CachedDataWasUsed = TRUE;
383  }
384  else
385  {
386  $this->CachedDataWasUsed = FALSE;
387 
388  # query server for XML text
389  list($Text, $Type, $Charset) = $this->GetXmlInfo($ServerUrl);
390  $QueryResult = "";
391 
392  # if query was successful
393  if ($Text !== FALSE)
394  {
395  $QueryResult = $Text;
396 
397  # clear out any old cache entries
398  $DB->Query("
399  DELETE FROM RSSClientCache
400  WHERE ServerUrl = '".addslashes($ServerUrl)."'");
401 
402  # save info in cache
403  $DB->Query("
404  INSERT INTO RSSClientCache
405  (ServerUrl, CachedXml, Type, Charset, LastQueryTime)
406  VALUES (
407  '".addslashes($ServerUrl)."',
408  '".addslashes($Text)."',
409  '".addslashes($Type)."',
410  '".addslashes($Charset)."',
411  NOW())");
412  }
413  }
414  }
415 
416  # return query result to caller
417  return $QueryResult;
418  }
419 
424  function LoadChannelInfo()
425  {
426  $Parser = $this->Parser;
427  $Parser->SeekToRoot();
428  $Result = $Parser->SeekTo("rss");
429  if ($Result === NULL)
430  {
431  $Result = $Parser->SeekTo("rdf:RDF");
432  }
433  $Parser->SeekTo("channel");
434  $this->ChannelTitle = $Parser->GetData("title");
435  $this->ChannelLink = $Parser->GetData("link");
436  $this->ChannelDescription = $Parser->GetData("description");
437  }
438 
439 }
UsedCachedData()
Determine whether the RSS client is using cached data.
Definition: RSSClient.php:254
GetItems($NumberOfItems=NULL, $ChannelName=NULL)
Retrieve the RSS items from the RSS feed.
Definition: RSSClient.php:179
GetChannelTitle()
Retrieve the channel title as given in the RSS feed.
Definition: RSSClient.php:224
Implements an RSS client for fetching, parsing, and caching RSS feeds.
Definition: RSSClient.php:13
$ChannelDescription
Definition: RSSClient.php:272
GetChannelLink()
Retrive the URL to the site of the channel in the RSS feed.
Definition: RSSClient.php:234
ServerUrl($NewValue=NULL)
Get or set the RSS feed URL.
Definition: RSSClient.php:58
RSSClient($ServerUrl, $CacheDB=NULL, $RefreshTime=600, $Encoding="UTF-8", $DebugLevel=0)
Object constructor.
Definition: RSSClient.php:29
Encoding($NewValue=NULL)
Get or set the character encoding of the RSS feed.
Definition: RSSClient.php:88
$CachedDataWasUsed
Definition: RSSClient.php:273
SetDebugLevel($NewLevel)
Set the current level of verbosity for debug output.
Definition: RSSClient.php:280
LoadChannelInfo()
Load information from the current RSS channel.
Definition: RSSClient.php:424
AutodetectEncoding()
Try to automatically detect and set the encoding of the RSS feed.
Definition: RSSClient.php:117
QueryServerWithCaching($ServerUrl, $CacheDB, $RefreshTime)
Load the XML of an RSS feed from the cache, if available, or from the server.
Definition: RSSClient.php:354
GetXmlInfo($Url)
Get the XML text at the given URL, along with the type and character encoding of the text...
Definition: RSSClient.php:294
GetChannelDescription()
Get the description of the channel as given in the RSS feed.
Definition: RSSClient.php:244