You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

402 lines
9.3 KiB

<?
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/* Written by Reverend Jim (jim@revjim.net)
*
* http://revjim.net/code/feedParser/
*/
class feedParser {
var $version = "0.5";
var $entities = array(
'nbsp' => "&#160;",
'iexcl' => "&#161;",
'cent' => "&#162;",
'pound' => "&#163;",
'curren' => "&#164;",
'yen' => "&#165;",
'brvbar' => "&#166;",
'sect' => "&#167;",
'uml' => "&#168;",
'copy' => "&#169;",
'ordf' => "&#170;",
'laquo' => "&#171;",
'not' => "&#172;",
'shy' => "&#173;",
'reg' => "&#174;",
'macr' => "&#175;",
'deg' => "&#176;",
'plusmn' => "&#177;",
'sup2' => "&#178;",
'sup3' => "&#179;",
'acute' => "&#180;",
'micro' => "&#181;",
'para' => "&#182;",
'middot' => "&#183;",
'cedil' => "&#184;",
'sup1' => "&#185;",
'ordm' => "&#186;",
'raquo' => "&#187;",
'frac14' => "&#188;",
'frac12' => "&#189;",
'frac34' => "&#190;",
'iquest' => "&#191;",
'Agrave' => "&#192;",
'Aacute' => "&#193;",
'Acirc' => "&#194;",
'Atilde' => "&#195;",
'Auml' => "&#196;",
'Aring' => "&#197;",
'AElig' => "&#198;",
'Ccedil' => "&#199;",
'Egrave' => "&#200;",
'Eacute' => "&#201;",
'Ecirc' => "&#202;",
'Euml' => "&#203;",
'Igrave' => "&#204;",
'Iacute' => "&#205;",
'Icirc' => "&#206;",
'Iuml' => "&#207;",
'ETH' => "&#208;",
'Ntilde' => "&#209;",
'Ograve' => "&#210;",
'Oacute' => "&#211;",
'Ocirc' => "&#212;",
'Otilde' => "&#213;",
'Ouml' => "&#214;",
'times' => "&#215;",
'Oslash' => "&#216;",
'Ugrave' => "&#217;",
'Uacute' => "&#218;",
'Ucirc' => "&#219;",
'Uuml' => "&#220;",
'Yacute' => "&#221;",
'THORN' => "&#222;",
'szlig' => "&#223;",
'agrave' => "&#224;",
'aacute' => "&#225;",
'acirc' => "&#226;",
'atilde' => "&#227;",
'auml' => "&#228;",
'aring' => "&#229;",
'aelig' => "&#230;",
'ccedil' => "&#231;",
'egrave' => "&#232;",
'eacute' => "&#233;",
'ecirc' => "&#234;",
'euml' => "&#235;",
'igrave' => "&#236;",
'iacute' => "&#237;",
'icirc' => "&#238;",
'iuml' => "&#239;",
'eth' => "&#240;",
'ntilde' => "&#241;",
'ograve' => "&#242;",
'oacute' => "&#243;",
'ocirc' => "&#244;",
'otilde' => "&#245;",
'ouml' => "&#246;",
'divide' => "&#247;",
'oslash' => "&#248;",
'ugrave' => "&#249;",
'uacute' => "&#250;",
'ucirc' => "&#251;",
'uuml' => "&#252;",
'yacute' => "&#253;",
'thorn' => "&#254;",
'yuml' => "&#255;"
);
var $namespaces = array(
'DC' => 'http://purl.org/dc/elements/1.1/',
'RDF' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
'RSS' => 'http://purl.org/rss/1.0/',
'RSS2'=> 'http://backend.userland.com/rss2',
'RDF2' => 'http://my.netscape.com/rdf/simple/0.9/'
);
function buildStruct($xmldata) {
// Create a parser object
$p = new XMLParser;
// Define our known namespaces
foreach ($this->namespaces as $space => $uri) {
$p->definens($space,$uri);
}
// Define base namespace
$p->definens("UNDEF");
$this->parseEntities($xmldata);
// Tell the parser to get the file.
$p->setXmlData($xmldata);
// Tell the parser to build the tree.
$p->buildXmlTree();
// Spit the tree out so we can see it
return $p->getXmlTree();
}
function parseEntities(&$data) {
foreach($this->entities as $entity => $replace) {
$data = preg_replace('/&' . $entity . ';/',$replace,$data);
}
$data = preg_replace('/&[ ]*;/','',$data);
}
function parseFeed($xmldata) {
$data =& $this->buildStruct(&$xmldata);
if(is_array($data)) {
foreach($data as $child) {
if(is_array($child)) {
switch($child['tag']) {
case "RSS:RSS":
case "UNDEF:RSS":
case "RSS2:RSS":
$info = $this->parseRSS(&$child);
break;
case "RDF:RDF":
$info = $this->parseRDF(&$child);
break;
default:
$info["warning"] .= "Unknown document format: " . $child['tag'] . "\n";
break;
}
}
}
}
return $info;
}
function parseRDF(&$data) {
if(is_array($data['children'])) {
foreach($data['children'] as $child) {
if(is_array($child)) {
switch($child['tag']) {
case "RSS:CHANNEL":
case "RDF2:CHANNEL":
$channel = $this->getRDFChannel(&$child);
break;
case "RSS:ITEM":
case "RDF2:ITEM":
$item[] = $this->getRDFItem(&$child);
break;
default:
break;
}
}
}
}
return array('channel' => $channel, 'item' => $item);
}
function parseRSS(&$data) {
if(is_array($data['children'])) {
foreach($data['children'] as $child) {
if(is_array($child)) {
switch($child['tag']) {
case "RSS:CHANNEL":
case "RSS2:CHANNEL":
case "UNDEF:CHANNEL":
$info = $this->getRSSChannel(&$child);
break;
default:
break;
}
}
}
}
return $info;
}
function getRDFChannel($data) {
if(is_array($data['children'])) {
foreach($data['children'] as $child) {
if(is_array($child)) {
switch($child['tag']) {
case "RSS:TITLE":
case "RDF2:TITLE":
$channel['title'] = $child['children'][0];
break;
case "RSS:LINK":
case "RDF2:LINK":
$channel['link'] = $child['children'][0];
break;
case "RSS:DESCRIPTION":
case "RDF2:DESCRIPTION":
$channel['description'] = $child['children'][0];
break;
case "RSS:WEBMASTER":
$channel['creator'] = $child['children'][0];
break;
default:
break;
}
}
}
}
return $channel;
}
function getRSSChannel($data) {
if(is_array($data['children'])) {
foreach($data['children'] as $child) {
if(is_array($child)) {
switch($child['tag']) {
case "UNDEF:TITLE":
case "RSS:TITLE":
case "RSS2:TITLE":
$channel['title'] = $child['children'][0];
break;
case "UNDEF:LINK":
case "RSS:LINK":
case "RSS2:LINK":
$channel['link'] = $child['children'][0];
break;
case "UNDEF:DESCRIPTION":
case "RSS:DESCRIPTION":
case "RSS2:DESCRIPTION":
$channel['description'] = $child['children'][0];
break;
case "UNDEF:ITEM":
case "RSS:ITEM":
case "RSS2:ITEM":
$item[] = $this->getRSSItem(&$child);
break;
case "UNDEF:LASTBUILDDATE":
case "RSS:LASTBUILDDATE":
case "RSS2:LASTBUILDDATE":
$channel['lastbuilddate'] = strtotime($child['children'][0]);
break;
default:
break;
}
}
}
}
return array('channel' => $channel, 'item' => $item);
}
function getRDFItem($data) {
if(is_array($data['children'])) {
foreach($data['children'] as $child) {
if(is_array($child)) {
switch($child['tag']) {
case "RSS:TITLE":
case "RDF2:TITLE":
$item['title'] = $child['children'][0];
break;
case "RSS:LINK":
case "RDF2:LINK":
$item['link'] = $child['children'][0];
break;
case "RSS:DESCRIPTION":
case "RDF2:DESCRIPTION":
$item["description"] = $child['children'][0];
break;
case "DC:DATE":
$item["date"] = $this->dcDateToUnixTime($child['children'][0],0);
$item["locdate"] = $this->dcDateToUnixTime($child['children'][0],1);
break;
default:
break;
}
}
}
}
return $item;
}
function getRSSItem($data) {
if(is_array($data['children'])) {
foreach($data['children'] as $child) {
if(is_array($child)) {
switch($child['tag']) {
case "UNDEF:TITLE":
case "RSS:TITLE":
case "RSS2:TITLE":
$item['title'] = $child['children'][0];
break;
case "UNDEF:LINK":
case "RSS:LINK":
case "RSS2:LINK":
$item['link'] = $child['children'][0];
break;
case "UNDEF:DESCRIPTION":
case "RSS:DESCRIPTION":
case "RSS2:DESCRIPTION":
$item["description"] = $child['children'][0];
break;
case "DC:DATE":
$item["date"] = $this->dcDateToUnixTime($child['children'][0],0);
$item["locdate"] = $this->dcDateToUnixTime($child['children'][0],1);
break;
case "UNDEF:PUBDATE":
case "RSS:PUBDATE":
case "RSS2:PUBDATE":
$item["date"] = strtotime($child['children'][0]);
$item["locdate"] = strtotime($child['children'][0]);
break;
default:
break;
}
}
}
}
return $item;
}
function dcDateToUnixTime($dcdate,$cvttz = 1) {
list($date,$time) = explode("T",$dcdate);
preg_match(
"/([0-9]{2}:[0-9]{2}:[0-9]{2})(\-?\+?)([0-9]{2}):([0-9]{2})/",
$time,
$yo
);
if ($cvttz == 1) {
return strtotime($date . " " . $yo[1] . $yo[2] . $yo[3] . $yo[4]);
} else {
return strtotime($date . " " . $yo[1]);
}
}
}
?>