You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
401 lines
9.3 KiB
401 lines
9.3 KiB
<?
|
|
/*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
*/
|
|
|
|
/* Written by Reverend Jim (jim@revjim.net)
|
|
*
|
|
* http://revjim.net/code/feedParser/
|
|
*/
|
|
|
|
|
|
class feedParser {
|
|
|
|
var $version = "0.5";
|
|
var $entities = array(
|
|
'nbsp' => " ",
|
|
'iexcl' => "¡",
|
|
'cent' => "¢",
|
|
'pound' => "£",
|
|
'curren' => "¤",
|
|
'yen' => "¥",
|
|
'brvbar' => "¦",
|
|
'sect' => "§",
|
|
'uml' => "¨",
|
|
'copy' => "©",
|
|
'ordf' => "ª",
|
|
'laquo' => "«",
|
|
'not' => "¬",
|
|
'shy' => "­",
|
|
'reg' => "®",
|
|
'macr' => "¯",
|
|
'deg' => "°",
|
|
'plusmn' => "±",
|
|
'sup2' => "²",
|
|
'sup3' => "³",
|
|
'acute' => "´",
|
|
'micro' => "µ",
|
|
'para' => "¶",
|
|
'middot' => "·",
|
|
'cedil' => "¸",
|
|
'sup1' => "¹",
|
|
'ordm' => "º",
|
|
'raquo' => "»",
|
|
'frac14' => "¼",
|
|
'frac12' => "½",
|
|
'frac34' => "¾",
|
|
'iquest' => "¿",
|
|
'Agrave' => "À",
|
|
'Aacute' => "Á",
|
|
'Acirc' => "Â",
|
|
'Atilde' => "Ã",
|
|
'Auml' => "Ä",
|
|
'Aring' => "Å",
|
|
'AElig' => "Æ",
|
|
'Ccedil' => "Ç",
|
|
'Egrave' => "È",
|
|
'Eacute' => "É",
|
|
'Ecirc' => "Ê",
|
|
'Euml' => "Ë",
|
|
'Igrave' => "Ì",
|
|
'Iacute' => "Í",
|
|
'Icirc' => "Î",
|
|
'Iuml' => "Ï",
|
|
'ETH' => "Ð",
|
|
'Ntilde' => "Ñ",
|
|
'Ograve' => "Ò",
|
|
'Oacute' => "Ó",
|
|
'Ocirc' => "Ô",
|
|
'Otilde' => "Õ",
|
|
'Ouml' => "Ö",
|
|
'times' => "×",
|
|
'Oslash' => "Ø",
|
|
'Ugrave' => "Ù",
|
|
'Uacute' => "Ú",
|
|
'Ucirc' => "Û",
|
|
'Uuml' => "Ü",
|
|
'Yacute' => "Ý",
|
|
'THORN' => "Þ",
|
|
'szlig' => "ß",
|
|
'agrave' => "à",
|
|
'aacute' => "á",
|
|
'acirc' => "â",
|
|
'atilde' => "ã",
|
|
'auml' => "ä",
|
|
'aring' => "å",
|
|
'aelig' => "æ",
|
|
'ccedil' => "ç",
|
|
'egrave' => "è",
|
|
'eacute' => "é",
|
|
'ecirc' => "ê",
|
|
'euml' => "ë",
|
|
'igrave' => "ì",
|
|
'iacute' => "í",
|
|
'icirc' => "î",
|
|
'iuml' => "ï",
|
|
'eth' => "ð",
|
|
'ntilde' => "ñ",
|
|
'ograve' => "ò",
|
|
'oacute' => "ó",
|
|
'ocirc' => "ô",
|
|
'otilde' => "õ",
|
|
'ouml' => "ö",
|
|
'divide' => "÷",
|
|
'oslash' => "ø",
|
|
'ugrave' => "ù",
|
|
'uacute' => "ú",
|
|
'ucirc' => "û",
|
|
'uuml' => "ü",
|
|
'yacute' => "ý",
|
|
'thorn' => "þ",
|
|
'yuml' => "ÿ"
|
|
);
|
|
|
|
var $namespaces = array(
|
|
'DC' => 'http://purl.org/dc/elements/1.1/',
|
|
'RDF' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
|
'RSS' => 'http://purl.org/rss/1.0/',
|
|
'RSS2'=> 'http://backend.userland.com/rss2',
|
|
'RDF2' => 'http://my.netscape.com/rdf/simple/0.9/'
|
|
);
|
|
|
|
function buildStruct($xmldata) {
|
|
// Create a parser object
|
|
$p = new XMLParser;
|
|
|
|
// Define our known namespaces
|
|
foreach ($this->namespaces as $space => $uri) {
|
|
$p->definens($space,$uri);
|
|
}
|
|
|
|
// Define base namespace
|
|
$p->definens("UNDEF");
|
|
|
|
$this->parseEntities($xmldata);
|
|
|
|
// Tell the parser to get the file.
|
|
$p->setXmlData($xmldata);
|
|
|
|
// Tell the parser to build the tree.
|
|
$p->buildXmlTree();
|
|
|
|
// Spit the tree out so we can see it
|
|
return $p->getXmlTree();
|
|
|
|
}
|
|
|
|
function parseEntities(&$data) {
|
|
|
|
foreach($this->entities as $entity => $replace) {
|
|
$data = preg_replace('/&' . $entity . ';/',$replace,$data);
|
|
}
|
|
|
|
$data = preg_replace('/&[ ]*;/','',$data);
|
|
|
|
}
|
|
|
|
|
|
function parseFeed($xmldata) {
|
|
$data =& $this->buildStruct(&$xmldata);
|
|
if(is_array($data)) {
|
|
foreach($data as $child) {
|
|
if(is_array($child)) {
|
|
switch($child['tag']) {
|
|
case "RSS:RSS":
|
|
case "UNDEF:RSS":
|
|
case "RSS2:RSS":
|
|
$info = $this->parseRSS(&$child);
|
|
break;
|
|
case "RDF:RDF":
|
|
$info = $this->parseRDF(&$child);
|
|
break;
|
|
default:
|
|
$info["warning"] .= "Unknown document format: " . $child['tag'] . "\n";
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return $info;
|
|
|
|
}
|
|
|
|
function parseRDF(&$data) {
|
|
if(is_array($data['children'])) {
|
|
foreach($data['children'] as $child) {
|
|
if(is_array($child)) {
|
|
switch($child['tag']) {
|
|
case "RSS:CHANNEL":
|
|
case "RDF2:CHANNEL":
|
|
$channel = $this->getRDFChannel(&$child);
|
|
break;
|
|
case "RSS:ITEM":
|
|
case "RDF2:ITEM":
|
|
$item[] = $this->getRDFItem(&$child);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
return array('channel' => $channel, 'item' => $item);
|
|
|
|
}
|
|
|
|
function parseRSS(&$data) {
|
|
if(is_array($data['children'])) {
|
|
foreach($data['children'] as $child) {
|
|
if(is_array($child)) {
|
|
switch($child['tag']) {
|
|
case "RSS:CHANNEL":
|
|
case "RSS2:CHANNEL":
|
|
case "UNDEF:CHANNEL":
|
|
$info = $this->getRSSChannel(&$child);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
return $info;
|
|
|
|
}
|
|
|
|
function getRDFChannel($data) {
|
|
if(is_array($data['children'])) {
|
|
foreach($data['children'] as $child) {
|
|
if(is_array($child)) {
|
|
switch($child['tag']) {
|
|
case "RSS:TITLE":
|
|
case "RDF2:TITLE":
|
|
$channel['title'] = $child['children'][0];
|
|
break;
|
|
case "RSS:LINK":
|
|
case "RDF2:LINK":
|
|
$channel['link'] = $child['children'][0];
|
|
break;
|
|
case "RSS:DESCRIPTION":
|
|
case "RDF2:DESCRIPTION":
|
|
$channel['description'] = $child['children'][0];
|
|
break;
|
|
case "RSS:WEBMASTER":
|
|
$channel['creator'] = $child['children'][0];
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return $channel;
|
|
}
|
|
|
|
function getRSSChannel($data) {
|
|
if(is_array($data['children'])) {
|
|
foreach($data['children'] as $child) {
|
|
if(is_array($child)) {
|
|
switch($child['tag']) {
|
|
case "UNDEF:TITLE":
|
|
case "RSS:TITLE":
|
|
case "RSS2:TITLE":
|
|
$channel['title'] = $child['children'][0];
|
|
break;
|
|
case "UNDEF:LINK":
|
|
case "RSS:LINK":
|
|
case "RSS2:LINK":
|
|
$channel['link'] = $child['children'][0];
|
|
break;
|
|
case "UNDEF:DESCRIPTION":
|
|
case "RSS:DESCRIPTION":
|
|
case "RSS2:DESCRIPTION":
|
|
$channel['description'] = $child['children'][0];
|
|
break;
|
|
case "UNDEF:ITEM":
|
|
case "RSS:ITEM":
|
|
case "RSS2:ITEM":
|
|
$item[] = $this->getRSSItem(&$child);
|
|
break;
|
|
case "UNDEF:LASTBUILDDATE":
|
|
case "RSS:LASTBUILDDATE":
|
|
case "RSS2:LASTBUILDDATE":
|
|
$channel['lastbuilddate'] = strtotime($child['children'][0]);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return array('channel' => $channel, 'item' => $item);
|
|
}
|
|
|
|
function getRDFItem($data) {
|
|
if(is_array($data['children'])) {
|
|
foreach($data['children'] as $child) {
|
|
if(is_array($child)) {
|
|
switch($child['tag']) {
|
|
case "RSS:TITLE":
|
|
case "RDF2:TITLE":
|
|
$item['title'] = $child['children'][0];
|
|
break;
|
|
case "RSS:LINK":
|
|
case "RDF2:LINK":
|
|
$item['link'] = $child['children'][0];
|
|
break;
|
|
case "RSS:DESCRIPTION":
|
|
case "RDF2:DESCRIPTION":
|
|
$item["description"] = $child['children'][0];
|
|
break;
|
|
case "DC:DATE":
|
|
$item["date"] = $this->dcDateToUnixTime($child['children'][0],0);
|
|
$item["locdate"] = $this->dcDateToUnixTime($child['children'][0],1);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return $item;
|
|
}
|
|
|
|
function getRSSItem($data) {
|
|
if(is_array($data['children'])) {
|
|
foreach($data['children'] as $child) {
|
|
if(is_array($child)) {
|
|
switch($child['tag']) {
|
|
case "UNDEF:TITLE":
|
|
case "RSS:TITLE":
|
|
case "RSS2:TITLE":
|
|
$item['title'] = $child['children'][0];
|
|
break;
|
|
case "UNDEF:LINK":
|
|
case "RSS:LINK":
|
|
case "RSS2:LINK":
|
|
$item['link'] = $child['children'][0];
|
|
break;
|
|
case "UNDEF:DESCRIPTION":
|
|
case "RSS:DESCRIPTION":
|
|
case "RSS2:DESCRIPTION":
|
|
$item["description"] = $child['children'][0];
|
|
break;
|
|
case "DC:DATE":
|
|
$item["date"] = $this->dcDateToUnixTime($child['children'][0],0);
|
|
$item["locdate"] = $this->dcDateToUnixTime($child['children'][0],1);
|
|
break;
|
|
case "UNDEF:PUBDATE":
|
|
case "RSS:PUBDATE":
|
|
case "RSS2:PUBDATE":
|
|
$item["date"] = strtotime($child['children'][0]);
|
|
$item["locdate"] = strtotime($child['children'][0]);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return $item;
|
|
}
|
|
|
|
function dcDateToUnixTime($dcdate,$cvttz = 1) {
|
|
list($date,$time) = explode("T",$dcdate);
|
|
preg_match(
|
|
"/([0-9]{2}:[0-9]{2}:[0-9]{2})(\-?\+?)([0-9]{2}):([0-9]{2})/",
|
|
$time,
|
|
$yo
|
|
);
|
|
|
|
if ($cvttz == 1) {
|
|
return strtotime($date . " " . $yo[1] . $yo[2] . $yo[3] . $yo[4]);
|
|
} else {
|
|
return strtotime($date . " " . $yo[1]);
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
?>
|
|
|