You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							401 lines
						
					
					
						
							9.3 KiB
						
					
					
				
			
		
		
		
			
			
			
				
					
				
				
					
				
			
		
		
	
	
							401 lines
						
					
					
						
							9.3 KiB
						
					
					
				| <? | |
| /* | |
|  *  This program is free software; you can redistribute it and/or modify | |
|  *  it under the terms of the GNU General Public License as published by | |
|  *  the Free Software Foundation; either version 2 of the License, or | |
|  *  (at your option) any later version. | |
|  * | |
|  *  This program is distributed in the hope that it will be useful, | |
|  *  but WITHOUT ANY WARRANTY; without even the implied warranty of | |
|  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | |
|  *  GNU General Public License for more details. | |
|  * | |
|  *  You should have received a copy of the GNU General Public License | |
|  *  along with this program; if not, write to the Free Software | |
|  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
|  */ | |
| 
 | |
| /*  Written by Reverend Jim (jim@revjim.net) | |
|  * | |
|  *  http://revjim.net/code/feedParser/ | |
|  */ | |
| 
 | |
| 
 | |
| class feedParser { | |
| 
 | |
| 	var $version = "0.5"; | |
| 	var $entities = array( | |
| 		'nbsp' =>   " ", | |
| 		'iexcl' =>  "¡", | |
| 		'cent' =>   "¢", | |
| 		'pound' =>  "£", | |
| 		'curren' => "¤", | |
| 		'yen' =>    "¥", | |
| 		'brvbar' => "¦", | |
| 		'sect' =>   "§", | |
| 		'uml' =>    "¨", | |
| 		'copy' =>   "©", | |
| 		'ordf' =>   "ª", | |
| 		'laquo' =>  "«", | |
| 		'not' =>    "¬", | |
| 		'shy' =>    "­", | |
| 		'reg' =>    "®", | |
| 		'macr' =>   "¯", | |
| 		'deg' =>    "°", | |
| 		'plusmn' => "±", | |
| 		'sup2' =>   "²", | |
| 		'sup3' =>   "³", | |
| 		'acute' =>  "´", | |
| 		'micro' =>  "µ", | |
| 		'para' =>   "¶", | |
| 		'middot' => "·", | |
| 		'cedil' =>  "¸", | |
| 		'sup1' =>   "¹", | |
| 		'ordm' =>   "º", | |
| 		'raquo' =>  "»", | |
| 		'frac14' => "¼", | |
| 		'frac12' => "½", | |
| 		'frac34' => "¾", | |
| 		'iquest' => "¿", | |
| 		'Agrave' => "À", | |
| 		'Aacute' => "Á", | |
| 		'Acirc' =>  "Â", | |
| 		'Atilde' => "Ã", | |
| 		'Auml' =>   "Ä", | |
| 		'Aring' =>  "Å", | |
| 		'AElig' =>  "Æ", | |
| 		'Ccedil' => "Ç", | |
| 		'Egrave' => "È", | |
| 		'Eacute' => "É", | |
| 		'Ecirc' =>  "Ê", | |
| 		'Euml' =>   "Ë", | |
| 		'Igrave' => "Ì", | |
| 		'Iacute' => "Í", | |
| 		'Icirc' =>  "Î", | |
| 		'Iuml' =>   "Ï", | |
| 		'ETH' =>    "Ð", | |
| 		'Ntilde' => "Ñ", | |
| 		'Ograve' => "Ò", | |
| 		'Oacute' => "Ó", | |
| 		'Ocirc' =>  "Ô", | |
| 		'Otilde' => "Õ", | |
| 		'Ouml' =>   "Ö", | |
| 		'times' =>  "×", | |
| 		'Oslash' => "Ø", | |
| 		'Ugrave' => "Ù", | |
| 		'Uacute' => "Ú", | |
| 		'Ucirc' =>  "Û", | |
| 		'Uuml' =>   "Ü", | |
| 		'Yacute' => "Ý", | |
| 		'THORN' =>  "Þ", | |
| 		'szlig' =>  "ß", | |
| 		'agrave' => "à", | |
| 		'aacute' => "á", | |
| 		'acirc' =>  "â", | |
| 		'atilde' => "ã", | |
| 		'auml' =>   "ä", | |
| 		'aring' =>  "å", | |
| 		'aelig' =>  "æ", | |
| 		'ccedil' => "ç", | |
| 		'egrave' => "è", | |
| 		'eacute' => "é", | |
| 		'ecirc' =>  "ê", | |
| 		'euml' =>   "ë", | |
| 		'igrave' => "ì", | |
| 		'iacute' => "í", | |
| 		'icirc' =>  "î", | |
| 		'iuml' =>   "ï", | |
| 		'eth' =>    "ð", | |
| 		'ntilde' => "ñ", | |
| 		'ograve' => "ò", | |
| 		'oacute' => "ó", | |
| 		'ocirc' =>  "ô", | |
| 		'otilde' => "õ", | |
| 		'ouml' =>   "ö", | |
| 		'divide' => "÷", | |
| 		'oslash' => "ø", | |
| 		'ugrave' => "ù", | |
| 		'uacute' => "ú", | |
| 		'ucirc' =>  "û", | |
| 		'uuml' =>   "ü", | |
| 		'yacute' => "ý", | |
| 		'thorn' =>  "þ", | |
| 		'yuml' =>   "ÿ" | |
| 	); | |
| 
 | |
| 	var $namespaces = array( | |
| 		'DC' => 'http://purl.org/dc/elements/1.1/', | |
| 		'RDF' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', | |
| 		'RSS' => 'http://purl.org/rss/1.0/', | |
| 		'RSS2'=> 'http://backend.userland.com/rss2', | |
| 		'RDF2' => 'http://my.netscape.com/rdf/simple/0.9/' | |
| 	); | |
| 
 | |
| 	function buildStruct($xmldata) { | |
| 		// Create a parser object | |
| 		$p = new XMLParser; | |
| 	 | |
| 		// Define our known namespaces | |
| 		foreach ($this->namespaces as $space => $uri) { | |
| 			$p->definens($space,$uri); | |
| 		} | |
| 	 | |
| 		// Define base namespace | |
| 		$p->definens("UNDEF"); | |
| 
 | |
| 		$this->parseEntities($xmldata); | |
| 	 | |
| 		// Tell the parser to get the file. | |
| 		$p->setXmlData($xmldata); | |
| 	 | |
| 		// Tell the parser to build the tree. | |
| 		$p->buildXmlTree(); | |
| 	 | |
| 		// Spit the tree out so we can see it | |
| 		return $p->getXmlTree(); | |
| 	 | |
| 	} | |
| 
 | |
| 	function parseEntities(&$data) { | |
| 
 | |
| 		foreach($this->entities as $entity => $replace) { | |
| 			$data = preg_replace('/&' . $entity . ';/',$replace,$data); | |
| 		} | |
| 
 | |
| 		$data = preg_replace('/&[ ]*;/','',$data); | |
| 
 | |
| 	} | |
| 
 | |
| 
 | |
| 	function parseFeed($xmldata) { | |
| 		$data =& $this->buildStruct(&$xmldata); | |
| 		if(is_array($data)) { | |
| 			foreach($data as $child) { | |
| 				if(is_array($child)) { | |
| 					switch($child['tag']) { | |
| 						case "RSS:RSS": | |
| 						case "UNDEF:RSS": | |
| 						case "RSS2:RSS": | |
| 							$info = $this->parseRSS(&$child); | |
| 							break; | |
| 						case "RDF:RDF": | |
| 							$info = $this->parseRDF(&$child); | |
| 							break; | |
| 						default: | |
| 							$info["warning"] .= "Unknown document format: " . $child['tag'] . "\n"; | |
| 							break; | |
| 					} | |
| 				} | |
| 			} | |
| 		}  | |
| 		 | |
| 		return $info; | |
| 	 | |
| 	} | |
| 
 | |
| 	function parseRDF(&$data) { | |
| 		if(is_array($data['children'])) { | |
| 			foreach($data['children'] as $child) { | |
| 				if(is_array($child)) { | |
| 					switch($child['tag']) { | |
| 						case "RSS:CHANNEL": | |
| 						case "RDF2:CHANNEL": | |
| 							$channel = $this->getRDFChannel(&$child); | |
| 							break; | |
| 						case "RSS:ITEM": | |
| 						case "RDF2:ITEM": | |
| 							$item[] = $this->getRDFItem(&$child); | |
| 							break; | |
| 						default: | |
| 							break; | |
| 					} | |
| 				} | |
| 			} | |
| 		 | |
| 		}  | |
| 	 | |
| 		return array('channel' => $channel, 'item' => $item); | |
| 			 | |
| 	} | |
| 
 | |
| 	function parseRSS(&$data) { | |
| 		if(is_array($data['children'])) { | |
| 			foreach($data['children'] as $child) { | |
| 				if(is_array($child)) { | |
| 					switch($child['tag']) { | |
| 						case "RSS:CHANNEL": | |
| 						case "RSS2:CHANNEL": | |
| 						case "UNDEF:CHANNEL": | |
| 							$info = $this->getRSSChannel(&$child); | |
| 							break; | |
| 						default: | |
| 							break; | |
| 					} | |
| 				} | |
| 			} | |
| 		 | |
| 		}  | |
| 	 | |
| 		return $info; | |
| 			 | |
| 	} | |
| 
 | |
| 	function getRDFChannel($data) { | |
| 		if(is_array($data['children'])) { | |
| 			foreach($data['children'] as $child) { | |
| 				if(is_array($child)) { | |
| 					switch($child['tag']) { | |
| 						case "RSS:TITLE": | |
| 						case "RDF2:TITLE": | |
| 							$channel['title'] = $child['children'][0]; | |
| 							break; | |
| 						case "RSS:LINK": | |
| 						case "RDF2:LINK": | |
| 							$channel['link'] = $child['children'][0]; | |
| 							break; | |
| 						case "RSS:DESCRIPTION": | |
| 						case "RDF2:DESCRIPTION": | |
| 							$channel['description'] = $child['children'][0]; | |
| 							break; | |
| 						case "RSS:WEBMASTER": | |
| 							$channel['creator'] = $child['children'][0]; | |
| 							break; | |
| 						default: | |
| 							break; | |
| 					} | |
| 				} | |
| 			} | |
| 		} | |
| 
 | |
| 		return $channel; | |
| 	} | |
| 
 | |
| 	function getRSSChannel($data) { | |
| 		if(is_array($data['children'])) { | |
| 			foreach($data['children'] as $child) { | |
| 				if(is_array($child)) { | |
| 					switch($child['tag']) { | |
| 						case "UNDEF:TITLE": | |
| 						case "RSS:TITLE": | |
| 						case "RSS2:TITLE": | |
| 							$channel['title'] = $child['children'][0]; | |
| 							break; | |
| 						case "UNDEF:LINK": | |
| 						case "RSS:LINK": | |
| 						case "RSS2:LINK": | |
| 							$channel['link'] = $child['children'][0]; | |
| 							break; | |
| 						case "UNDEF:DESCRIPTION": | |
| 						case "RSS:DESCRIPTION": | |
| 						case "RSS2:DESCRIPTION": | |
| 							$channel['description'] = $child['children'][0]; | |
| 							break; | |
| 						case "UNDEF:ITEM": | |
| 						case "RSS:ITEM": | |
| 						case "RSS2:ITEM": | |
| 							$item[] = $this->getRSSItem(&$child); | |
| 							break; | |
| 						case "UNDEF:LASTBUILDDATE": | |
| 						case "RSS:LASTBUILDDATE": | |
| 						case "RSS2:LASTBUILDDATE": | |
| 							$channel['lastbuilddate'] = strtotime($child['children'][0]); | |
| 							break; | |
| 						default: | |
| 							break; | |
| 					} | |
| 				} | |
| 			} | |
| 		} | |
| 	 | |
| 		return array('channel' => $channel, 'item' => $item); | |
| 	} | |
| 
 | |
| 	function getRDFItem($data) { | |
| 		if(is_array($data['children'])) { | |
| 			foreach($data['children'] as $child) { | |
| 				if(is_array($child)) { | |
| 					switch($child['tag']) { | |
| 						case "RSS:TITLE": | |
| 						case "RDF2:TITLE": | |
| 							$item['title'] = $child['children'][0]; | |
| 							break; | |
| 						case "RSS:LINK": | |
| 						case "RDF2:LINK": | |
| 							$item['link'] = $child['children'][0]; | |
| 							break; | |
| 						case "RSS:DESCRIPTION": | |
| 						case "RDF2:DESCRIPTION": | |
| 							$item["description"] = $child['children'][0]; | |
| 							break; | |
| 						case "DC:DATE": | |
| 							$item["date"] = $this->dcDateToUnixTime($child['children'][0],0); | |
| 							$item["locdate"] = $this->dcDateToUnixTime($child['children'][0],1); | |
| 							break; | |
| 						default: | |
| 							break; | |
| 					} | |
| 				} | |
| 			} | |
| 		} | |
| 	 | |
| 		return $item; | |
| 	} | |
| 	 | |
| 	function getRSSItem($data) { | |
| 		if(is_array($data['children'])) { | |
| 			foreach($data['children'] as $child) { | |
| 				if(is_array($child)) { | |
| 					switch($child['tag']) { | |
| 						case "UNDEF:TITLE": | |
| 						case "RSS:TITLE": | |
| 						case "RSS2:TITLE": | |
| 							$item['title'] = $child['children'][0]; | |
| 							break; | |
| 						case "UNDEF:LINK": | |
| 						case "RSS:LINK": | |
| 						case "RSS2:LINK": | |
| 							$item['link'] = $child['children'][0]; | |
| 							break; | |
| 						case "UNDEF:DESCRIPTION": | |
| 						case "RSS:DESCRIPTION": | |
| 						case "RSS2:DESCRIPTION": | |
| 							$item["description"] = $child['children'][0]; | |
| 							break; | |
| 						case "DC:DATE": | |
| 							$item["date"] = $this->dcDateToUnixTime($child['children'][0],0); | |
| 							$item["locdate"] = $this->dcDateToUnixTime($child['children'][0],1); | |
| 							break; | |
| 						case "UNDEF:PUBDATE": | |
| 						case "RSS:PUBDATE": | |
| 						case "RSS2:PUBDATE": | |
| 							$item["date"] = strtotime($child['children'][0]); | |
| 							$item["locdate"] = strtotime($child['children'][0]); | |
| 							break; | |
| 						default: | |
| 							break; | |
| 					} | |
| 				} | |
| 			} | |
| 		} | |
| 	 | |
| 		return $item; | |
| 	} | |
| 
 | |
| 	function dcDateToUnixTime($dcdate,$cvttz = 1) { | |
| 		list($date,$time) = explode("T",$dcdate); | |
| 		preg_match( | |
| 			"/([0-9]{2}:[0-9]{2}:[0-9]{2})(\-?\+?)([0-9]{2}):([0-9]{2})/", | |
| 			$time, | |
| 			$yo | |
| 		); | |
| 
 | |
| 		if ($cvttz == 1) { | |
| 			return strtotime($date . " " . $yo[1] . $yo[2] . $yo[3] . $yo[4]); | |
| 		} else { | |
| 			return strtotime($date . " " . $yo[1]); | |
| 		} | |
| 
 | |
| 	} | |
| } | |
| 		 | |
| ?>
 | |
| 
 |