rss_php.php 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. <?php
  2. /*
  3. RSS_PHP - the PHP DOM based RSS Parser
  4. Author: <rssphp.net>
  5. Published: 200801 :: blacknet :: via rssphp.net
  6. RSS_PHP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY.
  7. Usage:
  8. See the documentation at http://rssphp.net/documentation
  9. Examples:
  10. Can be found online at http://rssphp.net/examples
  11. */
  12. class rss_php {
  13. public $document;
  14. public $channel;
  15. public $items;
  16. /****************************
  17. public load methods
  18. ***/
  19. # load RSS by URL
  20. public function load($url=false, $unblock=true) {
  21. if($url) {
  22. $ch = curl_init();
  23. curl_setopt($ch, CURLOPT_URL, $url);
  24. curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)');
  25. curl_setopt($ch, CURLOPT_HTTPHEADER,array("Accept-Language: es-es,en"));
  26. curl_setopt($ch, CURLOPT_TIMEOUT, 60);
  27. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  28. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  29. curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
  30. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
  31. $date = new DateTime();
  32. $expires = gmdate('D, d-M-Y H:i:s \G\M\T', $date->getTimestamp() + 31536000000);
  33. curl_setopt($ch, CURLOPT_COOKIE, "FreedomCookie=true;path=/;expires=".$expires);
  34. //Save Page
  35. $result = curl_exec($ch);
  36. curl_close($ch);
  37. $this->loadParser($result);
  38. }
  39. }
  40. # load raw RSS data
  41. public function loadRSS($rawxml=false) {
  42. if($rawxml) {
  43. $this->loadParser($rawxml);
  44. }
  45. }
  46. /****************************
  47. public load methods
  48. @param $includeAttributes BOOLEAN
  49. return array;
  50. ***/
  51. # return full rss array
  52. public function getRSS($includeAttributes=false) {
  53. if($includeAttributes) {
  54. return $this->document;
  55. }
  56. return $this->valueReturner();
  57. }
  58. # return channel data
  59. public function getChannel($includeAttributes=false) {
  60. if($includeAttributes) {
  61. return $this->channel;
  62. }
  63. return $this->valueReturner($this->channel);
  64. }
  65. # return rss items
  66. public function getItems($includeAttributes=false) {
  67. if($includeAttributes) {
  68. return $this->items;
  69. }
  70. return $this->valueReturner($this->items);
  71. }
  72. /****************************
  73. internal methods
  74. ***/
  75. private function loadParser($rss=false) {
  76. if($rss) {
  77. $this->document = array();
  78. $this->channel = array();
  79. $this->items = array();
  80. $DOMDocument = new DOMDocument;
  81. $DOMDocument->strictErrorChecking = false;
  82. $DOMDocument->loadXML($rss);
  83. $this->document = $this->extractDOM($DOMDocument->childNodes);
  84. }
  85. }
  86. private function valueReturner($valueBlock=false) {
  87. if(!$valueBlock) {
  88. $valueBlock = $this->document;
  89. }
  90. foreach($valueBlock as $valueName => $values) {
  91. if(isset($values['value'])) {
  92. $values = $values['value'];
  93. }
  94. if(is_array($values)) {
  95. $valueBlock[$valueName] = $this->valueReturner($values);
  96. } else {
  97. $valueBlock[$valueName] = $values;
  98. }
  99. }
  100. return $valueBlock;
  101. }
  102. private function extractDOM($nodeList,$parentNodeName=false) {
  103. $itemCounter = 0;
  104. foreach($nodeList as $values) {
  105. if(substr($values->nodeName,0,1) != '#') {
  106. if($values->nodeName == 'item') {
  107. $nodeName = $values->nodeName.':'.$itemCounter;
  108. $itemCounter++;
  109. } else {
  110. $nodeName = $values->nodeName;
  111. }
  112. $tempNode[$nodeName] = array();
  113. if($values->attributes) {
  114. for($i=0;$values->attributes->item($i);$i++) {
  115. $tempNode[$nodeName]['properties'][$values->attributes->item($i)->nodeName] = $values->attributes->item($i)->nodeValue;
  116. }
  117. }
  118. if(!$values->firstChild) {
  119. $tempNode[$nodeName]['value'] = $values->textContent;
  120. } else {
  121. $tempNode[$nodeName]['value'] = $this->extractDOM($values->childNodes, $values->nodeName);
  122. }
  123. if(in_array($parentNodeName, array('channel','rdf:RDF'))) {
  124. if($values->nodeName == 'item') {
  125. $this->items[] = $tempNode[$nodeName]['value'];
  126. } elseif(!in_array($values->nodeName, array('rss','channel'))) {
  127. $this->channel[$values->nodeName] = $tempNode[$nodeName];
  128. }
  129. }
  130. } elseif(substr($values->nodeName,1) == 'text') {
  131. $tempValue = trim(preg_replace('/\s\s+/',' ',str_replace("\n",' ', $values->textContent)));
  132. if($tempValue) {
  133. $tempNode = $tempValue;
  134. }
  135. } elseif(substr($values->nodeName,1) == 'cdata-section'){
  136. $tempNode = $values->textContent;
  137. }
  138. }
  139. return $tempNode;
  140. }
  141. private function randomContext() {
  142. $headerstrings = array();
  143. $headerstrings['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 5.'.rand(0,2).'; en-US; rv:1.'.rand(2,9).'.'.rand(0,4).'.'.rand(1,9).') Gecko/2007'.rand(10,12).rand(10,30).' Firefox/2.0.'.rand(0,1).'.'.rand(1,9);
  144. $headerstrings['Accept-Charset'] = rand(0,1) ? 'en-gb,en;q=0.'.rand(3,8) : 'en-us,en;q=0.'.rand(3,8);
  145. $headerstrings['Accept-Language'] = 'en-us,en;q=0.'.rand(4,6);
  146. $setHeaders = 'Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5'."\r\n".
  147. 'Accept-Charset: '.$headerstrings['Accept-Charset']."\r\n".
  148. 'Accept-Language: '.$headerstrings['Accept-Language']."\r\n".
  149. 'User-Agent: '.$headerstrings['User-Agent']."\r\n";
  150. $contextOptions = array(
  151. 'http'=>array(
  152. 'method'=>"GET",
  153. 'header'=>$setHeaders
  154. )
  155. );
  156. return stream_context_create($contextOptions);
  157. }
  158. }
  159. ?>