[ Index ]
 

Code source de XOOPS 2.0.17.1

Accédez au Source d'autres logiciels libres

title

Body

[fermer]

/htdocs/class/ -> snoopy.php (source)

   1  <?php
   2  
   3  /*************************************************
   4  
   5  Snoopy - the PHP net client
   6  Author: Monte Ohrt <monte@ispi.net>
   7  Copyright (c): 1999-2000 ispi, all rights reserved
   8  Version: 1.01
   9  
  10   * This library is free software; you can redistribute it and/or
  11   * modify it under the terms of the GNU Lesser General Public
  12   * License as published by the Free Software Foundation; either
  13   * version 2.1 of the License, or (at your option) any later version.
  14   *
  15   * This library is distributed in the hope that it will be useful,
  16   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18   * Lesser General Public License for more details.
  19   *
  20   * You should have received a copy of the GNU Lesser General Public
  21   * License along with this library; if not, write to the Free Software
  22   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  23  
  24  You may contact the author of Snoopy by e-mail at:
  25  monte@ispi.net
  26  
  27  Or, write to:
  28  Monte Ohrt
  29  CTO, ispi
  30  237 S. 70th suite 220
  31  Lincoln, NE 68510
  32  
  33  The latest version of Snoopy can be obtained from:
  34  http://snoopy.sourceforge.net/
  35  
  36  *************************************************/
  37  
  38  class Snoopy
  39  {
  40      /**** Public variables ****/
  41      
  42      /* user definable vars */
  43  
  44      var $host            =    "www.php.net";        // host name we are connecting to
  45      var $port            =    80;                    // port we are connecting to
  46      var $proxy_host        =    "";                    // proxy host to use
  47      var $proxy_port        =    "";                    // proxy port to use
  48      var $proxy_user        =    "";                    // proxy user to use
  49      var $proxy_pass        =    "";                    // proxy password to use
  50      
  51      var $agent            =    "Snoopy v1.2.3";    // agent we masquerade as
  52      var    $referer        =    "";                    // referer info to pass
  53      var $cookies        =    array();            // array of cookies to pass
  54                                                  // $cookies["username"]="joe";
  55      var    $rawheaders        =    array();            // array of raw headers to send
  56                                                  // $rawheaders["Content-type"]="text/html";
  57  
  58      var $maxredirs        =    5;                    // http redirection depth maximum. 0 = disallow
  59      var $lastredirectaddr    =    "";                // contains address of last redirected address
  60      var    $offsiteok        =    true;                // allows redirection off-site
  61      var $maxframes        =    0;                    // frame content depth maximum. 0 = disallow
  62      var $expandlinks    =    true;                // expand links to fully qualified URLs.
  63                                                  // this only applies to fetchlinks()
  64                                                  // submitlinks(), and submittext()
  65      var $passcookies    =    true;                // pass set cookies back through redirects
  66                                                  // NOTE: this currently does not respect
  67                                                  // dates, domains or paths.
  68      
  69      var    $user            =    "";                    // user for http authentication
  70      var    $pass            =    "";                    // password for http authentication
  71      
  72      // http accept types
  73      var $accept            =    "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
  74      
  75      var $results        =    "";                    // where the content is put
  76          
  77      var $error            =    "";                    // error messages sent here
  78      var    $response_code    =    "";                    // response code returned from server
  79      var    $headers        =    array();            // headers returned from server sent here
  80      var    $maxlength        =    500000;                // max return data length (body)
  81      var $read_timeout    =    0;                    // timeout on read operations, in seconds
  82                                                  // supported only since PHP 4 Beta 4
  83                                                  // set to 0 to disallow timeouts
  84      var $timed_out        =    false;                // if a read operation timed out
  85      var    $status            =    0;                    // http request status
  86  
  87      var $temp_dir        =    "/tmp";                // temporary directory that the webserver
  88                                                  // has permission to write to.
  89                                                  // under Windows, this should be C:\temp
  90  
  91      var    $curl_path        =    "/usr/local/bin/curl";
  92                                                  // Snoopy will use cURL for fetching
  93                                                  // SSL content if a full system path to
  94                                                  // the cURL binary is supplied here.
  95                                                  // set to false if you do not have
  96                                                  // cURL installed. See http://curl.haxx.se
  97                                                  // for details on installing cURL.
  98                                                  // Snoopy does *not* use the cURL
  99                                                  // library functions built into php,
 100                                                  // as these functions are not stable
 101                                                  // as of this Snoopy release.
 102      
 103      /**** Private variables ****/    
 104      
 105      var    $_maxlinelen    =    4096;                // max line length (headers)
 106      
 107      var $_httpmethod    =    "GET";                // default http request method
 108      var $_httpversion    =    "HTTP/1.0";            // default http request version
 109      var $_submit_method    =    "POST";                // default submit method
 110      var $_submit_type    =    "application/x-www-form-urlencoded";    // default submit type
 111      var $_mime_boundary    =   "";                    // MIME boundary for multipart/form-data submit type
 112      var $_redirectaddr    =    false;                // will be set if page fetched is a redirect
 113      var $_redirectdepth    =    0;                    // increments on an http redirect
 114      var $_frameurls        =     array();            // frame src urls
 115      var $_framedepth    =    0;                    // increments on frame depth
 116      
 117      var $_isproxy        =    false;                // set if using a proxy server
 118      var $_fp_timeout    =    30;                    // timeout for socket connection
 119  
 120  /*======================================================================*\
 121      Function:    fetch
 122      Purpose:    fetch the contents of a web page
 123                  (and possibly other protocols in the
 124                  future like ftp, nntp, gopher, etc.)
 125      Input:        $URI    the location of the page to fetch
 126      Output:        $this->results    the output text from the fetch
 127  \*======================================================================*/
 128  
 129  	function fetch($URI)
 130      {
 131      
 132          //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
 133          $URI_PARTS = parse_url($URI);
 134          if (!empty($URI_PARTS["user"]))
 135              $this->user = $URI_PARTS["user"];
 136          if (!empty($URI_PARTS["pass"]))
 137              $this->pass = $URI_PARTS["pass"];
 138          if (empty($URI_PARTS["query"]))
 139              $URI_PARTS["query"] = '';
 140          if (empty($URI_PARTS["path"]))
 141              $URI_PARTS["path"] = '';
 142                  
 143          switch(strtolower($URI_PARTS["scheme"]))
 144          {
 145              case "http":
 146                  $this->host = $URI_PARTS["host"];
 147                  if(!empty($URI_PARTS["port"]))
 148                      $this->port = $URI_PARTS["port"];
 149                  if($this->_connect($fp))
 150                  {
 151                      if($this->_isproxy)
 152                      {
 153                          // using proxy, send entire URI
 154                          $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
 155                      }
 156                      else
 157                      {
 158                          $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 159                          // no proxy, send only the path
 160                          $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
 161                      }
 162                      
 163                      $this->_disconnect($fp);
 164  
 165                      if($this->_redirectaddr)
 166                      {
 167                          /* url was redirected, check if we've hit the max depth */
 168                          if($this->maxredirs > $this->_redirectdepth)
 169                          {
 170                              // only follow redirect if it's on this site, or offsiteok is true
 171                              if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 172                              {
 173                                  /* follow the redirect */
 174                                  $this->_redirectdepth++;
 175                                  $this->lastredirectaddr=$this->_redirectaddr;
 176                                  $this->fetch($this->_redirectaddr);
 177                              }
 178                          }
 179                      }
 180  
 181                      if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 182                      {
 183                          $frameurls = $this->_frameurls;
 184                          $this->_frameurls = array();
 185                          
 186                          while(list(,$frameurl) = each($frameurls))
 187                          {
 188                              if($this->_framedepth < $this->maxframes)
 189                              {
 190                                  $this->fetch($frameurl);
 191                                  $this->_framedepth++;
 192                              }
 193                              else
 194                                  break;
 195                          }
 196                      }                    
 197                  }
 198                  else
 199                  {
 200                      return false;
 201                  }
 202                  return true;                    
 203                  break;
 204              case "https":
 205                  if(!$this->curl_path)
 206                      return false;
 207                  if(function_exists("is_executable"))
 208                      if (!is_executable($this->curl_path))
 209                          return false;
 210                  $this->host = $URI_PARTS["host"];
 211                  if(!empty($URI_PARTS["port"]))
 212                      $this->port = $URI_PARTS["port"];
 213                  if($this->_isproxy)
 214                  {
 215                      // using proxy, send entire URI
 216                      $this->_httpsrequest($URI,$URI,$this->_httpmethod);
 217                  }
 218                  else
 219                  {
 220                      $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 221                      // no proxy, send only the path
 222                      $this->_httpsrequest($path, $URI, $this->_httpmethod);
 223                  }
 224  
 225                  if($this->_redirectaddr)
 226                  {
 227                      /* url was redirected, check if we've hit the max depth */
 228                      if($this->maxredirs > $this->_redirectdepth)
 229                      {
 230                          // only follow redirect if it's on this site, or offsiteok is true
 231                          if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 232                          {
 233                              /* follow the redirect */
 234                              $this->_redirectdepth++;
 235                              $this->lastredirectaddr=$this->_redirectaddr;
 236                              $this->fetch($this->_redirectaddr);
 237                          }
 238                      }
 239                  }
 240  
 241                  if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 242                  {
 243                      $frameurls = $this->_frameurls;
 244                      $this->_frameurls = array();
 245  
 246                      while(list(,$frameurl) = each($frameurls))
 247                      {
 248                          if($this->_framedepth < $this->maxframes)
 249                          {
 250                              $this->fetch($frameurl);
 251                              $this->_framedepth++;
 252                          }
 253                          else
 254                              break;
 255                      }
 256                  }                    
 257                  return true;                    
 258                  break;
 259              default:
 260                  // not a valid protocol
 261                  $this->error    =    'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 262                  return false;
 263                  break;
 264          }        
 265          return true;
 266      }
 267  
 268  /*======================================================================*\
 269      Function:    submit
 270      Purpose:    submit an http form
 271      Input:        $URI    the location to post the data
 272                  $formvars    the formvars to use.
 273                      format: $formvars["var"] = "val";
 274                  $formfiles  an array of files to submit
 275                      format: $formfiles["var"] = "/dir/filename.ext";
 276      Output:        $this->results    the text output from the post
 277  \*======================================================================*/
 278  
 279  	function submit($URI, $formvars="", $formfiles="")
 280      {
 281          unset($postdata);
 282          
 283          $postdata = $this->_prepare_post_body($formvars, $formfiles);
 284              
 285          $URI_PARTS = parse_url($URI);
 286          if (!empty($URI_PARTS["user"]))
 287              $this->user = $URI_PARTS["user"];
 288          if (!empty($URI_PARTS["pass"]))
 289              $this->pass = $URI_PARTS["pass"];
 290          if (empty($URI_PARTS["query"]))
 291              $URI_PARTS["query"] = '';
 292          if (empty($URI_PARTS["path"]))
 293              $URI_PARTS["path"] = '';
 294  
 295          switch(strtolower($URI_PARTS["scheme"]))
 296          {
 297              case "http":
 298                  $this->host = $URI_PARTS["host"];
 299                  if(!empty($URI_PARTS["port"]))
 300                      $this->port = $URI_PARTS["port"];
 301                  if($this->_connect($fp))
 302                  {
 303                      if($this->_isproxy)
 304                      {
 305                          // using proxy, send entire URI
 306                          $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
 307                      }
 308                      else
 309                      {
 310                          $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 311                          // no proxy, send only the path
 312                          $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 313                      }
 314                      
 315                      $this->_disconnect($fp);
 316  
 317                      if($this->_redirectaddr)
 318                      {
 319                          /* url was redirected, check if we've hit the max depth */
 320                          if($this->maxredirs > $this->_redirectdepth)
 321                          {                        
 322                              if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 323                                  $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);                        
 324                              
 325                              // only follow redirect if it's on this site, or offsiteok is true
 326                              if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 327                              {
 328                                  /* follow the redirect */
 329                                  $this->_redirectdepth++;
 330                                  $this->lastredirectaddr=$this->_redirectaddr;
 331                                  if( strpos( $this->_redirectaddr, "?" ) > 0 )
 332                                      $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
 333                                  else
 334                                      $this->submit($this->_redirectaddr,$formvars, $formfiles);
 335                              }
 336                          }
 337                      }
 338  
 339                      if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 340                      {
 341                          $frameurls = $this->_frameurls;
 342                          $this->_frameurls = array();
 343                          
 344                          while(list(,$frameurl) = each($frameurls))
 345                          {                                                        
 346                              if($this->_framedepth < $this->maxframes)
 347                              {
 348                                  $this->fetch($frameurl);
 349                                  $this->_framedepth++;
 350                              }
 351                              else
 352                                  break;
 353                          }
 354                      }                    
 355                      
 356                  }
 357                  else
 358                  {
 359                      return false;
 360                  }
 361                  return true;                    
 362                  break;
 363              case "https":
 364                  if(!$this->curl_path)
 365                      return false;
 366                  if(function_exists("is_executable"))
 367                      if (!is_executable($this->curl_path))
 368                          return false;
 369                  $this->host = $URI_PARTS["host"];
 370                  if(!empty($URI_PARTS["port"]))
 371                      $this->port = $URI_PARTS["port"];
 372                  if($this->_isproxy)
 373                  {
 374                      // using proxy, send entire URI
 375                      $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 376                  }
 377                  else
 378                  {
 379                      $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 380                      // no proxy, send only the path
 381                      $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 382                  }
 383  
 384                  if($this->_redirectaddr)
 385                  {
 386                      /* url was redirected, check if we've hit the max depth */
 387                      if($this->maxredirs > $this->_redirectdepth)
 388                      {                        
 389                          if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 390                              $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);                        
 391  
 392                          // only follow redirect if it's on this site, or offsiteok is true
 393                          if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 394                          {
 395                              /* follow the redirect */
 396                              $this->_redirectdepth++;
 397                              $this->lastredirectaddr=$this->_redirectaddr;
 398                              if( strpos( $this->_redirectaddr, "?" ) > 0 )
 399                                  $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
 400                              else
 401                                  $this->submit($this->_redirectaddr,$formvars, $formfiles);
 402                          }
 403                      }
 404                  }
 405  
 406                  if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 407                  {
 408                      $frameurls = $this->_frameurls;
 409                      $this->_frameurls = array();
 410  
 411                      while(list(,$frameurl) = each($frameurls))
 412                      {                                                        
 413                          if($this->_framedepth < $this->maxframes)
 414                          {
 415                              $this->fetch($frameurl);
 416                              $this->_framedepth++;
 417                          }
 418                          else
 419                              break;
 420                      }
 421                  }                    
 422                  return true;                    
 423                  break;
 424                  
 425              default:
 426                  // not a valid protocol
 427                  $this->error    =    'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 428                  return false;
 429                  break;
 430          }        
 431          return true;
 432      }
 433  
 434  /*======================================================================*\
 435      Function:    fetchlinks
 436      Purpose:    fetch the links from a web page
 437      Input:        $URI    where you are fetching from
 438      Output:        $this->results    an array of the URLs
 439  \*======================================================================*/
 440  
 441  	function fetchlinks($URI)
 442      {
 443          if ($this->fetch($URI))
 444          {            
 445              if($this->lastredirectaddr)
 446                  $URI = $this->lastredirectaddr;
 447              if(is_array($this->results))
 448              {
 449                  for($x=0;$x<count($this->results);$x++)
 450                      $this->results[$x] = $this->_striplinks($this->results[$x]);
 451              }
 452              else
 453                  $this->results = $this->_striplinks($this->results);
 454  
 455              if($this->expandlinks)
 456                  $this->results = $this->_expandlinks($this->results, $URI);
 457              return true;
 458          }
 459          else
 460              return false;
 461      }
 462  
 463  /*======================================================================*\
 464      Function:    fetchform
 465      Purpose:    fetch the form elements from a web page
 466      Input:        $URI    where you are fetching from
 467      Output:        $this->results    the resulting html form
 468  \*======================================================================*/
 469  
 470  	function fetchform($URI)
 471      {
 472          
 473          if ($this->fetch($URI))
 474          {            
 475  
 476              if(is_array($this->results))
 477              {
 478                  for($x=0;$x<count($this->results);$x++)
 479                      $this->results[$x] = $this->_stripform($this->results[$x]);
 480              }
 481              else
 482                  $this->results = $this->_stripform($this->results);
 483              
 484              return true;
 485          }
 486          else
 487              return false;
 488      }
 489      
 490      
 491  /*======================================================================*\
 492      Function:    fetchtext
 493      Purpose:    fetch the text from a web page, stripping the links
 494      Input:        $URI    where you are fetching from
 495      Output:        $this->results    the text from the web page
 496  \*======================================================================*/
 497  
 498  	function fetchtext($URI)
 499      {
 500          if($this->fetch($URI))
 501          {            
 502              if(is_array($this->results))
 503              {
 504                  for($x=0;$x<count($this->results);$x++)
 505                      $this->results[$x] = $this->_striptext($this->results[$x]);
 506              }
 507              else
 508                  $this->results = $this->_striptext($this->results);
 509              return true;
 510          }
 511          else
 512              return false;
 513      }
 514  
 515  /*======================================================================*\
 516      Function:    submitlinks
 517      Purpose:    grab links from a form submission
 518      Input:        $URI    where you are submitting from
 519      Output:        $this->results    an array of the links from the post
 520  \*======================================================================*/
 521  
 522  	function submitlinks($URI, $formvars="", $formfiles="")
 523      {
 524          if($this->submit($URI,$formvars, $formfiles))
 525          {            
 526              if($this->lastredirectaddr)
 527                  $URI = $this->lastredirectaddr;
 528              if(is_array($this->results))
 529              {
 530                  for($x=0;$x<count($this->results);$x++)
 531                  {
 532                      $this->results[$x] = $this->_striplinks($this->results[$x]);
 533                      if($this->expandlinks)
 534                          $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
 535                  }
 536              }
 537              else
 538              {
 539                  $this->results = $this->_striplinks($this->results);
 540                  if($this->expandlinks)
 541                      $this->results = $this->_expandlinks($this->results,$URI);
 542              }
 543              return true;
 544          }
 545          else
 546              return false;
 547      }
 548  
 549  /*======================================================================*\
 550      Function:    submittext
 551      Purpose:    grab text from a form submission
 552      Input:        $URI    where you are submitting from
 553      Output:        $this->results    the text from the web page
 554  \*======================================================================*/
 555  
 556  	function submittext($URI, $formvars = "", $formfiles = "")
 557      {
 558          if($this->submit($URI,$formvars, $formfiles))
 559          {            
 560              if($this->lastredirectaddr)
 561                  $URI = $this->lastredirectaddr;
 562              if(is_array($this->results))
 563              {
 564                  for($x=0;$x<count($this->results);$x++)
 565                  {
 566                      $this->results[$x] = $this->_striptext($this->results[$x]);
 567                      if($this->expandlinks)
 568                          $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
 569                  }
 570              }
 571              else
 572              {
 573                  $this->results = $this->_striptext($this->results);
 574                  if($this->expandlinks)
 575                      $this->results = $this->_expandlinks($this->results,$URI);
 576              }
 577              return true;
 578          }
 579          else
 580              return false;
 581      }
 582  
 583      
 584  
 585  /*======================================================================*\
 586      Function:    set_submit_multipart
 587      Purpose:    Set the form submission content type to
 588                  multipart/form-data
 589  \*======================================================================*/
 590  	function set_submit_multipart()
 591      {
 592          $this->_submit_type = "multipart/form-data";
 593      }
 594  
 595      
 596  /*======================================================================*\
 597      Function:    set_submit_normal
 598      Purpose:    Set the form submission content type to
 599                  application/x-www-form-urlencoded
 600  \*======================================================================*/
 601  	function set_submit_normal()
 602      {
 603          $this->_submit_type = "application/x-www-form-urlencoded";
 604      }
 605  
 606  
 607  // XOOPS2 Hack begin
 608  // Added on March 4, 2003 by onokazu@xoops.org
 609  /*======================================================================*\
 610      Function:    set_submit_xml
 611      Purpose:    Set the submission content type to
 612                  text/xml
 613  \*======================================================================*/
 614  	function set_submit_xml()
 615      {
 616          $this->_submit_type = "text/xml";
 617      }
 618  // XOOPS2 Hack end
 619  
 620  
 621  /*======================================================================*\
 622      Private functions
 623  \*======================================================================*/
 624      
 625      
 626  /*======================================================================*\
 627      Function:    _striplinks
 628      Purpose:    strip the hyperlinks from an html document
 629      Input:        $document    document to strip.
 630      Output:        $match        an array of the links
 631  \*======================================================================*/
 632  
 633  	function _striplinks($document)
 634      {    
 635          preg_match_all("'<\s*a\s.*?href\s*=\s*            # find <a href=
 636                          ([\"\'])?                    # find single or double quote
 637                          (?(1) (.*?)\\1 | ([^\s\>]+))        # if quote found, match up to next matching
 638                                                      # quote, otherwise match up to next space
 639                          'isx",$document,$links);
 640                          
 641  
 642          // catenate the non-empty matches from the conditional subpattern
 643  
 644          while(list($key,$val) = each($links[2]))
 645          {
 646              if(!empty($val))
 647                  $match[] = $val;
 648          }                
 649          
 650          while(list($key,$val) = each($links[3]))
 651          {
 652              if(!empty($val))
 653                  $match[] = $val;
 654          }        
 655          
 656          // return the links
 657          return $match;
 658      }
 659  
 660  /*======================================================================*\
 661      Function:    _stripform
 662      Purpose:    strip the form elements from an html document
 663      Input:        $document    document to strip.
 664      Output:        $match        an array of the links
 665  \*======================================================================*/
 666  
 667  	function _stripform($document)
 668      {    
 669          preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
 670          
 671          // catenate the matches
 672          $match = implode("\r\n",$elements[0]);
 673                  
 674          // return the links
 675          return $match;
 676      }
 677  
 678      
 679      
 680  /*======================================================================*\
 681      Function:    _striptext
 682      Purpose:    strip the text from an html document
 683      Input:        $document    document to strip.
 684      Output:        $text        the resulting text
 685  \*======================================================================*/
 686  
 687  	function _striptext($document)
 688      {
 689          
 690          // I didn't use preg eval (//e) since that is only available in PHP 4.0.
 691          // so, list your entities one by one here. I included some of the
 692          // more common ones.
 693                                  
 694          $search = array("'<script[^>]*?>.*?</script>'si",    // strip out javascript
 695                          "'<[\/\!]*?[^<>]*?>'si",            // strip out html tags
 696                          "'([\r\n])[\s]+'",                    // strip out white space
 697                          "'&(quot|#34|#034|#x22);'i",        // replace html entities
 698                          "'&(amp|#38|#038|#x26);'i",            // added hexadecimal values
 699                          "'&(lt|#60|#060|#x3c);'i",
 700                          "'&(gt|#62|#062|#x3e);'i",
 701                          "'&(nbsp|#160|#xa0);'i",
 702                          "'&(iexcl|#161);'i",
 703                          "'&(cent|#162);'i",
 704                          "'&(pound|#163);'i",
 705                          "'&(copy|#169);'i",
 706                          "'&(reg|#174);'i",
 707                          "'&(deg|#176);'i",
 708                          "'&(#39|#039|#x27);'",
 709                          "'&(euro|#8364);'i",                // europe
 710                          "'&a(uml|UML);'",                    // german
 711                          "'&o(uml|UML);'",
 712                          "'&u(uml|UML);'",
 713                          "'&A(uml|UML);'",
 714                          "'&O(uml|UML);'",
 715                          "'&U(uml|UML);'",
 716                          "'&szlig;'i",
 717                          );
 718          $replace = array(    "",
 719                              "",
 720                              "\\1",
 721                              "\"",
 722                              "&",
 723                              "<",
 724                              ">",
 725                              " ",
 726                              chr(161),
 727                              chr(162),
 728                              chr(163),
 729                              chr(169),
 730                              chr(174),
 731                              chr(176),
 732                              chr(39),
 733                              chr(128),
 734                              chr(228),
 735                              chr(246),
 736                              chr(252),
 737                              chr(196),
 738                              chr(214),
 739                              chr(220),
 740                              chr(223),
 741                          );
 742                      
 743          $text = preg_replace($search,$replace,$document);
 744                                  
 745          return $text;
 746      }
 747  
 748  /*======================================================================*\
 749      Function:    _expandlinks
 750      Purpose:    expand each link into a fully qualified URL
 751      Input:        $links            the links to qualify
 752                  $URI            the full URI to get the base from
 753      Output:        $expandedLinks    the expanded links
 754  \*======================================================================*/
 755  
 756  	function _expandlinks($links,$URI)
 757      {
 758          
 759          preg_match("/^[^\?]+/",$URI,$match);
 760  
 761          $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
 762          $match = preg_replace("|/$|","",$match);
 763          $match_part = parse_url($match);
 764          $match_root =
 765          $match_part["scheme"]."://".$match_part["host"];
 766                  
 767          $search = array(     "|^http://".preg_quote($this->host)."|i",
 768                              "|^(\/)|i",
 769                              "|^(?!http://)(?!mailto:)|i",
 770                              "|/\./|",
 771                              "|/[^\/]+/\.\./|"
 772                          );
 773                          
 774          $replace = array(    "",
 775                              $match_root."/",
 776                              $match."/",
 777                              "/",
 778                              "/"
 779                          );            
 780                  
 781          $expandedLinks = preg_replace($search,$replace,$links);
 782  
 783          return $expandedLinks;
 784      }
 785  
 786  /*======================================================================*\
 787      Function:    _httprequest
 788      Purpose:    go get the http data from the server
 789      Input:        $url        the url to fetch
 790                  $fp            the current open file pointer
 791                  $URI        the full URI
 792                  $body        body contents to send if any (POST)
 793      Output:        
 794  \*======================================================================*/
 795      
 796  	function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
 797      {
 798          $cookie_headers = '';
 799          if($this->passcookies && $this->_redirectaddr)
 800              $this->setcookies();
 801              
 802          $URI_PARTS = parse_url($URI);
 803          if(empty($url))
 804              $url = "/";
 805          $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";        
 806          if(!empty($this->agent))
 807              $headers .= "User-Agent: ".$this->agent."\r\n";
 808          if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
 809              $headers .= "Host: ".$this->host;
 810              if(!empty($this->port))
 811                  $headers .= ":".$this->port;
 812              $headers .= "\r\n";
 813          }
 814          if(!empty($this->accept))
 815              $headers .= "Accept: ".$this->accept."\r\n";
 816          if(!empty($this->referer))
 817              $headers .= "Referer: ".$this->referer."\r\n";
 818          if(!empty($this->cookies))
 819          {            
 820              if(!is_array($this->cookies))
 821                  $this->cookies = (array)$this->cookies;
 822      
 823              reset($this->cookies);
 824              if ( count($this->cookies) > 0 ) {
 825                  $cookie_headers .= 'Cookie: ';
 826                  foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 827                  $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
 828                  }
 829                  $headers .= substr($cookie_headers,0,-2) . "\r\n";
 830              } 
 831          }
 832          if(!empty($this->rawheaders))
 833          {
 834              if(!is_array($this->rawheaders))
 835                  $this->rawheaders = (array)$this->rawheaders;
 836              while(list($headerKey,$headerVal) = each($this->rawheaders))
 837                  $headers .= $headerKey.": ".$headerVal."\r\n";
 838          }
 839          if(!empty($content_type)) {
 840              $headers .= "Content-type: $content_type";
 841              if ($content_type == "multipart/form-data")
 842                  $headers .= "; boundary=".$this->_mime_boundary;
 843              $headers .= "\r\n";
 844          }
 845          if(!empty($body))    
 846              $headers .= "Content-length: ".strlen($body)."\r\n";
 847          if(!empty($this->user) || !empty($this->pass))    
 848              $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
 849          
 850          //add proxy auth headers
 851          if(!empty($this->proxy_user))    
 852              $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
 853  
 854  
 855          $headers .= "\r\n";
 856          
 857          // set the read timeout if needed
 858          if ($this->read_timeout > 0)
 859              socket_set_timeout($fp, $this->read_timeout);
 860          $this->timed_out = false;
 861          
 862          fwrite($fp,$headers.$body,strlen($headers.$body));
 863          
 864          $this->_redirectaddr = false;
 865          unset($this->headers);
 866                          
 867          while($currentHeader = fgets($fp,$this->_maxlinelen))
 868          {
 869              if ($this->read_timeout > 0 && $this->_check_timeout($fp))
 870              {
 871                  $this->status=-100;
 872                  return false;
 873              }
 874                  
 875              if($currentHeader == "\r\n")
 876                  break;
 877                          
 878              // if a header begins with Location: or URI:, set the redirect
 879              if(preg_match("/^(Location:|URI:)/i",$currentHeader))
 880              {
 881                  // get URL portion of the redirect
 882                  preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
 883                  // look for :// in the Location header to see if hostname is included
 884                  if(!preg_match("|\:\/\/|",$matches[2]))
 885                  {
 886                      // no host in the path, so prepend
 887                      $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
 888                      // eliminate double slash
 889                      if(!preg_match("|^/|",$matches[2]))
 890                              $this->_redirectaddr .= "/".$matches[2];
 891                      else
 892                              $this->_redirectaddr .= $matches[2];
 893                  }
 894                  else
 895                      $this->_redirectaddr = $matches[2];
 896              }
 897          
 898              if(preg_match("|^HTTP/|",$currentHeader))
 899              {
 900                  if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
 901                  {
 902                      $this->status= $status[1];
 903                  }                
 904                  $this->response_code = $currentHeader;
 905              }
 906                  
 907              $this->headers[] = $currentHeader;
 908          }
 909  
 910          $results = '';
 911          do {
 912              $_data = fread($fp, $this->maxlength);
 913              if (strlen($_data) == 0) {
 914                  break;
 915              }
 916              $results .= $_data;
 917          } while(true);
 918  
 919          if ($this->read_timeout > 0 && $this->_check_timeout($fp))
 920          {
 921              $this->status=-100;
 922              return false;
 923          }
 924          
 925          // check if there is a a redirect meta tag
 926          
 927          if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
 928  
 929          {
 930              $this->_redirectaddr = $this->_expandlinks($match[1],$URI);    
 931          }
 932  
 933          // have we hit our frame depth and is there frame src to fetch?
 934          if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
 935          {
 936              $this->results[] = $results;
 937              for($x=0; $x<count($match[1]); $x++)
 938                  $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
 939          }
 940          // have we already fetched framed content?
 941          elseif(is_array($this->results))
 942              $this->results[] = $results;
 943          // no framed content
 944          else
 945              $this->results = $results;
 946          
 947          return true;
 948      }
 949  
 950  /*======================================================================*\
 951      Function:    _httpsrequest
 952      Purpose:    go get the https data from the server using curl
 953      Input:        $url        the url to fetch
 954                  $URI        the full URI
 955                  $body        body contents to send if any (POST)
 956      Output:        
 957  \*======================================================================*/
 958      
 959  	function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
 960      {
 961          if($this->passcookies && $this->_redirectaddr)
 962              $this->setcookies();
 963  
 964          $headers = array();        
 965                      
 966          $URI_PARTS = parse_url($URI);
 967          if(empty($url))
 968              $url = "/";
 969          // GET ... header not needed for curl
 970          //$headers[] = $http_method." ".$url." ".$this->_httpversion;        
 971          if(!empty($this->agent))
 972              $headers[] = "User-Agent: ".$this->agent;
 973          if(!empty($this->host))
 974              if(!empty($this->port))
 975                  $headers[] = "Host: ".$this->host.":".$this->port;
 976              else
 977                  $headers[] = "Host: ".$this->host;
 978          if(!empty($this->accept))
 979              $headers[] = "Accept: ".$this->accept;
 980          if(!empty($this->referer))
 981              $headers[] = "Referer: ".$this->referer;
 982          if(!empty($this->cookies))
 983          {            
 984              if(!is_array($this->cookies))
 985                  $this->cookies = (array)$this->cookies;
 986      
 987              reset($this->cookies);
 988              if ( count($this->cookies) > 0 ) {
 989                  $cookie_str = 'Cookie: ';
 990                  foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 991                  $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
 992                  }
 993                  $headers[] = substr($cookie_str,0,-2);
 994              }
 995          }
 996          if(!empty($this->rawheaders))
 997          {
 998              if(!is_array($this->rawheaders))
 999                  $this->rawheaders = (array)$this->rawheaders;
1000              while(list($headerKey,$headerVal) = each($this->rawheaders))
1001                  $headers[] = $headerKey.": ".$headerVal;
1002          }
1003          if(!empty($content_type)) {
1004              if ($content_type == "multipart/form-data")
1005                  $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
1006              else
1007                  $headers[] = "Content-type: $content_type";
1008          }
1009          if(!empty($body))    
1010              $headers[] = "Content-length: ".strlen($body);
1011          if(!empty($this->user) || !empty($this->pass))    
1012              $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
1013              
1014          for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
1015              $safer_header = strtr( $headers[$curr_header], "\"", " " );
1016              $cmdline_params .= " -H \"".$safer_header."\"";
1017          }
1018          
1019          if(!empty($body))
1020              $cmdline_params .= " -d \"$body\"";
1021          
1022          if($this->read_timeout > 0)
1023              $cmdline_params .= " -m ".$this->read_timeout;
1024          
1025          $headerfile = tempnam($temp_dir, "sno");
1026  
1027          $safer_URI = strtr( $URI, "\"", " " ); // strip quotes from the URI to avoid shell access
1028          exec($this->curl_path." -D \"$headerfile\"".$cmdline_params." \"".$safer_URI."\"",$results,$return);
1029          
1030          if($return)
1031          {
1032              $this->error = "Error: cURL could not retrieve the document, error $return.";
1033              return false;
1034          }
1035              
1036              
1037          $results = implode("\r\n",$results);
1038          
1039          $result_headers = file("$headerfile");
1040                          
1041          $this->_redirectaddr = false;
1042          unset($this->headers);
1043                          
1044          for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1045          {
1046              
1047              // if a header begins with Location: or URI:, set the redirect
1048              if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1049              {
1050                  // get URL portion of the redirect
1051                  preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1052                  // look for :// in the Location header to see if hostname is included
1053                  if(!preg_match("|\:\/\/|",$matches[2]))
1054                  {
1055                      // no host in the path, so prepend
1056                      $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1057                      // eliminate double slash
1058                      if(!preg_match("|^/|",$matches[2]))
1059                              $this->_redirectaddr .= "/".$matches[2];
1060                      else
1061                              $this->_redirectaddr .= $matches[2];
1062                  }
1063                  else
1064                      $this->_redirectaddr = $matches[2];
1065              }
1066          
1067              if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1068                  $this->response_code = $result_headers[$currentHeader];
1069  
1070              $this->headers[] = $result_headers[$currentHeader];
1071          }
1072  
1073          // check if there is a a redirect meta tag
1074          
1075          if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1076          {
1077              $this->_redirectaddr = $this->_expandlinks($match[1],$URI);    
1078          }
1079  
1080          // have we hit our frame depth and is there frame src to fetch?
1081          if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1082          {
1083              $this->results[] = $results;
1084              for($x=0; $x<count($match[1]); $x++)
1085                  $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1086          }
1087          // have we already fetched framed content?
1088          elseif(is_array($this->results))
1089              $this->results[] = $results;
1090          // no framed content
1091          else
1092              $this->results = $results;
1093  
1094          unlink("$headerfile");
1095          
1096          return true;
1097      }
1098  
1099  /*======================================================================*\
1100      Function:    setcookies()
1101      Purpose:    set cookies for a redirection
1102  \*======================================================================*/
1103      
1104  	function setcookies()
1105      {
1106          for($x=0; $x<count($this->headers); $x++)
1107          {
1108          if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1109              $this->cookies[$match[1]] = urldecode($match[2]);
1110          }
1111      }
1112  
1113      
1114  /*======================================================================*\
1115      Function:    _check_timeout
1116      Purpose:    checks whether timeout has occurred
1117      Input:        $fp    file pointer
1118  \*======================================================================*/
1119  
1120  	function _check_timeout($fp)
1121      {
1122          if ($this->read_timeout > 0) {
1123              $fp_status = socket_get_status($fp);
1124              if ($fp_status["timed_out"]) {
1125                  $this->timed_out = true;
1126                  return true;
1127              }
1128          }
1129          return false;
1130      }
1131  
1132  /*======================================================================*\
1133      Function:    _connect
1134      Purpose:    make a socket connection
1135      Input:        $fp    file pointer
1136  \*======================================================================*/
1137      
1138  	function _connect(&$fp)
1139      {
1140          if(!empty($this->proxy_host) && !empty($this->proxy_port))
1141              {
1142                  $this->_isproxy = true;
1143                  
1144                  $host = $this->proxy_host;
1145                  $port = $this->proxy_port;
1146              }
1147          else
1148          {
1149              $host = $this->host;
1150              $port = $this->port;
1151          }
1152      
1153          $this->status = 0;
1154          
1155          if($fp = fsockopen(
1156                      $host,
1157                      $port,
1158                      $errno,
1159                      $errstr,
1160                      $this->_fp_timeout
1161                      ))
1162          {
1163              // socket connection succeeded
1164  
1165              return true;
1166          }
1167          else
1168          {
1169              // socket connection failed
1170              $this->status = $errno;
1171              switch($errno)
1172              {
1173                  case -3:
1174                      $this->error="socket creation failed (-3)";
1175                  case -4:
1176                      $this->error="dns lookup failure (-4)";
1177                  case -5:
1178                      $this->error="connection refused or timed out (-5)";
1179                  default:
1180                      $this->error="connection failed (".$errno.")";
1181              }
1182              return false;
1183          }
1184      }
1185  /*======================================================================*\
1186      Function:    _disconnect
1187      Purpose:    disconnect a socket connection
1188      Input:        $fp    file pointer
1189  \*======================================================================*/
1190      
1191  	function _disconnect($fp)
1192      {
1193          return(fclose($fp));
1194      }
1195  
1196      
1197  /*======================================================================*\
1198      Function:    _prepare_post_body
1199      Purpose:    Prepare post body according to encoding type
1200      Input:        $formvars  - form variables
1201                  $formfiles - form upload files
1202      Output:        post body
1203  \*======================================================================*/
1204      
1205  	function _prepare_post_body($formvars, $formfiles)
1206      {
1207          settype($formvars, "array");
1208          settype($formfiles, "array");
1209          $postdata = '';
1210  
1211          if (count($formvars) == 0 && count($formfiles) == 0)
1212              return;
1213          
1214          switch ($this->_submit_type) {
1215              case "application/x-www-form-urlencoded":
1216                  reset($formvars);
1217                  while(list($key,$val) = each($formvars)) {
1218                      if (is_array($val) || is_object($val)) {
1219                          while (list($cur_key, $cur_val) = each($val)) {
1220                              $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1221                          }
1222                      } else
1223                          $postdata .= urlencode($key)."=".urlencode($val)."&";
1224                  }
1225                  break;
1226  
1227              case "multipart/form-data":
1228                  $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1229                  
1230                  reset($formvars);
1231                  while(list($key,$val) = each($formvars)) {
1232                      if (is_array($val) || is_object($val)) {
1233                          while (list($cur_key, $cur_val) = each($val)) {
1234                              $postdata .= "--".$this->_mime_boundary."\r\n";
1235                              $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1236                              $postdata .= "$cur_val\r\n";
1237                          }
1238                      } else {
1239                          $postdata .= "--".$this->_mime_boundary."\r\n";
1240                          $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1241                          $postdata .= "$val\r\n";
1242                      }
1243                  }
1244                  
1245                  reset($formfiles);
1246                  while (list($field_name, $file_names) = each($formfiles)) {
1247                      settype($file_names, "array");
1248                      while (list(, $file_name) = each($file_names)) {
1249                          if (!is_readable($file_name)) continue;
1250  
1251                          $fp = fopen($file_name, "r");
1252                          $file_content = fread($fp, filesize($file_name));
1253                          fclose($fp);
1254                          $base_name = basename($file_name);
1255  
1256                          $postdata .= "--".$this->_mime_boundary."\r\n";
1257                          $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1258                          $postdata .= "$file_content\r\n";
1259                      }
1260                  }
1261                  $postdata .= "--".$this->_mime_boundary."--\r\n";
1262                  break;
1263              // XOOPS2 Hack begin
1264              // Added on March 4, 2003 by onokazu@xoops.org
1265              case "text/xml":
1266              default:
1267                  $postdata = $formvars[0];
1268                  break;
1269              // XOOPS2 Hack end
1270          }
1271  
1272          return $postdata;
1273      }
1274  }
1275  
1276  ?>


Généré le : Sun Nov 25 11:44:32 2007 par Balluche grâce à PHPXref 0.7
  Clicky Web Analytics