[ Index ]
 

Code source de WordPress 2.1.2

Accédez au Source d'autres logiciels libresSoutenez Angelica Josefina !

title

Body

[fermer]

/wp-includes/ -> class-snoopy.php (source)

   1  <?php
   2  
   3  /*************************************************
   4  
   5  Snoopy - the PHP net client
   6  Author: Monte Ohrt <monte@ispi.net>
   7  Copyright (c): 1999-2000 ispi, all rights reserved
   8  Version: 1.01
   9  
  10   * This library is free software; you can redistribute it and/or
  11   * modify it under the terms of the GNU Lesser General Public
  12   * License as published by the Free Software Foundation; either
  13   * version 2.1 of the License, or (at your option) any later version.
  14   *
  15   * This library is distributed in the hope that it will be useful,
  16   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18   * Lesser General Public License for more details.
  19   *
  20   * You should have received a copy of the GNU Lesser General Public
  21   * License along with this library; if not, write to the Free Software
  22   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  23  
  24  You may contact the author of Snoopy by e-mail at:
  25  monte@ispi.net
  26  
  27  Or, write to:
  28  Monte Ohrt
  29  CTO, ispi
  30  237 S. 70th suite 220
  31  Lincoln, NE 68510
  32  
  33  The latest version of Snoopy can be obtained from:
  34  http://snoopy.sourceforge.net/
  35  
  36  *************************************************/
  37  
  38  if ( !in_array('Snoopy', get_declared_classes() ) ) :
  39  class Snoopy
  40  {
  41      /**** Public variables ****/
  42  
  43      /* user definable vars */
  44  
  45      var $host            =    "www.php.net";        // host name we are connecting to
  46      var $port            =    80;                    // port we are connecting to
  47      var $proxy_host        =    "";                    // proxy host to use
  48      var $proxy_port        =    "";                    // proxy port to use
  49      var $proxy_user        =    "";                    // proxy user to use
  50      var $proxy_pass        =    "";                    // proxy password to use
  51  
  52      var $agent            =    "Snoopy v1.2.3";    // agent we masquerade as
  53      var    $referer        =    "";                    // referer info to pass
  54      var $cookies        =    array();            // array of cookies to pass
  55                                                  // $cookies["username"]="joe";
  56      var    $rawheaders        =    array();            // array of raw headers to send
  57                                                  // $rawheaders["Content-type"]="text/html";
  58  
  59      var $maxredirs        =    5;                    // http redirection depth maximum. 0 = disallow
  60      var $lastredirectaddr    =    "";                // contains address of last redirected address
  61      var    $offsiteok        =    true;                // allows redirection off-site
  62      var $maxframes        =    0;                    // frame content depth maximum. 0 = disallow
  63      var $expandlinks    =    true;                // expand links to fully qualified URLs.
  64                                                  // this only applies to fetchlinks()
  65                                                  // submitlinks(), and submittext()
  66      var $passcookies    =    true;                // pass set cookies back through redirects
  67                                                  // NOTE: this currently does not respect
  68                                                  // dates, domains or paths.
  69  
  70      var    $user            =    "";                    // user for http authentication
  71      var    $pass            =    "";                    // password for http authentication
  72  
  73      // http accept types
  74      var $accept            =    "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
  75  
  76      var $results        =    "";                    // where the content is put
  77  
  78      var $error            =    "";                    // error messages sent here
  79      var    $response_code    =    "";                    // response code returned from server
  80      var    $headers        =    array();            // headers returned from server sent here
  81      var    $maxlength        =    8192;                // max return data length (body)
  82      var $read_timeout    =    0;                    // timeout on read operations, in seconds
  83                                                  // supported only since PHP 4 Beta 4
  84                                                  // set to 0 to disallow timeouts
  85      var $timed_out        =    false;                // if a read operation timed out
  86      var    $status            =    0;                    // http request status
  87  
  88      var $temp_dir        =    "/tmp";                // temporary directory that the webserver
  89                                                  // has permission to write to.
  90                                                  // under Windows, this should be C:\temp
  91  
  92      var    $curl_path        =    "/usr/local/bin/curl";
  93                                                  // Snoopy will use cURL for fetching
  94                                                  // SSL content if a full system path to
  95                                                  // the cURL binary is supplied here.
  96                                                  // set to false if you do not have
  97                                                  // cURL installed. See http://curl.haxx.se
  98                                                  // for details on installing cURL.
  99                                                  // Snoopy does *not* use the cURL
 100                                                  // library functions built into php,
 101                                                  // as these functions are not stable
 102                                                  // as of this Snoopy release.
 103  
 104      /**** Private variables ****/
 105  
 106      var    $_maxlinelen    =    4096;                // max line length (headers)
 107  
 108      var $_httpmethod    =    "GET";                // default http request method
 109      var $_httpversion    =    "HTTP/1.0";            // default http request version
 110      var $_submit_method    =    "POST";                // default submit method
 111      var $_submit_type    =    "application/x-www-form-urlencoded";    // default submit type
 112      var $_mime_boundary    =   "";                    // MIME boundary for multipart/form-data submit type
 113      var $_redirectaddr    =    false;                // will be set if page fetched is a redirect
 114      var $_redirectdepth    =    0;                    // increments on an http redirect
 115      var $_frameurls        =     array();            // frame src urls
 116      var $_framedepth    =    0;                    // increments on frame depth
 117  
 118      var $_isproxy        =    false;                // set if using a proxy server
 119      var $_fp_timeout    =    30;                    // timeout for socket connection
 120  
 121  /*======================================================================*\
 122      Function:    fetch
 123      Purpose:    fetch the contents of a web page
 124                  (and possibly other protocols in the
 125                  future like ftp, nntp, gopher, etc.)
 126      Input:        $URI    the location of the page to fetch
 127      Output:        $this->results    the output text from the fetch
 128  \*======================================================================*/
 129  
 130  	function fetch($URI)
 131      {
 132  
 133          //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
 134          $URI_PARTS = parse_url($URI);
 135          if (!empty($URI_PARTS["user"]))
 136              $this->user = $URI_PARTS["user"];
 137          if (!empty($URI_PARTS["pass"]))
 138              $this->pass = $URI_PARTS["pass"];
 139          if (empty($URI_PARTS["query"]))
 140              $URI_PARTS["query"] = '';
 141          if (empty($URI_PARTS["path"]))
 142              $URI_PARTS["path"] = '';
 143  
 144          switch(strtolower($URI_PARTS["scheme"]))
 145          {
 146              case "http":
 147                  $this->host = $URI_PARTS["host"];
 148                  if(!empty($URI_PARTS["port"]))
 149                      $this->port = $URI_PARTS["port"];
 150                  if($this->_connect($fp))
 151                  {
 152                      if($this->_isproxy)
 153                      {
 154                          // using proxy, send entire URI
 155                          $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
 156                      }
 157                      else
 158                      {
 159                          $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 160                          // no proxy, send only the path
 161                          $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
 162                      }
 163  
 164                      $this->_disconnect($fp);
 165  
 166                      if($this->_redirectaddr)
 167                      {
 168                          /* url was redirected, check if we've hit the max depth */
 169                          if($this->maxredirs > $this->_redirectdepth)
 170                          {
 171                              // only follow redirect if it's on this site, or offsiteok is true
 172                              if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 173                              {
 174                                  /* follow the redirect */
 175                                  $this->_redirectdepth++;
 176                                  $this->lastredirectaddr=$this->_redirectaddr;
 177                                  $this->fetch($this->_redirectaddr);
 178                              }
 179                          }
 180                      }
 181  
 182                      if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 183                      {
 184                          $frameurls = $this->_frameurls;
 185                          $this->_frameurls = array();
 186  
 187                          while(list(,$frameurl) = each($frameurls))
 188                          {
 189                              if($this->_framedepth < $this->maxframes)
 190                              {
 191                                  $this->fetch($frameurl);
 192                                  $this->_framedepth++;
 193                              }
 194                              else
 195                                  break;
 196                          }
 197                      }
 198                  }
 199                  else
 200                  {
 201                      return false;
 202                  }
 203                  return true;
 204                  break;
 205              case "https":
 206                  if(!$this->curl_path)
 207                      return false;
 208                  if(function_exists("is_executable"))
 209                      if (!is_executable($this->curl_path))
 210                          return false;
 211                  $this->host = $URI_PARTS["host"];
 212                  if(!empty($URI_PARTS["port"]))
 213                      $this->port = $URI_PARTS["port"];
 214                  if($this->_isproxy)
 215                  {
 216                      // using proxy, send entire URI
 217                      $this->_httpsrequest($URI,$URI,$this->_httpmethod);
 218                  }
 219                  else
 220                  {
 221                      $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 222                      // no proxy, send only the path
 223                      $this->_httpsrequest($path, $URI, $this->_httpmethod);
 224                  }
 225  
 226                  if($this->_redirectaddr)
 227                  {
 228                      /* url was redirected, check if we've hit the max depth */
 229                      if($this->maxredirs > $this->_redirectdepth)
 230                      {
 231                          // only follow redirect if it's on this site, or offsiteok is true
 232                          if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 233                          {
 234                              /* follow the redirect */
 235                              $this->_redirectdepth++;
 236                              $this->lastredirectaddr=$this->_redirectaddr;
 237                              $this->fetch($this->_redirectaddr);
 238                          }
 239                      }
 240                  }
 241  
 242                  if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 243                  {
 244                      $frameurls = $this->_frameurls;
 245                      $this->_frameurls = array();
 246  
 247                      while(list(,$frameurl) = each($frameurls))
 248                      {
 249                          if($this->_framedepth < $this->maxframes)
 250                          {
 251                              $this->fetch($frameurl);
 252                              $this->_framedepth++;
 253                          }
 254                          else
 255                              break;
 256                      }
 257                  }
 258                  return true;
 259                  break;
 260              default:
 261                  // not a valid protocol
 262                  $this->error    =    'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 263                  return false;
 264                  break;
 265          }
 266          return true;
 267      }
 268  
 269  /*======================================================================*\
 270      Function:    submit
 271      Purpose:    submit an http form
 272      Input:        $URI    the location to post the data
 273                  $formvars    the formvars to use.
 274                      format: $formvars["var"] = "val";
 275                  $formfiles  an array of files to submit
 276                      format: $formfiles["var"] = "/dir/filename.ext";
 277      Output:        $this->results    the text output from the post
 278  \*======================================================================*/
 279  
 280  	function submit($URI, $formvars="", $formfiles="")
 281      {
 282          unset($postdata);
 283  
 284          $postdata = $this->_prepare_post_body($formvars, $formfiles);
 285  
 286          $URI_PARTS = parse_url($URI);
 287          if (!empty($URI_PARTS["user"]))
 288              $this->user = $URI_PARTS["user"];
 289          if (!empty($URI_PARTS["pass"]))
 290              $this->pass = $URI_PARTS["pass"];
 291          if (empty($URI_PARTS["query"]))
 292              $URI_PARTS["query"] = '';
 293          if (empty($URI_PARTS["path"]))
 294              $URI_PARTS["path"] = '';
 295  
 296          switch(strtolower($URI_PARTS["scheme"]))
 297          {
 298              case "http":
 299                  $this->host = $URI_PARTS["host"];
 300                  if(!empty($URI_PARTS["port"]))
 301                      $this->port = $URI_PARTS["port"];
 302                  if($this->_connect($fp))
 303                  {
 304                      if($this->_isproxy)
 305                      {
 306                          // using proxy, send entire URI
 307                          $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
 308                      }
 309                      else
 310                      {
 311                          $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 312                          // no proxy, send only the path
 313                          $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 314                      }
 315  
 316                      $this->_disconnect($fp);
 317  
 318                      if($this->_redirectaddr)
 319                      {
 320                          /* url was redirected, check if we've hit the max depth */
 321                          if($this->maxredirs > $this->_redirectdepth)
 322                          {
 323                              if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 324                                  $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
 325  
 326                              // only follow redirect if it's on this site, or offsiteok is true
 327                              if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 328                              {
 329                                  /* follow the redirect */
 330                                  $this->_redirectdepth++;
 331                                  $this->lastredirectaddr=$this->_redirectaddr;
 332                                  if( strpos( $this->_redirectaddr, "?" ) > 0 )
 333                                      $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
 334                                  else
 335                                      $this->submit($this->_redirectaddr,$formvars, $formfiles);
 336                              }
 337                          }
 338                      }
 339  
 340                      if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 341                      {
 342                          $frameurls = $this->_frameurls;
 343                          $this->_frameurls = array();
 344  
 345                          while(list(,$frameurl) = each($frameurls))
 346                          {
 347                              if($this->_framedepth < $this->maxframes)
 348                              {
 349                                  $this->fetch($frameurl);
 350                                  $this->_framedepth++;
 351                              }
 352                              else
 353                                  break;
 354                          }
 355                      }
 356  
 357                  }
 358                  else
 359                  {
 360                      return false;
 361                  }
 362                  return true;
 363                  break;
 364              case "https":
 365                  if(!$this->curl_path)
 366                      return false;
 367                  if(function_exists("is_executable"))
 368                      if (!is_executable($this->curl_path))
 369                          return false;
 370                  $this->host = $URI_PARTS["host"];
 371                  if(!empty($URI_PARTS["port"]))
 372                      $this->port = $URI_PARTS["port"];
 373                  if($this->_isproxy)
 374                  {
 375                      // using proxy, send entire URI
 376                      $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 377                  }
 378                  else
 379                  {
 380                      $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 381                      // no proxy, send only the path
 382                      $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 383                  }
 384  
 385                  if($this->_redirectaddr)
 386                  {
 387                      /* url was redirected, check if we've hit the max depth */
 388                      if($this->maxredirs > $this->_redirectdepth)
 389                      {
 390                          if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 391                              $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
 392  
 393                          // only follow redirect if it's on this site, or offsiteok is true
 394                          if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 395                          {
 396                              /* follow the redirect */
 397                              $this->_redirectdepth++;
 398                              $this->lastredirectaddr=$this->_redirectaddr;
 399                              if( strpos( $this->_redirectaddr, "?" ) > 0 )
 400                                  $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
 401                              else
 402                                  $this->submit($this->_redirectaddr,$formvars, $formfiles);
 403                          }
 404                      }
 405                  }
 406  
 407                  if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 408                  {
 409                      $frameurls = $this->_frameurls;
 410                      $this->_frameurls = array();
 411  
 412                      while(list(,$frameurl) = each($frameurls))
 413                      {
 414                          if($this->_framedepth < $this->maxframes)
 415                          {
 416                              $this->fetch($frameurl);
 417                              $this->_framedepth++;
 418                          }
 419                          else
 420                              break;
 421                      }
 422                  }
 423                  return true;
 424                  break;
 425  
 426              default:
 427                  // not a valid protocol
 428                  $this->error    =    'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 429                  return false;
 430                  break;
 431          }
 432          return true;
 433      }
 434  
 435  /*======================================================================*\
 436      Function:    fetchlinks
 437      Purpose:    fetch the links from a web page
 438      Input:        $URI    where you are fetching from
 439      Output:        $this->results    an array of the URLs
 440  \*======================================================================*/
 441  
 442  	function fetchlinks($URI)
 443      {
 444          if ($this->fetch($URI))
 445          {
 446              if($this->lastredirectaddr)
 447                  $URI = $this->lastredirectaddr;
 448              if(is_array($this->results))
 449              {
 450                  for($x=0;$x<count($this->results);$x++)
 451                      $this->results[$x] = $this->_striplinks($this->results[$x]);
 452              }
 453              else
 454                  $this->results = $this->_striplinks($this->results);
 455  
 456              if($this->expandlinks)
 457                  $this->results = $this->_expandlinks($this->results, $URI);
 458              return true;
 459          }
 460          else
 461              return false;
 462      }
 463  
 464  /*======================================================================*\
 465      Function:    fetchform
 466      Purpose:    fetch the form elements from a web page
 467      Input:        $URI    where you are fetching from
 468      Output:        $this->results    the resulting html form
 469  \*======================================================================*/
 470  
 471  	function fetchform($URI)
 472      {
 473  
 474          if ($this->fetch($URI))
 475          {
 476  
 477              if(is_array($this->results))
 478              {
 479                  for($x=0;$x<count($this->results);$x++)
 480                      $this->results[$x] = $this->_stripform($this->results[$x]);
 481              }
 482              else
 483                  $this->results = $this->_stripform($this->results);
 484  
 485              return true;
 486          }
 487          else
 488              return false;
 489      }
 490  
 491  
 492  /*======================================================================*\
 493      Function:    fetchtext
 494      Purpose:    fetch the text from a web page, stripping the links
 495      Input:        $URI    where you are fetching from
 496      Output:        $this->results    the text from the web page
 497  \*======================================================================*/
 498  
 499  	function fetchtext($URI)
 500      {
 501          if($this->fetch($URI))
 502          {
 503              if(is_array($this->results))
 504              {
 505                  for($x=0;$x<count($this->results);$x++)
 506                      $this->results[$x] = $this->_striptext($this->results[$x]);
 507              }
 508              else
 509                  $this->results = $this->_striptext($this->results);
 510              return true;
 511          }
 512          else
 513              return false;
 514      }
 515  
 516  /*======================================================================*\
 517      Function:    submitlinks
 518      Purpose:    grab links from a form submission
 519      Input:        $URI    where you are submitting from
 520      Output:        $this->results    an array of the links from the post
 521  \*======================================================================*/
 522  
 523  	function submitlinks($URI, $formvars="", $formfiles="")
 524      {
 525          if($this->submit($URI,$formvars, $formfiles))
 526          {
 527              if($this->lastredirectaddr)
 528                  $URI = $this->lastredirectaddr;
 529              if(is_array($this->results))
 530              {
 531                  for($x=0;$x<count($this->results);$x++)
 532                  {
 533                      $this->results[$x] = $this->_striplinks($this->results[$x]);
 534                      if($this->expandlinks)
 535                          $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
 536                  }
 537              }
 538              else
 539              {
 540                  $this->results = $this->_striplinks($this->results);
 541                  if($this->expandlinks)
 542                      $this->results = $this->_expandlinks($this->results,$URI);
 543              }
 544              return true;
 545          }
 546          else
 547              return false;
 548      }
 549  
 550  /*======================================================================*\
 551      Function:    submittext
 552      Purpose:    grab text from a form submission
 553      Input:        $URI    where you are submitting from
 554      Output:        $this->results    the text from the web page
 555  \*======================================================================*/
 556  
 557  	function submittext($URI, $formvars = "", $formfiles = "")
 558      {
 559          if($this->submit($URI,$formvars, $formfiles))
 560          {
 561              if($this->lastredirectaddr)
 562                  $URI = $this->lastredirectaddr;
 563              if(is_array($this->results))
 564              {
 565                  for($x=0;$x<count($this->results);$x++)
 566                  {
 567                      $this->results[$x] = $this->_striptext($this->results[$x]);
 568                      if($this->expandlinks)
 569                          $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
 570                  }
 571              }
 572              else
 573              {
 574                  $this->results = $this->_striptext($this->results);
 575                  if($this->expandlinks)
 576                      $this->results = $this->_expandlinks($this->results,$URI);
 577              }
 578              return true;
 579          }
 580          else
 581              return false;
 582      }
 583  
 584  
 585  
 586  /*======================================================================*\
 587      Function:    set_submit_multipart
 588      Purpose:    Set the form submission content type to
 589                  multipart/form-data
 590  \*======================================================================*/
 591  	function set_submit_multipart()
 592      {
 593          $this->_submit_type = "multipart/form-data";
 594      }
 595  
 596  
 597  /*======================================================================*\
 598      Function:    set_submit_normal
 599      Purpose:    Set the form submission content type to
 600                  application/x-www-form-urlencoded
 601  \*======================================================================*/
 602  	function set_submit_normal()
 603      {
 604          $this->_submit_type = "application/x-www-form-urlencoded";
 605      }
 606  
 607  
 608  
 609  
 610  /*======================================================================*\
 611      Private functions
 612  \*======================================================================*/
 613  
 614  
 615  /*======================================================================*\
 616      Function:    _striplinks
 617      Purpose:    strip the hyperlinks from an html document
 618      Input:        $document    document to strip.
 619      Output:        $match        an array of the links
 620  \*======================================================================*/
 621  
 622  	function _striplinks($document)
 623      {
 624          preg_match_all("'<\s*a\s.*?href\s*=\s*            # find <a href=
 625                          ([\"\'])?                    # find single or double quote
 626                          (?(1) (.*?)\\1 | ([^\s\>]+))        # if quote found, match up to next matching
 627                                                      # quote, otherwise match up to next space
 628                          'isx",$document,$links);
 629  
 630  
 631          // catenate the non-empty matches from the conditional subpattern
 632  
 633          while(list($key,$val) = each($links[2]))
 634          {
 635              if(!empty($val))
 636                  $match[] = $val;
 637          }
 638  
 639          while(list($key,$val) = each($links[3]))
 640          {
 641              if(!empty($val))
 642                  $match[] = $val;
 643          }
 644  
 645          // return the links
 646          return $match;
 647      }
 648  
 649  /*======================================================================*\
 650      Function:    _stripform
 651      Purpose:    strip the form elements from an html document
 652      Input:        $document    document to strip.
 653      Output:        $match        an array of the links
 654  \*======================================================================*/
 655  
 656  	function _stripform($document)
 657      {
 658          preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
 659  
 660          // catenate the matches
 661          $match = implode("\r\n",$elements[0]);
 662  
 663          // return the links
 664          return $match;
 665      }
 666  
 667  
 668  
 669  /*======================================================================*\
 670      Function:    _striptext
 671      Purpose:    strip the text from an html document
 672      Input:        $document    document to strip.
 673      Output:        $text        the resulting text
 674  \*======================================================================*/
 675  
 676  	function _striptext($document)
 677      {
 678  
 679          // I didn't use preg eval (//e) since that is only available in PHP 4.0.
 680          // so, list your entities one by one here. I included some of the
 681          // more common ones.
 682  
 683          $search = array("'<script[^>]*?>.*?</script>'si",    // strip out javascript
 684                          "'<[\/\!]*?[^<>]*?>'si",            // strip out html tags
 685                          "'([\r\n])[\s]+'",                    // strip out white space
 686                          "'&(quot|#34|#034|#x22);'i",        // replace html entities
 687                          "'&(amp|#38|#038|#x26);'i",            // added hexadecimal values
 688                          "'&(lt|#60|#060|#x3c);'i",
 689                          "'&(gt|#62|#062|#x3e);'i",
 690                          "'&(nbsp|#160|#xa0);'i",
 691                          "'&(iexcl|#161);'i",
 692                          "'&(cent|#162);'i",
 693                          "'&(pound|#163);'i",
 694                          "'&(copy|#169);'i",
 695                          "'&(reg|#174);'i",
 696                          "'&(deg|#176);'i",
 697                          "'&(#39|#039|#x27);'",
 698                          "'&(euro|#8364);'i",                // europe
 699                          "'&a(uml|UML);'",                    // german
 700                          "'&o(uml|UML);'",
 701                          "'&u(uml|UML);'",
 702                          "'&A(uml|UML);'",
 703                          "'&O(uml|UML);'",
 704                          "'&U(uml|UML);'",
 705                          "'&szlig;'i",
 706                          );
 707          $replace = array(    "",
 708                              "",
 709                              "\\1",
 710                              "\"",
 711                              "&",
 712                              "<",
 713                              ">",
 714                              " ",
 715                              chr(161),
 716                              chr(162),
 717                              chr(163),
 718                              chr(169),
 719                              chr(174),
 720                              chr(176),
 721                              chr(39),
 722                              chr(128),
 723                              "ä",
 724                              "ö",
 725                              "ü",
 726                              "Ä",
 727                              "Ö",
 728                              "Ü",
 729                              "ß",
 730                          );
 731  
 732          $text = preg_replace($search,$replace,$document);
 733  
 734          return $text;
 735      }
 736  
 737  /*======================================================================*\
 738      Function:    _expandlinks
 739      Purpose:    expand each link into a fully qualified URL
 740      Input:        $links            the links to qualify
 741                  $URI            the full URI to get the base from
 742      Output:        $expandedLinks    the expanded links
 743  \*======================================================================*/
 744  
 745  	function _expandlinks($links,$URI)
 746      {
 747  
 748          preg_match("/^[^\?]+/",$URI,$match);
 749  
 750          $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
 751          $match = preg_replace("|/$|","",$match);
 752          $match_part = parse_url($match);
 753          $match_root =
 754          $match_part["scheme"]."://".$match_part["host"];
 755  
 756          $search = array(     "|^http://".preg_quote($this->host)."|i",
 757                              "|^(\/)|i",
 758                              "|^(?!http://)(?!mailto:)|i",
 759                              "|/\./|",
 760                              "|/[^\/]+/\.\./|"
 761                          );
 762  
 763          $replace = array(    "",
 764                              $match_root."/",
 765                              $match."/",
 766                              "/",
 767                              "/"
 768                          );
 769  
 770          $expandedLinks = preg_replace($search,$replace,$links);
 771  
 772          return $expandedLinks;
 773      }
 774  
 775  /*======================================================================*\
 776      Function:    _httprequest
 777      Purpose:    go get the http data from the server
 778      Input:        $url        the url to fetch
 779                  $fp            the current open file pointer
 780                  $URI        the full URI
 781                  $body        body contents to send if any (POST)
 782      Output:
 783  \*======================================================================*/
 784  
 785  	function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
 786      {
 787          $cookie_headers = '';
 788          if($this->passcookies && $this->_redirectaddr)
 789              $this->setcookies();
 790  
 791          $URI_PARTS = parse_url($URI);
 792          if(empty($url))
 793              $url = "/";
 794          $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
 795          if(!empty($this->agent))
 796              $headers .= "User-Agent: ".$this->agent."\r\n";
 797          if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
 798              $headers .= "Host: ".$this->host;
 799              if(!empty($this->port))
 800                  $headers .= ":".$this->port;
 801              $headers .= "\r\n";
 802          }
 803          if(!empty($this->accept))
 804              $headers .= "Accept: ".$this->accept."\r\n";
 805          if(!empty($this->referer))
 806              $headers .= "Referer: ".$this->referer."\r\n";
 807          if(!empty($this->cookies))
 808          {
 809              if(!is_array($this->cookies))
 810                  $this->cookies = (array)$this->cookies;
 811  
 812              reset($this->cookies);
 813              if ( count($this->cookies) > 0 ) {
 814                  $cookie_headers .= 'Cookie: ';
 815                  foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 816                  $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
 817                  }
 818                  $headers .= substr($cookie_headers,0,-2) . "\r\n";
 819              } 
 820          }
 821          if(!empty($this->rawheaders))
 822          {
 823              if(!is_array($this->rawheaders))
 824                  $this->rawheaders = (array)$this->rawheaders;
 825              while(list($headerKey,$headerVal) = each($this->rawheaders))
 826                  $headers .= $headerKey.": ".$headerVal."\r\n";
 827          }
 828          if(!empty($content_type)) {
 829              $headers .= "Content-type: $content_type";
 830              if ($content_type == "multipart/form-data")
 831                  $headers .= "; boundary=".$this->_mime_boundary;
 832              $headers .= "\r\n";
 833          }
 834          if(!empty($body))
 835              $headers .= "Content-length: ".strlen($body)."\r\n";
 836          if(!empty($this->user) || !empty($this->pass))
 837              $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
 838  
 839          //add proxy auth headers
 840          if(!empty($this->proxy_user))
 841              $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
 842  
 843  
 844          $headers .= "\r\n";
 845  
 846          // set the read timeout if needed
 847          if ($this->read_timeout > 0)
 848              socket_set_timeout($fp, $this->read_timeout);
 849          $this->timed_out = false;
 850  
 851          fwrite($fp,$headers.$body,strlen($headers.$body));
 852  
 853          $this->_redirectaddr = false;
 854          unset($this->headers);
 855  
 856          while($currentHeader = fgets($fp,$this->_maxlinelen))
 857          {
 858              if ($this->read_timeout > 0 && $this->_check_timeout($fp))
 859              {
 860                  $this->status=-100;
 861                  return false;
 862              }
 863  
 864              if($currentHeader == "\r\n")
 865                  break;
 866  
 867              // if a header begins with Location: or URI:, set the redirect
 868              if(preg_match("/^(Location:|URI:)/i",$currentHeader))
 869              {
 870                  // get URL portion of the redirect
 871                  preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
 872                  // look for :// in the Location header to see if hostname is included
 873                  if(!preg_match("|\:\/\/|",$matches[2]))
 874                  {
 875                      // no host in the path, so prepend
 876                      $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
 877                      // eliminate double slash
 878                      if(!preg_match("|^/|",$matches[2]))
 879                              $this->_redirectaddr .= "/".$matches[2];
 880                      else
 881                              $this->_redirectaddr .= $matches[2];
 882                  }
 883                  else
 884                      $this->_redirectaddr = $matches[2];
 885              }
 886  
 887              if(preg_match("|^HTTP/|",$currentHeader))
 888              {
 889                  if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
 890                  {
 891                      $this->status= $status[1];
 892                  }
 893                  $this->response_code = $currentHeader;
 894              }
 895  
 896              $this->headers[] = $currentHeader;
 897          }
 898  
 899          $results = '';
 900          do {
 901              $_data = fread($fp, $this->maxlength);
 902              if (strlen($_data) == 0) {
 903                  break;
 904              }
 905              $results .= $_data;
 906          } while(true);
 907  
 908          if ($this->read_timeout > 0 && $this->_check_timeout($fp))
 909          {
 910              $this->status=-100;
 911              return false;
 912          }
 913  
 914          // check if there is a a redirect meta tag
 915  
 916          if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
 917  
 918          {
 919              $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
 920          }
 921  
 922          // have we hit our frame depth and is there frame src to fetch?
 923          if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
 924          {
 925              $this->results[] = $results;
 926              for($x=0; $x<count($match[1]); $x++)
 927                  $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
 928          }
 929          // have we already fetched framed content?
 930          elseif(is_array($this->results))
 931              $this->results[] = $results;
 932          // no framed content
 933          else
 934              $this->results = $results;
 935  
 936          return true;
 937      }
 938  
 939  /*======================================================================*\
 940      Function:    _httpsrequest
 941      Purpose:    go get the https data from the server using curl
 942      Input:        $url        the url to fetch
 943                  $URI        the full URI
 944                  $body        body contents to send if any (POST)
 945      Output:
 946  \*======================================================================*/
 947  
 948  	function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
 949      {
 950          if($this->passcookies && $this->_redirectaddr)
 951              $this->setcookies();
 952  
 953          $headers = array();
 954  
 955          $URI_PARTS = parse_url($URI);
 956          if(empty($url))
 957              $url = "/";
 958          // GET ... header not needed for curl
 959          //$headers[] = $http_method." ".$url." ".$this->_httpversion;
 960          if(!empty($this->agent))
 961              $headers[] = "User-Agent: ".$this->agent;
 962          if(!empty($this->host))
 963              if(!empty($this->port))
 964                  $headers[] = "Host: ".$this->host.":".$this->port;
 965              else
 966                  $headers[] = "Host: ".$this->host;
 967          if(!empty($this->accept))
 968              $headers[] = "Accept: ".$this->accept;
 969          if(!empty($this->referer))
 970              $headers[] = "Referer: ".$this->referer;
 971          if(!empty($this->cookies))
 972          {
 973              if(!is_array($this->cookies))
 974                  $this->cookies = (array)$this->cookies;
 975  
 976              reset($this->cookies);
 977              if ( count($this->cookies) > 0 ) {
 978                  $cookie_str = 'Cookie: ';
 979                  foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 980                  $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
 981                  }
 982                  $headers[] = substr($cookie_str,0,-2);
 983              }
 984          }
 985          if(!empty($this->rawheaders))
 986          {
 987              if(!is_array($this->rawheaders))
 988                  $this->rawheaders = (array)$this->rawheaders;
 989              while(list($headerKey,$headerVal) = each($this->rawheaders))
 990                  $headers[] = $headerKey.": ".$headerVal;
 991          }
 992          if(!empty($content_type)) {
 993              if ($content_type == "multipart/form-data")
 994                  $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
 995              else
 996                  $headers[] = "Content-type: $content_type";
 997          }
 998          if(!empty($body))
 999              $headers[] = "Content-length: ".strlen($body);
1000          if(!empty($this->user) || !empty($this->pass))
1001              $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
1002  
1003          for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
1004              $safer_header = strtr( $headers[$curr_header], "\"", " " );
1005              $cmdline_params .= " -H \"".$safer_header."\"";
1006          }
1007  
1008          if(!empty($body))
1009              $cmdline_params .= " -d \"$body\"";
1010  
1011          if($this->read_timeout > 0)
1012              $cmdline_params .= " -m ".$this->read_timeout;
1013  
1014          $headerfile = tempnam($temp_dir, "sno");
1015  
1016          $safer_URI = strtr( $URI, "\"", " " ); // strip quotes from the URI to avoid shell access
1017          exec(escapeshellcmd($this->curl_path." -D \"$headerfile\"".$cmdline_params." \"".$safer_URI."\""),$results,$return);
1018  
1019          if($return)
1020          {
1021              $this->error = "Error: cURL could not retrieve the document, error $return.";
1022              return false;
1023          }
1024  
1025  
1026          $results = implode("\r\n",$results);
1027  
1028          $result_headers = file("$headerfile");
1029  
1030          $this->_redirectaddr = false;
1031          unset($this->headers);
1032  
1033          for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1034          {
1035  
1036              // if a header begins with Location: or URI:, set the redirect
1037              if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1038              {
1039                  // get URL portion of the redirect
1040                  preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1041                  // look for :// in the Location header to see if hostname is included
1042                  if(!preg_match("|\:\/\/|",$matches[2]))
1043                  {
1044                      // no host in the path, so prepend
1045                      $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1046                      // eliminate double slash
1047                      if(!preg_match("|^/|",$matches[2]))
1048                              $this->_redirectaddr .= "/".$matches[2];
1049                      else
1050                              $this->_redirectaddr .= $matches[2];
1051                  }
1052                  else
1053                      $this->_redirectaddr = $matches[2];
1054              }
1055  
1056              if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1057                  $this->response_code = $result_headers[$currentHeader];
1058  
1059              $this->headers[] = $result_headers[$currentHeader];
1060          }
1061  
1062          // check if there is a a redirect meta tag
1063  
1064          if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1065          {
1066              $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
1067          }
1068  
1069          // have we hit our frame depth and is there frame src to fetch?
1070          if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1071          {
1072              $this->results[] = $results;
1073              for($x=0; $x<count($match[1]); $x++)
1074                  $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1075          }
1076          // have we already fetched framed content?
1077          elseif(is_array($this->results))
1078              $this->results[] = $results;
1079          // no framed content
1080          else
1081              $this->results = $results;
1082  
1083          unlink("$headerfile");
1084  
1085          return true;
1086      }
1087  
1088  /*======================================================================*\
1089      Function:    setcookies()
1090      Purpose:    set cookies for a redirection
1091  \*======================================================================*/
1092  
1093  	function setcookies()
1094      {
1095          for($x=0; $x<count($this->headers); $x++)
1096          {
1097          if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1098              $this->cookies[$match[1]] = urldecode($match[2]);
1099          }
1100      }
1101  
1102  
1103  /*======================================================================*\
1104      Function:    _check_timeout
1105      Purpose:    checks whether timeout has occurred
1106      Input:        $fp    file pointer
1107  \*======================================================================*/
1108  
1109  	function _check_timeout($fp)
1110      {
1111          if ($this->read_timeout > 0) {
1112              $fp_status = socket_get_status($fp);
1113              if ($fp_status["timed_out"]) {
1114                  $this->timed_out = true;
1115                  return true;
1116              }
1117          }
1118          return false;
1119      }
1120  
1121  /*======================================================================*\
1122      Function:    _connect
1123      Purpose:    make a socket connection
1124      Input:        $fp    file pointer
1125  \*======================================================================*/
1126  
1127  	function _connect(&$fp)
1128      {
1129          if(!empty($this->proxy_host) && !empty($this->proxy_port))
1130              {
1131                  $this->_isproxy = true;
1132  
1133                  $host = $this->proxy_host;
1134                  $port = $this->proxy_port;
1135              }
1136          else
1137          {
1138              $host = $this->host;
1139              $port = $this->port;
1140          }
1141  
1142          $this->status = 0;
1143  
1144          if($fp = fsockopen(
1145                      $host,
1146                      $port,
1147                      $errno,
1148                      $errstr,
1149                      $this->_fp_timeout
1150                      ))
1151          {
1152              // socket connection succeeded
1153  
1154              return true;
1155          }
1156          else
1157          {
1158              // socket connection failed
1159              $this->status = $errno;
1160              switch($errno)
1161              {
1162                  case -3:
1163                      $this->error="socket creation failed (-3)";
1164                  case -4:
1165                      $this->error="dns lookup failure (-4)";
1166                  case -5:
1167                      $this->error="connection refused or timed out (-5)";
1168                  default:
1169                      $this->error="connection failed (".$errno.")";
1170              }
1171              return false;
1172          }
1173      }
1174  /*======================================================================*\
1175      Function:    _disconnect
1176      Purpose:    disconnect a socket connection
1177      Input:        $fp    file pointer
1178  \*======================================================================*/
1179  
1180  	function _disconnect($fp)
1181      {
1182          return(fclose($fp));
1183      }
1184  
1185  
1186  /*======================================================================*\
1187      Function:    _prepare_post_body
1188      Purpose:    Prepare post body according to encoding type
1189      Input:        $formvars  - form variables
1190                  $formfiles - form upload files
1191      Output:        post body
1192  \*======================================================================*/
1193  
1194  	function _prepare_post_body($formvars, $formfiles)
1195      {
1196          settype($formvars, "array");
1197          settype($formfiles, "array");
1198          $postdata = '';
1199  
1200          if (count($formvars) == 0 && count($formfiles) == 0)
1201              return;
1202  
1203          switch ($this->_submit_type) {
1204              case "application/x-www-form-urlencoded":
1205                  reset($formvars);
1206                  while(list($key,$val) = each($formvars)) {
1207                      if (is_array($val) || is_object($val)) {
1208                          while (list($cur_key, $cur_val) = each($val)) {
1209                              $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1210                          }
1211                      } else
1212                          $postdata .= urlencode($key)."=".urlencode($val)."&";
1213                  }
1214                  break;
1215  
1216              case "multipart/form-data":
1217                  $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1218  
1219                  reset($formvars);
1220                  while(list($key,$val) = each($formvars)) {
1221                      if (is_array($val) || is_object($val)) {
1222                          while (list($cur_key, $cur_val) = each($val)) {
1223                              $postdata .= "--".$this->_mime_boundary."\r\n";
1224                              $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1225                              $postdata .= "$cur_val\r\n";
1226                          }
1227                      } else {
1228                          $postdata .= "--".$this->_mime_boundary."\r\n";
1229                          $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1230                          $postdata .= "$val\r\n";
1231                      }
1232                  }
1233  
1234                  reset($formfiles);
1235                  while (list($field_name, $file_names) = each($formfiles)) {
1236                      settype($file_names, "array");
1237                      while (list(, $file_name) = each($file_names)) {
1238                          if (!is_readable($file_name)) continue;
1239  
1240                          $fp = fopen($file_name, "r");
1241                          while (!feof($fp)) {
1242                              $file_content .= fread($fp, filesize($file_name));
1243                          }
1244                          fclose($fp);
1245                          $base_name = basename($file_name);
1246  
1247                          $postdata .= "--".$this->_mime_boundary."\r\n";
1248                          $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1249                          $postdata .= "$file_content\r\n";
1250                      }
1251                  }
1252                  $postdata .= "--".$this->_mime_boundary."--\r\n";
1253                  break;
1254          }
1255  
1256          return $postdata;
1257      }
1258  }
1259  endif;
1260  
1261  ?>


Généré le : Fri Mar 30 19:41:27 2007 par Balluche grâce à PHPXref 0.7