[ Index ] |
|
Code source de Horde 3.1.3 |
1 <?php 2 /** 3 * The Text_reST_Parser:: class implements a parser for reStructuredText 4 * documents. 5 * 6 * $Horde: framework/Text_reST/reST/Parser.php,v 1.8.2.8 2006/01/01 21:28:39 jan Exp $ 7 * 8 * Copyright 2003-2006 Jason M. Felice <jfelice@cronosys.com> 9 * 10 * See the enclosed file COPYING for license information (LGPL). If you did not 11 * receive this file, see http://www.fsf.org/copyleft/lgpl.html. 12 * 13 * @author Jason M. Felice <jfelice@cronosys.com> 14 * @package Text_reST 15 */ 16 class Text_reST_Parser { 17 18 /** 19 * The parse tree. 20 * 21 * @var Text_reST 22 */ 23 var $_document; 24 25 /** 26 * A hash of adornment levels. 27 * 28 * The keys are one-character or two-character strings. The one-character 29 * strings represent underline adornments of the specified character, and 30 * the double-character keys are the underline-and-overline styles. The 31 * values associated with the keys are integers representing the 32 * adornment's heading level. 33 * 34 * @var array 35 */ 36 var $_adornmentLevels = array(); 37 38 /** 39 * Constructor. 40 */ 41 function Text_reST_Parser() 42 { 43 } 44 45 /** 46 * Returns a parse tree representing a document. 47 * 48 * @param string $text This is the text of the document to parse. 49 * 50 * @return Text_reST The parse tree. 51 */ 52 function &parse($text) 53 { 54 $this->_text = $text; 55 56 require_once dirname(__FILE__) . '/../reST.php'; 57 58 $this->_document = &new Text_reST('Document'); 59 $this->_pushState($this->_document, 'Section', 0); 60 61 while ($this->_next()) { 62 63 // 64 // Parse a `..' directive. We rewrite an `__' directive to a 65 // `.. __: ' directive here. 66 // 67 if (preg_match('/^(\.\.|__)\s+(.*?)\s*$/', 68 $this->_lineBuffer[0], $m)) { 69 if ($m[1] == '__') { 70 $text = '__: ' . $m[2]; 71 } else { 72 $text = $m[2]; 73 } 74 while ($this->_ensureLines(2) && 75 preg_match('/^ ([^\s].*?)\s*$/', 76 $this->_lineBuffer[1], $m)) { 77 $text .= ' '.$m[1]; 78 $this->_next(); 79 } 80 $this->_parseDirective($text); 81 continue; 82 } 83 84 // 85 // Look for overline-and-underline headings. 86 // 87 if ($this->_ensureLines(3) && 88 preg_match('/^(.+?)\s*$/', $this->_lineBuffer[1], $lineMatch) && 89 $this->_checkAdornment(array(0, 2), strlen($lineMatch[1]))) { 90 91 $adornmentType = $this->_lineBuffer[0]{0}; 92 $adornmentType .= $adornmentType; 93 94 $this->_next(); 95 $this->_next(); 96 97 if (isset($this->_adornmentLevels[$adornmentType])) { 98 $newLevel = $this->_adornmentLevels[$adornmentType]; 99 $this->_popToLevel('Section', $newLevel - 1); 100 } else { 101 $newLevel = $this->_getStateLevel('Section') + 1; 102 $this->_adornmentLevels[$adornmentType] = $newLevel; 103 } 104 $node = &$this->_makeNode($this->_currentNode, 'Section', 105 array('level' => $newLevel)); 106 $this->_pushState($node, 'Section', $newLevel); 107 preg_match('/^\s*(.*?)\s*$/', $lineMatch[1], $lineMatch); 108 $this->_makeNode($this->_currentNode, 'Heading', 109 array('level' => $newLevel), 110 $lineMatch[1]); 111 continue; 112 } 113 114 // 115 // Look for underline headings. 116 // 117 if ($this->_ensureLines(2) && 118 preg_match('/^([^\s].*?)\s*$/', 119 $this->_lineBuffer[0], $lineMatch) && 120 $this->_checkAdornment(array(1), strlen($lineMatch[1]))) { 121 122 $adornmentType = $this->_lineBuffer[1]{0}; 123 $this->_next(); 124 if (isset($this->_adornmentLevels[$adornmentType])) { 125 $newLevel = $this->_adornmentLevels[$adornmentType]; 126 $this->_popToLevel('Section', $newLevel - 1); 127 } else { 128 $newLevel = $this->_getStateLevel('Section') + 1; 129 $this->_adornmentLevels[$adornmentType] = $newLevel; 130 } 131 $node = &$this->_makeNode($this->_currentNode, 'Section', 132 array('level' => $newLevel)); 133 $this->_pushState($node, 'Section', $newLevel); 134 $this->_makeNode($this->_currentNode, 'Heading', 135 array('level' => $newLevel), 136 $lineMatch[1]); 137 continue; 138 } 139 140 // 141 // Parse a `::' paragraph. 142 // 143 if (preg_match('/^\s*::\s*$/', $this->_lineBuffer[0])) { 144 $this->_next(); 145 $this->_parseLiteralBlock(); 146 continue; 147 } 148 149 // 150 // Parse a paragraph. We end the paragraph when we return to 151 // a lower indentation level or encounter a blank line. 152 // 153 if (preg_match('/^(\s*)([^\s].*?)\s*$/', 154 $this->_lineBuffer[0], $m)) { 155 $text = $m[2]; 156 $level = strlen($m[1]); 157 while ($this->_ensureLines(2) && 158 preg_match('/^(\s*)([^\s].*?)\s*$/', 159 $this->_lineBuffer[1], $m)) { 160 if (strlen($m[1]) < $level) { 161 break; 162 } 163 $text .= ' ' . $m[2]; 164 $this->_next(); 165 } 166 167 $trailingLiteral = false; 168 if (preg_match('/^(.*[^\s]:):\s*$/', $text, $m)) { 169 $text = $m[1]; 170 $trailingLiteral = true; 171 } elseif (preg_match('/^(.*?)\s*::\s*$/', $text, $m)) { 172 $text = $m[1]; 173 $trailingLiteral = true; 174 } 175 176 $this->_makeNode($this->_currentNode, 'Paragraph', array(), 177 $text); 178 179 if ($trailingLiteral) { 180 $this->_next(); 181 $this->_parseLiteralBlock(); 182 } 183 continue; 184 } 185 186 // XXX: Handle garbage line. 187 188 }; 189 190 return $this->_document; 191 } 192 193 function &_makeNode(&$parent, $type, $props = array(), $childText = null) 194 { 195 $node = &new Text_reST($type); 196 foreach ($props as $name => $value) { 197 $node->setProperty($name, $value); 198 } 199 if (!is_null($parent)) { 200 $parent->appendChild($node); 201 } 202 if (!is_null($childText)) { 203 $this->_parseInline($node, $childText); 204 } 205 return $node; 206 } 207 208 /** 209 * Checks multiple adornemnt lines in the line buffer and makes sure they 210 * are adornments and that all are identical adornments. 211 * 212 * @access private 213 * 214 * @param array $lines An array of line numbers to check if they are 215 * adornments. 216 * @param integer $minLength The minimum length for this adornment. The 217 * default is 1. 218 * 219 * @return boolean Whether this line is an adornment which matches the 220 * above criteria. 221 */ 222 function _checkAdornment($lines = array(0), $minLength = 1) 223 { 224 $chr = null; 225 foreach ($lines as $i) { 226 if (!preg_match('/^([^a-zA-Z0-9\x7f-\xff\s]+)\s*$/', 227 $this->_lineBuffer[$i], $m)) { 228 return false; 229 } 230 if (is_null($chr)) { 231 if (strlen($m[1]) < $minLength) { 232 return false; 233 } 234 $chr = $m[1]{0}; 235 } else { 236 if (strlen($m[1]) != $minLength) { 237 return false; 238 } 239 } 240 $minLength = strlen($m[1]); 241 for ($j = 0; $j < strlen($m[1]); $j++) { 242 if ($m[1]{$j} != $chr) { 243 return false; 244 } 245 } 246 } 247 return true; 248 } 249 250 function &_parseInline(&$node, $text) 251 { 252 static $aliases = array('sup' => 'superscript', 253 'sub' => 'subscript'); 254 static $schemas = array('http', 255 'https', 256 'ftp', 257 'irc', 258 'telnet', 259 'news'); 260 261 while (strlen($text) > 0) { 262 if (preg_match('/^\*\*((?:\\\\.|[^\\\\])*?)\*\*(.*)$/', $text, $m)) { 263 $this->_makeNode($node, 'Interpreted-Text', 264 array('role' => 'strong'), 265 $m[1]); 266 $text = $m[2]; 267 } elseif (preg_match('/^\*((?:\\\\.|[^\\\\])*?)\*(.*)$/', $text, $m)) { 268 $this->_makeNode($node, 'Interpreted-Text', 269 array('role' => 'emphasis'), 270 $m[1]); 271 $text = $m[2]; 272 } elseif (preg_match('/^``(.*?)``(.*)$/', $text, $m)) { 273 $sub = &$this->_makeNode($node, 'Interpreted-Text', 274 array('role' => 'literal')); 275 $sub->appendChild($m[1]); 276 $text = $m[2]; 277 } elseif (preg_match('/^:([a-z-]+):`((?:\\\\.|[^\\\\])*?)`(.*)$/', 278 $text, $m)) { 279 $role = $m[1]; 280 if (isset($aliases[$m[1]])) { 281 $role = $aliases[$m[1]]; 282 } 283 $sub = &$this->_makeNode($node, 'Interpreted-Text', 284 array('role' => $role)); 285 if ($role == 'literal') { 286 $sub->appendChild($m[2]); 287 } else { 288 $this->_parseInline($sub, $m[2]); 289 } 290 $text = $m[3]; 291 } elseif (preg_match('/^`((?:\\\\.|[^\\\\])*?)`:([a-z-]+):(.*)$/', 292 $text, $m)) { 293 $role = $m[2]; 294 if (isset($aliases[$m[2]])) { 295 $role = $aliases[$m[2]]; 296 } 297 $sub = &$this->_makeNode($node, 'Interpreted-Text', 298 array('role' => $role)); 299 if ($role == 'literal') { 300 $sub->appendChild($m[1]); 301 } else { 302 $this->_parseInline($sub, $m[1]); 303 } 304 $text = $m[3]; 305 } elseif (preg_match('/^`((?:\\\\.|[^\\\\])*?)`__(.*)$/', 306 $text, $m)) { 307 $this->_parseLink($node, $m[1], true); 308 $text = $m[2]; 309 } elseif (preg_match('/^`((?:\\\\.|[^\\\\])*?)`_(.*)$/', 310 $text, $m)) { 311 $this->_parseLink($node, $m[1], false); 312 $text = $m[2]; 313 } elseif (preg_match('/^`((?:\\\\.|[^\\\\])*?)`(.*)$/', 314 $text, $m)) { 315 $this->_makeNode($node, 'Interpreted-Text', 316 array('role' => 'title-reference'), 317 $m[1]); 318 $text = $m[2]; 319 } elseif (preg_match('/^((?:' . join('|', $schemas) . '):\/\/[-0-9a-z#%&+.\/:;?_\\~]+[-0-9a-z#%&+\/_\\~])(.*)$/i', $text, $m)) { 320 $sub = &$this->_makeNode($node, 'Link', array('href' => $m[1])); 321 $sub->appendChild($m[1]); 322 $text = $m[2]; 323 } elseif (preg_match('/^([a-z0-9-]+@[a-z0-9-\.]+\.[a-z0-9-]+)(.*)$/i', 324 $text, $m)) { 325 $sub = &$this->_makeNode($node, 'Link', 326 array('href' => 'mailto:' . $m[1])); 327 $sub->appendChild($m[1]); 328 $text = $m[2]; 329 } elseif (preg_match('/^(\w+)_\b(.*)$/', $text, $m)) { 330 $this->_parseLink($node, $m[1], false); 331 $text = $m[2]; 332 } elseif (preg_match('/^\\\\\s(.*)$/', $text, $m)) { 333 // Backslash-escaped whitespace characters are removed from 334 // the document. 335 $text = $m[1]; 336 } elseif (preg_match('/^\\\\(.)(.*)$/', $text, $m)) { 337 $c = $m[1]; 338 $text = $m[2]; 339 $node->appendChild($c); 340 } else { 341 // XXX: We should try to use a regexp to grab as much text as 342 // possible, then fall through to the single-character case 343 // if we can't get anything. 344 345 $c = substr($text, 0, 1); 346 $text = substr($text, 1); 347 $node->appendChild($c); 348 } 349 } 350 351 return $body; 352 } 353 354 /** 355 * Parses an anonymous or named link. 356 * 357 * @access private 358 * 359 * @param Text_reST The parent node for the link. 360 * @param string $text The text to parse. 361 * @param boolean $anonymous Whether this is an anonymous link. 362 * 363 * @return Text_reST The new link node. 364 */ 365 function &_parseLink(&$node, $text, $anonymous = false) 366 { 367 $link = &$this->_makeNode($node, 'Link'); 368 369 if (preg_match('/<(.*)>/', $text, $m)) { 370 $link->setProperty('href', $m[1]); 371 if (preg_match('/^([^<]+?)\s*</', $text, $m)) { 372 $link->appendChild($m[1]); 373 if (!$anonymous) { 374 $link->setProperty('name', $this->_normalizeName($m[1])); 375 } 376 } 377 } else { 378 if (!$anonymous) { 379 $link->setProperty('name', $this->_normalizeName($text)); 380 } 381 $link->appendChild($text); 382 } 383 384 if ($anonymous && is_null($link->getProperty('href'))) { 385 $this->_queueAnonymousReference($link, 'link'); 386 } elseif (!$anonymous && !is_null($link->getProperty('name'))) { 387 $this->_putNamedReference($link, 'link'); 388 } 389 390 return $link; 391 } 392 393 /** 394 * Normalizes an object name. 395 * This means that we lowercase it and normalize any whitespace in it. 396 * 397 * @param string $name A name to normalize. 398 * 399 * @return string The normalized name. 400 */ 401 function _normalizeName($name) 402 { 403 return preg_replace('/\s+/', ' ', strtolower($name)); 404 } 405 406 /** 407 * Parses and executes a `..' directive. 408 * 409 * @access private 410 * 411 * @param string $text A directive to execute, less the leading `.. '. 412 */ 413 function _parseDirective($text) 414 { 415 if (preg_match('/^__:\s*(.*?)\s*$/', $text, $m)) { 416 // 417 // Anonymous link definition 418 // 419 $defn = &new Text_reST('Link'); 420 if (preg_match('/^[a-z0-9-]+@[a-z0-9-\.]+\.[a-z0-9-]+$/i', $m[1])) { 421 $m[1] = 'mailto:' . $m[1]; 422 } 423 $defn->setProperty('href', $m[1]); 424 $this->_queueAnonymousDefinition($defn, 'link'); 425 } elseif (preg_match('/^\s*_(.*?):\s*(.*?)\s*$/', $text, $m)) { 426 // 427 // Named link definition 428 // 429 $defn = &new Text_reST('Link'); 430 $defn->setProperty('name', $this->_normalizeName($m[1])); 431 if (preg_match('/^[a-z0-9-]+@[a-z0-9-\.]+\.[a-z0-9-]+$/i', $m[2])) { 432 $m[2] = 'mailto:' . $m[2]; 433 } 434 $defn->setProperty('href', $m[2]); 435 $this->_putNamedDefinition($defn, 'link'); 436 } 437 } 438 439 /** 440 * Skips blank lines until we find one we can get the indentation level 441 * from, then, gathers lines until we have a different level. 442 */ 443 function _parseLiteralBlock() 444 { 445 if (!$this->_ensureLines(1)) { 446 return false; 447 } 448 449 while (preg_match('/^\s*$/', $this->_lineBuffer[0])) { 450 if (!$this->_next()) { 451 return false; 452 } 453 } 454 455 if (!preg_match('/^(\s+)(.*?)\s*$/', $this->_lineBuffer[0], $m)) { 456 return false; 457 } 458 $level = strlen($m[1]); 459 $text = $m[2]; 460 461 if ($this->_next()) { 462 $re = '/^(?: {' . $level . '}(.*?)|())\s*$/'; 463 while (preg_match($re, $this->_lineBuffer[0], $m)) { 464 $text .= "\n" . $m[1]; 465 if (!$this->_next()) { 466 break; 467 } 468 } 469 } 470 471 $l = &$this->_makeNode($this->_currentNode, 'Literal-Block', array()); 472 $l->appendChild(preg_replace('/\s+$/s', '', $text)); 473 474 // XXX: Dirty hack! 475 array_unshift($this->_lineBuffer, ''); 476 } 477 478 //---- 479 // Line-reading members 480 //---- 481 482 /** 483 * The remainder of the text we are parsing, being modified by _getLine() 484 * and _next(). 485 * 486 * @access private 487 * @var string 488 */ 489 var $_text; 490 491 /** 492 * An array of the lines we have peeked at. 493 * 494 * The first element is the line we are currently working with and so on. 495 * 496 * @access private 497 * @var array 498 */ 499 var $_lineBuffer = array(); 500 501 /** 502 * Retrieves the next line from a block of text. 503 * 504 * We replace tabs with 8 spaces. 505 * 506 * @access private 507 */ 508 function _getLine() 509 { 510 if (strlen($this->_text) == 0) { 511 return null; 512 } 513 $i = strpos($this->_text, "\n"); 514 if ($i !== false) { 515 $line = substr($this->_text, 0, $i); 516 $this->_text = substr($this->_text, $i + 1); 517 } else { 518 $line = $this->_text; 519 $this->_text = ''; 520 } 521 return preg_replace('/\t/', ' ', $line); 522 } 523 524 /** 525 * Bumps to the next line in the input. 526 * 527 * @access private 528 */ 529 function _next() 530 { 531 // Special case the first time 'round. 532 if (count($this->_lineBuffer) == 0) { 533 return $this->_ensureLines(1); 534 } 535 536 if (!$this->_ensureLines(2)) { 537 return false; 538 } 539 array_shift($this->_lineBuffer); 540 return true; 541 } 542 543 /** 544 * Makes sure there is a certain number of lines at minimum in the line 545 * buffer. 546 * 547 * @access private 548 * 549 * @param integer $count This is the number of lines which must be in the 550 * buffer. 551 * 552 * @return boolean Whether or not we succeeded. We can fail at 553 * end-of-file. 554 */ 555 function _ensureLines($count = 1) 556 { 557 while (count($this->_lineBuffer) < $count) { 558 $line = $this->_getLine(); 559 if (is_null($line)) { 560 return false; 561 } 562 $this->_lineBuffer[] = $line; 563 } 564 return true; 565 } 566 567 //---- 568 // Anonymous references and definitions 569 //---- 570 571 var $_anonymousReferences = array(); 572 var $_anonymousDefinitions = array(); 573 574 /** 575 * Since anonymous references and definitions (e.g. footnotes, links) do 576 * not need to be defined "in lockstep" according to the spec, we create 577 * the partial parse node in both places and use this nifty system to 578 * queue or merge in each place. Note that the reference is the "master" 579 * node. The definition gets thrown away since it really isn't in the 580 * parse tree anyway. 581 * 582 * @access private 583 * 584 * @param object &$node The node to queue or merge to. 585 * @param string $type The type of anonymous object. 586 */ 587 function _queueAnonymousReference(&$node, $type) 588 { 589 if (!array_key_exists($type, $this->_anonymousDefinitions)) { 590 $this->_anonymousDefinitions[$type] = array(); 591 } 592 if (count($this->_anonymousDefinitions[$type]) > 0) { 593 $defn = &$this->_anonymousDefinitions[$type][0]; 594 array_shift($this->_anonymousDefinitions[$type]); 595 $this->_mergeNodeProperties($node, $defn); 596 } else { 597 $this->_anonymousReferences[$type][] = &$node; 598 } 599 } 600 601 /** 602 * Handles an anonymous definition. 603 * 604 * @access private 605 * 606 * @param object &$node The node to queue or merge from. 607 * @param string $type The type of anonymous object. 608 */ 609 function _queueAnonymousDefinition(&$node, $type) 610 { 611 if (!array_key_exists($type, $this->_anonymousReferences)) { 612 $this->_anonymousReferences[$type] = array(); 613 } 614 if (count($this->_anonymousReferences[$type]) > 0) { 615 $ref = &$this->_anonymousReferences[$type][0]; 616 array_shift($this->_anonymousReferences[$type]); 617 $this->_mergeNodeProperties($ref, $node); 618 } else { 619 $this->_anonymousDefinitions[$type][] = &$node; 620 } 621 } 622 623 /** 624 * Merges the properties from each node into the other node. 625 * 626 * The node type is not changed (for the case where we have a footnote 627 * reference and a footnote definition), but both nodes will have all 628 * properties. 629 * 630 * @access private 631 * 632 * @param object &$node The reference node. 633 * @param object &$defn The definition node. 634 */ 635 function _mergeNodeProperties(&$node, &$defn) 636 { 637 // XXX: We should make sure there is no collision. 638 foreach ($defn->_properties as $name => $value) { 639 $node->setProperty($name, $value); 640 } 641 foreach ($node->_properties as $name => $value) { 642 $defn->setProperty($name, $value); 643 } 644 } 645 646 //---- 647 // Named references and definitions 648 //---- 649 650 var $_namedReferences = array(); 651 var $_namedDefinitions = array(); 652 653 /** 654 * Stores a named reference parse node in a hash so we can later merge 655 * properties with a definition. If we already have a definition, do 656 * the merge now. 657 * 658 * @access private 659 * 660 * @param Text_reST &$node The parse tree node. 661 * @param string $type The type of named reference. 662 * 663 * @return boolean Whether or not we successfully added the reference. 664 */ 665 function _putNamedReference(&$node, $type) 666 { 667 $name = $node->getProperty('name'); 668 if (isset($this->_namedReferences[$type][$name])) { 669 return false; 670 } 671 $this->_namedReferences[$type][$name] = &$node; 672 if (isset($this->_namedDefinitions[$type][$name])) { 673 $defn = &$this->_namedDefinitions[$type][$name]; 674 $this->_mergeNodeProperties($node, $defn); 675 } 676 return true; 677 } 678 679 /** 680 * The inverse of {@link _putNamedReference()}. 681 * 682 * @access private 683 */ 684 function _putNamedDefinition(&$node, $type) 685 { 686 $name = $node->getProperty('name'); 687 if (isset($this->_namedDefinitions[$type][$name])) { 688 return false; 689 } 690 $this->_namedDefinitions[$type][$name] = &$node; 691 if (isset($this->_namedReferences[$type][$name])) { 692 $ref = &$this->_namedReferences[$type][$name]; 693 $this->_mergeNodeProperties($ref, $node); 694 } 695 return true; 696 } 697 698 //---- 699 // State stack management 700 //---- 701 702 /** 703 * The state stack. 704 * 705 * It is used to keep track of nested body-level elements and how they 706 * might end. 707 * 708 * @var array 709 */ 710 var $_stateStack = array(); 711 712 var $_currentNode; 713 714 function _pushState(&$node, $stateType, $level) 715 { 716 $state = &new Text_reST_Parser_state($node, $stateType, $level); 717 $this->_stateStack[] = &$state; 718 $this->_currentNode = &$node; 719 } 720 721 function _getStateLevel($stateType) 722 { 723 for ($i = count($this->_stateStack) - 1; $i >= 0; $i--) { 724 if ($this->_stateStack[$i]->stateType == $stateType) { 725 return $this->_stateStack[$i]->level; 726 } 727 } 728 return 0; 729 } 730 731 function _popToLevel($stateType, $level) 732 { 733 while ($this->_getStateLevel($stateType) > $level) { 734 $this->_pop(); 735 } 736 } 737 738 function _pop() 739 { 740 array_pop($this->_stateStack); 741 if (count($this->_stateStack)) { 742 $state = &$this->_stateStack[count($this->_stateStack) - 1]; 743 $this->_currentNode = &$state->node; 744 } 745 } 746 747 } 748 749 /** 750 * This class represents a node on the parser's state stack. 751 * 752 * @package Text_reST 753 */ 754 class Text_reST_Parser_state { 755 756 var $node; 757 var $stateType; 758 var $level; 759 760 /** 761 * Constructor. 762 * 763 * @param object &$node This is the parse node associated with this 764 * state. Block-level elements parsed in this 765 * state will be children of this node. 766 * @param string $stateType Currently only 'Section'. 767 * @param mixed $level This is the nesting level of this state type. 768 */ 769 function Text_reST_Parser_state(&$node, $stateType, $level) 770 { 771 $this->node = &$node; 772 $this->stateType = $stateType; 773 $this->level = $level; 774 } 775 776 }
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Sun Feb 25 18:01:28 2007 | par Balluche grâce à PHPXref 0.7 |