[ Index ] |
|
Code source de eZ Publish 3.9.0 |
1 <?php 2 // 3 // Definition of eZDiffTextEngine class 4 // 5 // <creation-tag> 6 // 7 // SOFTWARE NAME: eZ publish 8 // SOFTWARE RELEASE: 3.9.0 9 // BUILD VERSION: 17785 10 // COPYRIGHT NOTICE: Copyright (C) 1999-2006 eZ systems AS 11 // SOFTWARE LICENSE: GNU General Public License v2.0 12 // NOTICE: > 13 // This program is free software; you can redistribute it and/or 14 // modify it under the terms of version 2.0 of the GNU General 15 // Public License as published by the Free Software Foundation. 16 // 17 // This program is distributed in the hope that it will be useful, 18 // but WITHOUT ANY WARRANTY; without even the implied warranty of 19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 // GNU General Public License for more details. 21 // 22 // You should have received a copy of version 2.0 of the GNU General 23 // Public License along with this program; if not, write to the Free 24 // Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 25 // MA 02110-1301, USA. 26 // 27 // 28 29 /*! \file ezdifftextengine.php 30 eZDiffTextEngine class 31 */ 32 33 /*! 34 \class eZDiffTextEngine ezdifftextengine.php 35 \ingroup eZDiff 36 \brief eZDiff provides an access point the diff system 37 38 The eZDiffEngine class is an abstract class, providing interface and shared code 39 for the different available DiffEngine. 40 */ 41 42 include_once ( 'lib/ezdiff/classes/ezdiffengine.php' ); 43 include_once ( 'lib/ezdiff/classes/ezdiffmatrix.php' ); 44 45 class eZDiffTextEngine extends eZDiffEngine 46 { 47 function eZDiffTextEngine() 48 { 49 } 50 51 /*! 52 This function calculates changes in plain text and creates an object to hold 53 overview of changes. 54 */ 55 function createDifferenceObject( $fromData, $toData ) 56 { 57 include_once ( 'lib/ezdiff/classes/eztextdiff.php' ); 58 59 $pattern = array( '/[ ][ ]+/', 60 '/ \n( \n)+/', 61 '/^ /m', 62 '/(\n){3,}/' ); 63 $replace = array( ' ', 64 "\n", 65 '', 66 "\n\n" ); 67 68 $old = preg_replace( $pattern, $replace, $fromData ); 69 $new = preg_replace( $pattern, $replace, $toData ); 70 71 $oldArray = split( "\r\n", $old ); 72 $newArray = split( "\r\n", $new ); 73 74 $oldSums = array(); 75 foreach( $oldArray as $paragraph ) 76 { 77 $oldSums[] = crc32( $paragraph ); 78 } 79 80 $newSums = array(); 81 foreach( $newArray as $paragraph ) 82 { 83 $newSums[] = crc32( $paragraph ); 84 } 85 86 $changes = new eZTextDiff(); 87 88 $pre = $this->preProcess( $oldSums, $newSums ); 89 $out = $this->createOutput( $pre, $oldArray, $newArray ); 90 91 $changes->setChanges( $out ); 92 93 return $changes; 94 } 95 96 /*! 97 This Method will create the differences array which is processed by the templates 98 */ 99 function createOutput( $arr, $oldArray, $newArray ) 100 { 101 $diff = array(); 102 103 foreach( $arr as $item ) 104 { 105 $old = null; 106 $new = null; 107 foreach ( $item as $par => $state ) 108 { 109 switch ( $state ) 110 { 111 case 'unchanged': 112 { 113 $index = $par; 114 }break; 115 116 case 'changed': 117 { 118 $old = $oldArray[$par]; 119 $new = $newArray[$par]; 120 }break; 121 122 case 'added': 123 { 124 $new = $newArray[$par]; 125 }break; 126 127 case 'removed': 128 { 129 $old = $oldArray[$par]; 130 }break; 131 } 132 } 133 134 if ( $old === null ) 135 { 136 if ( $new === null ) 137 { 138 // unchanged paragraph 139 $text = $newArray[$index]; 140 $this->addNewLine( $text ); 141 $diff[] = array( 'unchanged' => $text, 142 'status' => 0 ); 143 } 144 else 145 { 146 // added paragraph 147 $text = $new; 148 $this->addNewLine( $text ); 149 $diff[] = array( 'added' => $text, 150 'status' => 2 ); 151 } 152 } 153 elseif ( $new === null ) 154 { 155 // removed paragraph 156 $text = $old; 157 $this->addNewLine( $text ); 158 $diff[] = array( 'removed' => $text, 159 'status' => 1 ); 160 } 161 else 162 { 163 // changed paragraph 164 $diffText = $this->buildDiff( explode( ' ', $old ), explode( ' ', $new ) ); 165 $size = count( $diffText ) - 1; 166 167 foreach( $diffText as $number => $change ) 168 { 169 $state = $change['status']; 170 switch( $state ) 171 { 172 case '0': 173 { 174 if ( $number == $size ) 175 $this->addNewLine( $change['unchanged'] ); 176 }break; 177 178 case '1': 179 { 180 if ( $number == $size ) 181 $this->addNewLine( $change['removed'] ); 182 }break; 183 184 case '2': 185 { 186 if ( $number == $size ) 187 $this->addNewLine( $change['added'] ); 188 }break; 189 } 190 $diff[] = $change; 191 } 192 } 193 } 194 $output = $this->postProcessDiff( $diff ); 195 return $output; 196 } 197 198 /*! 199 \private 200 This method will add a newline after unempty paragraphs 201 */ 202 function addNewLine( &$text ) 203 { 204 $text .= "\n"; 205 } 206 207 /*! 208 This method will determine which paragraphs which need to be diffed. 209 */ 210 function preProcess( $oldArray, $newArray ) 211 { 212 $substr = $this->substringMatrix( $oldArray, $newArray ); 213 214 $nOldWords = count( $oldArray ); 215 $nNewWords = count( $newArray ); 216 217 /* 218 $tmp = $substr['lengthMatrix']; 219 print( "<pre>" ); 220 $this->dumpMatrix( $tmp, $nOldWords, $nNewWords ); 221 print( "</pre>" ); 222 */ 223 224 $strings = $this->substrings( $substr, $oldArray, $newArray ); 225 226 //Merge detected paragraphs 227 $mergedStrings = array(); 228 foreach ( $strings as $sstring ) 229 { 230 $mergedStrings = $mergedStrings + $sstring; 231 } 232 unset( $strings ); 233 234 //Check for changes in lead & inner paragraphs 235 $offset = 0; 236 $delOffset = 0; 237 $internalOffset = 0; 238 $merged = array(); 239 240 foreach ( $mergedStrings as $key => $wordArray ) 241 { 242 $oldOffset = $wordArray['oldOffset']; 243 244 //Check for inserted paragraphs 245 $nk = $internalOffset; 246 while ( $key > $offset ) 247 { 248 $merged[$nk] = array( $offset => 'added' ); 249 $nk++; 250 $offset++; 251 } 252 253 //Check for removed paragraphs 254 $k = $internalOffset; 255 while ( $oldOffset > $delOffset ) 256 { 257 if ( $k < $nk ) 258 { 259 // Paragraph is changed 260 if ( array_key_exists( $delOffset, $merged[$k] ) ) 261 { 262 // Old & new paragraph places is the same 263 $merged[$k][$delOffset] = 'changed'; 264 } 265 else 266 { 267 $merged[$k][$delOffset] = 'removed'; 268 } 269 } 270 else 271 { 272 $merged[$k] = array( $delOffset => 'removed' ); 273 } 274 $k++; 275 $delOffset++; 276 } 277 278 $internalOffset = ($k > $nk) ? $k:$nk; 279 280 //The default state - unchanged paragraph 281 $merged[$internalOffset] = array( $key => 'unchanged' ); 282 $internalOffset++; 283 $delOffset++; 284 $offset++; 285 } 286 287 //Check for appended paragraphs 288 $nk = $internalOffset; 289 while ( $nNewWords > $offset ) 290 { 291 $merged[$nk] = array( $offset => 'added' ); 292 $nk++; 293 $offset++; 294 } 295 296 //Check for end-deleted paragraphs 297 $k = $internalOffset; 298 while ( $nOldWords > $delOffset ) 299 { 300 if ( $k < $nk ) 301 { 302 if ( array_key_exists( $delOffset, $merged[$k] ) ) 303 { 304 $merged[$k][$delOffset] = 'changed'; 305 } 306 else 307 { 308 $merged[$k][$delOffset] = 'removed'; 309 } 310 } 311 else 312 { 313 $merged[$k] = array( $delOffset => 'removed' ); 314 } 315 $k++; 316 $delOffset++; 317 } 318 319 return $merged; 320 } 321 322 323 /*! 324 \private 325 This method will contruct a diff output for plain text. 326 */ 327 function buildDiff( $oldArray, $newArray ) 328 { 329 $substr = $this->substringMatrix( $oldArray, $newArray ); 330 331 $nOldWords = count( $oldArray ); 332 $nNewWords = count( $newArray ); 333 334 /* 335 $tmp = $substr['lengthMatrix']; 336 print( "<pre>" ); 337 $this->dumpMatrix( $tmp, $nOldWords, $nNewWords ); 338 print( "</pre>" ); 339 */ 340 341 $strings = $this->substrings( $substr, $oldArray, $newArray ); 342 $len = count( $strings ); 343 344 //Merge detected substrings 345 $mergedStrings = array(); 346 foreach ( $strings as $sstring ) 347 { 348 $mergedStrings = $mergedStrings + $sstring; 349 } 350 unset( $strings ); 351 352 //Check for changes in lead & inner words 353 $differences = array(); 354 $offset = 0; 355 $delOffset = 0; 356 357 foreach ( $mergedStrings as $key => $wordArray ) 358 { 359 $oldOffset = $wordArray['oldOffset']; 360 361 // Added words 362 while ( $key > $offset ) 363 { 364 $differences[] = array( 'added' => $newArray[$offset], 365 'status' => 2 ); 366 $offset++; 367 } 368 369 // Removed words 370 while ( $oldOffset > $delOffset ) 371 { 372 $differences[] = array( 'removed' => $oldArray[$delOffset], 373 'status' => 1 ); 374 $delOffset++; 375 } 376 377 //The default state - unchanged paragraph 378 $differences[] = array( 'unchanged' => $newArray[$key], 379 'status' => 0 ); 380 $delOffset++; 381 $offset++; 382 } 383 384 // Appended words 385 while ( $nNewWords > $offset ) 386 { 387 $differences[] = array( 'added' => $newArray[$offset], 388 'status' => 2 ); 389 $offset++; 390 } 391 392 // Words, removed at the paragraph end 393 while ( $nOldWords > $delOffset ) 394 { 395 $differences[] = array( 'removed' => $oldArray[$delOffset], 396 'status' => 1 ); 397 $delOffset++; 398 } 399 400 $output = $this->postProcessDiff( $differences ); 401 return $output; 402 403 } 404 405 /*! 406 \private 407 This method will chain together similar changes, in order to create a more connected output. 408 */ 409 function postProcessDiff( $diffArray ) 410 { 411 $string = ""; 412 $diff = array(); 413 $item = current( $diffArray ); 414 415 $lastStatus = $item['status']; 416 417 $mode = array( 0 => 'unchanged', 418 1 => 'removed', 419 2 => 'added' ); 420 421 while ( $item = current( $diffArray ) ) 422 { 423 if ( $item['status'] != $lastStatus ) 424 { 425 $diff[] = array( $mode[$lastStatus] => $string, 426 'status' => $lastStatus ); 427 $lastStatus = $item['status']; 428 $string =""; 429 continue; 430 } 431 432 $string .= $item[$mode[$lastStatus]] . " "; 433 next( $diffArray ); 434 } 435 if ( strlen( $string ) > 0 ) 436 { 437 $diff[] = array( $mode[$lastStatus] => $string, 438 'status' => $lastStatus ); 439 } 440 return $diff; 441 } 442 443 444 /*! 445 \private 446 This method will detect discontinuous substrings in the matrix. 447 \return Array of substrings 448 */ 449 function substrings( $sub, $old, $new ) 450 { 451 $matrix = $sub['lengthMatrix']; 452 $val = $sub['maxLength']; 453 $row = $sub['maxRow']; 454 $col = $sub['maxCol']; 455 456 if ( $val == 0 ) 457 { 458 //No common substrings were found 459 return array(); 460 } 461 462 $rows = count( $old ); 463 $cols = count( $new ); 464 465 $lcsOffsets = $this->findLongestSubstringOffsets( $sub ); 466 $lcsPlacement = $this->substringPlacement( $lcsOffsets['newStart'], $lcsOffsets['newEnd'], $cols ); 467 468 $substringSet = array(); 469 470 $substringArray = $this->traceSubstring( $row, $col, $matrix, $val, $new ); 471 $substring = $substringArray['substring']; 472 unset( $substringArray ); 473 $substringSet[] = array_reverse( $substring, true ); 474 $substring = array(); 475 476 //Get more text 477 if ( $lcsPlacement['hasTextLeft'] ) 478 { 479 $row = $lcsOffsets['oldStart']; 480 $col = $lcsOffsets['newStart']; 481 482 if ( $row != 0 ) 483 { 484 $row--; 485 } 486 487 if ( $col != 0 ) 488 { 489 $col--; 490 } 491 492 $done = false; 493 $info = true; 494 $prevRowUsed = -1; 495 496 while ( !$done && $info ) 497 { 498 if ( $prevRowUsed == $row ) 499 { 500 $done = true; 501 continue; 502 } 503 504 $info = $this->localSubstring( 'left', $row, $col, $rows, $cols, $matrix ); 505 $prevRowUsed = $row; 506 if ( $info ) 507 { 508 $substringArray = $this->traceSubstring( $info['remainRow'], $info['remainCol'], $matrix, $info['remainMax'], $new ); 509 $substring = $substringArray['substring']; 510 array_unshift( $substringSet, array_reverse( $substring, true ) ); 511 $substring = array(); 512 513 if ( $info['remainCol'] == 0 || $substringArray['lastCol'] == 0 ) 514 { 515 $done = true; 516 } 517 else 518 { 519 $row = $substringArray['lastRow']; 520 $col = $substringArray['lastCol']; 521 522 if ( $row != 0 ) 523 { 524 $row--; 525 } 526 if ( $col != 0 ) 527 { 528 $col--; 529 } 530 } 531 unset( $substringArray ); 532 } 533 } 534 } 535 536 537 if ( $lcsPlacement['hasTextRight'] ) 538 { 539 //reset search location in matrix 540 $row = $sub['maxRow']; 541 $col = $sub['maxCol']; 542 543 if ( $row != $rows-1 ) 544 { 545 $row++; 546 } 547 548 if ( $col != $cols-1 ) 549 { 550 $col++; 551 } 552 553 $done = false; 554 $info = true; 555 $prevRowUsed = -1; 556 557 while( !$done && $info ) 558 { 559 if ( $prevRowUsed == $row ) 560 { 561 $done = true; 562 continue; 563 } 564 $info = $this->localSubstring( 'right', $row, $col, $rows, $cols, $matrix ); 565 $prevRowUsed = $row; 566 if ( $info ) 567 { 568 $substringArray = $this->traceSubstring( $info['remainRow'], $info['remainCol'], $matrix, $info['remainMax'], $new ); 569 $substring = $substringArray['substring']; 570 $substringSet[] = array_reverse( $substring, true ); 571 $substring = array(); 572 573 if ( $info['remainCol'] == $cols-1 || $substringArray['lastRow'] == $cols-1 ) 574 { 575 $done = true; 576 } 577 else 578 { 579 $row = $info['remainRow']; 580 $col = $info['remainCol']; 581 582 if ( $row != $rows-1 ) 583 { 584 $row++; 585 } 586 587 if ( $col != $cols-1 ) 588 { 589 $col++; 590 } 591 } 592 unset( $substringArray ); 593 } 594 } 595 } 596 return $substringSet; 597 } 598 599 /*! 600 \private 601 This method checks a patch of the length matrix for the longest substring. 602 Depending on the \a $direction a sub matrix is searched for valid substrings. 603 */ 604 function localSubstring( $direction, $row, $col, $rows, $cols, $matrix ) 605 { 606 $colMax = 0; 607 $prevColMax = 0; 608 $colMaxRow = 0; 609 $colMaxCol = 0; 610 611 $remainMax = 0; 612 $remainRow = 0; 613 $remainCol = 0; 614 615 switch( $direction ) 616 { 617 case 'right': 618 { 619 for ( $j = $col; $j < $cols; $j++ ) 620 { 621 $startRow = $row; 622 $colMax = 0; 623 624 while ( $startRow < $rows ) 625 { 626 $matVal = $matrix->get( $startRow, $j ); 627 if ( $matVal > $colMax ) 628 { 629 $colMax = $matVal; 630 $colMaxRow = $startRow; 631 $colMaxCol = $j; 632 } 633 $startRow++; 634 } 635 636 if ( $colMax > $remainMax ) 637 { 638 //Set best candidate thus far 639 $remainMax = $colMax; 640 $remainRow = $colMaxRow; 641 $remainCol = $colMaxCol; 642 } 643 644 if ( ( $prevColMax > 1 ) && ( $colMax < $prevColMax ) ) 645 { 646 break 2; 647 } 648 649 $prevColMax = $colMax; 650 } 651 }break; 652 653 case 'left': 654 { 655 for ( $j = $col; $j >= 0; $j-- ) 656 { 657 $startRow = $row; 658 $colMax = 0; 659 660 while ( $startRow >= 0 ) 661 { 662 $matVal = $matrix->get( $startRow, $j ); 663 if ( $matVal > $colMax ) 664 { 665 $colMax = $matVal; 666 $colMaxRow = $startRow; 667 $colMaxCol = $j; 668 } 669 $startRow--; 670 } 671 if ( $colMax > $remainMax ) 672 { 673 //Set best candidate thus far 674 $remainMax = $colMax; 675 $remainRow = $colMaxRow; 676 $remainCol = $colMaxCol; 677 } 678 if ( ( $prevColMax > 1 ) && ( $colMax < $prevColMax ) ) 679 { 680 break 2; 681 } 682 683 $prevColMax = $colMax; 684 } 685 }break; 686 } 687 688 if ( $remainMax > 0 ) 689 { 690 return array( 'remainMax' => $remainMax, 691 'remainRow' => $remainRow, 692 'remainCol' => $remainCol ); 693 } 694 else 695 { 696 return false; 697 } 698 } 699 700 /*! 701 \private 702 This method will backtrace found substrings, it will start from the endpoint of the 703 string and work towards its start. 704 \return Substring with endpoing at \a $row, \a $col 705 */ 706 function traceSubstring( $row, $col, $matrix, $val, $new ) 707 { 708 $substring = array(); 709 while( $row >= 0 && $col >= 0 ) 710 { 711 if ( $matrix->get( $row, $col ) == $val ) 712 { 713 $substring[$col] = array( 'word' => $new[$col], 714 'oldOffset' => $row ); 715 716 if ( $val > 1 ) 717 { 718 $val--; 719 } 720 else if ( $val == 1 ) 721 { 722 break; 723 } 724 725 $row--; 726 $col--; 727 if ( $row < 0 || $col < 0 ) 728 break; 729 } 730 } 731 return array( 'substring' => $substring, 732 'lastRow' => $row, 733 'lastCol' => $col ); 734 } 735 736 /*! 737 \private 738 This method will return an array with boolean values indicating whether 739 the specified offsets \a $startOffset and \a $endOffset have additional 740 text to either the left or right side. 741 */ 742 function substringPlacement( $startOffset, $endOffset, $totalStringLength ) 743 { 744 $leftText = false; 745 $rightText = false; 746 747 if ( $startOffset > 0 ) 748 $leftText = true; 749 750 if ( $endOffset < $totalStringLength-1 ) 751 $rightText = true; 752 753 return array( 'hasTextLeft' => $leftText, 754 'hasTextRight' => $rightText ); 755 } 756 757 /*! 758 \private 759 Helper method to matrices. 760 */ 761 function dumpMatrix( $arr, $rows, $cols ) 762 { 763 for ( $i = 0; $i < $rows; $i++ ) 764 { 765 for ( $j = 0; $j < $cols; $j++ ) 766 { 767 print( $arr->get( $i, $j ) . " " ); 768 if ( $j == $cols-1 ) 769 print( "\n" ); 770 } 771 } 772 } 773 774 /*! 775 \private 776 This method calculates the offsets in the old and new text for the 777 longest common substring. 778 */ 779 function findLongestSubstringOffsets( $varArray ) 780 { 781 //The columns yields the offsets in the new text. 782 //The rows gives us the offsets in the old text. 783 $lengthMatrix = $varArray['lengthMatrix']; 784 $max = $varArray['maxLength']; 785 $len = $max; 786 $maxRow = $varArray['maxRow']; 787 $maxCol = $varArray['maxCol']; 788 $newStart = 0; 789 $newEnd = 0; 790 $oldStart = 0; 791 $oldEnd = 0; 792 793 while ( $len > 0 && $maxRow >= 0 && $maxCol >= 0) 794 { 795 $len = $lengthMatrix->get( $maxRow, $maxCol ); 796 797 if ( $lengthMatrix->get( $maxRow, $maxCol ) == $max ) 798 { 799 $newEnd = $maxCol; 800 $oldEnd = $maxRow; 801 } 802 803 if ( $lengthMatrix->get( $maxRow, $maxCol ) == 1 ) 804 { 805 $newStart = $maxCol; 806 $oldStart = $maxRow; 807 } 808 809 $maxRow--; 810 $maxCol--; 811 } 812 return array( 'newStart' => $newStart, 813 'newEnd' => $newEnd, 814 'oldStart' => $oldStart, 815 'oldEnd' => $oldEnd ); 816 } 817 818 /*! 819 \private 820 This function find the longest common substrings in \a $old and \a $new 821 It will build a matrix consisting of the length of detected substrings. 822 823 The function will build a structure like the following: 824 array = ( 'maxLength' => 825 'maxRow' => 826 'maxCol' => 827 'lengthMatrix' => ) 828 829 \return Matrix containing the length of longest common substring string and where it is found in the substring length matrix. 830 */ 831 function substringMatrix( $old, $new ) 832 { 833 $maxLength = 0; 834 $sizeOld = count( $old ); 835 $sizeNew = count( $new ); 836 $matrix = new eZDiffMatrix( $sizeOld, $sizeNew ); 837 838 $maxC = 0; 839 $maxR = 0; 840 841 for ( $row = 0; $row < $sizeOld; $row++ ) 842 { 843 for ( $col = 0; $col < $sizeNew; $col++ ) 844 { 845 if ( $old[$row] === $new[$col] ) 846 { 847 if ( $row > 0 && $col > 0 ) 848 { 849 $val = 1 + $matrix->get( $row-1, $col-1 ); 850 $matrix->set( $row, $col, $val ); 851 } 852 else if ( $row > 0 && $col == 0 ) 853 { 854 $val = 1 + $matrix->get( $row-1, $col ); 855 $matrix->set( $row, $col, $val ); 856 } 857 else if ( $row == 0 && $col > 0 ) 858 { 859 $val = 1 + $matrix->get( $row, $col-1 ); 860 $matrix->set( $row, $col, $val ); 861 } 862 else if ( $row == 0 && $col == 0 ) 863 { 864 $matrix->set( $row, $col, 1 ); 865 } 866 867 if ( $matrix->get( $row, $col ) > $maxLength ) 868 { 869 $maxLength = $matrix->get( $row, $col ); 870 $maxR = $row; 871 $maxC = $col; 872 } 873 } 874 else 875 { 876 $matrix->set( $row, $col, 0 ); 877 } 878 } 879 } 880 return array( 'maxLength' => $maxLength, 881 'maxRow' => $maxR, 882 'maxCol' => $maxC, 883 'lengthMatrix' => $matrix ); 884 } 885 886 /*! 887 \private 888 Removes empty elements from array 889 \return array without empty elements 890 */ 891 function trimEmptyArrayElements( $array ) 892 { 893 foreach( $array as $key => $value ) 894 { 895 if ( empty( $value ) ) 896 { 897 unset( $array[$key] ); 898 } 899 } 900 //Calls array_merge to reset the array keys. 901 return array_merge( $array ); 902 } 903 } 904 905 ?>
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Sat Feb 24 10:30:04 2007 | par Balluche grâce à PHPXref 0.7 |