[ Index ] |
|
Code source de SPIP Agora 1.4 |
1 #! /usr/bin/perl -w 2 3 # Spell Checker Plugin for HTMLArea-3.0 4 # Sponsored by www.americanbible.org 5 # Implementation by Mihai Bazon, http://dynarch.com/mishoo/ 6 # 7 # (c) dynarch.com 2003. 8 # Distributed under the same terms as HTMLArea itself. 9 # This notice MUST stay intact for use (see license.txt). 10 # 11 # $Id: spell-check-logic.cgi,v 1.1 2004/11/22 18:04:57 trivoallan Exp $ 12 13 use strict; 14 use utf8; 15 use Encode; 16 use Text::Aspell; 17 use XML::DOM; 18 use CGI; 19 20 my $TIMER_start = undef; 21 eval { 22 use Time::HiRes qw( gettimeofday tv_interval ); 23 $TIMER_start = [gettimeofday()]; 24 }; 25 # use POSIX qw( locale_h ); 26 27 binmode STDIN, ':utf8'; 28 binmode STDOUT, ':utf8'; 29 30 my $debug = 0; 31 32 my $speller = new Text::Aspell; 33 my $cgi = new CGI; 34 35 my $total_words = 0; 36 my $total_mispelled = 0; 37 my $total_suggestions = 0; 38 my $total_words_suggested = 0; 39 40 # FIXME: report a nice error... 41 die "Can't create speller!" unless $speller; 42 43 my $dict = $cgi->param('dictionary') || $cgi->cookie('dictionary') || 'en'; 44 45 # add configurable option for this 46 $speller->set_option('lang', $dict); 47 $speller->set_option('encoding', 'UTF-8'); 48 #setlocale(LC_CTYPE, $dict); 49 50 # ultra, fast, normal, bad-spellers 51 # bad-spellers seems to cause segmentation fault 52 $speller->set_option('sug-mode', 'normal'); 53 54 my %suggested_words = (); 55 keys %suggested_words = 128; 56 57 my $file_content = decode('UTF-8', $cgi->param('content')); 58 $file_content = parse_with_dom($file_content); 59 60 my $ck_dictionary = $cgi->cookie(-name => 'dictionary', 61 -value => $dict, 62 -expires => '+30d'); 63 64 print $cgi->header(-type => 'text/html; charset: utf-8', 65 -cookie => $ck_dictionary); 66 67 my $js_suggested_words = make_js_hash(\%suggested_words); 68 my $js_spellcheck_info = make_js_hash_from_array 69 ([ 70 [ 'Total words' , $total_words ], 71 [ 'Mispelled words' , $total_mispelled . ' in dictionary \"'.$dict.'\"' ], 72 [ 'Total suggestions' , $total_suggestions ], 73 [ 'Total words suggested' , $total_words_suggested ], 74 [ 'Spell-checked in' , defined $TIMER_start ? (tv_interval($TIMER_start) . ' seconds') : 'n/a' ] 75 ]); 76 77 print qq^<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> 78 <html> 79 <head> 80 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 81 <link rel="stylesheet" type="text/css" media="all" href="spell-check-style.css" /> 82 <script type="text/javascript"> 83 var suggested_words = { $js_suggested_words }; 84 var spellcheck_info = { $js_spellcheck_info }; </script> 85 </head> 86 <body onload="window.parent.finishedSpellChecking();">^; 87 88 print $file_content; 89 if ($cgi->param('init') eq '1') { 90 my @dicts = $speller->dictionary_info(); 91 my $dictionaries = ''; 92 foreach my $i (@dicts) { 93 next if $i->{jargon}; 94 my $name = $i->{name}; 95 if ($name eq $dict) { 96 $name = '@'.$name; 97 } 98 $dictionaries .= ',' . $name; 99 } 100 $dictionaries =~ s/^,//; 101 print qq^<div id="HA-spellcheck-dictionaries">$dictionaries</div>^; 102 } 103 104 print '</body></html>'; 105 106 # Perl is beautiful. 107 sub spellcheck { 108 my $node = shift; 109 my $doc = $node->getOwnerDocument; 110 my $check = sub { # called for each word in the text 111 # input is in UTF-8 112 my $word = shift; 113 my $already_suggested = defined $suggested_words{$word}; 114 ++$total_words; 115 if (!$already_suggested && $speller->check($word)) { 116 return undef; 117 } else { 118 # we should have suggestions; give them back to browser in UTF-8 119 ++$total_mispelled; 120 if (!$already_suggested) { 121 # compute suggestions for this word 122 my @suggestions = $speller->suggest($word); 123 my $suggestions = decode($speller->get_option('encoding'), join(',', @suggestions)); 124 $suggested_words{$word} = $suggestions; 125 ++$total_suggestions; 126 $total_words_suggested += scalar @suggestions; 127 } 128 # HA-spellcheck-error 129 my $err = $doc->createElement('span'); 130 $err->setAttribute('class', 'HA-spellcheck-error'); 131 my $tmp = $doc->createTextNode; 132 $tmp->setNodeValue($word); 133 $err->appendChild($tmp); 134 return $err; 135 } 136 }; 137 while ($node->getNodeValue =~ /([\p{IsWord}']+)/) { 138 my $word = $1; 139 my $before = $`; 140 my $after = $'; 141 my $df = &$check($word); 142 if (!$df) { 143 $before .= $word; 144 } 145 { 146 my $parent = $node->getParentNode; 147 my $n1 = $doc->createTextNode; 148 $n1->setNodeValue($before); 149 $parent->insertBefore($n1, $node); 150 $parent->insertBefore($df, $node) if $df; 151 $node->setNodeValue($after); 152 } 153 } 154 }; 155 156 sub check_inner_text { 157 my $node = shift; 158 my $text = ''; 159 for (my $i = $node->getFirstChild; defined $i; $i = $i->getNextSibling) { 160 if ($i->getNodeType == TEXT_NODE) { 161 spellcheck($i); 162 } 163 } 164 }; 165 166 sub parse_with_dom { 167 my ($text) = @_; 168 $text = '<spellchecker>'.$text.'</spellchecker>'; 169 170 my $parser = new XML::DOM::Parser; 171 if ($debug) { 172 open(FOO, '>:utf8', '/tmp/foo'); 173 print FOO $text; 174 close FOO; 175 } 176 my $doc = $parser->parse($text); 177 my $nodes = $doc->getElementsByTagName('*'); 178 my $n = $nodes->getLength; 179 180 for (my $i = 0; $i < $n; ++$i) { 181 my $node = $nodes->item($i); 182 if ($node->getNodeType == ELEMENT_NODE) { 183 check_inner_text($node); 184 } 185 } 186 187 my $ret = $doc->toString; 188 $ret =~ s{<spellchecker>(.*)</spellchecker>}{$1}sg; 189 return $ret; 190 }; 191 192 sub make_js_hash { 193 my ($hash) = @_; 194 my $js_hash = ''; 195 while (my ($key, $val) = each %$hash) { 196 $js_hash .= ',' if $js_hash; 197 $js_hash .= '"'.$key.'":"'.$val.'"'; 198 } 199 return $js_hash; 200 }; 201 202 sub make_js_hash_from_array { 203 my ($array) = @_; 204 my $js_hash = ''; 205 foreach my $i (@$array) { 206 $js_hash .= ',' if $js_hash; 207 $js_hash .= '"'.$i->[0].'":"'.$i->[1].'"'; 208 } 209 return $js_hash; 210 };
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Sat Feb 24 14:40:03 2007 | par Balluche grâce à PHPXref 0.7 |