[ Index ] |
|
Code source de Horde 3.1.3 |
1 <?php 2 3 require_once 'PEAR.php'; 4 require_once 'File.php'; 5 6 /** 7 * The File_CSV package allows reading and creating of CSV data and files. 8 * 9 * $Horde: framework/File_CSV/CSV.php,v 1.4.2.7 2006/03/30 10:15:29 selsky Exp $ 10 * 11 * Copyright 2002-2003 Tomas Von Veschler Cox <cox@idecnet.com> 12 * Copyright 2005-2006 Jan Schneider <jan@horde.org> 13 * 14 * This source file is subject to version 2.0 of the PHP license, that is 15 * bundled with this package in the file LICENSE, and is available at through 16 * the world-wide-web at http://www.php.net/license/2_02.txt. If you did not 17 * receive a copy of the PHP license and are unable to obtain it through the 18 * world-wide-web, please send a note to license@php.net so we can mail you a 19 * copy immediately. 20 * 21 * @author Tomas Von Veschler Cox <cox@idecnet.com> 22 * @author Jan Schneider <jan@horde.org> 23 * @since Horde 3.1 24 * @package File_CSV 25 */ 26 class File_CSV { 27 28 /** 29 * Discovers the format of a CSV file (the number of fields, the separator, 30 * the quote string, and the line break). 31 * 32 * We can't use the auto_detect_line_endings PHP setting, because it's not 33 * supported by fgets() contrary to what the manual says. 34 * 35 * @static 36 * 37 * @param string The CSV file name 38 * @param array Extra separators that should be checked for. 39 * 40 * @return array The format hash. 41 */ 42 function discoverFormat($file, $extraSeps = array()) 43 { 44 if (!$fp = @fopen($file, 'r')) { 45 return PEAR::raiseError('Could not open file: ' . $file); 46 } 47 48 $seps = array("\t", ';', ':', ','); 49 $seps = array_merge($seps, $extraSeps); 50 $matches = array(); 51 $crlf = null; 52 $conf = array(); 53 54 /* Take the first 10 lines and store the number of ocurrences for each 55 * separator in each line. */ 56 for ($i = 0; ($i < 10) && ($line = fgets($fp));) { 57 /* Do we have Mac line endings? */ 58 $lines = preg_split('/\r(?!\n)/', $line, 10); 59 $j = 0; 60 $c = count($lines); 61 if ($c > 1) { 62 $crlf = "\r"; 63 } 64 while ($i < 10 && $j < $c) { 65 $line = $lines[$j]; 66 if (!isset($crlf)) { 67 foreach (array("\r\n", "\n") as $c) { 68 if (substr($line, -strlen($c)) == $c) { 69 $crlf = $c; 70 break; 71 } 72 } 73 } 74 $i++; 75 $j++; 76 foreach ($seps as $sep) { 77 $matches[$sep][$i] = substr_count($line, $sep); 78 } 79 } 80 } 81 if (isset($crlf)) { 82 $conf['crlf'] = $crlf; 83 } 84 85 /* Group the results by amount of equal ocurrences. */ 86 $fields = array(); 87 $amount = array(); 88 foreach ($matches as $sep => $lines) { 89 $times = array(); 90 $times[0] = 0; 91 foreach ($lines as $num) { 92 if ($num > 0) { 93 $times[$num] = (isset($times[$num])) ? $times[$num] + 1 : 1; 94 } 95 } 96 arsort($times); 97 $fields[$sep] = key($times); 98 $amount[$sep] = $times[key($times)]; 99 } 100 arsort($amount); 101 $sep = key($amount); 102 103 $conf['fields'] = $fields[$sep] + 1; 104 $conf['sep'] = $sep; 105 106 /* Test if there are fields with quotes arround in the first 5 107 * lines. */ 108 $quotes = '"\''; 109 $quote = ''; 110 rewind($fp); 111 for ($i = 0; ($i < 5) && ($line = fgets($fp)); $i++) { 112 if (preg_match("|$sep([$quotes]).*([$quotes])$sep|U", $line, $match)) { 113 if ($match[1] == $match[2]) { 114 $quote = $match[1]; 115 break; 116 } 117 } 118 if (preg_match("|^([$quotes]).*([$quotes])$sep|", $line, $match) || 119 preg_match("|([$quotes]).*([$quotes])$sep\s$|Us", $line, $match)) 120 { 121 if ($match[1] == $match[2]) { 122 $quote = $match[1]; 123 break; 124 } 125 } 126 } 127 $conf['quote'] = $quote; 128 129 fclose($fp); 130 131 // XXX What about trying to discover the "header"? 132 return $conf; 133 } 134 135 /** 136 * Reads a row from a CSV file and returns it as an array. 137 * 138 * @param string $file The name of the CSV file. 139 * @param array $conf The configuration for the CSV file. 140 * 141 * @return array|boolean The CSV data or false if no more data available. 142 */ 143 function read($file, &$conf) 144 { 145 $fp = File_CSV::getPointer($file, $conf, FILE_MODE_READ); 146 if (is_a($fp, 'PEAR_Error')) { 147 return $fp; 148 } 149 150 $line = fgets($fp); 151 if (!$line) { 152 return false; 153 } 154 155 /* Use readQuoted() if we have Mac line endings. */ 156 if (preg_match('/\r(?!\n)/', $line)) { 157 fseek($fp, -strlen($line), SEEK_CUR); 158 return File_CSV::readQuoted($file, $conf); 159 } 160 161 File_CSV::_line(File_CSV::_line() + 1); 162 163 if ($conf['fields'] == 1) { 164 return array($line); 165 } 166 167 $fields = explode($conf['sep'], $line); 168 if ($conf['quote']) { 169 $last = &$fields[count($fields) - 1]; 170 /* Fallback to read the line with readQuoted() when guess that the 171 * simple explode won't work right. */ 172 if ((substr($last, -strlen($conf['crlf'])) == $conf['crlf'] && 173 $last{0} == $conf['quote'] && 174 $last{strlen(rtrim($last)) - 1} != $conf['quote']) || 175 (count($fields) != $conf['fields']) 176 // XXX perhaps there is a separator inside a quoted field 177 //preg_match("|{$conf['quote']}.*{$conf['sep']}.*{$conf['quote']}|U", $line) 178 ) 179 { 180 fseek($fp, -strlen($line), SEEK_CUR); 181 return File_CSV::readQuoted($file, $conf); 182 } else { 183 $last = rtrim($last); 184 foreach ($fields as $k => $v) { 185 $fields[$k] = File_CSV::unquote($v, $conf['quote'], $conf['crlf']); 186 } 187 } 188 } else { 189 $fields[count($fields) -1] = rtrim($fields[count($fields) -1]); 190 } 191 192 if (count($fields) < $conf['fields']) { 193 File_CSV::warning(sprintf(_("Wrong number of fields in line %d. Expected %d, found %d."), File_CSV::_line(), $conf['fields'], count($fields))); 194 $fields = array_merge($fields, array_fill(0, $conf['fields'] - count($fields), '')); 195 } elseif (count($fields) > $conf['fields']) { 196 File_CSV::warning(sprintf(_("More fields found in line %d than the expected %d."), File_CSV::_line(), $conf['fields'])); 197 array_splice($fields, $conf['fields']); 198 } 199 200 return $fields; 201 } 202 203 /** 204 * Reads a row from a CSV file and returns it as an array. 205 * This method is able to read fields with multiline data and normalizes 206 * linebreaks to single newline characters (0x0a). 207 * 208 * @param string $file The name of the CSV file. 209 * @param array $conf The configuration for the CSV file. 210 * 211 * @return array|boolean The CSV data or false if no more data available. 212 */ 213 function readQuoted($file, &$conf) 214 { 215 $fp = File_CSV::getPointer($file, $conf, FILE_MODE_READ); 216 if (is_a($fp, 'PEAR_Error')) { 217 return $fp; 218 } 219 220 /* A buffer with all characters of the current field read so far. */ 221 $buff = ''; 222 /* The current character. */ 223 $c = null; 224 /* The read fields. */ 225 $ret = array(); 226 /* The number of the current field. */ 227 $i = 1; 228 /* Are we inside a quoted field? */ 229 $in_quote = false; 230 231 while (($ch = fgetc($fp)) !== false) { 232 /* Previous character. */ 233 $prev = $c; 234 /* Current character. */ 235 $c = $ch; 236 237 /* Simple character. */ 238 if ($c != $conf['quote'] && 239 $c != $conf['sep'] && 240 strpos($conf['crlf'], $c) === false) { 241 $buff .= $c; 242 continue; 243 } 244 245 if ($conf['quote'] && $c == $conf['quote'] && 246 ($prev == $conf['sep'] || 247 $prev == substr($conf['crlf'], -1) || 248 $prev === null)) 249 { 250 /* Quote begin. */ 251 $in_quote = true; 252 } elseif ($in_quote) { 253 /* Quote end? */ 254 if ($c == $conf['sep'] && $prev == $conf['quote']) { 255 $in_quote = false; 256 } elseif ($c == substr($conf['crlf'], -1)) { 257 $sub = (strlen($conf['crlf']) > 1 && 258 $prev == substr($conf['crlf'], -2, 1)) 259 ? 2 260 : 1; 261 if ((strlen($buff) >= $sub) && 262 ($buff{strlen($buff) - $sub} == $conf['quote'])) 263 { 264 $in_quote = false; 265 } else { 266 File_CSV::_line(File_CSV::_line() + 1); 267 } 268 } 269 } 270 271 /* End of line or end of field. */ 272 if (!$in_quote && 273 ($c == $conf['sep'] || $c == substr($conf['crlf'], -1))) { 274 /* More fields than expected. */ 275 if ($c == $conf['sep'] && 276 (count($ret) + 1) == $conf['fields']) { 277 while ($c !== false && $c != substr($conf['crlf'], -1)) { 278 $c = fgetc($fp); 279 } 280 File_CSV::warning(sprintf(_("More fields found in line %d than the expected %d."), File_CSV::_line(), $conf['fields'])); 281 } 282 283 if ($prev == substr($conf['crlf'], -2, 1)) { 284 $buff = substr($buff, 0, -1); 285 } 286 287 /* Less fields than expected. */ 288 if (($c == substr($conf['crlf'], -1)) && 289 ($i != $conf['fields'])) { 290 if ($i == 1) { 291 /* Skip empty lines. */ 292 return $ret; 293 } 294 File_CSV::warning(sprintf(_("Wrong number of fields in line %d. Expected %d, found %d."), File_CSV::_line(), $conf['fields'], $i)); 295 296 $ret[] = File_CSV::unquote($buff, $conf['quote'], $conf['crlf']); 297 $ret = array_merge($ret, array_fill(0, $conf['fields'] - $i, '')); 298 return $ret; 299 } 300 301 $ret[] = File_CSV::unquote($buff, $conf['quote'], $conf['crlf']); 302 if (count($ret) == $conf['fields']) { 303 return $ret; 304 } 305 306 $buff = ''; 307 $i++; 308 continue; 309 } 310 $buff .= $c; 311 } 312 313 return feof($fp) ? false : $ret; 314 } 315 316 /** 317 * Writes a hash into a CSV file. 318 * 319 * @param string $file The name of the CSV file. 320 * @param array $fields The CSV data. 321 * @param array $conf The configuration for the CSV file. 322 * 323 * @return boolean True on success, PEAR_Error on failure. 324 */ 325 function write($file, $fields, &$conf) 326 { 327 if (is_a($fp = File_CSV::getPointer($file, $conf, FILE_MODE_WRITE), 'PEAR_Error')) { 328 return $fp; 329 } 330 331 if (count($fields) != $conf['fields']) { 332 return PEAR::raiseError(sprintf(_("Wrong number of fields. Expected %d, found %d."), $conf['fields'], count($fields))); 333 } 334 335 $write = ''; 336 for ($i = 0; $i < count($fields); $i++) { 337 if (!is_numeric($fields[$i]) && $conf['quote']) { 338 $write .= $conf['quote'] . $fields[$i] . $conf['quote']; 339 } else { 340 $write .= $fields[$i]; 341 } 342 if ($i < (count($fields) - 1)) { 343 $write .= $conf['sep']; 344 } else { 345 $write .= $conf['crlf']; 346 } 347 } 348 349 if (!fwrite($fp, $write)) { 350 return PEAR::raiseError(sprintf(_("Cannot write to file \"%s\""), $file)); 351 } 352 353 return true; 354 } 355 356 /** 357 * Removes surrounding quotes from a string and normalizes linebreaks. 358 * 359 * @param string $field The string to unquote. 360 * @param string $quote The quote character. 361 * @param string $crlf The linebreak character. 362 * 363 * @return string The unquoted data. 364 */ 365 function unquote($field, $quote, $crlf) 366 { 367 /* Skip empty fields (form: ;;) */ 368 if (!strlen($field)) { 369 return $field; 370 } 371 if ($quote && $field{0} == $quote && $field{strlen($field)-1} == $quote) { 372 return str_replace($crlf, "\n", substr($field, 1, -1)); 373 } 374 return $field; 375 } 376 377 /** 378 * Sets or gets the current line being parsed. 379 * 380 * @param integer $line If specified, the current line. 381 * 382 * @return integer The current line. 383 */ 384 function _line($line = null) 385 { 386 static $current_line = 0; 387 388 if (!is_null($line)) { 389 $current_line = $line; 390 } 391 392 return $current_line; 393 } 394 395 /** 396 * Adds a warning to or retrieves and resets the warning stack. 397 * 398 * @param string A warning string. If not specified, the existing 399 * warnings will be returned instead and the warning stack 400 * gets emptied. 401 * 402 * @return array If no parameter has been specified, the list of existing 403 * warnings. 404 */ 405 function warning($warning = null) 406 { 407 static $warnings = array(); 408 409 if (is_null($warning)) { 410 $return = $warnings; 411 $warnings = array(); 412 return $return; 413 } 414 415 $warnings[] = $warning; 416 } 417 418 /** 419 * Returns or creates the file descriptor associated with a file. 420 * 421 * @static 422 * 423 * @param string $file The name of the file 424 * @param array $conf The configuration 425 * @param string $mode The open mode. FILE_MODE_READ or FILE_MODE_WRITE. 426 * 427 * @return resource The file resource or PEAR_Error on error. 428 */ 429 function getPointer($file, &$conf, $mode = FILE_MODE_READ) 430 { 431 static $resources = array(); 432 static $config = array(); 433 434 $signature = $file . serialize($conf); 435 if (isset($resources[$signature])) { 436 $conf = $config[$signature]; 437 return $resources[$signature]; 438 } 439 if (is_a($error = File_CSV::_checkConfig($conf), 'PEAR_Error')) { 440 return $error; 441 } 442 $config[$signature] = $conf; 443 444 $fp = @fopen($file, $mode); 445 if (!is_resource($fp)) { 446 return PEAR::raiseError(sprintf(_("Can't open file \"%s\"."), $file)); 447 } 448 $resources[$signature] = $fp; 449 File_CSV::_line(0); 450 451 if ($mode == FILE_MODE_READ && !empty($conf['header'])) { 452 if (is_a($header = File_CSV::read($file, $conf), 'PEAR_Error')) { 453 return $header; 454 } 455 } 456 457 return $fp; 458 } 459 460 /** 461 * Checks the configuration given by the user. 462 * 463 * @param array $conf The configuration assoc array 464 * @param string $error The error will be written here if any 465 */ 466 function _checkConfig(&$conf) 467 { 468 // check conf 469 if (!is_array($conf)) { 470 return PEAR::raiseError('Invalid configuration.'); 471 } 472 473 if (!isset($conf['fields']) || !is_numeric($conf['fields'])) { 474 return PEAR::raiseError(_("The number of fields must be numeric.")); 475 } 476 477 if (isset($conf['sep'])) { 478 if (strlen($conf['sep']) != 1) { 479 return PEAR::raiseError(_("The separator must be one single character.")); 480 } 481 } elseif ($conf['fields'] > 1) { 482 return PEAR::raiseError(_("No separator specified.")); 483 } 484 485 if (!empty($conf['quote'])) { 486 if (strlen($conf['quote']) != 1) { 487 return PEAR::raiseError(_("The quote character must be one single character.")); 488 } 489 } else { 490 $conf['quote'] = ''; 491 } 492 493 if (!isset($conf['crlf'])) { 494 $conf['crlf'] = "\n"; 495 } 496 } 497 498 }
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Sun Feb 25 18:01:28 2007 | par Balluche grâce à PHPXref 0.7 |