eZ Publish 3.9.0 : /support/lupdate-ezpublish3/fetchtr

[Sommaire] [Imprimer]
   1  //
   2  // Finds i18n data from php files
   3  //
   4  // This file is based on fetchtr.cpp from lupdate/Qt Linguist,
   5  // which is Copyright (C) 2000 Trolltech AS (www.trolltech.com).
   6  //
   7  // Gunnstein Lye <gl@ez.no>
   8  // Created on: <10-Dec-2002 18:46:17 gl>
   9  //
  10  // Copyright (C) 1999-2006 eZ systems as. All rights reserved.
  11  //
  12  // This program is free software; you can redistribute it and/or
  13  // modify it under the terms of the GNU General Public License
  14  // as published by the Free Software Foundation; either version 2
  15  // of the License, or (at your option) any later version.
  16  //
  17  // This program is distributed in the hope that it will be useful,
  18  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20  // GNU General Public License for more details.
  21  //
  22  // You should have received a copy of the GNU General Public License
  23  // along with this program; if not, write to the Free Software
  24  // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  25  //
  26  // The GNU General Public License is also available online at:
  27  //
  28  // http://www.gnu.org/copyleft/gpl.html
  29  //
  30  
  31  #include <metatranslator.h>
  32  
  33  #include <ctype.h>
  34  #include <errno.h>
  35  
  36  #include <qfile.h>
  37  #include <qfileinfo.h>
  38  #include <qregexp.h>
  39  #include <qstring.h>
  40  #include <qtextstream.h>
  41  
  42  
  43  /*
  44    The first part of this source file is the PHP tokenizer.  We skip
  45    most of PHP; the only tokens that interest us are defined here.
  46    Thus, the code fragment
  47  
  48    function main()
  49    {
  50        printf( "Hello, world!\n" );
  51        return 0;
  52    }
  53  
  54    is broken down into the following tokens:
  55  
  56        Ident Ident LeftParen RightParen
  57        LeftBrace
  58        Ident LeftParen String RightParen Semicolon
  59        return Semicolon
  60        RightBrace.
  61  
  62    Notice that the 0 doesn't produce any token.
  63  */
  64  
  65  enum { Tok_Eof, Tok_class, Tok_namespace, Tok_return, Tok_tr,
  66         Tok_trUtf8, Tok_translate, Tok_Ident, Tok_i18n, Tok_x18n,
  67         Tok_Comment, Tok_String, Tok_SString, Tok_Colon, Tok_Gulbrandsen,
  68         Tok_LeftBrace, Tok_RightBrace, Tok_LeftParen, Tok_RightParen,
  69         Tok_Comma, Tok_Semicolon };
  70  
  71  /*
  72    The tokenizer maintains the following global variables. The names
  73    should be self-explanatory.
  74  */
  75  static QCString yyFileName;
  76  static int yyCh;
  77  static char yyIdent[128];
  78  static size_t yyIdentLen;
  79  static char yyComment[65536];
  80  static size_t yyCommentLen;
  81  static char yyString[16384];
  82  static size_t yyStringLen;
  83  static int yyBraceDepth;
  84  static int yyParenDepth;
  85  static int yyLineNo;
  86  static int yyCurLineNo;
  87  
  88  // the file to read from (if reading from a file)
  89  static FILE *yyInFile;
  90  
  91  // the string to read from and current position in the string (otherwise)
  92  static QString yyInStr;
  93  static int yyInPos;
  94  
  95  static int (*getChar)();
  96  
  97  static int getCharFromFile()
  98  {
  99      int c = getc( yyInFile );
 100      if ( c == '\n' )
 101          yyCurLineNo++;
 102      return c;
 103  }
 104  
 105  static void startTokenizer( const char *fileName, int (*getCharFunc)() )
 106  {
 107      yyInPos = 0;
 108      getChar = getCharFunc;
 109  
 110      yyFileName = fileName;
 111      yyCh = getChar();
 112      yyBraceDepth = 0;
 113      yyParenDepth = 0;
 114      yyCurLineNo = 1;
 115  }
 116  
 117  static int getToken()
 118  {
 119      const char tab[] = "abfnrtv";
 120      const char backTab[] = "\a\b\f\n\r\t\v";
 121      uint n;
 122  
 123      yyIdentLen = 0;
 124      yyCommentLen = 0;
 125      yyStringLen = 0;
 126  
 127      while ( yyCh != EOF ) {
 128          yyLineNo = yyCurLineNo;
 129  
 130          if ( isalpha(yyCh) || yyCh == '_' ) {
 131              do {
 132                  if ( yyIdentLen < sizeof(yyIdent) - 1 )
 133                      yyIdent[yyIdentLen++] = (char) yyCh;
 134                  yyCh = getChar();
 135              } while ( isalnum(yyCh) || yyCh == '_' );
 136              yyIdent[yyIdentLen] = '\0';
 137  
 138              switch ( yyIdent[0] ) {
 139                  case 'e':
 140                      if ( strcmp(yyIdent + 1, "zi18n") == 0 )
 141                          return Tok_i18n;
 142                      else if ( strcmp(yyIdent + 1, "zx18n") == 0 )
 143                          return Tok_x18n;
 144                      break;
 145              }
 146              return Tok_Ident;
 147          } else {
 148              switch ( yyCh ) {
 149                  case '/':
 150                      yyCh = getChar();
 151                      if ( yyCh == '/' ) {
 152                          do {
 153                              yyCh = getChar();
 154                          } while ( yyCh != EOF && yyCh != '\n' );
 155                      } else if ( yyCh == '*' ) {
 156                          bool metAster = FALSE;
 157                          bool metAsterSlash = FALSE;
 158  
 159                          while ( !metAsterSlash ) {
 160                              yyCh = getChar();
 161                              if ( yyCh == EOF ) {
 162                                  qWarning( "%s: Unterminated PHP comment starting at"
 163                                            " line %d", (const char *) yyFileName,
 164                                            yyLineNo );
 165                                  yyComment[yyCommentLen] = '\0';
 166                                  return Tok_Comment;
 167                              }
 168                              if ( yyCommentLen < sizeof(yyComment) - 1 )
 169                                  yyComment[yyCommentLen++] = (char) yyCh;
 170  
 171                              if ( yyCh == '*' )
 172                                  metAster = TRUE;
 173                              else if ( metAster && yyCh == '/' )
 174                                  metAsterSlash = TRUE;
 175                              else
 176                                  metAster = FALSE;
 177                          }
 178                          yyCh = getChar();
 179                          yyCommentLen -= 2;
 180                          yyComment[yyCommentLen] = '\0';
 181                          return Tok_Comment;
 182                      }
 183                      break;
 184                  case '"':
 185                      yyCh = getChar();
 186  
 187                      while ( yyCh != EOF && yyCh != '\n' && yyCh != '"' ) {
 188                          if ( yyCh == '\\' ) {
 189                              yyCh = getChar();
 190  
 191                              if ( yyCh == 'x' ) {
 192                                  QCString hex = "0";
 193  
 194                                  yyCh = getChar();
 195                                  while ( isxdigit(yyCh) ) {
 196                                      hex += (char) yyCh;
 197                                      yyCh = getChar();
 198                                  }
 199                                  sscanf( hex, "%x", &n );
 200                                  if ( yyStringLen < sizeof(yyString) - 1 )
 201                                      yyString[yyStringLen++] = (char) n;
 202                              } else if ( yyCh >= '0' && yyCh < '8' ) {
 203                                  QCString oct = "";
 204  
 205                                  do {
 206                                      oct += (char) yyCh;
 207                                      yyCh = getChar();
 208                                  } while ( yyCh >= '0' && yyCh < '8' );
 209                                  sscanf( oct, "%o", &n );
 210                                  if ( yyStringLen < sizeof(yyString) - 1 )
 211                                      yyString[yyStringLen++] = (char) n;
 212                              } else {
 213                                  const char *p = strchr( tab, yyCh );
 214                                  if ( yyStringLen < sizeof(yyString) - 1 )
 215                                      yyString[yyStringLen++] = ( p == 0 ) ?
 216                                          (char) yyCh : backTab[p - tab];
 217                                  yyCh = getChar();
 218                              }
 219                          } else {
 220                              if ( yyStringLen < sizeof(yyString) - 1 )
 221                                  yyString[yyStringLen++] = (char) yyCh;
 222                              yyCh = getChar();
 223                          }
 224                      }
 225                      yyString[yyStringLen] = '\0';
 226  
 227  //                     if ( yyCh != '"' )
 228  //                         qWarning( "%s:%d: Unterminated PHP string",
 229  //                                   (const char *) yyFileName, yyLineNo );
 230  
 231                      if ( yyCh == EOF ) {
 232                          return Tok_Eof;
 233                      } else {
 234                          yyCh = getChar();
 235                          return Tok_String;
 236                      }
 237                      break;
 238                  case '\'':
 239                      yyCh = getChar();
 240  
 241                      while ( yyCh != EOF && yyCh != '\n' && yyCh != '\'' ) {
 242                          if ( yyCh == '\\' ) {
 243                              yyCh = getChar();
 244  
 245                              if ( yyCh == 'x' ) {
 246                                  QCString hex = "0";
 247  
 248                                  yyCh = getChar();
 249                                  while ( isxdigit(yyCh) ) {
 250                                      hex += (char) yyCh;
 251                                      yyCh = getChar();
 252                                  }
 253                                  sscanf( hex, "%x", &n );
 254                                  if ( yyStringLen < sizeof(yyString) - 1 )
 255                                      yyString[yyStringLen++] = (char) n;
 256                              } else if ( yyCh >= '0' && yyCh < '8' ) {
 257                                  QCString oct = "";
 258  
 259                                  do {
 260                                      oct += (char) yyCh;
 261                                      yyCh = getChar();
 262                                  } while ( yyCh >= '0' && yyCh < '8' );
 263                                  sscanf( oct, "%o", &n );
 264                                  if ( yyStringLen < sizeof(yyString) - 1 )
 265                                      yyString[yyStringLen++] = (char) n;
 266                              } else {
 267                                  const char *p = strchr( tab, yyCh );
 268                                  if ( yyStringLen < sizeof(yyString) - 1 )
 269                                      yyString[yyStringLen++] = ( p == 0 ) ?
 270                                          (char) yyCh : backTab[p - tab];
 271                                  yyCh = getChar();
 272                              }
 273                          } else {
 274                              if ( yyStringLen < sizeof(yyString) - 1 )
 275                                  yyString[yyStringLen++] = (char) yyCh;
 276                              yyCh = getChar();
 277                          }
 278                      }
 279                      yyString[yyStringLen] = '\0';
 280  
 281  //                     if ( yyCh != '\'' )
 282  //                         qWarning( "%s:%d: Unterminated PHP string",
 283  //                                   (const char *) yyFileName, yyLineNo );
 284  
 285                      if ( yyCh == EOF ) {
 286                          return Tok_Eof;
 287                      } else {
 288                          yyCh = getChar();
 289                          return Tok_SString;
 290                      }
 291                      break;
 292                  case ':':
 293                      yyCh = getChar();
 294                      if ( yyCh == ':' ) {
 295                          yyCh = getChar();
 296                          return Tok_Gulbrandsen;
 297                      }
 298                      return Tok_Colon;
 299                  case '{':
 300                      yyBraceDepth++;
 301                      yyCh = getChar();
 302                      return Tok_LeftBrace;
 303                  case '}':
 304                      yyBraceDepth--;
 305                      yyCh = getChar();
 306                      return Tok_RightBrace;
 307                  case '(':
 308                      yyParenDepth++;
 309                      yyCh = getChar();
 310                      return Tok_LeftParen;
 311                  case ')':
 312                      yyParenDepth--;
 313                      yyCh = getChar();
 314                      return Tok_RightParen;
 315                  case ',':
 316                      yyCh = getChar();
 317                      return Tok_Comma;
 318                  case ';':
 319                      yyCh = getChar();
 320                      return Tok_Semicolon;
 321                  default:
 322                      yyCh = getChar();
 323              }
 324          }
 325      }
 326      return Tok_Eof;
 327  }
 328  
 329  /*
 330    The second part of this source file is the parser. It accomplishes
 331    a very easy task: It finds all strings inside a tr() or translate()
 332    call, and possibly finds out the context of the call. It supports
 333    three cases: (1) the context is specified, as in
 334    FunnyDialog::tr("Hello") or translate("FunnyDialog", "Hello");
 335    (2) the call appears within an inlined function; (3) the call
 336    appears within a function defined outside the class definition.
 337  */
 338  
 339  static int yyTok;
 340  
 341  static bool match( int t )
 342  {
 343      bool matches = ( yyTok == t );
 344      if ( matches )
 345          yyTok = getToken();
 346      return matches;
 347  }
 348  
 349  static bool matchString( QCString *s )
 350  {
 351      bool matches = ( yyTok == Tok_String );
 352      *s = "";
 353      while ( yyTok == Tok_String ) {
 354          *s += yyString;
 355          yyTok = getToken();
 356      }
 357      return matches;
 358  }
 359  
 360  static bool matchSString( QCString *s )
 361  {
 362      bool matches = ( yyTok == Tok_SString );
 363      *s = "";
 364      while ( yyTok == Tok_SString ) {
 365          *s += yyString;
 366          yyTok = getToken();
 367      }
 368      return matches;
 369  }
 370  
 371  static void parse( MetaTranslator *tor, const char *initialContext,
 372                     const char *defaultContext )
 373  {
 374      QMap<QCString, QCString> qualifiedContexts;
 375      QStringList namespaces;
 376      QCString context;
 377      QCString ext;
 378      QCString text;
 379      QCString comment;
 380      QCString functionContext = initialContext;
 381      QCString prefix;
 382      bool utf8 = FALSE;
 383  
 384      yyTok = getToken();
 385      while ( yyTok != Tok_Eof ) {
 386          switch ( yyTok ) {
 387              case Tok_i18n:
 388                  utf8 = FALSE;
 389                  yyTok = getToken();
 390                  if ( match( Tok_LeftParen ) &&
 391                       ( matchString( &context ) || matchSString( &context ) ) &&
 392                       match( Tok_Comma ) &&
 393                       ( matchString( &text ) || matchSString( &text ) ) )
 394                  {
 395                      if ( ( match( Tok_Comma ) &&
 396                             ( matchString( &comment ) || matchSString( &comment ) ) &&
 397                             match( Tok_RightParen ) ) == false )
 398                      {
 399                          comment = "";
 400                      }
 401                      tor->insert( MetaTranslatorMessage( context, text, comment, QString::null, utf8 ) );
 402                  }
 403  //                 else
 404  //                     qDebug( " --- token failed ------------" );
 405                  break;
 406              case Tok_x18n:
 407                  utf8 = FALSE;
 408                  yyTok = getToken();
 409                  if ( match( Tok_LeftParen ) &&
 410                       ( matchString( &ext ) || matchSString( &ext ) ) &&
 411                       match( Tok_Comma ) &&
 412                       ( matchString( &context ) || matchSString( &context ) ) &&
 413                       match( Tok_Comma ) &&
 414                       ( matchString( &text ) || matchSString( &text ) ) )
 415                  {
 416                      if ( ( match( Tok_Comma ) &&
 417                             ( matchString( &comment ) || matchSString( &comment ) ) &&
 418                             match( Tok_RightParen ) ) == false )
 419                      {
 420                          comment = "";
 421                      }
 422                      tor->insert( MetaTranslatorMessage( context, text, comment, QString::null, utf8 ) );
 423                  }
 424  //                 else
 425  //                     qDebug( " --- token failed ------------" );
 426                  break;
 427              case Tok_Ident:
 428                  if ( !prefix.isNull() )
 429                      prefix += "::";
 430                  prefix += yyIdent;
 431                  yyTok = getToken();
 432                  if ( yyTok != Tok_Gulbrandsen )
 433                      prefix = (const char *) 0;
 434                  break;
 435              case Tok_Comment:
 436                  comment = yyComment;
 437                  comment = comment.simplifyWhiteSpace();
 438                  yyTok = getToken();
 439                  break;
 440              case Tok_Gulbrandsen:
 441                  // at top level?
 442                  if ( yyBraceDepth == (int) namespaces.count() && yyParenDepth == 0 )
 443                      functionContext = prefix;
 444                  yyTok = getToken();
 445                  break;
 446              case Tok_RightBrace:
 447              case Tok_Semicolon:
 448                  if ( yyBraceDepth >= 0 &&
 449                       yyBraceDepth + 1 == (int) namespaces.count() )
 450                      namespaces.remove( namespaces.fromLast() );
 451                  if ( yyBraceDepth == (int) namespaces.count() ) {
 452                      functionContext = defaultContext;
 453                  }
 454                  yyTok = getToken();
 455                  break;
 456              default:
 457                  yyTok = getToken();
 458          }
 459      }
 460  
 461  //     if ( yyBraceDepth != 0 )
 462  //         qWarning( "%s: Unbalanced braces in PHP code", (const char *) yyFileName );
 463  //     if ( yyParenDepth != 0 )
 464  //         qWarning( "%s: Unbalanced parentheses in PHP code", (const char *) yyFileName );
 465  }
 466  
 467  void fetchtr_php( QFileInfo *fi, MetaTranslator *tor, bool mustExist )
 468  {
 469      char *defaultContext = "";
 470      yyInFile = fopen( fi->filePath().latin1(), "r" );
 471      if ( yyInFile == 0 )
 472      {
 473          if ( mustExist )
 474              qWarning( "lupdate error: cannot open PHP source file '%s': %s",
 475                        fi->filePath().latin1(), strerror(errno) );
 476          return;
 477      }
 478  
 479      startTokenizer( fi->fileName().latin1(), getCharFromFile );
 480      parse( tor, 0, defaultContext );
 481      fclose( yyInFile );
 482  }
Code source de eZ Publish 3.9.0

/support/lupdate-ezpublish3/ -> fetchtr_php.cpp (source)