[Uml-devel] KDE/kdesdk/umbrello/umbrello/codeimport/kdevcppparser

Jean Vittor jean.vittor at free.fr
Sat Mar 29 15:58:45 UTC 2008


SVN commit 791448 by jvittor:

Use global rules

 M  +142 -105  lexer.cpp  
 M  +6 -6      lexer.h  


--- trunk/KDE/kdesdk/umbrello/umbrello/codeimport/kdevcppparser/lexer.cpp #791447:791448
@@ -29,6 +29,7 @@
 #include <q3valuelist.h>
 
 #include <boost/bind.hpp>
+#include <boost/spirit/dynamic/if.hpp>
 #include <boost/spirit/phoenix/functions.hpp>
 
 namespace boost { namespace spirit { namespace impl {
@@ -66,17 +67,16 @@
 
 #endif
 
-Lexer::CharRule gr_charLiteral =
-  !ch_p('L') >> ch_p('\'')
-  >> *((anychar_p - '\'' - '\\')
-       | (ch_p('\\') >> (ch_p('\'') | '\\')))
-  >> '\'';
-Lexer::CharRule gr_numberLiteral = digit_p >> *(alnum_p | '.');
-Lexer::CharRule gr_stringLiteral =
-  ch_p('"') >> *((anychar_p - '"' - '\\') | str_p("\\\"") | "\\\\") >> '"';
-Lexer::CharRule gr_identifier = +(alnum_p | '_');
-Lexer::CharRule gr_whiteSpaces = *(blank_p | (ch_p('\\') >> eol_p));
+/** Utility closure that defines a result value.
+    Used to return values from grammars (copied from cpp lexer in spirit
+    repository). */
+template < typename ResultT >
+struct result_closure : closure<result_closure<ResultT>, ResultT> {
+  typedef closure<result_closure<ResultT>, ResultT> base_t;
+  typename base_t::member1 result_;
+};
 
+/** Stuff to construct a QString from iterators */
 struct constructQString_impl {
   template <typename _Arg1, typename _Arg2>
   struct result {
@@ -89,8 +89,74 @@
   }
 };
 
-function<constructQString_impl> const constructQString = constructQString_impl();
+const function<constructQString_impl> constructQString =
+  constructQString_impl();
 
+struct identifier :
+  grammar<identifier, result_closure<QString>::context_t>
+{
+  template < typename ScannerT >
+  struct definition {
+    typedef rule<ScannerT> rule_t;
+    rule_t main;
+
+    rule_t const& start() const {return main;}
+
+    definition( identifier const& self) {
+      main = (lexeme_d[
+		       (+(alnum_p | '_'))
+		       [self.result_ = constructQString(arg1, arg2)]
+		       ]);
+    }
+  };
+} identifier_g;
+
+struct operator_ :
+  grammar<operator_, result_closure<Token>::context_t>
+{
+  template < typename ScannerT >
+  struct definition {
+    typedef rule<ScannerT, result_closure<int>::context_t> rule_t;
+    rule_t main;
+
+    rule_t const& start() const {return main;}
+
+    definition( operator_ const& self) {
+      main =
+	(str_p("::")[ main.result_ = Token_scope]
+	 | (str_p("->*") | ".*")[ main.result_ = Token_ptrmem]
+	 | (str_p("<<=") | ">>=" | "+=" | "-=" | "*=" | "/=" | "%=" | "^="
+	    | "&=" | "|=")
+	 [ main.result_ = Token_assign]
+	 | (str_p("<<") | ">>")[ main.result_ = Token_shift]
+	 | (str_p("==") | "!=")[ main.result_ = Token_eq]
+	 | str_p("<=")[ main.result_ = Token_leq]
+	 | str_p(">=")[ main.result_ = Token_geq]
+	 | str_p("&&")[ main.result_ = Token_and]
+	 | str_p("||")[ main.result_ = Token_or]
+	 | str_p("++")[ main.result_ = Token_incr]
+	 | str_p("--")[ main.result_ = Token_decr]
+	 | str_p("->")[ main.result_ = Token_arrow]
+	 | str_p("##")[ main.result_ = Token_concat]
+	 | str_p("...")[ main.result_ = Token_ellipsis]
+	 )
+	[ self.result_ = construct_<Token>( main.result_, arg1, arg2)];
+    }
+  };
+} operator_g;
+
+Lexer::CharRule gr_charLiteral =
+  !ch_p('L') >> ch_p('\'')
+  >> *((anychar_p - '\'' - '\\')
+       | (ch_p('\\') >> (ch_p('\'') | '\\')))
+  >> '\'';
+Lexer::CharRule gr_numberLiteral = digit_p >> *(alnum_p | '.');
+Lexer::CharRule gr_stringLiteral =
+  ch_p('"') >> *((anychar_p - '"' - '\\') | str_p("\\\"") | "\\\\") >> '"';
+Lexer::CharRule gr_whiteSpaces = *(blank_p | (ch_p('\\') >> eol_p));
+Lexer::CharRule gr_lineComment = (str_p("//") >> (*(anychar_p - eol_p)));
+Lexer::CharRule gr_multiLineComment = confix_p( "/*", *anychar_p, "*/");
+
 Token::Token()
   : m_type( -1 ),
     m_start(),
@@ -252,31 +318,6 @@
     }
 }
 
-bool Lexer::Source::findOperator( Token& p_tk) {
-  int l_tokenType = -1;
-#warning This rule should be global
-  CharRule lr_operator =
-    (str_p("::")[ var( l_tokenType) = Token_scope]
-     | (str_p("->*") | ".*")[ var( l_tokenType) = Token_ptrmem]
-     | (str_p("<<=") | ">>=" | "+=" | "-=" | "*=" | "/=" | "%=" | "^=" | "&="
-	| "|=")[ var( l_tokenType) = Token_assign]
-     | (str_p("<<") | ">>")[ var( l_tokenType) = Token_shift]
-     | (str_p("==") | "!=")[ var( l_tokenType) = Token_eq]
-     | str_p("<=")[ var( l_tokenType) = Token_leq]
-     | str_p(">=")[ var( l_tokenType) = Token_geq]
-     | str_p("&&")[ var( l_tokenType) = Token_and]
-     | str_p("||")[ var( l_tokenType) = Token_or]
-     | str_p("++")[ var( l_tokenType) = Token_incr]
-     | str_p("--")[ var( l_tokenType) = Token_decr]
-     | str_p("->")[ var( l_tokenType) = Token_arrow]
-     | str_p("##")[ var( l_tokenType) = Token_concat]
-     | str_p("...")[ var( l_tokenType) = Token_ellipsis]
-     )
-    [var(p_tk) = construct_<Token>( l_tokenType, arg1, arg2)];
-  parse_info<CharIterator> l_info = parse( lr_operator);
-  return l_info.hit;
-}
-
 Position const& Lexer::getTokenPosition( const Token& token) const
 {
   return token.getStartPosition();
@@ -291,18 +332,17 @@
   QChar ch = m_source.currentChar();
   if( ch.isNull() || ch.isSpace() ){
     /* skip */
-  } else if( m_source.get_startLine() && ch == '#' ){
+  } else if( m_source.get_startLine() && ch == '#') {
 
     m_source.nextChar(); // skip #
     m_source.parse( gr_whiteSpaces); // skip white spaces
     m_source.set_startLine( false);
     
     QString directive;
-    m_source.parse( gr_identifier
-		    [var(directive) = constructQString(arg1, arg2)]); // read the directive
+    m_source.parse( identifier_g[ assign(directive)]); // read the directive
 
     handleDirective( directive );
-  } else if( m_source.get_startLine() && m_skipping[ m_ifLevel ] ){
+  } else if( m_source.get_startLine() && m_skipping[ m_ifLevel ] ) {
     // skip line and continue
     m_source.set_startLine( false);
     bool ppe = m_preprocessorEnabled;
@@ -316,21 +356,25 @@
     m_source.set_startLine( true);
     m_preprocessorEnabled = ppe;
     return;
-  } else if( m_source.readLineComment( m_recordComments, tk)) {
-  } else if( m_source.readMultiLineComment( m_recordComments, tk)) {
-  } else if( m_source.parse(
-			    gr_charLiteral
-			    [var(tk) = construct_<Token>( Token_char_literal,
-							  arg1, arg2)]
-			    ).hit) {
   } else if( m_source.parse
-	     ( gr_stringLiteral
-	       [var(tk) = construct_<Token>( Token_string_literal, arg1, arg2)]
-	       ).hit) {
+	     (
+	      if_p(var( m_recordComments))
+	      [ gr_lineComment | gr_multiLineComment
+		[var( tk) = construct_<Token>(Token_comment, arg1, arg2)]
+		]
+	      .else_p[ gr_lineComment | gr_multiLineComment]
+	      |
+	      gr_charLiteral
+	      [var(tk) = construct_<Token>( Token_char_literal,
+					    arg1, arg2)]
+	      |
+	      gr_stringLiteral
+	      [var(tk) = construct_<Token>( Token_string_literal, arg1, arg2)]
+	      ).hit) {
   } else if( ch.isLetter() || ch == '_' ){
     CharIterator start = m_source.get_ptr();
     QString ide;
-    m_source.parse( gr_identifier[var(ide) = constructQString(arg1, arg2)]);
+    m_source.parse( identifier_g[assign(ide)]);
     int k = Lookup::find( &keyword, ide );
     if( m_preprocessorEnabled && m_driver->hasMacro(ide) &&
 	(k == -1 || !m_driver->macro(ide).body().isEmpty()) ){
@@ -353,8 +397,7 @@
 	CharIterator endIde = m_source.get_ptr();
 
 	m_source.parse( gr_whiteSpaces);
-	if( m_source.currentChar() == '(' ){
-	  m_source.nextChar();
+	if( m_source.parse( ch_p('(')).hit) {
 	  int argIdx = 0;
 	  int argCount = m.argumentList().size();
 	  while( !m_source.currentChar().isNull() && argIdx<argCount ){
@@ -496,7 +539,7 @@
 	     ( gr_numberLiteral
 	       [var(tk) = construct_<Token>( Token_number_literal, arg1, arg2)]
 	       ).hit) {
-  } else if( m_source.findOperator( tk)) {
+  } else if( m_source.parse( operator_g[ assign(tk)]).hit) {
   } else {
     CharIterator l_ptr = m_source.get_ptr();
     m_source.nextChar();
@@ -584,29 +627,6 @@
     return arg.trimmed();
 }
 
-bool Lexer::Source::readLineComment( bool p_recordComments, Token& p_tk)
-{
-  if( p_recordComments)
-    return parse( (str_p("//")
-		   >> (*(anychar_p - eol_p)))
-		  [var(p_tk) = construct_<Token>(Token_comment, arg1, arg2)]
-		  ).hit;
-  else
-    return parse( (str_p("//") >> (*(anychar_p - eol_p)))
-		  ).hit;
-}
-
-bool Lexer::Source::readMultiLineComment( bool p_recordComments, Token& p_tk)
-{
-#warning This rule should be global
-  if( p_recordComments)
-    return parse( confix_p( "/*", *anychar_p, "*/")
-		  [var( p_tk) = construct_<Token>(Token_comment, arg1, arg2)]
-		  ).hit;
-  else
-    return parse( confix_p( "/*", *anychar_p, "*/")).hit;
-}
-
 void Lexer::handleDirective( const QString& directive )
 {
     m_inPreproc = true;
@@ -669,7 +689,7 @@
 {
   m_source.parse( gr_whiteSpaces);
   QString word;
-  m_source.parse( gr_identifier[var(word) = constructQString(arg1, arg2)]);
+  m_source.parse( identifier_g[assign(word)]);
   bool r = m_driver->hasMacro( word );
 
   return r;
@@ -681,8 +701,7 @@
   m_source.parse( gr_whiteSpaces);
 
   QString macroName;
-  m_source.parse( gr_identifier
-		  [var(macroName) = constructQString(arg1, arg2)]);
+  m_source.parse( identifier_g[assign(macroName)]);
   m_driver->removeMacro( macroName );
   m.setName( macroName );
 
@@ -696,8 +715,8 @@
       m_source.parse( gr_whiteSpaces);
 
       QString arg;
-      m_source.parse( (str_p("...") | gr_identifier)
-		      [var(arg) = constructQString( arg1, arg2)]
+      m_source.parse( str_p("...")[var(arg) = constructQString( arg1, arg2)]
+		      | identifier_g[assign(arg)]
 		      );
       m.addArgument( Macro::Argument(arg) );
 
@@ -813,36 +832,54 @@
     }
 }
 
+typedef std::pair<QString, int> Dependency;
+
+struct DependencyClosure
+  : boost::spirit::closure<DependencyClosure, QString, int>
+{
+  member1 m_word;
+  member2 m_scope;
+};
+
+struct header :
+  grammar<header, result_closure<Dependency>::context_t>
+{
+  template < typename ScannerT >
+  struct definition {
+    typedef rule<ScannerT, DependencyClosure::context_t> rule_t;
+    rule_t main;
+
+    rule_t const& start() const {return main;}
+
+    definition( header const& self) {
+      main =
+	( confix_p( ch_p('"') [main.m_scope = (int)Dep_Local],
+		    (*anychar_p) [main.m_word = constructQString( arg1, arg2)],
+		    '"')
+	  |
+	  confix_p( ch_p('<') [main.m_scope = (int)Dep_Global],
+		    (*anychar_p) [main.m_word = constructQString( arg1, arg2)],
+		    '>')
+	  )
+	[self.result_ = construct_<Dependency>( main.m_word, main.m_scope)]
+	;
+    }
+  };
+} header_g;
+
 void Lexer::processInclude() {
-  if( m_skipping[m_ifLevel] )
-    return;
-
-  m_source.parse( gr_whiteSpaces);
-  QString word;
-  int l_scope = -1;
-#warning This rule should be global
-  if( m_source.parse(
-		     confix_p( ch_p('"')[var(l_scope) = (int)Dep_Local],
-			       (*anychar_p)
-			       [var(word) = constructQString( arg1, arg2)],
-			       '"')
-		     |
-		     confix_p( ch_p('<')[var(l_scope) = (int)Dep_Global],
-			       (*anychar_p)
-			       [var(word) = constructQString( arg1,
-							      arg2)],
-			       '>')
-		     ).hit) {
-    m_driver->addDependence( m_driver->currentFileName(),
-			     Dependence(word, l_scope));
-  }
+  if( !m_skipping[m_ifLevel] )
+    m_source.parse( gr_whiteSpaces >>
+		    header_g
+		    [boost::bind( &Lexer::addDependence, this, _1)]
+		    );
 }
 
 void Lexer::processUndef()
 {
   m_source.parse( gr_whiteSpaces);
   QString word;
-  m_source.parse( gr_identifier[var(word) = constructQString(arg1, arg2)]);
+  m_source.parse( identifier_g[assign(word)]);
   m_driver->removeMacro( word );
 }
 
--- trunk/KDE/kdesdk/umbrello/umbrello/codeimport/kdevcppparser/lexer.h #791447:791448
@@ -223,6 +223,12 @@
 
 private:
   static int toInt( const Token& token );
+
+  void addDependence( std::pair<QString, int> const& p_wordAndScope) const {
+    m_driver->addDependence( m_driver->currentFileName(),
+			     Dependence( p_wordAndScope.first,
+					 p_wordAndScope.second));
+  }
   void tokenize();
   void nextToken( Token& token);
   void skip( int l, int r );
@@ -275,7 +281,6 @@
       return m_ptr != m_endPtr ? *m_ptr : QChar::null;
     }
     bool eof() const {return m_ptr >= m_endPtr;}
-    bool findOperator( Token& p_tk);
     void insert( QString const& p) {
       int l_offset = &*m_ptr - m_source.data();
       m_source.insert( l_offset, p);
@@ -293,9 +298,6 @@
 	break;
       }
     }
-    void nextChar( int n ) {
-      std::advance( m_ptr, n);
-    }
     template <typename _RuleT>
     parse_info<CharIterator> parse( _RuleT const& p_rule) {
       parse_info<CharIterator> l_return =
@@ -304,8 +306,6 @@
 	m_ptr = l_return.stop;
       return l_return;
     }
-    bool readLineComment( bool p_recordComments, Token& p_tk);
-    bool readMultiLineComment( bool p_recordComments, Token& p_tk);
     void reset() {
       m_source.clear();
       m_ptr = CharIterator();




More information about the umbrello-devel mailing list