Main Page | Class Hierarchy | Alphabetical List | Class List | File List | Class Members

xml_lexer.h

Go to the documentation of this file.
00001 
00002 // Common Text Transformation Library
00003 // Copyright (C) 1997-2006 by Igor Kholodov. 
00004 //
00005 // This library is free software; you can redistribute it and/or
00006 // modify it under the terms of the GNU Lesser General Public
00007 // License as published by the Free Software Foundation; either
00008 // version 2.1 of the License, or (at your option) any later version.
00009 //
00010 // This library is distributed in the hope that it will be useful,
00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013 // Lesser General Public License for more details.
00014 //
00015 // You should have received a copy of the GNU Lesser General Public
00016 // License along with this library; if not, write to the
00017 // Free Software Foundation, Inc.,
00018 // 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00019 //
00020 // mailto:cttl@users.sourceforge.net
00021 // http://sourceforge.net/projects/cttl/
00023 
00039 // xml_lexer.h
00040 
00041 #ifndef _XML_LEXER_H_INCLUDED_
00042 #define _XML_LEXER_H_INCLUDED_
00043 
00076 template< typename ParserT >
00077 struct lexer : public ParserT {
00078 
00080     typedef typename ParserT::universe_T universe_T;
00081 
00083     typedef typename universe_T::strict_edge_T strict_universe_T;
00084 
00086     strict_universe_T& consumed_data;
00087 
00089     strict_universe_T edge_xml_name;
00090 
00092     strict_universe_T attribute_value;
00093     
00095     lambda< inode_writer<> >::scalar inode_text;
00096 
00098     lambda< inode_writer<> >::scalar inode_element;
00099 
00101     lambda< inode_writer<> >::scalar inode_first;
00102 
00104     lambda< inode_writer<> >::scalar inode_last;
00105 
00107     lambda< inode_writer<> >::scalar inode_attribute;
00108 
00110     lambda< inode_writer<> >::scalar inode_parent;
00111 
00112     lambda< size_t >::stack element_stack;
00113 
00115     lambda<>::scalar id_xml_element;
00116 
00118     lambda<>::scalar id_attribute_value;
00119 
00120 
00122     lexer( strict_universe_T& consumed_data_ )
00123         :
00124         consumed_data( consumed_data_ ),
00125         edge_xml_name( new_edge( consumed_data_.parent() ) ),
00126         attribute_value( new_edge( consumed_data_.parent() ) ),
00127         inode_text( inode_writer<>( this->vect_xml_tree ) ),
00128         inode_element( inode_writer<>( this->vect_xml_tree ) ),
00129         inode_first( inode_writer<>( this->vect_xml_tree ) ),
00130         inode_last( inode_writer<>( this->vect_xml_tree ) ),
00131         inode_attribute( inode_writer<>( this->vect_xml_tree ) ),
00132         inode_parent( inode_writer<>( this->vect_xml_tree ) )
00133     {
00134     }
00135 
00137     void operator=( lexer< ParserT > const& ) const
00138     {
00139     }
00140 
00142     size_t xml_grammar( universe_T& universe_ )
00143     {
00144         offset_stack_guard< universe_T > error_info( this->xml_offset_stack, universe_.first );
00145 
00146         return (
00147             *CTTL_RULE( lexer< ParserT >::xml_node )
00148             +
00149             ( end() | CTTL_RULE( ParserT::error_parser_failed ) )
00150 
00151         ).match( universe_ )
00152         ;
00153     }
00154 
00156     size_t xml_node( universe_T& universe_ )
00157     {
00158         return (
00159 
00160 #ifdef DISPOSE_CONSUMED_DATA
00161             consumed_data.second( begin( true ) )
00162             +
00163 #endif // DISPOSE_CONSUMED_DATA
00164 
00165             (
00166                 CTTL_RULE( lexer< ParserT >::xml_pi )
00167                 |
00168                 CTTL_RULE( lexer< ParserT >::xml_element )
00169                 |
00170                 CTTL_RULE( lexer< ParserT >::xml_comment )
00171                 |
00172                 CTTL_RULE( lexer< ParserT >::xml_text )
00173             )
00174 
00175         ).match( universe_ )
00176         ;
00177     }
00178 
00180     size_t xml_name( universe_T& universe_ )
00181     {
00182         edge< policy_strict_stream > strict_stream_universe( universe_, universe_.space_policy() );
00183 
00184         return (
00185             *entity( isspace )
00186             +
00187             edge_xml_name( +entity( &LITERAL_XML_NAME ) )
00188             +
00189             // In this implementation xml elements and
00190             // attributes share name dictionary:
00191             *( ++( id_xml_element.make_reference() ^ this->xml_name_dictionary ^ edge_xml_name ) )
00192 
00193         ).match( strict_stream_universe )
00194         ;
00195     }
00196 
00198     size_t xml_comment( universe_T& universe_ )
00199     {
00200         offset_stack_guard< universe_T > error_info( this->xml_offset_stack, universe_.first );
00201 
00202         return (
00203             universe_.first( symbol() + 4 )
00204             +
00205             &LITERAL_XML_COMMENT_OPEN    //"<!--"
00206             +
00207             *( -( universe_.first( symbol() + 3 ) + &LITERAL_XML_COMMENT_CLOSE ) + symbol() )
00208             +
00209             universe_.first( symbol() + 3 )
00210             +
00211             ( &LITERAL_XML_COMMENT_CLOSE | CTTL_RULE( ParserT::error_missing_comment_close ) )   // "-->"
00212 
00213         ).match( universe_ )
00214         ;
00215     }
00216 
00218     size_t xml_pi( universe_T& universe_ )
00219     {
00220         offset_stack_guard< universe_T > error_info( this->xml_offset_stack, universe_.first );
00221 
00222         return (
00223             universe_.first( symbol() + 2 )
00224             +
00225             &LITERAL_XML_PI_OPEN    // "<?"
00226             +
00227             ( CTTL_RULE( lexer< ParserT >::xml_name ) | CTTL_RULE( ParserT::error_bad_pi ) )
00228             +
00229             *( -( universe_.first( symbol() + 2 ) + &LITERAL_XML_PI_CLOSE ) + symbol() )
00230             +
00231             universe_.first( symbol() + 2 )
00232             +
00233             ( &LITERAL_XML_PI_CLOSE | CTTL_RULE( ParserT::error_missing_pi_close ) )   // "?>"
00234 
00235         ).match( universe_ )
00236         ;
00237     }
00238 
00240     size_t xml_element( universe_T& universe_ )
00241     {
00242         offset_stack_guard< universe_T > error_info( this->xml_offset_stack, universe_.first );
00243         return (
00244             '<'
00245             +
00246             CTTL_RULE( lexer< ParserT >::xml_name )
00247             +
00248             // in-line semantic action to create new xml element node
00249             (
00250                 inode_element << ELEMENT_SIZE,
00251                 inode_element[ FLD_ELEMENT_NAME ] = id_xml_element,
00252                 element_stack = alias::offset( inode_element )
00253                 ,
00254                 const_scalar( 1 )
00255             )
00256             +
00257             *CTTL_RULE( lexer< ParserT >::xml_attr )
00258             +
00259             (
00260                 (
00261                     universe_.first( symbol() + 2 )
00262                     +
00263                     &LITERAL_XML_ELEM_CLOSED    // "/>" closed inode_element
00264                 )
00265                 |
00266                 (
00267                     '>'                         // open inode_element
00268                     +
00269                     *CTTL_RULE( lexer< ParserT >::xml_node )
00270                     +
00271                     ( CTTL_RULE( lexer< ParserT >::xml_closeElement ) | CTTL_RULE( ParserT::error_missing_close_element ) )
00272                 )
00273             )
00274             +
00275             // in-line semantic action to add xml element to the parse tree:
00276             (
00277                 CTTL_LAMBDA_ASSERT( +element_stack ),
00278                 inode_element = *element_stack--,
00279                 +element_stack
00280                 && (
00281                     // if a parent element exists:
00282                     inode_parent = *element_stack,
00283                     inode_first = inode_parent[ FLD_FIRST_ELEMENT ],
00284                     !inode_first
00285                     &&
00286                     (
00287                         // if no children exist,
00288                         inode_parent[ FLD_FIRST_ELEMENT ] = alias::offset( inode_element ),
00289                         const_scalar( 1 )
00290                     ) || (
00291                         // otherwise
00292                         inode_last = inode_parent[ FLD_LAST_ELEMENT ],
00293                         CTTL_LAMBDA_ASSERT( alias::offset( inode_last ) ),
00294                         inode_last[ FLD_NEXT_ELEMENT ] = alias::offset( inode_element ),
00295                         const_scalar( 1 )
00296                     )
00297                     ,
00298                     inode_parent[ FLD_LAST_ELEMENT ] = alias::offset( inode_element )
00299                 )
00300                 ,
00301                 const_scalar( 1 )
00302             )
00303 
00304         ).match( universe_ )
00305         ;
00306     }
00307 
00309     size_t xml_closeElement( universe_T& universe_ )
00310     {
00311         return (
00312             universe_.first( symbol() + 2 )
00313             +
00314             &LITERAL_XML_CLOSE_ELEM // "</"
00315             +
00316             CTTL_RULE( lexer< ParserT >::xml_name )
00317             +
00318             '>'
00319 
00320         ).match( universe_ )
00321         ;
00322     }
00323 
00324     static int validate_text( universe_T& universe_ )
00325     {
00326         universe_.first.find_class( isspace );
00327         universe_.second.rfind_class( isspace );
00328         return universe_.length();
00329     }
00330 
00332     size_t xml_text( universe_T& universe_ )
00333     {
00334         return (
00335             +( -symbol( '<' ) + symbol() )
00336             &
00337             (
00338                 // In-line semantic action to add text node to the current xml element:
00339                 ++(
00340                     scalar( 0 )
00341                     ^
00342                     CTTL_STATIC_ACTION(
00343                         std::ptr_fun( &lexer< ParserT >::validate_text ),
00344                         &universe_
00345                     )
00346                 )
00347                 &&
00348                 (
00349                     inode_text << TEXT_SIZE,
00350                     inode_text[ FLD_TEXT_VALUE ] = ++( scalar( 0 )^this->vect_xml_text^universe_ ),
00351                     CTTL_LAMBDA_ASSERT( +element_stack ),
00352                     inode_element = *element_stack,
00353                     inode_first = inode_element[ FLD_FIRST_TEXT ],
00354                     !inode_first
00355                     &&
00356                     (
00357                         // if no children exist,
00358                         inode_element[ FLD_FIRST_TEXT ] = alias::offset( inode_text ), // consider using macro here
00359                         const_scalar( 1 )
00360                     ) || (
00361                         inode_last = inode_element[ FLD_LAST_TEXT ],
00362                         CTTL_LAMBDA_ASSERT( alias::offset( inode_last ) ),
00363                         inode_last[ FLD_NEXT_TEXT ] = alias::offset( inode_text ),
00364                         const_scalar( 1 )
00365                     )
00366                     ,
00367                     inode_element[ FLD_LAST_TEXT ] = alias::offset( inode_text )
00368                 )
00369                 ,
00370                 const_scalar( 1 )
00371 
00372             )
00373 
00374         ).match( universe_ )
00375         ;
00376     }
00377 
00379     size_t xml_attr( universe_T& universe_ )
00380     {
00381         offset_stack_guard< universe_T > error_info( this->xml_offset_stack, universe_.first );
00382 
00383         return (
00384             CTTL_RULE( lexer< ParserT >::xml_name )
00385             +
00386             '='
00387             +
00388             (
00389                 (
00390                     CTTL_RULE( lexer< ParserT >::xml_value )
00391                     +
00392                     *( ++( id_attribute_value ^ this->vect_xml_text ^ attribute_value ) )
00393                 )
00394                 |
00395                 CTTL_RULE( ParserT::error_bad_attr_format )
00396             )
00397             +
00398             // in-line semantic action to add xml attribute node to the parse subtree
00399             // (attributes and xml elements share symbol table of names).
00400             (
00401                 inode_attribute << ATTRIBUTE_SIZE,
00402                 inode_attribute[ FLD_ATTRIBUTE_NAME ] = id_xml_element,
00403                 inode_attribute[ FLD_ATTRIBUTE_VALUE ] = id_attribute_value,
00404 
00405                 // link with parent element
00406                 CTTL_LAMBDA_ASSERT( +element_stack ),
00407                 inode_element = *element_stack,
00408                 inode_first = inode_element[ FLD_FIRST_ATTRIBUTE ],
00409                 !inode_first
00410                 &&
00411                 (
00412                     inode_element[ FLD_FIRST_ATTRIBUTE ] = alias::offset( inode_attribute )
00413                 ) || (
00414                     inode_last = inode_element[ FLD_LAST_ATTRIBUTE ],
00415                     CTTL_LAMBDA_ASSERT( alias::offset( inode_last ) ),
00416                     inode_last[ FLD_NEXT_ATTRIBUTE ] = alias::offset( inode_attribute ),
00417                     const_scalar( 1 )
00418                 )
00419                 ,
00420                 inode_element[ FLD_LAST_ATTRIBUTE ] = alias::offset( inode_attribute ),
00421                 const_scalar( 1 )
00422             )
00423 
00424         ).match( universe_ )
00425         ;
00426     }
00427 
00429     size_t xml_value( universe_T& universe_ )
00430     {
00431         return (
00432             (
00433                 '\"'
00434                 +
00435                 attribute_value( *( -symbol( '\"' ) + symbol() ) )
00436                 +
00437                 '\"'
00438             )
00439             |
00440             (
00441                 '\''
00442                 +
00443                 attribute_value( *( -symbol( '\'' ) + symbol() ) )
00444                 +
00445                 '\''
00446             )
00447         ).match( universe_ )
00448         ;
00449     }
00450 };
00451 
00452 #endif //_XML_LEXER_H_INCLUDED_

Generated on Thu Nov 2 17:43:26 2006 for CTTL XML stream parser sample by  doxygen 1.3.9.1