Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

xtl_quote.h

Go to the documentation of this file.
00001 
00002 // Common Text Transformation Library
00003 // Copyright (C) 1997-2006 by Igor Kholodov. 
00004 //
00005 // This library is free software; you can redistribute it and/or
00006 // modify it under the terms of the GNU Lesser General Public
00007 // License as published by the Free Software Foundation; either
00008 // version 2.1 of the License, or (at your option) any later version.
00009 //
00010 // This library is distributed in the hope that it will be useful,
00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013 // Lesser General Public License for more details.
00014 //
00015 // You should have received a copy of the GNU Lesser General Public
00016 // License along with this library; if not, write to the
00017 // Free Software Foundation, Inc.,
00018 // 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00019 //
00020 // mailto:cttl@users.sourceforge.net
00021 // http://sourceforge.net/projects/cttl/
00023 
00042 // xtl_quote.h
00043 
00044 #ifndef _CTTL_XTL_QUOTE_H_INCLUDED_
00045 #define _CTTL_XTL_QUOTE_H_INCLUDED_
00046 
00047 namespace cttl_impl {
00048 
00054 typedef xtl_wrap< xtl_bool< true > >
00055 relaxed_bool_T;
00056 
00085 template< typename LeftT, typename MiddleT, typename RightT, typename DerivedT >
00086 class xtl_quote_base {
00087 
00088 protected:
00090     LeftT   m_left;
00091 
00093     MiddleT m_middle;
00094 
00096     RightT  m_right;
00097 
00098 public:
00099     // compile-time
00100 
00102     xtl_quote_base( LeftT const& left_, MiddleT const& middle_, RightT const& right_ )
00103     : m_left( left_ ), m_middle( middle_ ), m_right( right_ )
00104     {
00105     }
00106 
00107     // run-time
00108 
00133     template< typename UniverseT >
00134     size_t match( UniverseT& edge_ )
00135     {
00136         // L+M+R_ balanced pair
00137         CTTL_TRACE_LEVEL_MATCH( '\"' );
00138         typename UniverseT::offset_manager_T saved_first_offset( edge_.parent().container(), edge_.first.offset() );
00139         typename UniverseT::offset_manager_T left_offset( edge_.parent().container(), m_left.match( edge_ ) );
00140 
00141         if ( left_offset != UniverseT::string_T::npos ) {
00142             if ( static_cast< DerivedT* >( this )->match_pair( edge_ ) )
00143                 return left_offset;
00144         }
00145                 
00146         edge_.first.offset( saved_first_offset );       // restore universe
00147         CTTL_TRACE_TEXT_RESULT( false, '\"', "xtl_quote_base: opening close failed" );
00148         return UniverseT::string_T::npos;
00149     }
00150     
00175     template< typename UniverseT >
00176     size_t find( UniverseT& edge_ )
00177     {
00178         // !L+M+R_
00179         CTTL_TRACE_LEVEL_FIND( '\"' );
00180         typename UniverseT::offset_manager_T saved_first_offset( edge_.parent().container(), edge_.first.offset() );
00181         typename UniverseT::offset_manager_T left_offset( edge_.parent().container(), m_left.find( edge_ ) );
00182         if ( left_offset != UniverseT::string_T::npos ) {
00183             if ( static_cast< DerivedT* >( this )->match_pair( edge_ ) )
00184                 return left_offset;
00185         }
00186 
00187         edge_.first.offset( saved_first_offset );       // restore universe
00188         CTTL_TRACE_TEXT_RESULT( false, '\"', "xtl_quote_base: opening clause failed" );
00189         return UniverseT::string_T::npos;
00190     }
00191     
00216     template< typename UniverseT >
00217     size_t bang_find( UniverseT& edge_ )
00218     {
00219         // !!L+M+R_
00220         CTTL_TRACE_LEVEL_BANG( '\"' );
00221         typename UniverseT::offset_manager_T saved_first_offset( edge_.parent().container(), edge_.first.offset() );
00222         // if this offset stops progressing, we must bail out of infinite loop:
00223         typename UniverseT::offset_manager_T iteration_offset( edge_.parent().container(), UniverseT::string_T::npos );
00224         typename UniverseT::offset_manager_T left_offset( edge_.parent().container(), UniverseT::string_T::npos );
00225 
00226         while ( ( left_offset = m_left.bang_find( edge_ ) ) != UniverseT::string_T::npos ) {
00227             if ( static_cast< DerivedT* >( this )->match_pair( edge_ ) )
00228                 return left_offset;
00229 
00230             if ( iteration_offset == edge_.first.offset() ) {
00231                 CTTL_TRACE_TEXT( '\"', "!!(left,middle,right): iteration has not made any progress: bailing out" );
00232                 break;  // second, third, etc., iteration hasn't made any progress: bail out
00233             }
00234 
00235             iteration_offset = edge_.first.offset();
00236         }
00237 
00238         edge_.first.offset( saved_first_offset );       // restore universe
00239         CTTL_TRACE_TEXT_RESULT( false, '\"', "xtl_quote_base: LHS bang_find failed" );
00240         return UniverseT::string_T::npos;
00241     }
00242     
00243 };  // xtl_quote_base
00244 
00245 
00268 template< typename LeftT, typename MiddleT, typename RightT >
00269 class xtl_quote_generic : public xtl_quote_base< LeftT, MiddleT, RightT, xtl_quote_generic< LeftT, MiddleT, RightT > > {
00270 public:
00271     // compile-time
00272 
00274     xtl_quote_generic( LeftT const& left_, MiddleT const& middle_, RightT const& right_ )
00275     : xtl_quote_base< LeftT, MiddleT, RightT, xtl_quote_generic< LeftT, MiddleT, RightT > >( left_, middle_, right_ )
00276     {
00277     }
00278 
00279     // run-time
00280 
00287     template< typename UniverseT >
00288     bool match_pair( UniverseT& edge_ )
00289     {
00290         // LHS matched
00291         typename UniverseT::offset_manager_T middle_offset_start( edge_.parent().container(), edge_.first.offset() );
00292         size_t level = 1;   // we are inside a quote
00293 
00294         typename UniverseT::offset_manager_T current_offset( edge_.parent().container(), edge_.first.offset() );
00295         typename UniverseT::offset_manager_T right_offset( edge_.parent().container(), UniverseT::string_T::npos );
00296         typename UniverseT::offset_manager_T right_right_offset( edge_.parent().container(), UniverseT::string_T::npos );
00297 
00298         for (
00299             ;
00300             level && ( ( right_offset = this->m_right.bang_find( edge_ ) ) != UniverseT::string_T::npos );
00301             current_offset = edge_.first.offset()
00302             )
00303         {
00304             // RHS found
00305             // restart left search position
00306             right_right_offset = edge_.first.offset( current_offset );
00307             size_t left_offset = this->m_left.bang_find( edge_ );
00308             if ( left_offset != UniverseT::string_T::npos )
00309                 if ( left_offset < right_offset ) {
00310                     // LHS found earlier, keep searching
00311                     ++level;
00312                     continue;   // ------->
00313                 }
00314 
00315             // restore edge after right pos
00316             edge_.first.offset( right_right_offset );
00317             --level;
00318         }
00319 
00320         if ( level ) {
00321             // RHS was never found - we fail
00322             CTTL_TRACE_TEXT_RESULT( false, '\"', "xtl_quote_generic: closing clause not found" );
00323             return false;
00324         }
00325 
00326         // preserve edge
00327         xtl_edge_offset_manager< UniverseT > edge_mgr( edge_ );
00328         edge_.first.offset( middle_offset_start );
00329         edge_.second.offset( right_offset );
00330         if ( this->m_middle.match( edge_ ) != UniverseT::string_T::npos ) {
00331             edge_mgr.restore( edge_ );
00332             return true;
00333         }
00334 
00335         edge_mgr.restore( edge_ );
00336         CTTL_TRACE_TEXT_RESULT( false, '\"', "xtl_quote_generic: middle clause failed" );
00337         return false;
00338     }
00339     
00340 };  // class xtl_quote_generic
00341 
00342 
00359 template< typename MiddleT, typename RightT >
00360 class xtl_quote_bool : public xtl_quote_base< relaxed_bool_T, MiddleT, RightT, xtl_quote_bool< MiddleT, RightT > > {
00361 
00362 public:
00363     // compile-time
00364 
00366     xtl_quote_bool( MiddleT const& middle_, RightT const& right_ )
00367     : xtl_quote_base< relaxed_bool_T, MiddleT, RightT, xtl_quote_bool< MiddleT, RightT > >( true, middle_, right_ )
00368     {
00369     }
00370 
00371     // run-time
00372 
00379     template< typename UniverseT >
00380     bool match_pair( UniverseT& edge_ )
00381     {
00382         // LHS matched
00383         typename UniverseT::offset_manager_T middle_offset_start( edge_.parent().container(), edge_.first.offset() );
00384         typename UniverseT::offset_manager_T right_offset( edge_.parent().container(), this->m_right.bang_find( edge_ ) );
00385 
00386         if ( right_offset != UniverseT::string_T::npos ) {
00387             // RHS matched
00388             // preserve edge
00389             xtl_edge_offset_manager< UniverseT > edge_mgr( edge_ );
00390             edge_.first.offset( middle_offset_start );
00391             edge_.second.offset( right_offset );
00392             if ( this->m_middle.match( edge_ ) != UniverseT::string_T::npos ) {
00393                 edge_mgr.restore( edge_ );
00394                 return true;
00395             }
00396             edge_mgr.restore( edge_ );
00397             CTTL_TRACE_TEXT_RESULT( false, '\"', "quote(true,M,R) middle clause failed" );
00398             return false;
00399         }
00400 
00401         CTTL_TRACE_TEXT_RESULT( false, '\"', "quote(true,M,R) closing clause failed" );
00402         return false;
00403     }
00404     
00405 };  // class xtl_quote_bool
00406 
00407 
00430 template< typename UniverseT, typename MiddleT, typename RightT, typename CharT >
00431 inline bool
00432 xtl_match_quote( UniverseT& edge_, MiddleT& middle_, RightT& right_, CharT const* chars2find_ )
00433 {
00434     typename UniverseT::string_T const& str = edge_.parent().text();
00435 
00436     // Closing quote is specified by the first character
00437     const CharT close_quote = chars2find_[ 0 ];
00438     const CharT back_whack = CharT( '\\' );
00439     const CharT cr = CharT( '\r' );
00440     const CharT lf = CharT( '\n' );
00441 
00442     // LHS matched.
00443     size_t middle_offset_start = edge_.first.offset();
00444     size_t current_offset = edge_.first.offset();
00445 
00446     // Assume we will fail
00447     size_t close_offset = UniverseT::string_T::npos;
00448     for (
00449         size_t temp_offset = str.find_first_of( chars2find_, current_offset );
00450         temp_offset != UniverseT::string_T::npos;
00451         temp_offset = str.find_first_of( chars2find_, temp_offset + 1 )
00452         )
00453     {
00454         if ( edge_.second.offset() <= temp_offset )
00455             // attempt to step beyond current universe
00456             return false;
00457 
00458         // make sure offset is valid, if not, keep retrying
00459         size_t adjusted_offset = edge_.space_policy().lower_bound( temp_offset, edge_.second.offset() );
00460         if ( adjusted_offset != temp_offset ) {
00461             temp_offset = adjusted_offset - 1;
00462             if ( edge_.second.offset() <= temp_offset )
00463                 // attempt to step beyond current universe
00464                 return false;
00465             continue;
00466         }
00467 
00468         // note: if open and close tokens are the same, then close token has higher priority
00469         if ( str[ temp_offset ] == back_whack ) {
00470             // this is C literal, and escape sequence was found inside:
00471             ++temp_offset;  // automatically skip next CTTL_STD_CHAR
00472             // Now we look at one more character; if it's remaining part of <cr><lf> pair,
00473             // eat it and go on:
00474             if ( str[ temp_offset ] == cr )
00475                 ++temp_offset;  // skip cr
00476 
00477             else if ( str[ temp_offset ] == lf )
00478                 ++temp_offset;  // skip lf
00479 
00480         } else if ( str[ temp_offset ] == close_quote ) {
00481             // RHS quote is found:
00482             close_offset = temp_offset;
00483             break;
00484         }
00485 
00486         // If we encounter unexpected cr or lf, halt evaluation of string literal:
00487         if ( ( str[ temp_offset ] == cr ) || ( str[ temp_offset ] == lf ) )
00488             // RHS not found on the same line - fail immediately
00489             return false;
00490     }
00491 
00492     if ( close_offset == UniverseT::string_T::npos )
00493         // RHS was never found
00494         return false;
00495 
00496     edge_.first.offset( close_offset );
00497     // Note: because right side is simply xtl_wrap< xtl_char< CharT > >, no
00498     // offset protection is used for middle_offset_start and right_offset:
00499     typename UniverseT::strict_edge_T strict_universe( edge_ );
00500     size_t right_offset = right_.match( strict_universe );
00501     if ( right_offset == UniverseT::string_T::npos ) {
00502         assert( false );
00503         return false;
00504     }
00505 
00506     xtl_edge_offset_manager< UniverseT > edge_mgr( edge_ ); // preserve edge
00507     edge_.first.offset( middle_offset_start );
00508     edge_.second.offset( right_offset );
00509     if ( middle_.match( edge_ ) != UniverseT::string_T::npos ) {
00510         edge_mgr.restore( edge_ );
00511         return true;
00512     }
00513     edge_mgr.restore( edge_ );
00514     return false;
00515 
00516 }   // xtl_match_quote()
00517 
00518 
00533 template< typename MiddleT, typename CharT >
00534 class xtl_quote_ansi_double_quote : public xtl_quote_base< xtl_wrap< xtl_char< CharT > >, MiddleT, xtl_wrap< xtl_char< CharT > >, xtl_quote_ansi_double_quote< MiddleT, CharT > > {
00535 public:
00536     // compile-time
00537 
00539     xtl_quote_ansi_double_quote()
00540     : xtl_quote_base< xtl_wrap< xtl_char< CharT > >, relaxed_bool_T, xtl_wrap< xtl_char< CharT > >, xtl_quote_ansi_double_quote< relaxed_bool_T, CharT > >( CharT( '\"' ), true, CharT( '\"' ) )
00541     {
00542     }
00543 
00545     xtl_quote_ansi_double_quote( MiddleT const& middle_ )
00546     : xtl_quote_base< xtl_wrap< xtl_char< CharT > >, MiddleT, xtl_wrap< xtl_char< CharT > >, xtl_quote_ansi_double_quote< MiddleT, CharT > >( CharT( '\"' ), middle_, CharT( '\"' ) )
00547     {
00548     }
00549 
00550     // run-time
00551 
00558     template< typename UniverseT >
00559     bool match_pair( UniverseT& edge_ )
00560     {
00561 //      typedef typename UniverseT::char_T char_T;
00562         static const CharT chars2find[] = { '\"', '\n', '\r', 0x00 };
00563         return xtl_match_quote( edge_, this->m_middle, this->m_right, chars2find ); // "\"\n\r"
00564     }
00565     
00566 };  // class xtl_quote_ansi_double_quote
00567 
00568 
00583 template< typename MiddleT, typename CharT >
00584 class xtl_quote_ansi_single_quote : public xtl_quote_base< xtl_wrap< xtl_char< CharT > >, MiddleT, xtl_wrap< xtl_char< CharT > >, xtl_quote_ansi_single_quote< MiddleT, CharT > > {
00585 
00586 public:
00587 
00588     // compile-time
00589 
00591     xtl_quote_ansi_single_quote()
00592     : xtl_quote_base< xtl_wrap< xtl_char< CharT > >, relaxed_bool_T, xtl_wrap< xtl_char< CharT > >, xtl_quote_ansi_single_quote< relaxed_bool_T, CharT > >( CharT( '\'' ), true, CharT( '\'' ) )
00593     {
00594     }
00595 
00597     xtl_quote_ansi_single_quote( MiddleT const& middle_ )
00598     : xtl_quote_base< xtl_wrap< xtl_char< CharT > >, MiddleT, xtl_wrap< xtl_char< CharT > >, xtl_quote_ansi_single_quote< MiddleT, CharT > >( CharT( '\'' ), middle_, CharT( '\'' ) )
00599     {
00600     }
00601 
00602     // run-time
00603 
00610     template< typename UniverseT >
00611     bool match_pair( UniverseT& edge_ )
00612     {
00613 //      typedef typename UniverseT::char_T char_T;
00614         static const CharT chars2find[] = { '\'', '\n', '\r', 0x00 };
00615         return xtl_match_quote( edge_, this->m_middle, this->m_right, chars2find ); // "\'\n\r"
00616     }
00617     
00618 };  // class xtl_quote_ansi_single_quote
00619 
00620 
00635 template< typename MiddleT, typename CharT >
00636 class xtl_quote_c_double_quote : public xtl_quote_base< xtl_wrap< xtl_char< CharT > >, MiddleT, xtl_wrap< xtl_char< CharT > >, xtl_quote_c_double_quote< MiddleT, CharT > > {
00637 
00638 public:
00639 
00640     // compile-time
00641 
00643     xtl_quote_c_double_quote()
00644     : xtl_quote_base<
00645         xtl_wrap< xtl_char< CharT > >,
00646         relaxed_bool_T,
00647         xtl_wrap< xtl_char< CharT > >,
00648         xtl_quote_c_double_quote< xtl_wrap< xtl_bool< true > >, CharT >
00649         >( CharT( '\"' ), true, CharT( '\"' ) )
00650     {
00651     }
00652 
00654     xtl_quote_c_double_quote( MiddleT const& middle_ )
00655     : xtl_quote_base< xtl_wrap< xtl_char< CharT > >, MiddleT, xtl_wrap< xtl_char< CharT > >, xtl_quote_c_double_quote< MiddleT, CharT > >( CharT( '\"' ), middle_, CharT( '\"' ) )
00656     {
00657     }
00658 
00659     // run-time
00660 
00667     template< typename UniverseT >
00668     bool match_pair( UniverseT& edge_ )
00669     {
00670 //      typedef typename UniverseT::char_T char_T;
00671         static const CharT chars2find[] = { '\"', '\n', '\r', '\\', 0x00 };
00672         return xtl_match_quote( edge_, this->m_middle, this->m_right, chars2find ); // "\"\n\r\\"
00673     }
00674     
00675 };  // class xtl_quote_c_double_quote
00676 
00677 
00692 template< typename MiddleT, typename CharT >
00693 class xtl_quote_c_single_quote : public xtl_quote_base< xtl_wrap< xtl_char< CharT > >, MiddleT, xtl_wrap< xtl_char< CharT > >, xtl_quote_c_single_quote< MiddleT, CharT > > {
00694 
00695 public:
00696 
00697     // compile-time
00698 
00700     xtl_quote_c_single_quote()
00701     : xtl_quote_base< xtl_wrap< xtl_char< CharT > >, relaxed_bool_T, xtl_wrap< xtl_char< CharT > >, xtl_quote_c_single_quote< relaxed_bool_T, CharT > >( CharT( '\'' ), true, CharT( '\'' ) )
00702     {
00703     }
00704 
00706     xtl_quote_c_single_quote( MiddleT const& middle_ )
00707     : xtl_quote_base< xtl_wrap< xtl_char< CharT > >, MiddleT, xtl_wrap< xtl_char< CharT > >, xtl_quote_c_single_quote< MiddleT, CharT > >( CharT( '\'' ), middle_, CharT( '\'' ) )
00708     {
00709     }
00710 
00711     // run-time
00712 
00719     template< typename UniverseT >
00720     bool match_pair( UniverseT& edge_ )
00721     {
00722 //      typedef typename UniverseT::char_T char_T;
00723         static const CharT chars2find[] = { '\'', '\n', '\r', '\\', 0x00 };
00724         return xtl_match_quote( edge_, this->m_middle, this->m_right, chars2find ); // "\'\n\r\\"
00725     }
00726     
00727 };  // class xtl_quote_c_single_quote
00728 
00730 typedef xtl_wrap< xtl_quote_ansi_single_quote< relaxed_bool_T, CTTL_STD_CHAR > >
00731 ansi_single_quote_T;
00732 
00734 typedef xtl_wrap< xtl_quote_ansi_double_quote< relaxed_bool_T, CTTL_STD_CHAR > >
00735 ansi_double_quote_T;
00736 
00738 typedef xtl_wrap< xtl_quote_c_single_quote< relaxed_bool_T, CTTL_STD_CHAR > >
00739 c_single_quote_T;
00740 
00742 typedef xtl_wrap< xtl_quote_c_double_quote< relaxed_bool_T, CTTL_STD_CHAR > >
00743 c_double_quote_T;
00744 
00745 
00747 typedef xtl_wrap< xtl_quote_ansi_single_quote< relaxed_bool_T, CTTL_STD_WCHAR > >
00748 wchar_ansi_single_quote_T;
00749 
00751 typedef xtl_wrap< xtl_quote_ansi_double_quote< relaxed_bool_T, CTTL_STD_WCHAR > >
00752 wchar_ansi_double_quote_T;
00753 
00755 typedef xtl_wrap< xtl_quote_c_single_quote< relaxed_bool_T, CTTL_STD_WCHAR > >
00756 wchar_c_single_quote_T;
00757 
00759 typedef xtl_wrap< xtl_quote_c_double_quote< relaxed_bool_T, CTTL_STD_WCHAR > >
00760 wchar_c_double_quote_T;
00761 
00762 }   // namespace cttl_impl
00763 
00764 
00765 #endif // _CTTL_XTL_QUOTE_H_INCLUDED_

Generated on Thu Nov 2 17:44:07 2006 for Common Text Transformation Library by  doxygen 1.3.9.1