Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

xtl_region_map.h

Go to the documentation of this file.
00001 
00002 // Common Text Transformation Library
00003 // Copyright (C) 1997-2006 by Igor Kholodov. 
00004 //
00005 // This library is free software; you can redistribute it and/or
00006 // modify it under the terms of the GNU Lesser General Public
00007 // License as published by the Free Software Foundation; either
00008 // version 2.1 of the License, or (at your option) any later version.
00009 //
00010 // This library is distributed in the hope that it will be useful,
00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013 // Lesser General Public License for more details.
00014 //
00015 // You should have received a copy of the GNU Lesser General Public
00016 // License along with this library; if not, write to the
00017 // Free Software Foundation, Inc.,
00018 // 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00019 //
00020 // mailto:cttl@users.sourceforge.net
00021 // http://sourceforge.net/projects/cttl/
00023 
00033 // xtl_region_map.h
00034 
00035 #ifndef _CTTL_XTL_REGION_MAP_H_INCLUDED_
00036 #define _CTTL_XTL_REGION_MAP_H_INCLUDED_
00037 
00038 #include <vector>
00039 #include <map>
00040 #include <string>
00041 
00042 namespace cttl_impl {
00043 
00051 class xtl_region_map
00052 {
00054     std::map< size_t, size_t > m_map_region;
00055 
00056 public:
00058     xtl_region_map()
00059     {
00060     }
00061 
00075     void adjust( size_t after_offset_, int delta_offset_ )
00076     {
00077         if ( !delta_offset_ )
00078             return;
00079 
00080         std::map< size_t, size_t >::iterator from_iter =
00081             m_map_region.lower_bound( after_offset_ );
00082         
00083         if ( from_iter != m_map_region.end() ) {
00084             std::vector< std::pair< size_t, size_t > > adjustment_vector;
00085             std::back_insert_iterator< std::vector< std::pair< size_t, size_t > > >
00086                 vector_back_inserter( adjustment_vector );
00087             
00088             std::copy( from_iter, m_map_region.end(), vector_back_inserter );
00089             m_map_region.erase( from_iter, m_map_region.end() );
00090             
00091             for ( size_t i = 0; i < adjustment_vector.size(); ++i ) {
00092                 if ( adjustment_vector[ i ].first > after_offset_ )
00093                     adjustment_vector[ i ].first += delta_offset_;
00094                 
00095                 if ( adjustment_vector[ i ].second > after_offset_ )
00096                     adjustment_vector[ i ].second += delta_offset_;
00097                 
00098                 m_map_region.insert( adjustment_vector[ i ] );
00099             }
00100         }
00101     }
00102 
00112     void insert( size_t from_offset_, size_t to_offset_ )
00113     {
00114         
00115         if ( from_offset_ == to_offset_ )   // no need to add empty ranges
00116             return;
00117         
00118         size_t invalid_from = 0;
00119         size_t invalid_to = 0;
00120         
00121         std::map< size_t, size_t >::const_iterator iter =
00122             m_map_region.lower_bound( from_offset_ );
00123         
00124         if ( iter != m_map_region.end() ) {
00125             invalid_from = iter->second;
00126             invalid_to = iter->first;
00127             
00128             if ( ( invalid_from <= from_offset_ ) && ( from_offset_ <= invalid_to ) ) {
00129                 // offset is invalid
00130                 from_offset_ = invalid_from;
00131             }
00132         }
00133         
00134         iter = m_map_region.lower_bound( to_offset_ );
00135         
00136         if ( iter != m_map_region.end() ) {
00137             invalid_from = iter->second;
00138             invalid_to = iter->first;
00139             
00140             if ( ( invalid_from <= to_offset_ ) && ( to_offset_ < invalid_to ) ) {
00141                 // offset is invalid
00142                 to_offset_ = invalid_to;
00143             }
00144         }
00145         
00146         // even if no adjustments were made to this region,
00147         // any enclosed ranges should be removed:
00148         erase( from_offset_, to_offset_ );
00149         
00150         m_map_region[ to_offset_ ] = from_offset_;
00151     }
00152     
00154     void erase( size_t from_offset_, size_t to_offset_ )
00155     {
00156         if ( from_offset_ == to_offset_ )   // no need to erase empty ranges
00157             return;
00158         
00159         if ( m_map_region.empty() )
00160             return; // there are no regions - nothing to do
00161         
00162         std::map< size_t, size_t >::iterator from_iter =
00163             m_map_region.lower_bound( from_offset_ );
00164         
00165         if ( from_iter != m_map_region.end() ) {
00166             std::map< size_t, size_t >::iterator to_iter =
00167                 m_map_region.upper_bound( to_offset_ );
00168             
00169             m_map_region.erase( from_iter, to_iter );
00170         }
00171     }
00172 
00179     bool intersection( size_t offset_ ) const
00180     {
00181         if ( m_map_region.empty() )
00182             return false;   // there is no regions
00183         
00184         // Find closest region:
00185         std::map< size_t, size_t >::const_iterator iter =
00186             m_map_region.lower_bound( offset_ );
00187         
00188         // If nothing found,
00189         if ( iter == m_map_region.end() )
00190             return false;   // offset is outside of any region, good to go
00191         
00192         size_t invalid_from = iter->second;
00193         size_t invalid_to = iter->first;
00194         
00195         if ( ( invalid_from <= offset_ ) && ( offset_ < invalid_to ) )
00196             return true;    // offset is inside a region
00197         
00198         return false;   // no intersection with a region was found
00199     }
00200     
00216     size_t find_not_region( size_t offset_, size_t str_length_ ) const
00217     {
00218         if ( offset_ > str_length_ )
00219             return str_length_;
00220         
00221         if ( m_map_region.empty() )
00222             return offset_; // there are no regions
00223         
00224         // Find closest region:
00225         std::map< size_t, size_t >::const_iterator iter =
00226             m_map_region.lower_bound( offset_ );
00227         
00228         // Has map entry been found ?
00229         if ( iter != m_map_region.end() ) {
00230             
00231             size_t invalid_from = iter->second;
00232             size_t invalid_to = iter->first;
00233             
00234             if ( ( invalid_from <= offset_ ) && ( offset_ < invalid_to ) )
00235                 // we are inside a region, so advance to the first
00236                 // character outside of this region
00237                 // ( invalid_to is guaranteed to be valid offset ):
00238                 offset_ = invalid_to;
00239         }
00240 
00241         return offset_;
00242     }
00243 
00252     size_t find_upper_boundary( size_t offset_, size_t str_length_, size_t default_offset_ ) const
00253     {
00254         // regions are ordered by the RHS offsets, so
00255         // the search always starts at the specified position plus one to exclude
00256         // the region if offset_ is already at one of the region's end position.
00257         ++offset_;
00258         if ( offset_ >= str_length_ )
00259             return default_offset_;
00260         
00261         if ( m_map_region.empty() )
00262             return default_offset_; // there are no regions
00263         
00264         // Find closest region:
00265         std::map< size_t, size_t >::const_iterator iter =
00266             m_map_region.lower_bound( offset_ );
00267         
00268         if ( iter != m_map_region.end() )
00269             return iter->second;    // region LHS position
00270 
00271         return default_offset_;
00272     }
00273 
00274 
00281     size_t find_lower_boundary( size_t offset_, size_t str_length_, size_t default_offset_ ) const
00282     {
00283         // regions are ordered by the RHS offsets, so
00284         // the search always starts at the specified position plus one to exclude
00285         // the region if offset_ is already at one of the region's end position.
00286         ++offset_;
00287         if ( offset_ >= str_length_ )
00288             return default_offset_;
00289         
00290         if ( m_map_region.empty() )
00291             return default_offset_; // there are no regions
00292         
00293         // Find closest region:
00294         std::map< size_t, size_t >::const_iterator iter =
00295             m_map_region.lower_bound( offset_ );
00296         
00297         if ( iter != m_map_region.end() )
00298             return iter->first; // region RHS position
00299 
00300         return default_offset_;
00301     }
00302 
00303 
00311     std::pair< size_t, size_t > find_region( size_t offset_, size_t str_length_, size_t default_offset_ ) const
00312     {
00313         // regions are ordered by the RHS offsets, so
00314         // the search always starts at the specified position plus one:
00315         ++offset_;
00316         if ( offset_ >= str_length_ )
00317             return std::make_pair( default_offset_, default_offset_ );
00318         
00319         if ( m_map_region.empty() )
00320             return std::make_pair( default_offset_, default_offset_ );
00321         
00322         // Find closest region:
00323         std::map< size_t, size_t >::const_iterator iter =
00324             m_map_region.lower_bound( offset_ );
00325         
00326         if ( iter != m_map_region.end() )
00327             return std::make_pair( iter->second, iter->first ); // pair( LHS, RHS )
00328 
00329         return std::make_pair( default_offset_, default_offset_ );
00330     }
00331 
00333     void clear()
00334     {
00335         m_map_region.clear();
00336     }
00337     
00339     bool empty() const
00340     {
00341         return m_map_region.empty();
00342     }
00343     
00344 
00346     bool intersection( size_t from_offset_, size_t to_offset_ ) const
00347     {
00348         if ( m_map_region.empty() )
00349             return false;   // there are no regions
00350         
00351         std::map< size_t, size_t >::const_iterator from_iter =
00352             m_map_region.lower_bound( from_offset_ );
00353         
00354         if ( from_iter != m_map_region.end() ) {
00355             //    compare region found:  (from_iter->second)-----(from_iter->first)
00356             // with region in question:        (from_offset)-----(to_offset)
00357             if ( ( from_offset_ <= from_iter->second ) && ( from_iter->second < to_offset_ ) )
00358                 return true;
00359             if ( ( from_iter->second <= from_offset_ ) && ( from_offset_ < from_iter->first ) )
00360                 return true;
00361         }
00362 
00363         std::map< size_t, size_t >::const_iterator to_iter =
00364             m_map_region.upper_bound( to_offset_ );
00365 
00366         if ( to_iter != m_map_region.end() ) {
00367             //    compare region found: (to_iter->second)-----(to_iter->first)
00368             // with region in question:     (from_offset)-----(to_offset)
00369             if ( ( from_offset_ <= to_iter->second ) && ( to_iter->second < to_offset_ ) )
00370                 return true;
00371             if ( ( to_iter->second <= from_offset_ ) && ( from_offset_ < to_iter->first ) )
00372                 return true;
00373         }
00374 
00375         return false;   // no regions found inside specified pair of offsets
00376     }
00377 
00379     bool contains( size_t from_offset_, size_t to_offset_ ) const
00380     {
00381         if ( m_map_region.empty() )
00382             return false;   // there are no regions defined
00383         
00384         std::map< size_t, size_t >::const_iterator from_iter =
00385             m_map_region.lower_bound( from_offset_ );
00386         
00387         if ( from_iter != m_map_region.end() ) {
00388             //                does region (from_iter->second)-------(from_iter->first)
00389             // contain region in question       (from_offset)-------(to_offset) ?
00390             if ( ( from_iter->second <= from_offset_ ) && ( from_offset_ < from_iter->first )
00391                 && ( from_iter->second <= to_offset_ ) && ( to_offset_ <= from_iter->first ) )
00392                 return true;
00393         }
00394 
00395         std::map< size_t, size_t >::const_iterator to_iter =
00396             m_map_region.upper_bound( to_offset_ );
00397 
00398         if ( to_iter != m_map_region.end() ) {
00399             //                does region (to_iter->second)------(to_iter->first)
00400             // contain region in question     (from_offset)------(to_offset) ?
00401             if ( ( to_iter->second <= from_offset_ ) && ( from_offset_ < to_iter->first )
00402             && ( to_iter->second <= to_offset_ ) && ( to_offset_ <= to_iter->first ) )
00403                 return true;
00404         }
00405 
00406         return false;   // specified region contains valid space
00407     }
00408 
00410     template< typename StringT >
00411     StringT text_difference( StringT const& str_, size_t from_offset_, size_t to_offset_ ) const
00412     {
00413         if ( from_offset_ == to_offset_ )
00414             return StringT();
00415 
00416         StringT str_difference;
00417         
00418         std::map< size_t, size_t >::const_iterator map_iter = m_map_region.lower_bound( from_offset_ );
00419         if ( map_iter == m_map_region.end() )
00420             return str_.substr( from_offset_, to_offset_ - from_offset_ );
00421 
00422         size_t current_offset_from = from_offset_;
00423         size_t current_offset_to = to_offset_;
00424 
00425         for ( ; map_iter != m_map_region.end(); ++map_iter ) {
00426             size_t region_from = map_iter->second;
00427             size_t region_to = map_iter->first;
00428 
00429             if ( ( region_from <= current_offset_from ) && ( current_offset_from < region_to ) ) {
00430                 current_offset_from = region_to;
00431                 continue;
00432             }
00433 
00434             if ( ( region_from <= current_offset_to ) && ( current_offset_to < region_to ) )
00435                 current_offset_to = region_from;
00436             
00437             if ( ( current_offset_from <= region_from ) && ( region_from < current_offset_to ) )
00438                 current_offset_to = region_from;
00439             
00440             if ( current_offset_from > current_offset_to  )
00441                 return str_difference;
00442 
00443             if ( current_offset_to > to_offset_ )
00444                 return str_difference;
00445 
00446             str_difference += str_.substr( current_offset_from, current_offset_to - current_offset_from );
00447 
00448             if ( current_offset_to < region_from )
00449                 return str_difference;
00450 
00451             current_offset_from = region_to;
00452             current_offset_to = to_offset_;
00453         }
00454         
00455         if ( current_offset_from < current_offset_to )
00456             str_difference += str_.substr( current_offset_from, current_offset_to - current_offset_from );
00457 
00458         return str_difference;
00459     }
00460 
00461 };  // xtl_region_map
00462 
00463 }   // namespace cttl_impl
00464 
00465 #endif // _CTTL_XTL_REGION_MAP_H_INCLUDED_

Generated on Thu Nov 2 17:44:08 2006 for Common Text Transformation Library by  doxygen 1.3.9.1