Common Text Transformation Library: word

This sample demonstrates semantic action functions organized within a base parser class.

// sample code: word_count.cpp
// demonstrates stateful parser and lexer implementation.

//#define NDEBUG    // must appear before assert.h is included to stop assertions from being compiled 
//#define CTTL_TRACE_EVERYTHING

#include <iostream>
#include "cttl/cttl.h"

using namespace cttl;

template< typename UniverseT >
struct base_parser {
    // parser defines two kinds of universes:
    typedef UniverseT universe_T;
    typedef typename UniverseT::strict_edge_T strict_universe_T;

    // semantic actions:
    size_t count_words( strict_universe_T& ) const
    {
        return 0;
    }

    size_t replace_words( strict_universe_T& ) const
    {
        return 0;
    }
};

template< typename UniverseT >
struct parser : public base_parser< UniverseT > {

    // parser defines two kinds of universes:
    typedef UniverseT universe_T;
    typedef typename UniverseT::strict_edge_T strict_universe_T;

    int count;

    parser( int count_ )
        :
        count( count_ )
    {
    }

    // semantic actions:
    size_t count_words( strict_universe_T& universe_ )
    {
        ++count;
        return universe_.second.offset();
    }

    /*
    size_t replace_words( strict_universe_T& universe_ )
    {
        universe_ = "<WORD/>";
        return universe_.second.offset();
    }
    */
};

template< typename ParserT >
struct lexer : public ParserT {

    // lexer defines two kinds of universes:
    typedef typename ParserT::universe_T universe_T;
    typedef typename universe_T::strict_edge_T strict_universe_T;

    // lexer static data:
    std::set< std::string > keywords;

    lexer( int count_ = 0 )
        :   ParserT( count_ )
    {
        // populate list of keywords:
        keywords.insert( "abc" );
        keywords.insert( "xyz" );
    }

    // grammar rule definitions
    size_t start( universe_T& universe_ )
    {
        return (
            // at least one word should be present
            +(
                // invoke grammar rule
                rule( *this, &lexer< ParserT >::word )
                &
                // invoke semantic action
                rule( *this, &ParserT::count_words )
                &
                // invoke another semantic action
                rule( *this, &ParserT::replace_words )
            )

        ).match( universe_ )
        ;
    }

    size_t word( universe_T& universe_ ) const
    {
        // a word can anything made of alphabetic
        // characters, but not a keyword:
        return (

            isalpha
            -
            begin( keywords )

        ).match( universe_ );
    }
};


int main(int argc, char* argv[])
{
    if ( argc == 1 ) {
        std::cout
            << "Usage: on command the line, enter some words to count, for example,"
            << std::endl
            << '\t'
            << argv[ 0 ]
            << " abc def ghi"
            << std::endl
            ;

        return 1;
    }

    // construct input string from the command line arguments:
    input<> inp( &argv[ 1 ], ' ' );

    // construct universe to be parsed:
    typedef const_edge< policy_space<> > universe_T;

    universe_T universe( new_edge( inp ) );

    // construct the parser:
    lexer< parser< universe_T > > word_parser;

    // count words:
    if ( word_parser.start( universe ) != std::string::npos ) {
        std::cout << "Word count: " << word_parser.count << std::endl;

    } else {
        std::cout << "*** parser failed ***" << std::endl;
        return 1;
    }

    std::cout << "Input: " << inp.text() << std::endl;
    return 0;
}

Providing that the user specifies command arguments "one two three", the program generates the following output:

Word count: 3
Input: one two three

Permission to copy, use, modify, sell and distribute this document is granted provided this copyright notice appears in all copies. This document is provided "as is" without express or implied warranty, and with no claim as to its suitability for any purpose.