Common Text Transformation Library: arithmetics

Modified version of the arithmetic expression parser. Introduces grammar rule tracing facilities of the library. For more information, see grammar debugging and tracing.

// sample code: arithmetics_traced.cpp
// demonstrates stateful cttl parser

//#define NDEBUG    // must appear before assert.h is included to stop assertions from being compiled 
//#define CTTL_TRACE_EVERYTHING
#define CTTL_TRACE_RULES  //define to turn light tracing on
//#define CTTL_TRACE_TRIVIAL    //define for trace messages only mode

#include <iostream>
#include "cttl/cttl.h"

using namespace cttl;

struct parser {

    /*  Arithmetic expression grammar production rules in EBNF form:
     *
     * <expr> --> <term> ( '+' <term> | '-' <term> )*
     * <term> --> <factor> ( '*'  <factor> | '/'  <factor> )*
     * <factor> --> <prime> | '(' <expr> ')' | '-' <factor> | '+' <factor>
     * <prime> --> ( '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' )+
     * 
     */

    int ip_cnt;

    parser( int ip_cnt_ = 0 )
        :   ip_cnt( ip_cnt_ )
    {
    }

    size_t rule_factor( const_edge<>& universe_ )
    {
        return (

            ( isdigit & CTTL_RULE( parser::numeric_literal ) )
            |
            (
                '(' + CTTL_RULE( parser::rule_expr ) + ')'
            )
            |
            (
                '-' + ( CTTL_RULE( parser::rule_factor ) & CTTL_RULE( parser::unary_minus ) )
            )
            |
            (
                '+' + CTTL_RULE( parser::rule_factor )  // unary plus is simply ignored
            )
            |
            CTTL_RULE( parser::parse_error )

        ).match( universe_ )
        ;
    }

    size_t rule_term( const_edge<>& universe_ )
    {
        return (

            CTTL_RULE( parser::rule_factor )
            +
            *(
                (
                    ( '*' + CTTL_RULE( parser::rule_factor ) )
                    &
                    CTTL_RULE( parser::multiply )
                )
                |
                (
                    ( '/' + CTTL_RULE( parser::rule_factor ) )
                    &
                    CTTL_RULE( parser::divide )
                )
            )

        ).match( universe_ )
        ;
    }


    size_t rule_expr( const_edge<>& universe_ )
    {
        return (

            CTTL_RULE( parser::rule_term )
            +
            *(
                (
                    ( '+' + CTTL_RULE( parser::rule_term ) )
                    &
                    CTTL_RULE( parser::add )
                )
                |
                (
                    ( '-' + CTTL_RULE( parser::rule_term ) )
                    &
                    CTTL_RULE( parser::subtract )
                )
            )

        ).match( universe_ )
        ;
    }


    size_t numeric_literal( const_edge<>& edge_ )
    {
        std::cout << "\t" << ++ip_cnt << "\t ;" << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t PUSH \t " << edge_.text() << std::endl;
        return edge_.second.offset();
    }

    size_t divide( const_edge<>& edge_ )
    {
        std::cout << "\t" << ++ip_cnt << "\t ;" << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t POP \t R1" << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t POP \t R2" << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t DIV \t R2, R1 \t; R2 = R2 " << edge_.text() << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t PUSH \t R2" << std::endl;
        return edge_.second.offset();
    }

    size_t multiply( const_edge<>& edge_ )
    {
        std::cout << "\t" << ++ip_cnt << "\t ;" << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t POP \t R1" << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t POP \t R2" << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t MUL \t R2, R1 \t; R2 = R2 " << edge_.text() << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t PUSH \t R2" << std::endl;
        return edge_.second.offset();
    }

    size_t add( const_edge<>& edge_ )
    {
        std::cout << "\t" << ++ip_cnt << "\t ;" << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t POP \t R1" << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t POP \t R2" << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t ADD \t R2, R1 \t; R2 = R2 " << edge_.text() << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t PUSH \t R2" << std::endl;
        return edge_.second.offset();
    }

    size_t subtract( const_edge<>& edge_ )
    {
        std::cout << "\t" << ++ip_cnt << "\t ;" << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t POP \t R1" << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t POP \t R2" << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t SUB \t R2, R1 \t; R2 = R2 " << edge_.text() << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t PUSH \t R2" << std::endl;
        return edge_.second.offset();
    }

    size_t unary_minus( const_edge<>& edge_ )
    {
        std::cout << "\t" << ++ip_cnt << "\t ;" << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t POP \t R1" << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t NEG \t R1 \t\t; R1 = -(" << edge_.text() << ")" << std::endl;
        std::cout << "\t" << ++ip_cnt << "\t PUSH \t R1" << std::endl;
        return edge_.second.offset();
    }

    size_t parse_error( const_edge<>& edge_ ) const
    {
        std::cout
            << "*** syntax error ***"
            << std::endl
            << edge_.parent().text()
            << std::endl
            ;

        for ( size_t pos = 0; pos < edge_.first.offset(); ++pos )
            std::cout << '\x20';

        std::cout << "^-- at position " << edge_.first.offset() << std::endl;
        return std::string::npos;
    }

};


int main(int argc, char* argv[])
{
    if ( argc == 1 ) {
        std::cout << "\t usage: enter arithmetic expression to parse" << std::endl;
        return 1;
    }

    // construct input string from arguments on the command line:
    input<> inp( &argv[ 1 ], ' ' );

    // construct universe to be parsed
    const_edge<> universe( new_edge( inp ) );

    // construct the parser:
    parser arithmetic_parser;

    // evaluate arithmetic expression:
    if ( CTTL_MEMBER_RULE( arithmetic_parser, &parser::rule_expr ).match( universe ) != std::string::npos ) {
        if( universe.length() ) {
            std::cout
                << std::endl
                << "*** error: parser terminated: ***"
                << std::endl
                << inp.text()
                << std::endl
                ;
            for ( size_t pos = 0; pos < universe.first.offset(); ++pos )
                std::cout << '\x20';

            std::cout << "^-- at position " << universe.first.offset() << std::endl;
        }

    } else
        std::cout << "*** parser terminated" << std::endl;

    return 0;
}

Providing that the user specifies command argument "2*-3", the program generates the following output:

Input:
    2*-3

Output:
--------------------@2*-3-198->parser::rule_expr@0-4
--------------------@2*-3? {;       0-4 
--------------------@2*-3---85->parser::rule_term@0-4
--------------------@2*-3?   {;     0-4 
--------------------@2*-3-----60->parser::rule_factor@0-4
--------------------@2*-3?     {|       0-4 
--------------------@2*-3?      {|      0-4 
--------------------@2*-3?       {|     0-4 
--------------------@2*-3?        {|        0-4 
--------------------@2*-3?         {&       0-4 
-----------------------2@|          $       1-4 
-----------------------@2-----------36->parser::numeric_literal@0-1
    1    ;
    2    PUSH    2
-----------------------@2+++++++++++36<-parser::numeric_literal@0-0 ''
                                   }
                                  }
                                 }
                                }
                               }
---------------------@*-3+++++60<-parser::rule_factor@0-1   '2'
---------------------@*-3?    {*        1-4 
---------------------@*-3?     {|       1-4 
---------------------@*-3?      {&      1-4 
---------------------@*-3?       {;     1-4 
----------------------2*@|        *     2-4 char
----------------------@-3---------64->parser::rule_factor@2-4
----------------------@-3?         {|       2-4 
----------------------@-3?          {|      2-4 
----------------------@-3?           {|     2-4 
----------------------@-3?            {|        2-4 
----------------------@-3?             {&       2-4 
~~~~~~~~~~~~~~~~~~~~~~2*@~              $       2-4 FAIL 
~~~~~~~~~~~~~~~~~~~~~~2*@~              &       2-4 FAIL 
                                       }
----------------------@-3?             {;       2-4 
----------------------@-3?              {;      2-4 
~~~~~~~~~~~~~~~~~~~~~~2*@~               (      2-4 FAIL char
                                        }
                                       }
~~~~~~~~~~~~~~~~~~~~~~2*@~             |        2-4 FAIL 
                                      }
----------------------@-3?            {;        2-4 
---------------------2*-@|             -        3-4 char
-----------------------@3?             {&       3-4 
-----------------------@3---------------43->parser::rule_factor@3-4
-----------------------@3?               {|     3-4 
-----------------------@3?                {|        3-4 
-----------------------@3?                 {|       3-4 
-----------------------@3?                  {|      3-4 
-----------------------@3?                   {&     3-4 
--------------------2*-3@|                    $     4-4 
-----------------------@3---------------------36->parser::numeric_literal@3-4
    3    ;
    4    PUSH    3
-----------------------@3+++++++++++++++++++++36<-parser::numeric_literal@3-3   ''
                                             }
                                            }
                                           }
                                          }
                                         }
------------------------@+++++++++++++++43<-parser::rule_factor@3-4 '3'
-----------------------@3---------------43->parser::unary_minus@3-4
    5    ;
    6    POP     R1
    7    NEG     R1         ; R1 = -(3)
    8    PUSH    R1
-----------------------@3+++++++++++++++43<-parser::unary_minus@3-3 ''
                                       }
                                      }
                                     }
                                    }
                                   }
------------------------@+++++++++64<-parser::rule_factor@2-4   '-3'
                                 }
---------------------@*-3--------66->parser::multiply@1-4
    9    ;
    10   POP     R1
    11   POP     R2
    12   MUL     R2, R1     ; R2 = R2 *-3
    13   PUSH    R2
---------------------@*-3++++++++66<-parser::multiply@1-1   ''
                                }
                               }
------------------------@?     {|       4-4 
------------------------@?      {&      4-4 
------------------------@?       {;     4-4 
~~~~~~~~~~~~~~~~~~~~2*-3@~        L     4-4 FAIL empty universe
                                 }
~~~~~~~~~~~~~~~~~~~~2*-3@~       &      4-4 FAIL 
                                }
------------------------@?      {&      4-4 
------------------------@?       {;     4-4 
~~~~~~~~~~~~~~~~~~~~2*-3@~        L     4-4 FAIL empty universe
                                 }
~~~~~~~~~~~~~~~~~~~~2*-3@~       &      4-4 FAIL 
                                }
~~~~~~~~~~~~~~~~~~~~2*-3@~      |       4-4 FAIL 
                               }
                              }
                             }
------------------------@+++85<-parser::rule_term@0-4   '2*-3'
------------------------@?  {*      4-4 
------------------------@?   {|     4-4 
------------------------@?    {&        4-4 
------------------------@?     {;       4-4 
~~~~~~~~~~~~~~~~~~~~2*-3@~      L       4-4 FAIL empty universe
                               }
~~~~~~~~~~~~~~~~~~~~2*-3@~     &        4-4 FAIL 
                              }
------------------------@?    {&        4-4 
------------------------@?     {;       4-4 
~~~~~~~~~~~~~~~~~~~~2*-3@~      L       4-4 FAIL empty universe
                               }
~~~~~~~~~~~~~~~~~~~~2*-3@~     &        4-4 FAIL 
                              }
~~~~~~~~~~~~~~~~~~~~2*-3@~    |     4-4 FAIL 
                             }
                            }
                           }
------------------------@+198<-parser::rule_expr@0-4    '2*-3'

Permission to copy, use, modify, sell and distribute this document is granted provided this copyright notice appears in all copies. This document is provided "as is" without express or implied warranty, and with no claim as to its suitability for any purpose.