/////////////////////////////////////////////////////////////////////////////// // // Basic Parser Grammar for Spirit (http://spirit.sourceforge.net/) // // Written by: Willem Hengeveld itsme@xs4all.nl // /////////////////////////////////////////////////////////////////////////////// // // problems: // expression with multiple identical operators not processed correctly // relational operators not processed correctly // a = 1 = 2 // is parsed as assignment(identifier, expression(2)) // // try tree_parse_info<> info = pt_parse(first, expression); // instead of just 'parse' // // todo: add named parameters. // #define BOOST_SPIRIT_DEBUG ///$$$ DEFINE THIS WHEN DEBUGGING $$$/// #include #include #include #include #include /////////////////////////////////////////////////////////////////////////////// using namespace std; using namespace boost::spirit; #define BOOST_SPIRIT_ASSERT_EXCEPTION basic_exception class basic_exception { public: basic_exception(const char *msg) : m_msg(msg) { } std::string m_msg; }; /////////////////////////////////////////////////////////////////////////////// // // Our Basic grammar // /////////////////////////////////////////////////////////////////////////////// struct basic_grammar : public grammar { basic_grammar() {} template struct definition { definition(basic_grammar const& /*self*/) { #ifdef BOOST_SPIRIT_DEBUG debug(); // define the debug names #endif //----------------------------------------------------------------- // KEYWORDS //----------------------------------------------------------------- keywords = "and", "as", "at", "bit", "bit0", "bit33", "byte", "byte0", "call", "case", "const", "dim", "div", "do", "double", "dword", "eeprom", "eepron", "else", "elseif", "end", "exit", "for", "function", "goto", "if", "in", "integer", "iopin", "loop", "mod", "next", "nil", "not", "on", "or", "qword", "ram", "rem", "repeat", "rom", "select", "signed", "single", "step", "stream", "string", "sub", "then", "to", "until", "wend", "while", "word", "xor", "xram"; //----------------------------------------------------------------- // OPERATORS //----------------------------------------------------------------- chlit<> PLUS('+'); chlit<> MINUS('-'); chlit<> STAR('*'); chlit<> SLASH('/'); chlit<> ASSIGN('='); chlit<> COMMA(','); chlit<> AMPERSAND('&'); chlit<> SEMI(';'); chlit<> COLON(':'); chlit<> EQUAL('='); chlit<> SQUOTE('\''); chlit<> DQUOTE('\"'); strlit<> NOT_EQUAL("<>"); chlit<> LT('<'); strlit<> LE("<="); strlit<> GE(">="); chlit<> GT('>'); chlit<> LPAREN('('); chlit<> RPAREN(')'); chlit<> LBRACK('['); chlit<> RBRACK(']'); chlit<> DOT('.'); strlit<> LAND("&&"); strlit<> LOR("||"); strlit<> LNOT("!"); //----------------------------------------------------------------- // TOKENS //----------------------------------------------------------------- typedef inhibit_case > token_t; token_t IN_ = as_lower_d["in"]; token_t ON = as_lower_d["on"]; token_t DIM = as_lower_d["dim"]; token_t REM = as_lower_d["rem"]; token_t DIV = as_lower_d["div"]; token_t MOD = as_lower_d["mod"]; token_t AND = as_lower_d["and"]; token_t OR = as_lower_d["or"]; token_t NOT = as_lower_d["not"]; token_t XOR = as_lower_d["xor"]; token_t NIL = as_lower_d["nil"]; token_t END = as_lower_d["end"]; token_t GOTO = as_lower_d["goto"]; token_t EXIT = as_lower_d["exit"]; token_t IF = as_lower_d["if"]; token_t THEN = as_lower_d["then"]; token_t ELSE = as_lower_d["else"]; token_t ELSEIF = as_lower_d["elseif"]; token_t SELECT = as_lower_d["select"]; token_t CASE = as_lower_d["case"]; token_t WHILE = as_lower_d["while"]; token_t WEND = as_lower_d["wend"]; token_t REPEAT = as_lower_d["repeat"]; token_t UNTIL = as_lower_d["until"]; token_t FOR = as_lower_d["for"]; token_t DO = as_lower_d["do"]; token_t TO = as_lower_d["to"]; token_t STEP = as_lower_d["step"]; token_t NEXT = as_lower_d["next"]; token_t LOOP = as_lower_d["loop"]; token_t CONST_ = as_lower_d["const"]; token_t SUB = as_lower_d["sub"]; token_t FUNCTION = as_lower_d["function"]; token_t EEPROM = as_lower_d["eeprom"]; token_t RAM = as_lower_d["ram"]; token_t ROM = as_lower_d["rom"]; token_t XRAM = as_lower_d["xram"]; token_t AS = as_lower_d["as"]; token_t AT = as_lower_d["at"]; token_t BIT = as_lower_d["bit"]; token_t BIT0 = as_lower_d["bit0"]; token_t BIT33 = as_lower_d["bit33"]; token_t BYTE = as_lower_d["byte"]; token_t BYTE0 = as_lower_d["byte0"]; token_t DOUBLE = as_lower_d["double"]; token_t INTEGER = as_lower_d["integer"]; token_t DWORD = as_lower_d["dword"]; token_t EEPRON = as_lower_d["eepron"]; token_t IOPIN = as_lower_d["iopin"]; token_t QWORD = as_lower_d["qword"]; token_t SIGNED = as_lower_d["signed"]; token_t SINGLE = as_lower_d["single"]; token_t STREAM = as_lower_d["stream"]; token_t STRING = as_lower_d["string"]; token_t WORD = as_lower_d["word"]; token_t CALL = as_lower_d["call"]; token_t BITWISEAND = AND; token_t BITWISENOT = NOT; token_t BITWISEOR = OR; token_t BITWISEXOR = XOR; token_t LOGICALAND = AND; token_t LOGICALNOT = NOT; token_t LOGICALOR = OR; //----------------------------------------------------------------- // Start grammar definition //----------------------------------------------------------------- identifier = as_lower_d[ lexeme_d[ (alpha_p >> *(alnum_p | '_')) - (keywords >> anychar_p - (alnum_p | '_')) ] ]; // constants // no escaping. string_constant = lexeme_d[ DQUOTE >> *( anychar_p - DQUOTE ) >> DQUOTE ]; unsignedInteger = uint_p ; unsignedReal = ureal_p ; unsignedNumber = longest_d[ unsignedReal | unsignedInteger ] ; // this is determined later to be a constant string value. constant_string = string_expression ; // this is determined later to be a constant numeric value. constant_number = numeric_expression ; // this is determined later to be a constant real value. constant_real = constant_number ; // this is determined later to be a constant integer value. constant_integer = constant_number ; constant_value = constant_number | constant_string | constantIdentifier ; parentised_constant_list = LPAREN >> !(constant_string % COMMA) >> RPAREN ; constantIdentifier = identifier ; variableIdentifier = identifier ; fieldIdentifier = identifier ; fieldDesignator = variableIdentifier >> DOT >> fieldIdentifier ; variable = longest_d[ fieldDesignator | variableIdentifier | arrayOrFunctionCall ] ; unaryOperator = PLUS | MINUS | BITWISENOT ; addingOperator = PLUS | MINUS | BITWISEOR | BITWISEXOR ; multiplyingOperator = STAR | SLASH | DIV | MOD | BITWISEAND ; arrayOrFunctionCall = identifier >> LPAREN >> !expression_list >> RPAREN ; numeric_atom = LPAREN >> numeric_expression >> RPAREN | variable | unsignedNumber ; numeric_factor = !unaryOperator >> numeric_atom ; numeric_term = numeric_factor % multiplyingOperator ; numeric_expression = numeric_term % addingOperator ; // string expression string_term = arrayOrFunctionCall | string_constant ; string_expression = string_term % AMPERSAND ; // boolean expression relationalOperator = EQUAL | NOT_EQUAL | GE | LE | LT | GT ; // todo: add expresson 'IN' array_expression boolean_atom = expression >> relationalOperator >> expression | LPAREN >> boolean_expression >> RPAREN ; boolean_factor = !LOGICALNOT >> boolean_atom ; boolean_term = boolean_factor % LOGICALAND ; boolean_expression = boolean_term % LOGICALOR ; // generic expression expression = numeric_expression | string_expression | boolean_expression ; expression_list = expression % COMMA ; // statements subroutineIdentifier = identifier ; subroutineCall = subroutineIdentifier >> !expression_list | CALL >> subroutineIdentifier >> ( LPAREN >> !expression_list >> RPAREN | empty ) ; labelIdentifier = identifier ; labelStatement = labelIdentifier >> COLON ; assignmentStatement = variable >> ASSIGN >> expression | arrayOrFunctionCall >> ASSIGN >> expression ; gotoStatement = GOTO >> labelIdentifier ; emptyStatement = empty ; empty = epsilon_p ; ifStatement = IF >> expression >> THEN >> statement_separator >> multipleStatements >> *( ELSEIF >> expression >> THEN >> statement_separator >> multipleStatements) >> !( ELSE >> statement_separator >> multipleStatements) >> END >> IF | IF >> expression >> THEN >> statement ; caseStatement = SELECT >> CASE >> expression >> statement_separator >> *( CASE >> expression_list >> statement_separator >> multipleStatements) >> !( CASE >> ELSE >> statement_separator >> multipleStatements) >> END >> SELECT ; whileStatement = WHILE >> expression >> statement_separator >> multipleStatements >> WEND ; doloopStatement = DO >> !( ( WHILE | UNTIL ) >> expression ) >> statement_separator >> multipleStatements >> LOOP >> !( ( WHILE | UNTIL ) >> expression ) ; forStatement = FOR >> identifier >> ASSIGN >> expression >> TO >> expression >> !( STEP >> expression ) >> statement_separator >> multipleStatements >> NEXT ; // todo: this is probably not correct. onStatement = ON >> identifier >> GOTO >> identifier ; typeIdentifier = BIT | !SIGNED >> BYTE | !SIGNED >> WORD | !SIGNED >> DWORD | !SIGNED >> QWORD | SINGLE | DOUBLE | STRING | IOPIN | STREAM | INTEGER ; // location defaults to RAM variableLocation = EEPROM|RAM|ROM|XRAM; parentised_integer_list = LPAREN >> !(constant_integer % COMMA) >> RPAREN ; arrayIdentifier = identifier ; constantArrayIdentifier = identifier ; // type defaults to word dataDefinition = CONST_ >> constantIdentifier >> !( AS >> typeIdentifier ) >> EQUAL >> constant_value | CONST_ >> constantArrayIdentifier >> parentised_integer_list >> !( AS >> typeIdentifier ) >> EQUAL >> parentised_constant_list | DIM >> variableIdentifier >> !( AS >> typeIdentifier ) >> !( AT >> constant_number ) >> !variableLocation | DIM >> arrayIdentifier >> parentised_integer_list >> !( AS >> typeIdentifier ) >> !( AT >> constant_number ) >> !variableLocation ; variable_list = identifier % COMMA ; functionIdentifier = identifier ; functionDefinition = FUNCTION >> functionIdentifier >> LPAREN >> variable_list >> RPAREN >> statement_separator >> multipleStatements >> END >> FUNCTION ; subroutineDefinition = SUB >> subroutineIdentifier >> LPAREN >> variable_list >> RPAREN >> statement_separator >> multipleStatements >> END >> SUB ; remStatement = REM >> *(anychar_p - eol_p) ; functionCall = arrayOrFunctionCall ; statement = ifStatement | caseStatement | whileStatement | doloopStatement | forStatement | onStatement | assignmentStatement | arrayOrFunctionCall | subroutineCall | gotoStatement | dataDefinition | remStatement | labelStatement | emptyStatement ; multipleStatements = *( statement >> statement_separator ) ; end_of_line = eol_p | SQUOTE >> *(anychar_p - eol_p) >> eol_p ; statement_separator = COLON | end_of_line ; program = ( subroutineDefinition | functionDefinition | statement ) % statement_separator ; //----------------------------------------------------------------- // End grammar definition //----------------------------------------------------------------- } #ifdef BOOST_SPIRIT_DEBUG void debug() { BOOST_SPIRIT_DEBUG_RULE(identifier); BOOST_SPIRIT_DEBUG_RULE(program); BOOST_SPIRIT_DEBUG_RULE(variableLocation); BOOST_SPIRIT_DEBUG_RULE(dataDefinition); BOOST_SPIRIT_DEBUG_RULE(parentised_integer_list); BOOST_SPIRIT_DEBUG_RULE(parentised_constant_list); BOOST_SPIRIT_DEBUG_RULE(constant_value); BOOST_SPIRIT_DEBUG_RULE(constant_number); BOOST_SPIRIT_DEBUG_RULE(constant_string); BOOST_SPIRIT_DEBUG_RULE(unsignedInteger); BOOST_SPIRIT_DEBUG_RULE(unsignedReal); BOOST_SPIRIT_DEBUG_RULE(constantIdentifier); BOOST_SPIRIT_DEBUG_RULE(typeIdentifier); BOOST_SPIRIT_DEBUG_RULE(statement); BOOST_SPIRIT_DEBUG_RULE(labelIdentifier); BOOST_SPIRIT_DEBUG_RULE(labelStatement); BOOST_SPIRIT_DEBUG_RULE(assignmentStatement); BOOST_SPIRIT_DEBUG_RULE(variable); BOOST_SPIRIT_DEBUG_RULE(variableIdentifier); BOOST_SPIRIT_DEBUG_RULE(fieldDesignator); BOOST_SPIRIT_DEBUG_RULE(expression); BOOST_SPIRIT_DEBUG_RULE(relationalOperator); BOOST_SPIRIT_DEBUG_RULE(addingOperator); BOOST_SPIRIT_DEBUG_RULE(multiplyingOperator); BOOST_SPIRIT_DEBUG_RULE(onStatement); BOOST_SPIRIT_DEBUG_RULE(subroutineIdentifier); BOOST_SPIRIT_DEBUG_RULE(functionIdentifier); BOOST_SPIRIT_DEBUG_RULE(gotoStatement); BOOST_SPIRIT_DEBUG_RULE(emptyStatement); BOOST_SPIRIT_DEBUG_RULE(empty); BOOST_SPIRIT_DEBUG_RULE(ifStatement); BOOST_SPIRIT_DEBUG_RULE(remStatement); BOOST_SPIRIT_DEBUG_RULE(caseStatement); BOOST_SPIRIT_DEBUG_RULE(whileStatement); BOOST_SPIRIT_DEBUG_RULE(doloopStatement); BOOST_SPIRIT_DEBUG_RULE(forStatement); BOOST_SPIRIT_DEBUG_RULE(subroutineDefinition); BOOST_SPIRIT_DEBUG_RULE(functionDefinition); BOOST_SPIRIT_DEBUG_RULE(expression_list); BOOST_SPIRIT_DEBUG_RULE(arrayOrFunctionCall); BOOST_SPIRIT_DEBUG_RULE(functionCall); BOOST_SPIRIT_DEBUG_RULE(subroutineCall); BOOST_SPIRIT_DEBUG_RULE(multipleStatements); BOOST_SPIRIT_DEBUG_RULE(numeric_expression); BOOST_SPIRIT_DEBUG_RULE(unaryOperator); BOOST_SPIRIT_DEBUG_RULE(string_expression); BOOST_SPIRIT_DEBUG_RULE(unsignedNumber); BOOST_SPIRIT_DEBUG_RULE(variable_list); BOOST_SPIRIT_DEBUG_RULE(end_of_line); BOOST_SPIRIT_DEBUG_RULE(statement_separator); BOOST_SPIRIT_DEBUG_RULE(arrayIdentifier); BOOST_SPIRIT_DEBUG_RULE(boolean_atom); BOOST_SPIRIT_DEBUG_RULE(boolean_expression); BOOST_SPIRIT_DEBUG_RULE(boolean_factor); BOOST_SPIRIT_DEBUG_RULE(boolean_term); BOOST_SPIRIT_DEBUG_RULE(constantArrayIdentifier); BOOST_SPIRIT_DEBUG_RULE(constant_integer); BOOST_SPIRIT_DEBUG_RULE(constant_real); BOOST_SPIRIT_DEBUG_RULE(fieldIdentifier); BOOST_SPIRIT_DEBUG_RULE(numeric_atom); BOOST_SPIRIT_DEBUG_RULE(numeric_factor); BOOST_SPIRIT_DEBUG_RULE(numeric_term); BOOST_SPIRIT_DEBUG_RULE(string_constant); BOOST_SPIRIT_DEBUG_RULE(string_term); } #endif rule const& start() const { return program; } symbols<> keywords; rule identifier, program, variableLocation, dataDefinition, parentised_integer_list, parentised_constant_list, constant_value, constant_number, constant_string, unsignedInteger, unsignedReal, constantIdentifier, typeIdentifier, statement, labelIdentifier, labelStatement, assignmentStatement, variable, variableIdentifier, fieldDesignator, expression, relationalOperator, addingOperator, multiplyingOperator, onStatement, subroutineIdentifier, functionIdentifier, gotoStatement, emptyStatement, empty, ifStatement, remStatement, caseStatement, whileStatement, doloopStatement, forStatement, subroutineDefinition, functionDefinition, expression_list, arrayOrFunctionCall, functionCall, subroutineCall, multipleStatements, numeric_expression, unaryOperator, string_expression, unsignedNumber, variable_list, statement_separator, end_of_line, arrayIdentifier, boolean_atom, boolean_expression, boolean_factor, boolean_term, constantArrayIdentifier, constant_integer, constant_real, fieldIdentifier, numeric_atom, numeric_factor, numeric_term, string_constant, string_term; }; }; /////////////////////////////////////////////////////////////////////////////// // // The Basic White Space Skipper // /////////////////////////////////////////////////////////////////////////////// struct basic_skipper : public grammar { basic_skipper() {} template struct definition { definition(basic_skipper const& /*self*/) { chlit<> SPACE(' '); chlit<> TAB('\t'); chlit<> UNDERSCORE('_'); skip = SPACE | TAB | UNDERSCORE >> eol_p ; #ifdef BOOST_SPIRIT_DEBUG BOOST_SPIRIT_DEBUG_RULE(skip); #endif } rule skip; rule const& start() const { return skip; } }; }; /////////////////////////////////////////////////////////////////////////////// // // Parse a file // /////////////////////////////////////////////////////////////////////////////// static void parse(char const* filename) { ifstream in(filename); if (!in) { cerr << "Could not open input file: " << filename << endl; return; } in.unsetf(ios::skipws); // Turn of white space skipping on the stream vector vec; std::copy( istream_iterator(in), istream_iterator(), std::back_inserter(vec)); vector::const_iterator first = vec.begin(); vector::const_iterator last = vec.end(); basic_skipper skip_p; basic_grammar p; #ifdef BOOST_SPIRIT_DEBUG BOOST_SPIRIT_DEBUG_NODE(skip_p); BOOST_SPIRIT_DEBUG_NODE(p); #endif parse_info::const_iterator> info = parse(first, last, p, skip_p); if (info.full) { cout << "\t\t" << filename << " Parses OK\n\n\n"; } else { cerr << "---PARSING FAILURE in " << filename << "\n"; cerr << string(info.stop, last); } } /////////////////////////////////////////////////////////////////////////////// // // Main program // /////////////////////////////////////////////////////////////////////////////// int main(int argc, char* argv[]) { cout << "/////////////////////////////////////////////////////////\n\n"; cout << "\t\tBasic Grammar For Spirit...\n\n"; cout << "/////////////////////////////////////////////////////////\n\n"; if (argc > 1) { for (int i = 1; i < argc; ++i) { cout << argv[i] << endl; try { parse(argv[i]); } catch( basic_exception e ) { cout << e.m_msg; } } } else { cerr << "---NO FILENAME GIVEN---" << endl; } return 0; }