set up grammar structure
This commit is contained in:
@ -1,10 +1,22 @@
|
||||
lexer grammar MXLexer;
|
||||
|
||||
// Keywords
|
||||
INT: 'int';
|
||||
VOID: 'void';
|
||||
BOOL: 'bool';
|
||||
INT: 'int';
|
||||
STRING: 'string';
|
||||
NEW: 'new';
|
||||
CLASS: 'class';
|
||||
NULL: 'null';
|
||||
TRUE: 'true';
|
||||
FALSE: 'false';
|
||||
THIS: 'this';
|
||||
IF: 'if';
|
||||
ELSE: 'else';
|
||||
FOR: 'for';
|
||||
WHILE: 'while';
|
||||
BREAK: 'break';
|
||||
CONTINUE: 'continue';
|
||||
RETURN: 'return';
|
||||
|
||||
// Operators
|
||||
@ -12,21 +24,57 @@ PLUS: '+';
|
||||
MINUS: '-';
|
||||
MULTIPLY: '*';
|
||||
DIVIDE: '/';
|
||||
MOD: '%';
|
||||
GN: '>=';
|
||||
GE: '>';
|
||||
LN: '<=';
|
||||
LE: '<';
|
||||
NE: '!=';
|
||||
EQ: '==';
|
||||
LAND: '&&';
|
||||
LOR: '||';
|
||||
LNOT: '!';
|
||||
ARS: '>>';
|
||||
ALS: '<<';
|
||||
BAND: '&';
|
||||
BOR: '|';
|
||||
BXOR: '^';
|
||||
BNOT: '~';
|
||||
ASSIGN: '=';
|
||||
SELF_PLUS: '++';
|
||||
SELF_MINUS: '--';
|
||||
DOT: '.';
|
||||
QUESTION_MARK: '?';
|
||||
COLON: ':';
|
||||
|
||||
// Punctuation
|
||||
LPAREN: '(';
|
||||
RPAREN: ')';
|
||||
LBRACKET: '[';
|
||||
RBRACKET: ']';
|
||||
LBRACE: '{';
|
||||
RBRACE: '}';
|
||||
SEMICOLON: ';';
|
||||
COMMA: ',';
|
||||
FORMAT_STRING_WHOLE: 'f"' FORMAT_STR_INTERNAL '"';
|
||||
FORMAT_STRING_HEAD: 'f"' FORMAT_STR_INTERNAL '$';
|
||||
FORMAT_STRING_TAIL: '$' FORMAT_STR_INTERNAL '"';
|
||||
FORMAT_STRING_BODY: '$' FORMAT_STR_INTERNAL '$';
|
||||
fragment FORMAT_STR_INTERNAL : (DOLLAR_IN_FORMAT_STR | ESC | ~[$"\\])*?;
|
||||
fragment FORMAT_STR_BEG: 'f"';
|
||||
fragment FORMAT_STR_END: '"';
|
||||
fragment DOLLAR_IN_FORMAT_STR: '$$';
|
||||
fragment DOLLAR: '$';
|
||||
|
||||
// Identifiers
|
||||
ID: [a-zA-Z_][a-zA-Z_0-9]*;
|
||||
ID: [a-zA-Z][a-zA-Z_0-9]*;
|
||||
|
||||
// Literals
|
||||
INT_LITERAL: [0-9]+;
|
||||
STRING_LITERAL: '"' (ESC|~["\\])*? '"';
|
||||
fragment ESC : '\\"' | '\\\\';
|
||||
|
||||
// Whitespace and comments
|
||||
WS: [ \t\r\n]+ -> skip;
|
||||
COMMENT: '//' ~[\r\n]* -> skip;
|
||||
LINE_COMMENT : '//' .*? '\r'? '\n' -> skip;
|
||||
BLOCK_COMMENT: '/*' .*? '*/' -> skip;
|
@ -3,39 +3,74 @@ parser grammar MXParser;
|
||||
options { tokenVocab=MXLexer; }
|
||||
|
||||
mxprog
|
||||
: function* EOF
|
||||
: (class_def|define_statement|function_def)+ EOF
|
||||
;
|
||||
|
||||
function
|
||||
: type ID LPAREN RPAREN block
|
||||
function_def
|
||||
: type ID LPAREN ((type ID)(COMMA type ID)*)? RPAREN suite
|
||||
;
|
||||
|
||||
class_def : CLASS ID LBRACE (class_var_def|class_constructor|function_def)* RBRACE SEMICOLON;
|
||||
class_var_def : type (LBRACKET RBRACKET)* ID (COMMA ID )* SEMICOLON;
|
||||
class_constructor : ID LPAREN RPAREN suite;
|
||||
suite : LBRACE statement* RBRACE;
|
||||
statement
|
||||
: SEMICOLON
|
||||
| define_statement
|
||||
| expr SEMICOLON
|
||||
| IF LPAREN expr RPAREN statement (ELSE statement)?
|
||||
| WHILE LPAREN expr RPAREN statement
|
||||
| FOR LPAREN (define_statement|(expr SEMICOLON)|SEMICOLON) expr? SEMICOLON (define_statement|expr)? RPAREN statement
|
||||
| (BREAK|CONTINUE|(RETURN expr?)) SEMICOLON
|
||||
| suite
|
||||
;
|
||||
define_statement : type (LBRACKET RBRACKET)* ID (ASSIGN expr)? (COMMA ID (ASSIGN expr)?)* SEMICOLON;
|
||||
expr
|
||||
: basic_expr
|
||||
| LPAREN expr RPAREN
|
||||
| expr DOT (ID|(ID LPAREN (expr (COMMA expr)*)? RPAREN))
|
||||
| expr (LBRACKET expr RBRACKET)+
|
||||
| expr (SELF_PLUS | SELF_MINUS)
|
||||
| (SELF_PLUS | SELF_MINUS) expr
|
||||
| MINUS expr
|
||||
| LNOT expr
|
||||
| BNOT expr
|
||||
| expr (MULTIPLY|DIVIDE|MOD) expr
|
||||
| expr (PLUS|MINUS) expr
|
||||
| expr (ARS|ALS) expr
|
||||
| expr (GN|GE|LN|LE) expr
|
||||
| expr (NE|EQ) expr
|
||||
| expr BAND expr
|
||||
| expr BXOR expr
|
||||
| expr BOR expr
|
||||
| expr LAND expr
|
||||
| expr LOR expr
|
||||
| expr QUESTION_MARK expr COLON expr
|
||||
| <assoc=right> expr ASSIGN expr
|
||||
| NEW ID
|
||||
| NEW ID LPAREN RPAREN
|
||||
| NEW ID (LBRACKET expr RBRACKET)*(LBRACKET RBRACKET)*constant?
|
||||
;
|
||||
|
||||
basic_expr
|
||||
: THIS
|
||||
| ID
|
||||
| ID LPAREN (expr (COMMA expr)*)? RPAREN
|
||||
| formatted_string
|
||||
| constant
|
||||
;
|
||||
|
||||
formatted_string : FORMAT_STRING_WHOLE | (FORMAT_STRING_HEAD expr (FORMAT_STRING_BODY expr)*? FORMAT_STRING_TAIL);
|
||||
constant : TRUE
|
||||
|FALSE
|
||||
|INT_LITERAL
|
||||
|STRING_LITERAL
|
||||
|NULL
|
||||
|LBRACE (constant (COMMA constant)*)? RBRACE;
|
||||
type
|
||||
: INT
|
||||
| BOOL
|
||||
| STRING
|
||||
| VOID
|
||||
;
|
||||
|
||||
block
|
||||
: LBRACE statement* RBRACE
|
||||
;
|
||||
|
||||
statement
|
||||
: expression SEMICOLON
|
||||
| returnStmt
|
||||
| ifStmt
|
||||
;
|
||||
|
||||
expression
|
||||
: INT_LITERAL
|
||||
| ID
|
||||
| expression (PLUS | MINUS | MULTIPLY | DIVIDE) expression
|
||||
| LPAREN expression RPAREN
|
||||
;
|
||||
|
||||
returnStmt
|
||||
: RETURN expression? SEMICOLON
|
||||
;
|
||||
|
||||
ifStmt
|
||||
: IF LPAREN expression RPAREN statement (ELSE statement)?
|
||||
;
|
||||
;
|
@ -9,5 +9,7 @@ OUTPUT_DIR="../src/semantic/antlr-generated"
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
# Run ANTLR to generate lexer and parser
|
||||
antlr4 -Dlanguage=Cpp -no-listener -visitor MXLexer.g4 MXParser.g4 -o "$OUTPUT_DIR"
|
||||
rm "$OUTPUT_DIR/MXParserBaseVisitor.cpp"
|
||||
rm "$OUTPUT_DIR/MXParserBaseVisitor.h"
|
||||
# Return to the original directory
|
||||
cd - > /dev/null
|
||||
|
Reference in New Issue
Block a user