set up grammar structure

This commit is contained in:
2024-08-06 15:03:51 +00:00
parent 9245c4fae7
commit a22a4ba594
14 changed files with 3243 additions and 852 deletions

View File

@ -1,10 +1,22 @@
lexer grammar MXLexer;
// Keywords
INT: 'int';
VOID: 'void';
BOOL: 'bool';
INT: 'int';
STRING: 'string';
NEW: 'new';
CLASS: 'class';
NULL: 'null';
TRUE: 'true';
FALSE: 'false';
THIS: 'this';
IF: 'if';
ELSE: 'else';
FOR: 'for';
WHILE: 'while';
BREAK: 'break';
CONTINUE: 'continue';
RETURN: 'return';
// Operators
@ -12,21 +24,57 @@ PLUS: '+';
MINUS: '-';
MULTIPLY: '*';
DIVIDE: '/';
MOD: '%';
GN: '>=';
GE: '>';
LN: '<=';
LE: '<';
NE: '!=';
EQ: '==';
LAND: '&&';
LOR: '||';
LNOT: '!';
ARS: '>>';
ALS: '<<';
BAND: '&';
BOR: '|';
BXOR: '^';
BNOT: '~';
ASSIGN: '=';
SELF_PLUS: '++';
SELF_MINUS: '--';
DOT: '.';
QUESTION_MARK: '?';
COLON: ':';
// Punctuation
LPAREN: '(';
RPAREN: ')';
LBRACKET: '[';
RBRACKET: ']';
LBRACE: '{';
RBRACE: '}';
SEMICOLON: ';';
COMMA: ',';
FORMAT_STRING_WHOLE: 'f"' FORMAT_STR_INTERNAL '"';
FORMAT_STRING_HEAD: 'f"' FORMAT_STR_INTERNAL '$';
FORMAT_STRING_TAIL: '$' FORMAT_STR_INTERNAL '"';
FORMAT_STRING_BODY: '$' FORMAT_STR_INTERNAL '$';
fragment FORMAT_STR_INTERNAL : (DOLLAR_IN_FORMAT_STR | ESC | ~[$"\\])*?;
fragment FORMAT_STR_BEG: 'f"';
fragment FORMAT_STR_END: '"';
fragment DOLLAR_IN_FORMAT_STR: '$$';
fragment DOLLAR: '$';
// Identifiers
ID: [a-zA-Z_][a-zA-Z_0-9]*;
ID: [a-zA-Z][a-zA-Z_0-9]*;
// Literals
INT_LITERAL: [0-9]+;
STRING_LITERAL: '"' (ESC|~["\\])*? '"';
fragment ESC : '\\"' | '\\\\';
// Whitespace and comments
WS: [ \t\r\n]+ -> skip;
COMMENT: '//' ~[\r\n]* -> skip;
LINE_COMMENT : '//' .*? '\r'? '\n' -> skip;
BLOCK_COMMENT: '/*' .*? '*/' -> skip;

View File

@ -3,39 +3,74 @@ parser grammar MXParser;
options { tokenVocab=MXLexer; }
mxprog
: function* EOF
: (class_def|define_statement|function_def)+ EOF
;
function
: type ID LPAREN RPAREN block
function_def
: type ID LPAREN ((type ID)(COMMA type ID)*)? RPAREN suite
;
class_def : CLASS ID LBRACE (class_var_def|class_constructor|function_def)* RBRACE SEMICOLON;
class_var_def : type (LBRACKET RBRACKET)* ID (COMMA ID )* SEMICOLON;
class_constructor : ID LPAREN RPAREN suite;
suite : LBRACE statement* RBRACE;
statement
: SEMICOLON
| define_statement
| expr SEMICOLON
| IF LPAREN expr RPAREN statement (ELSE statement)?
| WHILE LPAREN expr RPAREN statement
| FOR LPAREN (define_statement|(expr SEMICOLON)|SEMICOLON) expr? SEMICOLON (define_statement|expr)? RPAREN statement
| (BREAK|CONTINUE|(RETURN expr?)) SEMICOLON
| suite
;
define_statement : type (LBRACKET RBRACKET)* ID (ASSIGN expr)? (COMMA ID (ASSIGN expr)?)* SEMICOLON;
expr
: basic_expr
| LPAREN expr RPAREN
| expr DOT (ID|(ID LPAREN (expr (COMMA expr)*)? RPAREN))
| expr (LBRACKET expr RBRACKET)+
| expr (SELF_PLUS | SELF_MINUS)
| (SELF_PLUS | SELF_MINUS) expr
| MINUS expr
| LNOT expr
| BNOT expr
| expr (MULTIPLY|DIVIDE|MOD) expr
| expr (PLUS|MINUS) expr
| expr (ARS|ALS) expr
| expr (GN|GE|LN|LE) expr
| expr (NE|EQ) expr
| expr BAND expr
| expr BXOR expr
| expr BOR expr
| expr LAND expr
| expr LOR expr
| expr QUESTION_MARK expr COLON expr
| <assoc=right> expr ASSIGN expr
| NEW ID
| NEW ID LPAREN RPAREN
| NEW ID (LBRACKET expr RBRACKET)*(LBRACKET RBRACKET)*constant?
;
basic_expr
: THIS
| ID
| ID LPAREN (expr (COMMA expr)*)? RPAREN
| formatted_string
| constant
;
formatted_string : FORMAT_STRING_WHOLE | (FORMAT_STRING_HEAD expr (FORMAT_STRING_BODY expr)*? FORMAT_STRING_TAIL);
constant : TRUE
|FALSE
|INT_LITERAL
|STRING_LITERAL
|NULL
|LBRACE (constant (COMMA constant)*)? RBRACE;
type
: INT
| BOOL
| STRING
| VOID
;
block
: LBRACE statement* RBRACE
;
statement
: expression SEMICOLON
| returnStmt
| ifStmt
;
expression
: INT_LITERAL
| ID
| expression (PLUS | MINUS | MULTIPLY | DIVIDE) expression
| LPAREN expression RPAREN
;
returnStmt
: RETURN expression? SEMICOLON
;
ifStmt
: IF LPAREN expression RPAREN statement (ELSE statement)?
;
;

View File

@ -9,5 +9,7 @@ OUTPUT_DIR="../src/semantic/antlr-generated"
mkdir -p "$OUTPUT_DIR"
# Run ANTLR to generate lexer and parser
antlr4 -Dlanguage=Cpp -no-listener -visitor MXLexer.g4 MXParser.g4 -o "$OUTPUT_DIR"
rm "$OUTPUT_DIR/MXParserBaseVisitor.cpp"
rm "$OUTPUT_DIR/MXParserBaseVisitor.h"
# Return to the original directory
cd - > /dev/null