实现一个简单的解释器
功能
- 支持整形变量定义,
int a - 支持赋值语句
a = expression - 表达式可以使用变量
- 支持AST的打印
文法
BNF规则 描述文法
program::= statement+
statement::= intDeclaration | expressionStatement | assignStatement
intDeclaration::= INT ID ('=' expression)? ';'
expressionStatement::= expression ';'
assignStatement::= ID '=' expression ';'
expression::= expression1 ('+' expression1 | '-' expression1)*
expression1::= expression2 ('*' expression2 | '/' expression2)*
expression2::= ID | NUM | '(' expression ')'
设计
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-fgk1TZow-1647876869933)(C:\Users\20580\AppData\Roaming\Typora\typora-user-images\image-20220318170659566.png)]
实现
词法分析模块
词法分析的过程是将字符流 识别出来变成一个个的词素Token ,这个Token的定义,组织为
/* 词法单元模块 */
using Name = std::string;
// Token的枚举
enum TokenID{
INT, // int
ID, // id
NUM, // number
OTHER, // Other Char + - * / = ;
UNDEF // un define
};
// Token的定义,Name为Token的字符值,id为Token的编号
struct Token{
Name value;
TokenID id;
Token():id(UNDEF),value(""){};
};
在项目中,笔者写了三个接口,分别为:
read : 从字符流中读取一个Token
peek : 从字符流中前看一个Token
lookAhead :从字符流中前看n个Token
他的源代码为
// 读取一个Token
Token read(){
Token token;
while (*p != '\0'){
char character = *p;
if(char_set.find(character) != char_set.cend()){
p++;
token.id = OTHER;
token.value.push_back(character);
return token;
}else{
if(character == ' ' || character == '\r' || character == '\n' || character == '\t'){
p++;
continue;
}
// 识别数字
if(isDig(character)){
token.id = NUM;
readDig(token);
return token;
}
// 识别int 和 id
readIdOrInt(token);
return token;
}
}
token.id = UNDEF;
return token;
}
// 预读n个token
std::vector<Token> lookAhead(int n){
char *peek = p;
std::vector<Token> tokens = {};
for (int i = 0; i < n; ++i){
if(*p == '\0')
break;
tokens.push_back(read());
}
p = peek;
return tokens;
}
// 预读一个token
Token peek(){
char *peek = p; // p指向字符流的指针
Token t = read();
p = peek;
return t;
}
语法分析模块生成AST
根据上面的文法描述,从顶层文法一层一层往下递归,来实现整个语法分析的过程,并且递归的构造AST,根据AST结点类型的不同,对于AST的组织也是仅仅使用了一个简单的定义,并且AST的结点类型使用一个枚举类型来声明,AST的定义为
/*语法分析,AST生成模块*/
enum ASTNodeType{
IntDeclaration,
AssignStatement,
ExpressionStatement,
AddNode,
SubNode,
MulNode,
DivNode,
ELENode,
};
class SimpleASTNode{
public:
SimpleASTNode(ASTNodeType type):ast_type(type),childs{}, token(){}
void addChild(SimpleASTNode *node){
childs.push_back(node);
}
// eval AST 执行AST, 打印结果
int eval(){
switch (ast_type) {
case IntDeclaration:{
printf("Decl var: %s\n", token.value.c_str());
var_map[token.value] = 0;
if(!childs.empty()){
childs[0]->eval();
}
break;
}
case AssignStatement:{
if(var_map.find(token.value) == var_map.cend()){
printf("[ERROR]:Var %s is not define\n", token.value.c_str());
throw std::exception();
}else{
int value = childs[0]->eval();
var_map[token.value] = value;
printf("Assign %s = %d\n", token.value.c_str(), value);
}
break;
};
case ExpressionStatement:{
int value = childs[0]->eval();
printf("expression res: %d\n", value);
break;
}
case AddNode:{
return childs[0]->eval() + childs[1]->eval();
};
case SubNode:{
return childs[0]->eval() - childs[1]->eval();
};
case MulNode:{
return childs[0]->eval() * childs[1]->eval();
};
case DivNode:{
return childs[0]->eval() / childs[1]->eval();
};
case ELENode:{
if(token.id == NUM)
return atoi(token.value.c_str());
else if(token.id == ID){
if(var_map.find(token.value) == var_map.cend()){
printf("[ERROR]:Var %s is not define\n", token.value.c_str());
throw std::exception();
}
return var_map[token.value];
}else{
printf("[ERROR]:Unknown error!\n");
throw std::exception();
}
};
default:{
std::cerr<<"[ERROR]:Unknown AST type"<<std::endl;
return -1;
}
}
return 0;
}
//printAST 打印AST
void printAST(std::string tab){
switch(ast_type){
case IntDeclaration:{
printf("%sIntDeclarationNode:%s\n", tab.c_str(), token.value.c_str());
if(!childs.empty()){
tab.append(" ");
childs[0]->printAST(tab);
}
return;
}
case AssignStatement:{
printf("%sAssignNode:%s\n", tab.c_str(), token.value.c_str());
tab.append(" ");
childs[0]->printAST(tab);
return;
}
case ExpressionStatement:{
printf("%sExpressionNode:\n", tab.c_str());
tab.append(" ");
childs[0]->printAST(tab);
return;
}
case AddNode:{
printf("%sAddNode:\n", tab.c_str());
tab.append(" ");
childs[0]->printAST(tab);
childs[1]->printAST(tab);
return;
}
case SubNode:{
printf("%sSubNode:\n", tab.c_str());
tab.append(" ");
childs[0]->printAST(tab);
childs[1]->printAST(tab);
return;
}
case MulNode:{
printf("%sMulNode:\n", tab.c_str());
tab.append(" ");
childs[0]->printAST(tab);
childs[1]->printAST(tab);
return;
}
case DivNode:{
printf("%sDivNode:\n", tab.c_str());
tab.append(" ");
childs[0]->printAST(tab);
childs[1]->printAST(tab);
return;
}
case ELENode:{
if(token.id == ID){
printf("%sIDNode:%s\n", tab.c_str(), token.value.c_str());
}else if(token.id == NUM){
printf("%sNUMNode:%s\n", tab.c_str(), token.value.c_str());
}
return;
}
}
}
void setToken(Token tok){
token.id = tok.id;
token.value = tok.value;
}
private:
ASTNodeType ast_type;
Token token;
std::vector<SimpleASTNode*> childs;
};
基于上面的AST构造,依照文法, 从顶层往下层构造AST,每一个非终结符使用一个函数构造
// 表达式求值
SimpleASTNode* expression();
SimpleASTNode* expression1();
SimpleASTNode* expression2();
/**
* 文法描述,采用扩展BNF写法
* program::= statement+
* statement::= intDeclaration | expressionStatement | assignStatement
* intDeclaration::= INT ID ('=' expression)? ';'
* expressionStatement::= expression ';'
* assignStatement::= ID '=' expression ';'
* expression::= expression1 ('+' expression1 | '-' expression1)*
* expression1::= expression2 ('*' expression2 | '/' expression2)*
* expression2::= ID | NUM | '(' expression ')'
*/
SimpleASTNode* expression2(){
SimpleASTNode *node = nullptr;
char *loc = p;
Token token = peek();
if(token.id == ID || token.id == NUM){
read();
if(token.id == ID){
Token t = peek();
if(t.value == "="){
p = loc;
return nullptr;
}
}
node = new SimpleASTNode(ELENode);
node->setToken(token);
return node;
}else if(token.value == "("){
read();
node = expression();
if(node == nullptr){
printf("[ERROR]:Undefined Statement\n");
throw std::exception();
}
token = peek();
if(token.value != ")"){
printf("[ERROR]:Undefined Statement\n");
throw std::exception();
}
read();
}
return node;
}
SimpleASTNode* expression1(){
//expression1::= expression2 (* expression2 | / expression2)*
SimpleASTNode *node = expression2();
if(node == nullptr)
return node;
Token t = peek();
SimpleASTNode *p = node;
SimpleASTNode *root;
while (t.id == OTHER && (t.value == "*" || t.value == "/")){
if(t.value == "*"){
root = new SimpleASTNode(MulNode);
}else{
root = new SimpleASTNode(DivNode);
}
root->addChild(p);
read();
node = expression2();
if(node == nullptr){
printf("[ERROR]:Undefined Statement\n");
throw std::exception();
}
root->addChild(node);
p = root;
t = peek();
}
return p;
}
SimpleASTNode* expression(){
// expression::= expression1 (+ expression1 | - expression1)*
SimpleASTNode *node = expression1();
if(node == nullptr)
return node;
Token t = peek();
SimpleASTNode *p = node;
SimpleASTNode *root;
if(t.value == "="){
printf("[ERROR]:Undefined Statement\n");
throw std::exception();
}
while (t.id == OTHER && (t.value == "+" || t.value == "-")){
if(t.value == "+"){
root = new SimpleASTNode(AddNode);
}else{
root = new SimpleASTNode(SubNode);
}
root->addChild(p);
read();
node = expression1();
if(node == nullptr){
printf("[ERROR]:Undefined Statement\n");
throw std::exception();
}
root->addChild(node);
p = root;
t = peek();
}
return p;
}
//
SimpleASTNode* intDeclaration(){
SimpleASTNode *node = nullptr;
Token token = peek();
if(token.id == INT){
read(); // 消耗一个字符
node = new SimpleASTNode(IntDeclaration);
token = read();
if(token.id != ID){
printf("[ERROR]:Undefined Statement\n");
throw std::exception();
}else{
node->setToken(token);
Token tokens = token;
token = peek();
if(token.id == OTHER && token.value == "="){
// 添加子节点
read();
SimpleASTNode *childs = new SimpleASTNode(AssignStatement);
childs->setToken(tokens);
SimpleASTNode *child = expression();
if(child != nullptr){
node->addChild(childs);
childs->addChild(child);
}else{
printf("[ERROR]:Undefined Statement\n");
throw std::exception();
}
}else if(token.value != "" && token.value != ";"){
printf("[ERROR]:Undefined Statement\n");
throw std::exception();
}
token = read();
if(token.value != ";"){
printf("[ERROR]:Statement '%s' not have ';'\n", statement);
throw std::exception();
}
}
}
return node;
}
SimpleASTNode* expressionStatement(){
char *loc = p;
SimpleASTNode *expr = new SimpleASTNode(ExpressionStatement);
SimpleASTNode *node = expression();
if(node != nullptr){
Token token = read();
if(token.value != ";"){
printf("[ERROR]:Statement '%s' not have ';'\n", statement);
throw std::exception();
}
expr->addChild(node);
return expr;
}else{
p = loc;
}
return node;
}
SimpleASTNode* assignmentStatement(){
SimpleASTNode *node = nullptr;
Token token = peek();
if(token.id == ID){
read();
node = new SimpleASTNode(AssignStatement);
node->setToken(token);
token = peek();
if(token.value == "="){
read();
SimpleASTNode *child = expression();
if(p == nullptr){
printf("[ERROR]:Undefined Statement\n");
throw std::exception();
}else{
node->addChild(child);
token = read();
if(token.value != ";"){
printf("[ERROR]:Statement '%s' not have ';'\n", statement);
throw std::exception();
}
}
}else{
printf("[ERROR]:Undefined Statement\n");
throw std::exception();
}
}
return node;
}
主程序
/*主函数*/
int main(int argc, char *argv[]) {
bool verbose = false;
if(argc >= 2){
verbose = true;
}
statement = (char*)calloc(256,sizeof(char));
printf("======== Simple Script Hello! ========\n");
while(true){
printf("@>>>");
std::cin.getline(statement, 256);
p = statement;
if(strcmp(statement, "exit") == 0){
printf("======== Simple Script Bye! ========\n");
break;
}
SimpleASTNode *root = nullptr;
try{
root = intDeclaration();
if(root != nullptr){
root->eval();
if(verbose){
printf("AST:\n");
root->printAST(" ");
}
continue;
}
root = expressionStatement();
if(root != nullptr){
root->eval();
if(verbose){
printf("AST:\n");
root->printAST(" ");
}
continue;
}
root = assignmentStatement();
if(root != nullptr){
root->eval();
if(verbose){
printf("AST:\n");
root->printAST(" ");
};
continue;
}
}catch(...){
continue;
}
}
return 0;
}
最后结果
源码
arqqqq/SimpleScript: A simple script executor (github.com)
|