IT数码 购物 网址 头条 软件 日历 阅读 图书馆
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
图片批量下载器
↓批量下载图片,美女图库↓
图片自动播放器
↓图片自动播放器↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁
 
   -> C++知识库 -> 编译原理》实验一:词法分析器 C++ 版 -> 正文阅读

[C++知识库]编译原理》实验一:词法分析器 C++ 版

《编译原理》实验一:词法分析器 C++ 版

考虑如下C语言子集:

单词类别编码助记符
break1BREAK_
char2CHAR_
do3DO_
double4DOUBLE_
else5ELSE_
if6IF_
int7INT_
return8RETURN_
void9VOID_
while10WHILE_
标识符11ID构成标识符的字符串
常数12NUM数值
字符串13STRING字符串
+14ADD_
-15SUB_
*16MUL_
/17DIV_
>18GT_
>=19GE_
<20LT_
<=21LE_
==22EQ_
!=23NE_
=24ASSIGN_
{25LB_
}26RB_
27LR_
28RR_
29COMMA_
30SEMI_
单词的正则定义如下
D = [0-9]
L = [a-zA-Z_]
H = [a-fA-F0-9]
E = [Ee][±]?{D}+
FS = (f|F|l|L)
IS = (u|U|l|L)*
标识符
id = {L}({L}|{D})*
常数
num:
0[xX]{H}+{IS}?
| 0{D}+{IS}?
| {D}+{IS}?
| L?‘(\.|[^\’])+’
| {D}+{E}{FS}?
| {D}*“.”{D}+({E})?{FS}?
| {D}+“.”{D}*({E})?{FS}?
字符串
string = L?“(\.|[^\”])*"

对给定的源程序进行词法分析,每个单词一行,以二元组的形式输出结果。
例如,下面的源程序代码

void main()
{
	double sum = 0.0;
	double x = 1.0;
	while (x <= 100) sum = sum + x;
	printf("sum = %f\n", sum);
}

词法分析的结果为

(VOID, _)
(ID, “main”)
(LR, _)
(RR, _)
(LB, _)
(DOUBLE, _)
(ID, “sum”)
(ASSIGN, _)
(NUM, 0.0)
(SEMI, _)
(DOUBLE, _)
(ID, “x”)
(ASSIGN, _)
(NUM, 1.0)
(SEMI, _)
(RB, _)
(WHILE, _)
(LR, _)
(ID, “x”)
(LE, _)
(NUM, 100)
(RR, _)
(ID, “sum”)
(ASSIGN, _)
(ID, “sum”)
(ADD, _)
(ID, “x”)
(SEMI, _)
(ID, “printf”)
(LR, _)
(STRING, “sum = %f\n”)
(COMMA, _)
(ID, “sum”)
(RR, _)
(SEMI, _)
(RB, _)

编写C++代码

#include <iostream>
#include <map>
#include <algorithm>
#include <string>
#include<Windows.h>
using namespace std;

string in_str;				//输入符号串
int index;					//当前输入符号读入字符的位置
char character;				//全局变量字符,存放最新读入的字符
string token;				//字符数组,存放已读入的字符序列
map<string, int> Symbol;	//标识符集
map<string, int> Digit;		//常数集
map<string, int> String;		//常数集
map<string, int>::iterator ite;
const int len = 100;
//string Reserve[len];		//保留字表
string Reserve[3 * len];


struct Binary {
	Binary(int c, int i, string v = "_") {
		type = c;
		index = i;
		value = v;
	}
	int type = 0;
	int index = 0;
	string value = "_";
};


//构造保留字表的函数
void init_Reserve() 
{	
	// 单词
	Reserve[1] = "break";
	Reserve[2] = "char";
	Reserve[3] = "do";
	Reserve[4] = "double";
	Reserve[5] = "else";
	Reserve[6] = "if";
	Reserve[7] = "int";
	Reserve[8] = "return";
	Reserve[9] = "void";
	Reserve[10] = "while";
	Reserve[11] = "id";
	Reserve[12] = "num";
	Reserve[13] = "string";
	Reserve[14] = "+";
	Reserve[15] = "-";
	Reserve[16] = "*";
	Reserve[17] = "/";
	Reserve[18] = ">";
	Reserve[19] = ">=";
	Reserve[20] = "<";
	Reserve[21] = "<=";
	Reserve[22] = "==";
	Reserve[23] = "!=";
	Reserve[24] = "=";
	Reserve[25] = "{";
	Reserve[26] = "}";
	Reserve[27] = "(";
	Reserve[28] = ")";
	Reserve[29] = ",";
	Reserve[30] = ";";
	
	//助记符
	Reserve[31] = "BREAK";
	Reserve[32] = "CHAR";
	Reserve[33] = "DO";
	Reserve[34] = "DOUBLE";
	Reserve[35] = "ELSE";
	Reserve[36] = "IF";
	Reserve[37] = "INT";
	Reserve[38] = "RETURN";
	Reserve[39] = "VOID";
	Reserve[40] = "WHILE";
	Reserve[41] = "ID";
	Reserve[42] = "NUM";
	Reserve[43] = "STRING";
	Reserve[44] = "ADD";
	Reserve[45] = "SUB";
	Reserve[46] = "MUL";
	Reserve[47] = "DIV";
	Reserve[48] = "GT";
	Reserve[49] = "GE";
	Reserve[50] = "LT";
	Reserve[51] = "LE";
	Reserve[52] = "EQ";
	Reserve[53] = "NE";
	Reserve[54] = "ASSIGN";
	Reserve[55] = "LB";
	Reserve[56] = "RB";
	Reserve[57] = "LR";
	Reserve[58] = "RR";
	Reserve[59] = "COMMA";
	Reserve[60] = "SEMI";
	Reserve[61] = "\"";
	

}


//读入一个字符
void getChar() {	
	character = in_str[index++];
}

//读入非空白字符
void get_no_blank() 
{	
	while (character == ' ') {
		getChar();
	}
}

//连接字符串
void concat() {	
	token = token + character;
}

//回退字符的函数
void retract()
{
	character = ' ';
	index--;
}

//判断是否为字母
bool is_letter() 
{	
	if ((character >= 'A' && character <= 'Z') || (character >= 'a' && character <= 'z'))
		return true;
	return false;
}

//判断是否为数字
bool is_digit()
{	
	if (character >= '0' && character <= '9') 
		return true;
	/*
	if (character >= '0' && character <= '9') { //浮点数
		getChar();
		if (character == '.') {
			return true;
		}
		return false;
	}	
	else if (character == 'o' || character <= 'O') { // 十六进制
		getChar();
		if (character == 'x' || character <= 'X') {
			return true;
		}
		return true;
	}
	else { // 科学计数法

		return true;
	}
	*/
	return false;
}

bool is_string()
{
	if (character == '"' )
		return true;
	return false;
}

int dot_Sum = 0;
bool is_dotOnce()
{
	if(character == '.')
		dot_Sum++;
	if (dot_Sum == 1 || dot_Sum == 0)
		return true;
	else
		return false;
}



//匹配保留字符
int reserve() 
{	
	for (int i = 0; i < 3 * len; i++)
		if (Reserve[i] == token)
			return i;
	return -1;
}

string symbol() 
{
	ite = Symbol.find(token);

	if (ite != Symbol.end()) {
		return ite->first;
	}
	else {
		Symbol[token] = Symbol.size();
		return token;
	}
}

string constant() 
{
	ite = Digit.find(token);

	if (ite != Digit.end()) {
		return ite->first;
	}
	else {
		Digit[token] = Digit.size();
		return token;
	}
}

string _string()
{
	ite = String.find(token);

	if (ite != String.end()) {
		return ite->first;
	}
	else {
		String[token] = String.size();
		return token;
	}
}

Binary error() 
{
	//SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_RED);
	cout << token << "单词错误!" << endl;
	return Binary(0, 0);

}

//词法分析函数,逐个识别单词
Binary LexAnalyze() 
{
	token = "";
	getChar();
	get_no_blank();
	string val;
	int num = -1;
	dot_Sum = 0; 
	//char temp = getchar();

	switch (character) {
	case'a':
	case'b':
	case'c':
	case'd':
	case'e':
	case'f':
	case'g':
	case'h':
	case'i':
	case'j':
	case'k':
	case'l':
	case'm':
	case'n':
	case'o':
	case'p':
	case'q':
	case'r':
	case's':
	case't':
	case'u':
	case'v':
	case'w':
	case'x':
	case'y':
	case'z':
	case'A':
	case'B':
	case'C':
	case'D':
	case'E':
	case'F':
	case'G':
	case'H':
	case'I':
	case'J':
	case'K':
	case'L':
	case'M':
	case'N':
	case'O':
	case'P':
	case'Q':
	case'R':
	case'S':
	case'T':
	case'U':
	case'V':
	case'W':
	case'X':
	case'Y':
	case'Z':
		while (is_letter() || is_digit() || character=='_') { //为字母 数字 下划线
			concat();		//追加到token末尾
			getChar();		//读取下一个字符
		}
		retract();			//回退一个字符
		num = reserve();	//查看保留字表
		if (num != -1) {
			return Binary(num, 1);
		}
		else {
			val = symbol();	//查看标识符表
			return Binary(1, Symbol[val], val);
		}
		break;

		
	case'0':
		//dot_Sum = -1;
		concat();
		getChar();
		if (character == 'x' || character == 'X')	//十六进制
		{
			concat();		//追加到token末尾
			getChar();		//读取下一个字符
			while (is_letter() || is_digit()) { //为字母 数字 
				concat();		//追加到token末尾
				getChar();		//读取下一个字符
			}
			retract();			//回退一个字符
			val = constant();
			return Binary(2, Digit[val], val);
		}
		
		else if (is_dotOnce() || is_digit())
		{
			concat();		//追加到token末尾
			getChar();
			while (is_digit() && is_dotOnce()) {	//为数字
				concat();
				getChar();
			}
			retract();
			val = constant();	//查看常数表
			return Binary(2, Digit[val], val);
			//break;
		}
		else
			retract();
		break;

	case'1':
	case'2':
	case'3':
	case'4':
	case'5':
	case'6':
	case'7':
	case'8':
	case'9':
		concat();		//追加到token末尾
		getChar();
		while (is_digit() || character == '.' && is_dotOnce() || is_letter())
		{
			if (character == 'e' || character == 'E') {
				concat();
				getChar();
				if (character == '-' || is_digit()) {
					concat();
					getChar();
					continue;
				}
			}
			concat();
			getChar();
		}
		retract();
		val = constant();	//查看常数表
		return Binary(2, Digit[val], val);
		/*
		if (is_dotOnce() || is_digit())
		{
			concat();		//追加到token末尾
			getChar();
			while (is_digit() && is_dotOnce() || is_letter()) 
			{	
				if (character == 'e' || character == 'E') {
					concat();
					getChar();
					if (character == '-' || is_digit()) {
						concat();
						getChar();
						continue;
					}
				}
				concat();
				getChar();
			}
			retract();
			val = constant();	//查看常数表
			return Binary(2, Digit[val], val);
		}
		*/
		break;


	case'<':
		getChar();
		if (character == '=') 
			return Binary(21, 0);	//返回<=符号
		else {
			retract();
			return Binary(20, 0);	//返回<符号
		}
		break;

	case'>':
		getChar();
		if (character == '=')
			return Binary(19, 0);	//返回>=符号
		else {
			retract();
			return Binary(18, 0);	//返回>符号
		}
		break;

	case'=':
		getChar();
		if (character == '=') 
			return Binary(22, 0);	//返回==符号
		else {
			retract();
			return Binary(24, 0);	//返回=符号
		}
		break;

	case'!':
		getChar();
		if (character == '=')
			return Binary(23, 0);
		else 
			return error();
		break;

	case'+':
		return Binary(14, 0);
		break;

	case'-':
		return Binary(15, 0);
		break;

	case'*':
		return Binary(16, 0);
		break;

	case'/':
		
		getChar();
		if (character == '/') // 单行注释
		{
			concat();
			getChar();
			//temp = getchar();
			while (! '\n') {
				concat();
				getChar(); 
				//temp = getchar();
			}
			return Binary(0, 0);
		}
		else if(character == '*')  // 块注释
		{
			concat();
			getChar();
			while (character != '*') {
				concat();
				getChar();
			}
			concat();
			getChar();
			if (character == '/') {
				return Binary(0, 0);
				//break;
			}	
			else
				error();
		}
		else {
			retract();
			return Binary(17, 0);  // 返回除号
			//break;
		}
		break;

	case'{':
		return Binary(25, 0);
		break;

	case'}':
		return Binary(26, 0);
		break;
	case'(':
		return Binary(27, 0);
		break;

	case')':
		return Binary(28, 0);
		break;

	case',':
		return Binary(29, 0);
		break;

	case';':
		return Binary(30, 0);
		break;

	case'"':
		getChar();
		while (character != '"') { // 字符串(“”)
			concat();
			getChar();
			
		}
		val = _string();
		return Binary(3, String[val], val);
		break;


	default:
		return error();
	}
}


void show_table()
{
	/*
	cout << "==================" << "保留字" << "==================" << endl;
	cout << "保留字符\t类别编码" << endl;
	for (int i = 0; i < len; i++) {
		if (Reserve[i] != "") {
			if (Reserve[i].size() >= 8)
				cout << Reserve[i] << "\t" << i << endl;
			else
				cout << Reserve[i] << "\t\t" << i << endl;
		}
	}
	*/
	cout << "\n==================" << "标识符" << "==================" << endl;
	cout << "标识符\t\t类别编码\t表中位置" << endl;
	for (ite = Symbol.begin(); ite != Symbol.end(); ite++) {
		if (ite->first.size() >= 8)
			cout << ite->first << "\t1\t\t" << ite->second << endl;
		else
			cout << ite->first << "\t\t1\t\t" << ite->second << endl;
	}

	cout << "\n==================" << "常数表" << "==================" << endl;
	cout << "常量值\t\t类别编码\t表中位置" << endl;
	for (ite = Digit.begin(); ite != Digit.end(); ite++) {
		cout << ite->first << "\t\t2\t\t" << ite->second << endl;
	}

	cout << "\n=================" << "字符串表" << "==================" << endl;
	cout << "字符串值\t类别编码\t表中位置" << endl;
	for (ite = String.begin(); ite != String.end(); ite++) {
		cout << ite->first << "\t\t2\t\t" << ite->second << endl;
	}

}



int main() 
{
	init_Reserve();		//表初始化
	Symbol.clear();		//标识符集初始化
	Digit.clear();		//常数集初始化
	index = 0;
	character = ' ';
	token = "";

	//输入
	cout << "输入待词法分析的源程序代码:@代表输入结束\n" << endl;
	string in;
	while (cin >> in && in != "@") {
		in_str = in_str + " " + in;
	}
	

	//输出
	Binary word(0, 0, "_");	//识别二元组初始化
	cout << "\n------------------------识别结果------------------------" << endl;
	//循环进行词法分析直到识别所有单词符号
	while (index < in_str.size())
	{
		word = LexAnalyze();
		
		if (word.type != 0)
		{
			if (word.type == 1) {
				cout << "(" << Reserve[41] << "," <<"\""<< word.value<< "\""<< ")" << endl;
				continue;
			}
			if (word.type == 2) {
				cout << "(" << Reserve[42] << "," << word.value << ")" << endl;
				continue;
			}
			if (word.type == 3) {
				cout << "(" << Reserve[43] << "," << "\"" << word.value << "\"" << ")" << endl;
				continue;
			}
			cout << "(" << Reserve[word.type + 30] << "," << word.value << ")" << endl;
		}
			
	}

	cout << "\n------------------------词汇表展示------------------------\n" << endl;
	show_table();

	return 0;

}


// 注释的识别好像未完成?记不得了

/*
 

  void main()
{
double sum = 0.0;
double x = 1.0;
while (x <= 100) sum = sum + x;
printf(“sum = %f\n”, sum);
}
@

void main()
{
double sum = 0;
double x = 1;
while (x <= 100) sum = sum + x;
printf(“sum = %f\n”, sum);
}
@


void main()
{
	// compute 1 + 2 + … + 100 
double sum = 0.0;
double x = 1.0;
while (x <= 100) sum = sum + x;
printf(“sum = %f\n”, sum);
}
@





*/


/* A test C program for scanner (这个是老师给的最终测试案例,若有不能识别的标识符等,请自行添加代码)
int main() {
	double W, b;
	double Y_predicted;
	int passenger_id, survived, pclass;
	W = 0.0;
	b = 0.005;
	Y_predicted = 1;
	passenger_id = 1000L;
	survived = 505u;
	pclass = L'\10a0cc';
	if (passenger_id >= 100)
		W += 5.6372e-10;
	else
		b = 9.78f - 0.005 * W;
	if (Y_predicted < 1)
		passenger_id = 0X654E;
	else
		passenger_id = 0X054EL;
	survived = 2 ^ pclass;
	print("end");
}
@
*/




运行结果

在这里插入图片描述

  C++知识库 最新文章
【C++】友元、嵌套类、异常、RTTI、类型转换
通讯录的思路与实现(C语言)
C++PrimerPlus 第七章 函数-C++的编程模块(
Problem C: 算法9-9~9-12:平衡二叉树的基本
MSVC C++ UTF-8编程
C++进阶 多态原理
简单string类c++实现
我的年度总结
【C语言】以深厚地基筑伟岸高楼-基础篇(六
c语言常见错误合集
上一篇文章      下一篇文章      查看所有文章
加:2022-05-01 15:31:12  更:2022-05-01 15:32:02 
 
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁

360图书馆 购物 三丰科技 阅读网 日历 万年历 2024年11日历 -2024/11/23 22:28:45-

图片自动播放器
↓图片自动播放器↓
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
图片批量下载器
↓批量下载图片,美女图库↓
  网站联系: qq:121756557 email:121756557@qq.com  IT数码