1.目标:
用C语言编写一个C语言源程序的词法分析器(标题收回)
2.要求
[1]
基本要求:识别关键字、运算符、界限符、常量(布尔型、整型)、标识符;
[2]
扩展要求:常量(浮点型)、注释、错误处理。
3.对照表
?
?4.代码(废话少说)
头文件?
#include "gets.h"
是我自己无聊时写的,我经常要用的一些功能的结合体,本文除了文件读取时的getfdstr语句以外没有其他的有关该头文件的语句。
char getfdstr(char filename[],char data[])
{
int size = 0;
char letter[Max] = "";
char w;
//int i,j;
int length = 0;
freopen(filename,"r",stdin);
while(cin >> w)
{
if (w != ' ')
{
letter[length] = w;
length++;
}
}
letter[length] = '\0';
size = strlen(letter);
char *result = new char[size];
strcpy_s(result,size+1,letter);
strcpy_s(data,size+1,result);
return *result;
}
正片开始
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <conio.h>
#include <windows.h>
#include "gets.h"
#define Max 3068
using namespace std;
enum
{
UNDERLINE = 0,//下划线
LETTER = 1,//字母
NUMBER = 2,//数字
SYMBOL = 3,//符号
SYMBOLERROR = 4,//符号错误
IDENTIFIERERROR = 5//标识符错误
};//枚举 可以不写用数字代替 但我记性不好就用这个法子了
char data[Max];
char letter[Max];
bool isfloat = false;//浮点数判断
bool isbool = false;//布尔判断
bool notes = false;//注释判断
bool isvariable = false;//标识符判断
bool isiderror = false;//错误标识符判断
string key[32]={"char","double","enum","float","int","long","short","signed",
"struct","union","unsigned","void","for","do","while","break","continue",
"if","else","goto","switch","case","default","return","auto","extern","register",
"static","const","sizeof","typedef","volatile"};
int keyNum[32]={1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
string keyword[32]={"CHAR","DOUBLE","ENUM","FLOAT","INT","LONG","SHORT","SIGNED",
"STRUCT","UNION","UNSIGNED","VOID","FOR","DO","WHILE","BREAK","CONTINUE",
"IF","ELSE","GOTO","SWITCH","CASE","DEFAULT","RETURN","AUTO","EXTERN","REGISTER"
"STATIC","CONST","SIZEOF","TYPEDEF","VOLATILE"};
//关键字及其对应种别码与注记符
string symbol[33]={"+","-","*","/","%","++","--",">","<","==",
"!=",">=","<=","&&","||","!","=","+=","-=","*=","/=","%=",
",","(",")","[","]","{","}",";","/*","*/","'"};
int symbolNum[33]={33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66};
string symbolword[33]={"PLUS","MINUS","MULTI","RDIV","MODULO","INC","DEC","GT","LT","EQ","NEQ",
"GE","LE","AND","OR","NOT","ASSIGN","PLUS_A","MINUS_A","MUL_A","DIV_A","MOD_A","COMMA",
"LR_BRAC","RR_BRAC","LS_BRAC","RS_BRAC","L_PRA","R_PRA","SEMIC","L_ANNO","R_ANNO","QMARK"};
//符号的种别码与注记符
int num;
int length;
//void print(string s,string z,int n);
void Getword();
void error(FILE *fp,int type,string word);
string identify(char s,int n);//返回标识符
string Number(char s,int n);//返回数字
string symbolstr(char s,int n);//返回符号
string variable(char s,int n);//返回标识符
string Keyword(int n);//根据关键词种别码返回关键词
string Symbolword(int n);//根据符号种别码返回符号
bool isNum(char s);//判断是否是数字
bool isLetter(char s);//判断是否是字母
bool issymbol(char s);//判断是否是符号
bool isBool(string s);//因为表里没有bool关键字 但有布尔类型 所以额外加了一个判断
int wordtype(char str);//字符类型判断
int iskeyword(string s);//返回关键词种别码
int isSymbol(string s);//返回符号种别码
int main()
{
char filename[Max] = "input.txt";
/*这里可以加个提示语句然后用scanf接收文件名给filename*/
getfdstr(filename,data);
//自己写的头文件里的东西,意思就是把读取文件将文件内空格去除放到data里
length = strlen(data);
Getword();
getch();
}
//下面就慢慢看吧,有点长懒得解释
void Getword()
{
FILE *fp;
int key;
int count = 0;
fp = fopen("output.txt","w");
if (fp == NULL)
{
printf("文件打开失败!\n");
system("pause");
exit(0);
}
for (num=0;num<length;)
{
char str;
string word;
str = data[num];
key = wordtype(str);
switch (key)
{
case UNDERLINE:
word = variable(str,num);
printf("%s (%s,70) 标识符\n",word.c_str(),word.c_str());
fprintf(fp,"%s (%s,70) 标识符\n",word.c_str(),word.c_str());
isvariable = false;
break;
case LETTER:
word = identify(str,num);
if(notes)
break;
else if(isvariable)
{
if (iskeyword(word))
{
printf("%s (%s,%d) 关键字\n",word.c_str(),Keyword(iskeyword(word)).c_str(),iskeyword(word));
fprintf(fp,"%s (%s,%d) 关键字\n",word.c_str(),Keyword(iskeyword(word)).c_str(),iskeyword(word));
}
else
{
printf("%s (IDE,70) 标识符\n",word.c_str());
fprintf(fp,"%s (IDE,70) 标识符\n",word.c_str());
}
break;
}
else
{
if(!word.compare("bool"))
{
printf("%s (BOOL,%d) 关键字\n",word.c_str(),67);
fprintf(fp,"%s (BOOL,%d) 关键字\n",word.c_str(),67);
}
else if(isbool)
{
printf("%s (CONST_BOOL,%d) 布尔型\n",word.c_str(),67);
fprintf(fp,"%s (CONST_BOOL,%d) 布尔型\n",word.c_str(),67);
isbool = false;
}
else
{
printf("%s (%s,70) 标识符\n",word.c_str(),word.c_str());
fprintf(fp,"%s (%s,70) 标识符\n",word.c_str(),word.c_str());
isvariable = false;
}
break;
}
case NUMBER:
word = Number(str,num);
if(notes)
break;
else if(isiderror)
{
error(fp,IDENTIFIERERROR,word);
break;
}
else
{
if (isfloat)
{
printf("%s (CONST _FLOAT,69) 浮点型\n",word.c_str());
fprintf(fp,"%s (CONST _FLOAT,69) 浮点型\n",word.c_str());
isfloat = false;
}
else
{
printf("%s (CONST _INT,68) 整型\n",word.c_str());
fprintf(fp,"%s (CONST _INT,68) 整型\n",word.c_str());
}
break;
}
case SYMBOL:
word = symbolstr(str,num);
if(notes)
{
if(count == 0)
{
printf("%s (%s,%d) 界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
fprintf(fp,"%s (%s,%d) 界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
count++;
}
break;
}
else
{
if(isSymbol(word) == SYMBOLERROR)
{
error(fp,SYMBOLERROR,word);
break;
}
if(!word.compare("+")||!word.compare("-")||!word.compare("*")||!word.compare("/")||!word.compare("%")||
!word.compare("++")||!word.compare("--")||!word.compare(">")||!word.compare("<")||!word.compare("==")||
!word.compare("!=")||!word.compare(">=")||!word.compare("<=")||!word.compare("&&")||!word.compare("||")||
!word.compare("!")||!word.compare("=")||!word.compare("+=")||!word.compare("-=")||!word.compare("*=")||
!word.compare("/=")||!word.compare("%="))
{
printf("%s (%s,%d) 运算符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
fprintf(fp,"%s (%s,%d) 运算符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
break;
}
if(!word.compare("*/"))
{
printf("—————内容被注释—————\n");
fprintf(fp,"—————内容被注释—————\n");
printf("%s (%s,%d) 界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
fprintf(fp,"%s (%s,%d) 界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
count = 0;
break;
}
printf("%s (%s,%d) 界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
fprintf(fp,"%s (%s,%d) 界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
break;
}
}
}
fclose(fp);
}
int wordtype(char str)
{
if ((str <= 'z' && str >= 'a') || (str <='Z' && str >= 'A'))
return LETTER;
if (str <= '9' && str >= '0')
return NUMBER;
if (str == '_')
return UNDERLINE;
else
return SYMBOL;
}
string identify(char s,int n)
{
int j = n+1;
int flag = 1;
string temp1(sizeof(s),s);
while(flag)
{
if(!isvariable)
{
if (!isNum(data[j])&&isLetter(data[j])&&!issymbol(data[j]))
{
string temp2(sizeof(data[j]),data[j]);
temp1.append(temp2);
if (iskeyword(temp1))
{
j++;
num = j;
return temp1;
}
else if(isBool(temp1))
{
j++;
num = j;
isbool = true;
return temp1;
}
else if(!temp1.compare("bool"))
{
j++;
num = j;
return temp1;
}
j++;
}
else if (data[j] == '_')
{
string temp2(sizeof(data[j]),data[j]);
temp1.append(temp2);
j++;
num = j;
isvariable = true;
}
else
{
flag = 0;
}
}
else
{
if (((!isNum(data[j])||!isLetter(data[j]))&&!issymbol(data[j]))||data[j]=='_')
{
string temp2(sizeof(data[j]),data[j]);
temp1.append(temp2);
j++;
num = j;
}
else
{
return temp1;
}
}
}
num = j;
return temp1;
}
bool isNum(char s)
{
if (s <= '9' && s >= '0')
return true;
else
return false;
}
bool isLetter(char s)
{
if ((s <= 'z' && s >= 'a') || (s <='Z' && s >= 'A'))
return true;
else
return false;
}
int iskeyword(string s)
{
for (int i = 0; i < 32; i++)
{
if (s.compare(key[i])==0)
{
return keyNum[i];
}
}
return 0;
}
string Number(char s,int n)
{
int j = n+1;
int count = 0;
int flag = 1;
string temp1(sizeof(s),s);
while (flag)
{
if(!isiderror)
{
if (isNum(data[j]))
{
string temp2(sizeof(data[j]),data[j]);
temp1.append(temp2);
j++;
}
else if(data[j]=='.'&&count==0)
{
string temp2(sizeof(data[j]),data[j]);
temp1.append(temp2);
j++;
count++;
isfloat=true;
}
else if((data[j]>='a'&&data[j]<='z')||(data[j]>='A'&&data[j]<='Z')||data[j]=='_')
{
string temp2(sizeof(data[j]),data[j]);
temp1.append(temp2);
j++;
isiderror = true;
}
else
{
flag = 0;
}
}
else
{
if (((!isNum(data[j])||!isLetter(data[j]))&&!issymbol(data[j]))||data[j]=='_')
{
string temp2(sizeof(data[j]),data[j]);
temp1.append(temp2);
j++;
}
else
{
flag = 0;
}
}
}
num = j;
return temp1;
}
string symbolstr(char s,int n)
{
int j = n+1;
string str(sizeof(data[j]),data[j]);
string temp(sizeof(s),s);
if(!temp.compare(">")||!temp.compare("<")||!temp.compare("=")||!temp.compare("+")||!temp.compare("-")||
!temp.compare("!")|!temp.compare("*")||!temp.compare("/")||!temp.compare("%"))
{
if(!str.compare("="))
{
string temp2(sizeof(data[j]),data[j]);
temp.append(temp2);
j++;
}
}
if(!temp.compare("+"))
{
if(!str.compare("+"))
{
string temp2(sizeof(data[j]),data[j]);
temp.append(temp2);
j++;
}
}
if(!temp.compare("-"))
{
if(!str.compare("-"))
{
string temp2(sizeof(data[j]),data[j]);
temp.append(temp2);
j++;
}
}
if(!temp.compare("&"))
{
if(!str.compare("&"))
{
string temp2(sizeof(data[j]),data[j]);
temp.append(temp2);
j++;
}
}
if(!temp.compare("|"))
{
if(!str.compare("|"))
{
string temp2(sizeof(data[j]),data[j]);
temp.append(temp2);
j++;
}
}
if(!temp.compare("/"))
{
if(!str.compare("*"))
{
string temp2(sizeof(data[j]),data[j]);
temp.append(temp2);
j++;
notes = true;
}
}
if(!temp.compare("*"))
{
if(!str.compare("/"))
{
string temp2(sizeof(data[j]),data[j]);
temp.append(temp2);
j++;
notes = false;
}
}
num=j;
return temp;
}
int isSymbol(string s)
{
string temp(1,'"');
if (s.compare(temp) == 0)
return 65;
for (int i = 0;i < 33 ;i++)
{
if (s.compare(symbol[i])==0)
return symbolNum[i];
}
return SYMBOLERROR;
}
bool issymbol(char s)
{
if ((s <= 'z' && s >= 'a') || (s <='Z' && s >= 'A')||(s <= '9' && s >= '0'))
return false;
else
return true;
}
string Keyword(int n)
{
if (n>=0&&n<=32)
return keyword[n-1];
}
string Symbolword(int n)
{
string result(1,'"');
if(n == 65)
return result;
if(n>=33&&n<65)
return symbolword[n-33];
if(n == 66)
return symbolword[33];
}
bool isBool(string s)
{
if(!s.compare("true")||!s.compare("false")||!s.compare("TRUE")||!s.compare("FALSE"))
return true;
return false;
}
string variable(char s,int n)
{
int j = n+1;
int flag = 1;
string temp1(sizeof(s),s);
while(flag)
{
if (((!isNum(data[j])||!isLetter(data[j]))&&!issymbol(data[j]))||data[j]=='_')
{
string temp2(sizeof(data[j]),data[j]);
temp1.append(temp2);
j++;
}
else
{
flag = 0;
}
}
num = j;
return temp1;
}
void error(FILE *fp,int type,string word)
{
switch(type)
{
case SYMBOLERROR:
printf("ERROR! ERRORTYPE:SymbolError! %s\n",word.c_str());
fprintf(fp,"ERROR! ERRORTYPE:SymbolError! %s\n",word.c_str());
break;
case IDENTIFIERERROR:
printf("ERROR! ERRORTYPE:IdentifierError! %s\n",word.c_str());
fprintf(fp,"ERROR! ERRORTYPE:IdentifierError! %s\n",word.c_str());
break;
}
}
5.结果截图
输入:
?控制台输出:
文件输出:
?
?
到这功能就差不多完成了,C语言我也没学多久,所以可能有些地方有疏漏,希望大家多多指正。
?
参考文章:
(13条消息) 词法分析器(分析C语言)_flamingobaby的博客-CSDN博客_词法分析c语言
|