功能及代码结构
为实现编译器前端,需要对文法进行分析。该部分实现从文件中读入文法(方便修改),用合适的数据结构表示并求解各个非终结符号的First集和Follow集 仓库:https://github.com/xs1317/Complier 文件目录
- Tools.h 文法处理头文件
- Tools.cpp 文法处理代码
- WFdata 文法相关信息
- Grammar.txt 文法产生式
- First.txt 各个非终结符号的First集
- Fllow.txt 各个非终结符号的Follow集
- ProductionsId.txt 产生式及编号
数据结构表示
production类存储了产生式的编号、长度、左部、右部。其中编号主要用于语义分析阶段根据产生式序号调用语义动作。由于右部是有序的且有时需要进行随机访问,所以用vector记录。
WF类存储了所有的文法信息。终结符号Vt,非终结符号Vn,用set< string >存储。每个非终结符号的First集和Follow集,因为在使用时需要按Vn的名称进行访问,所以以map<string,set< string >>进行存储。文法的所有产生式,有两种存储方式:可以按左部访问的split_productions,可以按下表进行访问的vector,这两个结构的的内容实质上是相同的。
为了方便其他文件对文法信息的读取,声明了一个变量WFdata。
class production
{
public:
int length;
int id;
string left;
vector<string> right;
production(int i, vector<string>r, string l);
production();
string toString();
bool operator==(const production& p);
};
class WF
{
public:
set<string> Vt;
set<string> Vn;
map<string, set<string>> First;
map<string, set<string>> Follow;
map<string, list<production>> split_productions;
vector<production> productions;
string FirstOutPath = "First.txt";
string FollowOutPath = "Follow.txt";
string fileName = "Grammar.txt";
string outProductionPath = "ProductionsID.txt";
set<string> nullSymbol{ "S","statements","declaration","M","N" };
int number = 0;
WF();
void getWFinfo();
void init();
void outProductions();
bool followEqual(map<string, set<string>> oldM, map<string, set<string>> newM);
void getFirst();
void getFollow();
set<string> getSymbolsFirst(list<string> symbols);
bool isVn(string s);
bool isVt(string s);
void getOneFirst(string s);
bool nullable(string symbol);
};
extern WF wfdata;
读入文法信息
- 为了方便对输入字符串的处理,实现了一个字符串分割和一个字符串删除函数
list<string> split(string str, string pattern)
{
string::size_type pos;
list<string> result;
str += pattern;
int size = str.size();
for (int i = 0; i < size; i++)
{
pos = str.find(pattern, i);
if (pos < size)
{
string s = str.substr(i, pos - i);
if (s != "" && s != " ")
result.push_back(s);
i = pos + pattern.size() - 1;
}
}
return result;
}
string deletmember(string& s, char m) {
string::iterator it = s.begin();
for (it = s.begin(); it != s.end();)
{
if (*it == m)it = s.erase(it);
else it++;
}
return s;
}
- 下面这一段代码负责从文件中读入文法信息,用合适的数据结构表示,并记录所有的终结符号和非终结符号。
void WF::getWFinfo()
{
string line;
ifstream in(fileName);
while (getline(in, line))
{
int position = line.find("->");
if (position != -1)
{
string left = line.substr(0, position);
deletmember(left, ' ');
string right = line.substr(position + 2);
deletmember(right, '\t');
if (Vn.find(left) == Vn.end())
Vn.insert(left);
list<string> splitRight = split(right, " ");
list<string>::iterator it1;
for (it1 = splitRight.begin(); it1 != splitRight.end(); it1++)
{
string temp = *it1;
if (temp[0] == '\'' || temp == "integer" || temp == "character" || temp == "ID")
{
string tempa = deletmember(temp, '\'');
*it1 = tempa;
if (Vt.find(tempa) == Vt.end())
Vt.insert(tempa);
}
else
{
if (Vn.find(left) != Vn.end())
Vn.insert(temp);
}
}
vector<string> sr(splitRight.begin(), splitRight.end());
production P = production(number++, sr, left);
productions.push_back(P);
split_productions[left].push_back(P);
}
}
Vt.insert("!EOF!");
}
- 为了方便判断某个符号是终结符号还是非终结符号,编写了两个函数
bool WF::isVn(string s)
{
if (Vn.find(s) != Vn.end())
return true;
return false;
}
bool WF::isVt(string s)
{
if (Vt.find(s) != Vt.end())
return true;
return false;
}
求解First集
网上大部分的方法不支持求解含有左递归文法的First集求解,这里给出一个思路。nullable用于判断当前文法符号是否可以推出空,但本程序并没有实现,而是直接将文法所有可以推出空的符号记录了下来。
bool WF::nullable(string symbol)
{
if (nullSymbol.find(symbol) != nullSymbol.end())
return true;
else
return false;
}
void WF::getOneFirst(string s)
{
list<production> productions = split_productions.at(s);
for (list<production>::iterator pit = productions.begin(); pit != productions.end(); pit++)
{
production ptemp = *pit;
for (vector<string>::iterator sit = ptemp.right.begin(); sit != ptemp.right.end(); sit++)
{
string stemp = *sit;
if (isVt(stemp))
{
if (First[s].find(stemp) == First[s].end())
First[s].insert(stemp);
break;
}
else if (isVn(stemp))
{
if (stemp != s)
{
getOneFirst(stemp);
First[s].insert(First[stemp].begin(), First[stemp].end());
if (nullable(stemp))
continue;
else
break;
}
else if (stemp == s && !nullable(stemp))
{
break;
}
else if (stemp == s && nullable(stemp))
{
continue;
}
}
}
}
}
void WF::getFirst()
{
ofstream outFirst(FirstOutPath);
for (set<string>::iterator sit = Vn.begin(); sit != Vn.end(); sit++)
{
getOneFirst(*sit);
}
cout << "文法各个非终结符号的First集如下所示(其中 '#' 表示空):\r\n";
for (auto it = First.begin(); it != First.end(); it++)
{
string key = it->first;
set<string> F = it->second;
string outS = "First(" + key + ")={";
auto it2 = F.begin();
outS += " '" + *it2 + "'";
it2++;
for (; it2 != F.end(); it2++)
{
outS = outS + ", '" + *it2 + "' ";
}
outS = outS + " }\r\n";
cout << outS;
outFirst << outS;
}
}
求解Follow集
根据龙书的算法流程,求解Follow集前需要先求解First集,并且实现一串符号的First集求解,算法流程如下。
- 工具函数:getNextSymbols用于获取某个符号的所有后续串,getSymbolsFirst用于获取某一个串的First集
list<string> getNextSymbols(vector<string> src, vector<string>::iterator it, vector<string>::iterator end)
{
list<string> result;
for (; it != end; it++)
{
result.push_back(*it);
}
return result;
}
set<string> WF::getSymbolsFirst(list<string> symbols)
{
set<string> result;
if (symbols.size() == 0)
{
result.insert("#");
return result;
}
else
{
for (auto it = symbols.begin(); it != symbols.end(); it++)
{
string sym = *it;
if (isVt(sym))
{
result.insert(sym);
return result;
}
else if (isVn(sym))
{
if (First[sym].find("#") == First[sym].end())
{
result.insert(First[sym].begin(), First[sym].end());
return result;
}
else
{
for (auto itIn = First[sym].begin(); itIn != First[sym].end(); itIn++)
{
string f = *itIn;
if (f != "#")
{
result.insert(f);
}
}
auto itJudgeEnd = it;
itJudgeEnd++;
if (itJudgeEnd == symbols.end())
{
result.insert("#");
}
}
}
}
}
return result;
}
- 根据龙书算法流程,要不断修改Follow集直至Follow集不再变化,函数followEqual用于实现该过程
bool WF::followEqual(map<string, set<string>> oldM, map<string, set<string>> newM)
{
if (oldM.size() == newM.size())
{
for (auto it1 = newM.begin(); it1 != newM.end(); it1++)
{
string key = it1->first;
set<string> value = it1->second;
if (oldM.find(key) == oldM.end())
{
return false;
}
else
{
if (value.size() != oldM[key].size())
{
return false;
}
}
}
return true;
}
else
{
return false;
}
}
void WF::getFollow()
{
Follow["S"].insert("!EOF!");
map<string, set<string>> oldFollow = Follow;
do
{
oldFollow = Follow;
for (auto itPr = split_productions.begin(); itPr != split_productions.end(); itPr++)
{
string left = itPr->first;
list<production> rights = itPr->second;
for (production Pright : rights)
{
for (vector<string>::iterator itR = Pright.right.begin(); itR != Pright.right.end(); itR++)
{
string Nowsymbol = *itR;
if (isVt(Nowsymbol))
{
continue;
}
else if (isVn(Nowsymbol))
{
vector<string>::iterator itRnext = itR;
itRnext++;
if (itRnext == Pright.right.end())
{
Follow[Nowsymbol].insert(Follow[left].begin(), Follow[left].end());
}
else
{
list<string>nextSymbols = getNextSymbols(Pright.right, itRnext, Pright.right.end());
set<string>nextFirst = getSymbolsFirst(nextSymbols);
if (nextFirst.find("#") == nextFirst.end())
{
Follow[Nowsymbol].insert(nextFirst.begin(), nextFirst.end());
}
else
{
for (string nf : nextFirst)
{
if (nf != "#")
Follow[Nowsymbol].insert(nf);
}
Follow[Nowsymbol].insert(Follow[left].begin(), Follow[left].end());
}
}
}
}
}
}
} while (!followEqual(oldFollow, Follow));
ofstream outFollow(FollowOutPath);
cout << "文法各个非终结符号的Follow集如下所示(其中 '!EOF!' 表示终结符):\r\n";
for (auto it = Follow.begin(); it != Follow.end(); it++)
{
string key = it->first;
set<string> F = it->second;
string outS = "Follow(" + key + ")={";
auto it2 = F.begin();
outS += " '" + *it2 + "'";
it2++;
for (; it2 != F.end(); it2++)
{
outS = outS + ", '" + *it2 + "' ";
}
outS = outS + " }\r\n";
cout << outS;
outFollow << outS;
}
}
|