wust熊老师编译原理实验一:词法分析

#include <stdio.h>
#include <ctype.h>
#include <string.h>
char baseword[13][14] = {"begin", "call", "const", "do", "end", "if", "odd", "procedure", "read", "then", "var", "while", "write"};
char baseword2[4][14] = {"学号", "姓名", "202013407379", "易助平"};
char str[1000]; // scanf输入数据
int strIndex;
char ch;
char temp[20]; //临时数据temp<=str
int tempIndex;
int baselenghth = 13;
int resultType[1000];  //这一段数据的类型
char result[1000][20]; //储存每一段数距
char type[6][10] = {"基本字", "运算符", "标识符", "界符", "数字", "非法"};
bool endflag = false; //程序结束标志
int num = 0;          //段数
void getsym();
void getch();
void print()
{   printf("\t\t\t单词\t\t\t\t类型\t\t\t\t\n");
    for (int i = 0; i < num; i++)
    {
        printf("(\t\t\t%s\t\t\t\t%s\t\t\t)\n", result[i], type[resultType[i]]);
    }
}
bool check(unsigned char c)
{
    //通过字节码进行判断
    return c >= 0x80;
}
int main()
{   printf("输入plo程序:\n");
    while (scanf("%s", str) != EOF)
    { // scanf遇空格结束
        strIndex = 0;
        getch();
        while (ch)
        {
            getsym();
        }
        if (endflag)
            break;
    }
    printf("-----------________词法分析结果(丐版)__________------------\n");

    print();

    return 0;
}
void getch()
{
    ch = str[strIndex++];
}
void getsym()
{
    if (isalpha(ch))
    {
        tempIndex = 0;
        do
        {
            temp[tempIndex++] = ch;
            getch();
        } while (isalpha(ch) || isdigit(ch));
        //字母开头
        temp[tempIndex] = 0;
        strcpy(result[num], temp);

        /* 二分查找字符 */
        int i = 0;               //起始
        int j = baselenghth - 1; //终
        int k;
        do
        {
            k = ( i + j) / 2;
            if (strcmp(temp, baseword[k]) <= 0)
                j = k - 1;
            if (strcmp(temp, baseword[k]) >= 0)
                i = k + 1;

        } while (i <= j);
        if (i - 1 > j)
            resultType[num++] = 0; //基本字
        else
            resultType[num++] = 2; //标识符
    }
    else if (ch == '.')
    {
        result[num][0] = ch;
        result[num][1] = 0;
        resultType[num++] = 3; //界符
        endflag = 1;
        getch();
    }
    else if (isdigit(ch))
    {
        bool legalflag = 1;
        tempIndex = 0;
        do
        {
            temp[tempIndex++] = ch;
            getch();
        } while (isdigit(ch));
        if (isalpha(ch))
        {
            do
            {
                legalflag = 0;
                temp[tempIndex++] = ch;
                getch();
            } while (isalpha(ch));
        }

        temp[tempIndex] = 0;
        strcpy(result[num], temp);
        if (legalflag)
        {
            if (strcmp(temp, baseword2[2]) == 0)
                resultType[num++] = 0; //学号，基本字
            else
                resultType[num++] = 4; //数字
        }
        else
        {
            resultType[num++] = 5;
        }
    }
    else if (ch == '>' || ch == '<')
    {
        result[num][0] = ch;
        result[num][1] = 0;
        getch();
        if (ch == '=')
        {
            result[num][1] = ch;
            result[num][2] = 0;
        }
        resultType[num++] = 1; //运算符
    }
    else if (ch == '=')
    {
        result[num][0] = ch;
        result[num][1] = 0;

        resultType[num++] = 1;
        getch();
    }
    else if (ch == '*' || ch == '-' || ch == '+' || ch == '/' || ch == '#') //
    {
        result[num][0] = ch;
        result[num][1] = 0;
        resultType[num++] = 1; //运算符
        getch();
    }
    else if (ch == ':')
    {
        result[num][0] = ch;
        getch();
        if (ch == '=')
        {
            result[num][1] = '=';
            result[num][2] = 0;
            resultType[num++] = 1; //运算符
            getch();
        }
        else
        {
            result[num][1] = 0;
            resultType[num++] = 5; //非法
        }
        // getch();
    }
    else if (ch == '(' || ch == ')' || ch == ',' || ch == ';')
    {

        result[num][0] = ch;
        result[num][1] = 0;
        resultType[num++] = 3; //界符
        getch();
    }

    else if (check(ch))
    {
        int chineseL = 0;

        do
        {
            result[num][chineseL++] = ch;
            getch();
        } while (check(ch));
        result[num][chineseL] = 0;
        resultType[num++] = 0;
        // getch();
    }
}