I am working my way through two compiler textbooks: “Design of Compilers Techniques of Programming Language Translation” by Karen A. Lemone and “Modern Compiler Implementation in Java” by Andrew W. Appel. My first exercise is a single line by line assignment statement parser.
Here is my source code and my translation structures:
X1:=a+bb*12; X2:=a/2+bb*12; Identifiers: X1 a bb Literals: 12 Operators: := + * Punctuation: ; Identifiers: X2 a bb Literals: 2 12 Operators: := / + * Punctuation: ;
#pragma once
#include "RegularExpAssignStm.h"
#include <string>
#include <vector>
using namespace std;
class RegularExpAssignStm
{
public:
string punctuation[3] = { ";", "(", ")" };
string upperCase =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
string lowerCase =
"abcdefghijklmnopqrstuvwxyz";
string dig = "0123456789";
string ops[5] = { "+", "-", "*", "/", ":="};
vector<string> identifier;
vector<string> liter;
vector<string> oper;
vector<string> punc;
RegularExpAssignStm() { };
bool IdContains(char key);
size_t Search(size_t pos, string key, string match);
size_t SingleCharSearch(char key, size_t index, string match[]);
bool GetIdentifier(string assignStm);
bool GetLiteral(string assignStm);
bool Parse(string assignStm);
};
#include "pch.h"
#include "RegularExpAssignStm.h"
#include <functional>
#include <iostream>
#include <string>
using namespace std;
bool RegularExpAssignStm::IdContains(char key)
{
bool dg = false, uc = false, lc = false;
for (size_t i = 0; !uc && i < upperCase.size(); i++)
uc = key == upperCase[i];
if (uc)
return true;
for (size_t i = 0; !lc && i < lowerCase.size(); i++)
lc = key == lowerCase[i];
if (lc)
return true;
for (size_t i = 0; !dg && i < dig.size(); i++)
dg = key == dig[i];
if (dg)
return true;
return false;
}
size_t RegularExpAssignStm::Search(size_t pos, string key, string match)
{
bool found = false;
size_t i;
for (i = 0; !found && i < match.size(); i++)
found = key[pos] == match[i];
if (!found)
i = 4294967295;
else
i--;
return i;
}
size_t RegularExpAssignStm::SingleCharSearch(
char key, size_t index, string match[])
{
bool found = false;
size_t i;
for (i = 0; !found && i < match[index].size(); i++)
found = key == match[index].c_str()[i];
if (!found)
i = 4294967295;
else
i--;
return i;
}
bool RegularExpAssignStm::GetIdentifier(string assignStm)
{
string idStr;
for (size_t i = 0; i < dig.size(); i++)
if (assignStm[0] == dig[i])
return false;
for (size_t i = 0; i < assignStm.size(); i++)
{
if (IdContains(assignStm[i]))
idStr.push_back(assignStm[i]);
else
break;
}
if (idStr.size() > 0)
identifier.push_back(idStr);
return idStr.size() > 0;
}
bool RegularExpAssignStm::GetLiteral(string assignStm)
{
bool start = false;
string litStr;
for (size_t i = 0; !start && i < assignStm.size(); i++)
{
if (assignStm[0] == dig[i])
start = true;
}
if (start)
{
liter.push_back("");
for (size_t i = 0; i < assignStm.size(); i++)
{
if (assignStm[i] >= '0' && assignStm[i] <= '9')
liter[liter.size() - 1].push_back(assignStm[i]);
else
return liter.size() > 0;
}
}
return false;
}
bool RegularExpAssignStm::Parse(string assignStm)
{
if (GetIdentifier(assignStm))
assignStm.erase(0, identifier[identifier.size() - 1].size());
else
return false;
size_t assignOpPos = Search(0, assignStm, ops[4]);
if (assignOpPos != 4294967295)
{
assignStm.erase(0, ops[4].size());
oper.push_back(ops[4]);
}
else
return false;
while (true)
{
if (GetLiteral(assignStm))
{
assignStm.erase(0, liter[liter.size() - 1].size());
if (assignStm.size() <= 0)
return false;
}
else if (GetIdentifier(assignStm) &&
identifier[identifier.size() - 1].size() != 0)
{
assignStm.erase(0, identifier[identifier.size() - 1].size());
if (assignStm.size() <= 0)
return false;
}
size_t plusPos, minusPos, timesPos, divPos;
plusPos = SingleCharSearch(assignStm[0], 0, ops);
minusPos = SingleCharSearch(assignStm[0], 1, ops);
timesPos = SingleCharSearch(assignStm[0], 2, ops);
divPos = SingleCharSearch(assignStm[0], 3, ops);
if (plusPos != 4294967295)
{
oper.push_back(ops[0]);
assignStm.erase(0, 1);
}
else if (minusPos != 4294967295)
{
oper.push_back(ops[1]);
assignStm.erase(0, 1);
}
else if (timesPos != 4294967295)
{
oper.push_back(ops[2]);
assignStm.erase(0, 1);
}
else if (divPos != 4294967295)
{
oper.push_back(ops[3]);
assignStm.erase(0, 1);
}
else
return false;
if (assignStm.size() <= 0)
return false;
if (GetLiteral(assignStm))
{
assignStm.erase(0, liter[liter.size() - 1].size());
if (assignStm.size() <= 0)
return false;
}
else if (GetIdentifier(assignStm) &&
identifier[identifier.size() - 1].size() != 0)
{
assignStm.erase(0, identifier[identifier.size() - 1].size());
if (assignStm.size() <= 0)
return false;
}
size_t puns = SingleCharSearch(assignStm[0], 0, punctuation);
if (puns != 4294967295)
{
punc.push_back(punctuation[puns]);
assignStm.erase(0, punc[punc.size() - 1].size());
if (assignStm.size() <= 0)
return false;
}
}
return true;
}