An Assignment Statement Syntactic Scanner for a Language resembling ADA or Pascal Implemented by James Pate Williams, Jr.

I am working my way through two compiler textbooks: “Design of Compilers Techniques of Programming Language Translation” by Karen A. Lemone and “Modern Compiler Implementation in Java” by Andrew W. Appel. My first exercise is a single line by line assignment statement parser.

Here is my source code and my translation structures:

X1:=a+bb*12;
X2:=a/2+bb*12;

Identifiers:
X1
a
bb
Literals:
12
Operators:
:=
+
*
Punctuation:
;

Identifiers:
X2
a
bb
Literals:
2
12
Operators:
:=
/
+
*
Punctuation:
;
#pragma once
#include "RegularExpAssignStm.h"
#include <string>
#include <vector>
using namespace std;

class RegularExpAssignStm
{
public:
	string punctuation[3] = { ";", "(", ")" };
	string upperCase =
		"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
	string lowerCase =
		"abcdefghijklmnopqrstuvwxyz";
	string dig = "0123456789";
	string ops[5] = { "+", "-", "*", "/", ":="};
	vector<string> identifier;
	vector<string> liter;
	vector<string> oper;
	vector<string> punc;

	RegularExpAssignStm() {	};
	bool IdContains(char key);
	size_t Search(size_t pos, string key, string match);
	size_t SingleCharSearch(char key, size_t index, string match[]);
	bool GetIdentifier(string assignStm);
	bool GetLiteral(string assignStm);
	bool Parse(string assignStm);
};

#include "pch.h"
#include "RegularExpAssignStm.h"
#include <functional>
#include <iostream>
#include <string>
using namespace std;

bool RegularExpAssignStm::IdContains(char key)
{
	bool dg = false, uc = false, lc = false;

	for (size_t i = 0; !uc && i < upperCase.size(); i++)
		uc = key == upperCase[i];

	if (uc)
		return true;

	for (size_t i = 0; !lc && i < lowerCase.size(); i++)
		lc = key == lowerCase[i];

	if (lc)
		return true;

	for (size_t i = 0; !dg && i < dig.size(); i++)
		dg = key == dig[i];

	if (dg)
		return true;

	return false;
}

size_t RegularExpAssignStm::Search(size_t pos, string key, string match)
{
	bool found = false;
	size_t i;

	for (i = 0; !found && i < match.size(); i++)
		found = key[pos] == match[i];

	if (!found)
		i = 4294967295;
	else
		i--;

	return i;
}

size_t RegularExpAssignStm::SingleCharSearch(
	char key, size_t index, string match[])
{
	bool found = false;
	size_t i;

	for (i = 0; !found && i < match[index].size(); i++)
		found = key == match[index].c_str()[i];

	if (!found)
		i = 4294967295;
	else
		i--;

	return i;
}

bool RegularExpAssignStm::GetIdentifier(string assignStm)
{
	string idStr;

	for (size_t i = 0; i < dig.size(); i++)
		if (assignStm[0] == dig[i])
			return false;

	for (size_t i = 0; i < assignStm.size(); i++)
	{
		if (IdContains(assignStm[i]))
			idStr.push_back(assignStm[i]);
		else
			break;
	}

	if (idStr.size() > 0)
		identifier.push_back(idStr);

	return idStr.size() > 0;
}

bool RegularExpAssignStm::GetLiteral(string assignStm)
{
	bool start = false;
	string litStr;

	for (size_t i = 0; !start && i < assignStm.size(); i++)
	{
		if (assignStm[0] == dig[i])
			start = true;
	}
	
	if (start)
	{
		liter.push_back("");

		for (size_t i = 0; i < assignStm.size(); i++)
		{
			if (assignStm[i] >= '0' && assignStm[i] <= '9')
				liter[liter.size() - 1].push_back(assignStm[i]);
			else
				return liter.size() > 0;
		}
	}

	return false;
}

bool RegularExpAssignStm::Parse(string assignStm)
{
	if (GetIdentifier(assignStm))
		assignStm.erase(0, identifier[identifier.size() - 1].size());
	else
		return false;

	size_t assignOpPos = Search(0, assignStm, ops[4]);

	if (assignOpPos != 4294967295)
	{
		assignStm.erase(0, ops[4].size());
		oper.push_back(ops[4]);
	}

	else
		return false;

	while (true)
	{
		if (GetLiteral(assignStm))
		{
			assignStm.erase(0, liter[liter.size() - 1].size());

			if (assignStm.size() <= 0)
				return false;
		}

		else if (GetIdentifier(assignStm) &&
			identifier[identifier.size() - 1].size() != 0)
		{
			assignStm.erase(0, identifier[identifier.size() - 1].size());
			
			if (assignStm.size() <= 0)
				return false;
		}

		size_t plusPos, minusPos, timesPos, divPos;

		plusPos = SingleCharSearch(assignStm[0], 0, ops);
		minusPos = SingleCharSearch(assignStm[0], 1, ops);
		timesPos = SingleCharSearch(assignStm[0], 2, ops);
		divPos = SingleCharSearch(assignStm[0], 3, ops);

		if (plusPos != 4294967295)
		{
			oper.push_back(ops[0]);
			assignStm.erase(0, 1);
		}
		else if (minusPos != 4294967295)
		{
			oper.push_back(ops[1]);
			assignStm.erase(0, 1);
		}
		else if (timesPos != 4294967295)
		{
			oper.push_back(ops[2]);
			assignStm.erase(0, 1);
		}
		else if (divPos != 4294967295)
		{
			oper.push_back(ops[3]);
			assignStm.erase(0, 1);
		}
		else
			return false;

		if (assignStm.size() <= 0)
			return false;

		if (GetLiteral(assignStm))
		{
			assignStm.erase(0, liter[liter.size() - 1].size());
			
			if (assignStm.size() <= 0)
				return false;
		}

		else if (GetIdentifier(assignStm) &&
			identifier[identifier.size() - 1].size() != 0)
		{
			assignStm.erase(0, identifier[identifier.size() - 1].size());
			
			if (assignStm.size() <= 0)
				return false;
		}

		size_t puns = SingleCharSearch(assignStm[0], 0, punctuation);

		if (puns != 4294967295)
		{
			punc.push_back(punctuation[puns]);
			assignStm.erase(0, punc[punc.size() - 1].size());

			if (assignStm.size() <= 0)
				return false;
		}
	}

	return true;
}
Unknown's avatar

Author: jamespatewilliamsjr

My whole legal name is James Pate Williams, Jr. I was born in LaGrange, Georgia approximately 70 years ago. I barely graduated from LaGrange High School with low marks in June 1971. Later in June 1979, I graduated from LaGrange College with a Bachelor of Arts in Chemistry with a little over a 3 out 4 Grade Point Average (GPA). In the Spring Quarter of 1978, I taught myself how to program a Texas Instruments desktop programmable calculator and in the Summer Quarter of 1978 I taught myself Dayton BASIC (Beginner's All-purpose Symbolic Instruction Code) on LaGrange College's Data General Eclipse minicomputer. I took courses in BASIC in the Fall Quarter of 1978 and FORTRAN IV (Formula Translator IV) in the Winter Quarter of 1979. Professor Kenneth Cooper, a genius poly-scientist taught me a course in the Intel 8085 microprocessor architecture and assembly and machine language. We would hand assemble our programs and insert the resulting machine code into our crude wooden box computer which was designed and built by Professor Cooper. From 1990 to 1994 I earned a Bachelor of Science in Computer Science from LaGrange College. I had a 4 out of 4 GPA in the period 1990 to 1994. I took courses in C, COBOL, and Pascal during my BS work. After graduating from LaGrange College a second time in May 1994, I taught myself C++. In December 1995, I started using the Internet and taught myself client-server programming. I created a website in 1997 which had C and C# implementations of algorithms from the "Handbook of Applied Cryptography" by Alfred J. Menezes, et. al., and some other cryptography and number theory textbooks and treatises.

Leave a comment