An Assignment Statement Syntactic Scanner for a Language resembling ADA or Pascal Implemented by James Pate Williams, Jr.

I am working my way through two compiler textbooks: “Design of Compilers Techniques of Programming Language Translation” by Karen A. Lemone and “Modern Compiler Implementation in Java” by Andrew W. Appel. My first exercise is a single line by line assignment statement parser.

Here is my source code and my translation structures:

X1:=a+bb*12;
X2:=a/2+bb*12;

Identifiers:
X1
a
bb
Literals:
12
Operators:
:=
+
*
Punctuation:
;

Identifiers:
X2
a
bb
Literals:
2
12
Operators:
:=
/
+
*
Punctuation:
;
#pragma once
#include "RegularExpAssignStm.h"
#include <string>
#include <vector>
using namespace std;

class RegularExpAssignStm
{
public:
	string punctuation[3] = { ";", "(", ")" };
	string upperCase =
		"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
	string lowerCase =
		"abcdefghijklmnopqrstuvwxyz";
	string dig = "0123456789";
	string ops[5] = { "+", "-", "*", "/", ":="};
	vector<string> identifier;
	vector<string> liter;
	vector<string> oper;
	vector<string> punc;

	RegularExpAssignStm() {	};
	bool IdContains(char key);
	size_t Search(size_t pos, string key, string match);
	size_t SingleCharSearch(char key, size_t index, string match[]);
	bool GetIdentifier(string assignStm);
	bool GetLiteral(string assignStm);
	bool Parse(string assignStm);
};

#include "pch.h"
#include "RegularExpAssignStm.h"
#include <functional>
#include <iostream>
#include <string>
using namespace std;

bool RegularExpAssignStm::IdContains(char key)
{
	bool dg = false, uc = false, lc = false;

	for (size_t i = 0; !uc && i < upperCase.size(); i++)
		uc = key == upperCase[i];

	if (uc)
		return true;

	for (size_t i = 0; !lc && i < lowerCase.size(); i++)
		lc = key == lowerCase[i];

	if (lc)
		return true;

	for (size_t i = 0; !dg && i < dig.size(); i++)
		dg = key == dig[i];

	if (dg)
		return true;

	return false;
}

size_t RegularExpAssignStm::Search(size_t pos, string key, string match)
{
	bool found = false;
	size_t i;

	for (i = 0; !found && i < match.size(); i++)
		found = key[pos] == match[i];

	if (!found)
		i = 4294967295;
	else
		i--;

	return i;
}

size_t RegularExpAssignStm::SingleCharSearch(
	char key, size_t index, string match[])
{
	bool found = false;
	size_t i;

	for (i = 0; !found && i < match[index].size(); i++)
		found = key == match[index].c_str()[i];

	if (!found)
		i = 4294967295;
	else
		i--;

	return i;
}

bool RegularExpAssignStm::GetIdentifier(string assignStm)
{
	string idStr;

	for (size_t i = 0; i < dig.size(); i++)
		if (assignStm[0] == dig[i])
			return false;

	for (size_t i = 0; i < assignStm.size(); i++)
	{
		if (IdContains(assignStm[i]))
			idStr.push_back(assignStm[i]);
		else
			break;
	}

	if (idStr.size() > 0)
		identifier.push_back(idStr);

	return idStr.size() > 0;
}

bool RegularExpAssignStm::GetLiteral(string assignStm)
{
	bool start = false;
	string litStr;

	for (size_t i = 0; !start && i < assignStm.size(); i++)
	{
		if (assignStm[0] == dig[i])
			start = true;
	}
	
	if (start)
	{
		liter.push_back("");

		for (size_t i = 0; i < assignStm.size(); i++)
		{
			if (assignStm[i] >= '0' && assignStm[i] <= '9')
				liter[liter.size() - 1].push_back(assignStm[i]);
			else
				return liter.size() > 0;
		}
	}

	return false;
}

bool RegularExpAssignStm::Parse(string assignStm)
{
	if (GetIdentifier(assignStm))
		assignStm.erase(0, identifier[identifier.size() - 1].size());
	else
		return false;

	size_t assignOpPos = Search(0, assignStm, ops[4]);

	if (assignOpPos != 4294967295)
	{
		assignStm.erase(0, ops[4].size());
		oper.push_back(ops[4]);
	}

	else
		return false;

	while (true)
	{
		if (GetLiteral(assignStm))
		{
			assignStm.erase(0, liter[liter.size() - 1].size());

			if (assignStm.size() <= 0)
				return false;
		}

		else if (GetIdentifier(assignStm) &&
			identifier[identifier.size() - 1].size() != 0)
		{
			assignStm.erase(0, identifier[identifier.size() - 1].size());
			
			if (assignStm.size() <= 0)
				return false;
		}

		size_t plusPos, minusPos, timesPos, divPos;

		plusPos = SingleCharSearch(assignStm[0], 0, ops);
		minusPos = SingleCharSearch(assignStm[0], 1, ops);
		timesPos = SingleCharSearch(assignStm[0], 2, ops);
		divPos = SingleCharSearch(assignStm[0], 3, ops);

		if (plusPos != 4294967295)
		{
			oper.push_back(ops[0]);
			assignStm.erase(0, 1);
		}
		else if (minusPos != 4294967295)
		{
			oper.push_back(ops[1]);
			assignStm.erase(0, 1);
		}
		else if (timesPos != 4294967295)
		{
			oper.push_back(ops[2]);
			assignStm.erase(0, 1);
		}
		else if (divPos != 4294967295)
		{
			oper.push_back(ops[3]);
			assignStm.erase(0, 1);
		}
		else
			return false;

		if (assignStm.size() <= 0)
			return false;

		if (GetLiteral(assignStm))
		{
			assignStm.erase(0, liter[liter.size() - 1].size());
			
			if (assignStm.size() <= 0)
				return false;
		}

		else if (GetIdentifier(assignStm) &&
			identifier[identifier.size() - 1].size() != 0)
		{
			assignStm.erase(0, identifier[identifier.size() - 1].size());
			
			if (assignStm.size() <= 0)
				return false;
		}

		size_t puns = SingleCharSearch(assignStm[0], 0, punctuation);

		if (puns != 4294967295)
		{
			punc.push_back(punctuation[puns]);
			assignStm.erase(0, punc[punc.size() - 1].size());

			if (assignStm.size() <= 0)
				return false;
		}
	}

	return true;
}

Create an Index Using C++ and the Map Data Structure Designed and Implemented by James Pate Williams, Jr.

I recall that way back in the early to mid-1980s I had the pleasure of perusing a copy of the source code for a Pascal compiler. It was probably created directly under the inventor Nicklaus Wirth in Switzerland. I partially implemented a Pascal emulator for a Data General Eclipse minicomputer.

Here are some of the phases required for the creation of a Pascal computer program:

  1. Parse the source code.
  2. Create a symbol table.
  3. Interpret the symbols.
  4. Create P-Code for the interpreter.

Running the interpreter code involves translation of the P-Code to a computer readable bit string. Every computer scientist should at some time in her/his formal education should implement an assembler and a compiler.

Yesterday, April 11, 2023, I created a word index C++ application that takes a text file, parses the words, and creates an index also known as an English language symbol table. The app utilizes a C++ map that consists of integer keys and a node containing information about the words and their order in the text file. Below are the indexable text file and the symbol table (index).

This is a test of my index generator. The text file has

two lines. The second line is dummy definitions.

This is a test of my index generator. The text file has
two lines. The second line is dummy definitions.
The first number is the line number and the second the position within a line.

The         1             39          
The         2             12          
This        1             1            
a              1             9            
definitions           2             37          
dummy 2             31          
file          1             48          
generator            1             28          
has         1             53          
index     1             22          
is             2             28          
is             1             6            
line         2             23          
lines       2             5            
my          1             19          
of            1             16          
second  2             16          
test        1             11          
text        1             43          
two        2             1