// ID3MetadataParser.cpp (c) December 2025
// by James Pate Williams, Jr.
#include "pch.h"
#define FILE_EOF 0
#define NO_ERROR 1
#define EMPTY_FILE 2
#define INVALID_LINE 3
#define MISSING_NAME 4
#define INVALID_NAME 5
#define INVALID_DESCRIPTION 6
#define MISSING_DESCRIPTION 7
#define INVALID_TYPE 8
#define MISSING_TYPE 9
#define INVALID_RANGE 10
#define INVALID_CATEGORICAL 11
#define INVALID_DOUBLE 12
#define INVALID_FLOAT 13
#define INVALID_INTEGER 14
#define INVALID_ROLE 15
#define MISSING_ROLE 16
enum AttributeType {
categorical, integer, doubleReal, FloatReal
};
typedef struct tagCategoricalAttribute {
std::string name = "";
std::string description = "";
std::vector<char> category;
} CategoricalAttribute, * PCategoricalAttribute;
typedef struct tagIntegerAttribute {
std::string name = "";
std::string description = "";
int loValue= -1, hiValue = -1;
} IntegerAttribute, * PIntegerAttribute;
typedef struct tagDoubleAttribute {
std::string name = "";
std::string description = "";
double loValue = -1.0, hiValue = -1.0;
} DoubleAttribute, * PDoubleAttribute;
typedef struct tagFloatAttribute {
std::string name = "";
std::string description = "";
float loValue = -1.0f, hiValue = -1.0f;
} FloatAttribute, * PFloatAttribute;
static bool parseName(
char line[],
int length,
int& errorCode,
int& index,
bool& feature,
std::string& name)
{
char* cptr1 = std::strstr(line, "#name: feature ");
char* cptr2 = std::strstr(line, "#name: target ");
if (cptr1 == nullptr && cptr2 == nullptr) {
errorCode = MISSING_NAME;
return false;
}
if (cptr1) {
feature = true;
index = static_cast<int>(strlen("#name: feature "));
}
else if (cptr2) {
feature = false;
index = static_cast<int>(strlen("#name: target "));
}
else {
errorCode = INVALID_NAME;
return false;
}
if (index >= static_cast<int>(strlen(line))) {
errorCode = INVALID_NAME;
return false;
}
if (line[index] >= L'A' && line[index] <= 'Z' ||
line[index] >= L'a' && line[index] <= 'z') {
bool first = true;
name = "";
while (index < strlen(line)) {
if (line[index] >= 'A' && line[index] <= 'Z' ||
line[index] >= 'a' && line[index] <= 'z' ||
line[index] == ' ') {
if (first)
name += line[index++];
else if (first &&
line[index] >= '0' &&
line[index] <= '9') {
first = false;
name += line[index++];
}
if (!first)
name += line[index++];
}
else if (!first) {
errorCode = INVALID_NAME;
return false;
}
}
}
errorCode = 0;
index = length;
return true;
}
static bool parseDescription(
char line[],
int length,
int& errorCode,
int& index,
std::string& description) {
char* cptr = std::strstr(line, "#description: ");
if (cptr == nullptr) {
errorCode = MISSING_DESCRIPTION;
return false;
}
int lengthDesc = static_cast<int>(
strlen("#description: "));
if (lengthDesc == length) {
errorCode = INVALID_DESCRIPTION;
return false;
}
index = lengthDesc;
while (index < length)
description += line[index++];
errorCode = 0;
return true;
}
static bool parseCategorical(
char line[],
int length,
int& errorCode,
int& index,
std::vector<char>& category) {
int delta = static_cast<int>(strlen("#type: categorical: {"));
char* cptr = line + delta - 1;
char ch = *cptr++;
while (ch != '}' && index < length) {
while (ch != ',' && index < length) {
if (ch == '}') {
if (index == length - 1)
break;
else {
errorCode = INVALID_TYPE;
return false;
}
}
category.push_back(ch);
index++;
break;
}
cptr++;
ch = *cptr;
}
if (category.size() != 0 && ch == '}') {
errorCode = 0;
return true;
}
else {
errorCode = INVALID_CATEGORICAL;
return false;
}
}
static bool parseDoubleRange(
char line[],
int length,
int& errorCode,
int& index,
double& hiDouble,
double& loDouble)
{
index = static_cast<int>(strlen("#type: doubleReal ["));
char ch = line[index++];
std::string doubleStr;
while (ch != ',' &&
index < static_cast<int>(strlen(line))) {
doubleStr.push_back(ch);
ch = line[index++];
}
if (doubleStr.size() == 0) {
errorCode = INVALID_DOUBLE;
return false;
}
try {
loDouble = std::stod(doubleStr);
doubleStr = "";
ch = line[index++];
while (ch != ']' && index < strlen(line)) {
doubleStr.push_back(ch);
ch = line[index++];
}
if (doubleStr.size() == 0) {
errorCode = INVALID_DOUBLE;
return false;
}
hiDouble = std::stod(doubleStr);
errorCode = 0;
return true;
}
catch (std::exception ex) {
errorCode = INVALID_DOUBLE;
return false;
}
errorCode = INVALID_RANGE;
return false;
}
static bool parseFloatRange(
char line[],
int length,
int& errorCode,
int& index,
float& hiFloat,
float& loFloat)
{
char ch = '\0';
std::string floatStr;
ch = line[index++];
while (ch != ',' && index < strlen(line)) {
floatStr.push_back(ch);
ch = line[index++];
}
if (floatStr.size() == 0) {
errorCode = INVALID_INTEGER;
return false;
}
else {
try {
loFloat = std::stof(floatStr);
floatStr = "";
ch = line[++index];
while (ch != ']' && index < strlen(line)) {
floatStr.push_back(ch);
ch = line[index++];
}
if (floatStr.size() == 0) {
errorCode = INVALID_FLOAT;
return false;
}
hiFloat = std::stof(floatStr);
errorCode = 0;
return true;
}
catch (std::exception ex) {
errorCode = INVALID_FLOAT;
return false;
}
}
errorCode = INVALID_RANGE;
return false;
}
static bool parseIntegerRange(
char line[],
int length,
int& errorCode,
int& index,
int& hiInteger,
int& loInteger) {
char ch = '\0';
int i = 0;
std::string integerStr;
ch = line[i++];
if (ch < '0' || ch > '9') {
errorCode = INVALID_INTEGER;
return false;
}
while (ch != ',' && index < length) {
integerStr.push_back(ch);
ch = line[i++];
index++;
}
if (integerStr.size() == 0) {
errorCode = INVALID_INTEGER;
return false;
}
else {
try {
loInteger = std::stoi(integerStr);
integerStr = "";
i = 0;
ch = line[i++];
ch = line[i++];
ch = line[i++];
index += 3;
while (
ch >= '0' && ch <= '9' &&
ch != ']' && index < length) {
integerStr.push_back(ch);
ch = line[i++];
index++;
}
if (integerStr.size() == 0) {
errorCode = INVALID_INTEGER;
return false;
}
hiInteger = std::stoi(integerStr);
errorCode = 0;
return true;
}
catch (std::exception ex) {
errorCode = INVALID_INTEGER;
return false;
}
}
errorCode = INVALID_RANGE;
return false;
}
static bool parseType(
char line[],
int length,
double& hiDouble,
double& loDouble,
float& hiFloat,
float& loFloat,
int& errorCode,
int& index,
int& hiInteger,
int& loInteger,
std::string& type,
std::vector<char>& alphabet) {
char* cptr = std::strstr(line, "#type: ");
if (cptr == nullptr) {
errorCode = MISSING_TYPE;
return false;
}
int lengthType = static_cast<int>(strlen("#type: "));
if (lengthType >= length) {
errorCode = INVALID_TYPE;
return false;
}
index = lengthType;
cptr = line + index;
if (std::strstr(cptr, "categorical {") != nullptr) {
if (parseCategorical(line, length, errorCode,
index, alphabet)) {
errorCode = 0;
type = "categorical";
return true;
}
else {
errorCode = INVALID_CATEGORICAL;
return false;
}
}
if (std::strstr(cptr, "integer [") != nullptr) {
bool pir = parseIntegerRange(
line + index + strlen("integer ["),
length,
errorCode,
index,
hiInteger,
loInteger);
if (pir) {
type = "integer";
return true;
}
else
return false;
}
if (std::strstr(cptr, "doubleReal [") != nullptr) {
bool pdr = parseDoubleRange(
line,
length,
errorCode,
index,
hiDouble,
loDouble);
if (pdr) {
type = "doubleReal";
return true;
}
else
return false;
}
if (std::strstr(cptr, "floatReal [") != nullptr) {
bool pfr = parseFloatRange(
line,
length,
errorCode,
index,
hiFloat,
loFloat);
if (pfr) {
type = "floatReal";
return true;
}
}
errorCode = INVALID_TYPE;
return false;
}
static bool readMetaDataLine(
std::ifstream& file1,
char line[],
int& errorCode,
int& index) {
file1.getline(line, 256);
if (strlen(line) == 0 && index == -1) {
errorCode = EMPTY_FILE;
return false;
}
if (file1.eof()) {
errorCode = 0;
index = FILE_EOF;
return false;
}
if (strlen(line) > 0 &&
std::strstr(line, "#endheader") != nullptr)
return false;
if (strlen(line) > 0)
return true;
else
return false;
}
double dbl_max[8] = { 0 };
double dbl_min[8] = { 0 };
int int_max = 0;
int int_min = 0;
static void readDatasetFile(
std::ifstream& file2) {
char line[256] = "";
for (int i = 0; i < 8; i++) {
dbl_min[i] = DBL_MAX;
dbl_max[i] = DBL_MIN;
}
int_min = INT_MAX;
int_max = INT_MIN;
while (!file2.eof()) {
file2.getline(line, 256);
int count = 0, index = 0;
while (
count <= 9 &&
index < static_cast<int>(strlen(line))) {
char ch = line[index++], subline[256] = "";
int cp = 0;
while (ch != ',' && cp < static_cast<int>(strlen(line))) {
subline[cp++] = ch;
ch = line[index++];
}
count++;
if (strlen(subline) >= 1)
subline[cp] = '\0';
if (count >= 1 && count <= 8 && cp > 1) {
std::string substr(subline);
double x = std::stod(subline);
if (x > dbl_max[count - 1])
dbl_max[count - 1] = x;
if (x < dbl_min[count - 1])
dbl_min[count - 1] = x;
}
else if (count == 9 && !(
strstr(subline, "F") ||
strstr(subline, "I") ||
strstr(subline, "M"))) {
std::string substr(subline);
int x = std::stoi(substr);
if (x > int_max)
int_max = x;
if (x < int_min)
int_min = x;
}
}
}
file2.close();
}
int main()
{
bool feature = false;
char filename1[256] = "C:\\Users\\James\\OneDrive\\Desktop\\ID3MetadataParser\\x64\\Debug\\ID3MetadataParserDataFile.txt";
char filename2[256] = "C:\\Users\\James\\OneDrive\\Desktop\\ID3MetadataParser\\x64\\Debug\\abalone.data.txt";
char line[256] = "";
int errorCode = -1, index = -1, role = -1;
std::ifstream file1(filename1);
std::ifstream file2(filename2);
// file1 format
std::string name, description, type;
std::vector<char> category;
// file2 format
std::string cat, length, diameter, height, whole;
std::string shucked, viscera, shell, rings;
std::vector<CategoricalAttribute> categoricalAttributes;
std::vector<IntegerAttribute> integerAttributes;
std::vector<DoubleAttribute> doubleAttributes;
std::vector<FloatAttribute> floatAttributes;
std::vector<std::string> names;
std::vector<std::string> descriptions;
std::vector<std::string> types;
while (!file1.eof()) {
index = -1;
bool result = readMetaDataLine(
file1,
line,
errorCode,
index);
if (!result)
break;
index = 0;
int length = static_cast<int>(strlen(line));
if (length == 0)
break;
name = "";
bool pn = parseName(
line,
length,
errorCode,
index,
feature,
name);
if (pn) {
bool result = readMetaDataLine(
file1,
line,
errorCode,
index);
if (!result)
break;
length = static_cast<int>(strlen(line));
index = 0;
description = "";
bool pd = parseDescription(
line,
length,
errorCode,
index,
description);
if (pd) {
bool result = readMetaDataLine(
file1,
line,
errorCode,
index);
if (!result)
break;
length = static_cast<int>(strlen(line));
index = 0;
type = "";
double hiDouble = DBL_MIN;
double loDouble = DBL_MAX;
float hiFloat = FLT_MIN;
float loFloat = FLT_MAX;
int hiInteger = INT_MIN;
int loInteger = INT_MAX;
bool pt = parseType(
line,
length,
hiDouble,
loDouble,
hiFloat,
loFloat,
errorCode,
index,
hiInteger,
loInteger,
type,
category);
length = static_cast<int>(strlen(line));
if (pt) {
if (type == "categorical") {
CategoricalAttribute ca;
ca.category = category;
ca.description = description;
ca.name = name;
categoricalAttributes.push_back(ca);
}
else if (type == "integer") {
IntegerAttribute ia;
ia.loValue = loInteger;
ia.hiValue = hiInteger;
ia.description = description;
ia.name = name;
integerAttributes.push_back(ia);
}
else if (type == "doubleReal") {
DoubleAttribute da;
da.loValue = loDouble;
da.hiValue = hiDouble;
da.description = description;
da.name = name;
doubleAttributes.push_back(da);
}
else if (type == "floatReal") {
FloatAttribute fa;
fa.loValue = loFloat;
fa.hiValue = hiFloat;
fa.description = description;
fa.name = name;
floatAttributes.push_back(fa);
}
else {
errorCode = INVALID_TYPE;
break;
}
}
else {
errorCode = MISSING_TYPE;
break;
}
}
else {
errorCode = INVALID_DESCRIPTION;
break;
}
}
else {
errorCode = INVALID_NAME;
return false;
}
}
readDatasetFile(file2);
for (int i = 1; i <= 7; i++) {
std::cout << i << '\t' << dbl_min[i];
std::cout << '\t' << dbl_max[i];
std::cout << std::endl;
}
std::cout << "8\t" << int_min << '\t' << int_max;
std::cout << std::endl;
std::cout << std::endl;
for (int i = 0; i < static_cast<int>(categoricalAttributes.size()); i++) {
std::cout << categoricalAttributes[i].name << ' ';
std::cout << categoricalAttributes[i].description << ' ';
std::cout << std::endl;
}
for (int i = 0; i < static_cast<int>(doubleAttributes.size()); i++) {
std::cout << doubleAttributes[i].name << ' ';
std::cout << doubleAttributes[i].description << ' ';
std::cout << doubleAttributes[i].loValue << ' ';
std::cout << doubleAttributes[i].hiValue;
std::cout << std::endl;
}
for (int i = 0; i < static_cast<int>(floatAttributes.size()); i++) {
std::cout << floatAttributes[i].name << ' ';
std::cout << floatAttributes[i].description << ' ';
std::cout << floatAttributes[i].loValue << ' ';
std::cout << floatAttributes[i].hiValue;
std::cout << std::endl;
}
for (int i = 0; i < static_cast<int>(integerAttributes.size()); i++) {
std::cout << integerAttributes[i].name << ' ';
std::cout << integerAttributes[i].description << ' ';
std::cout << integerAttributes[i].loValue << ' ';
std::cout << integerAttributes[i].hiValue;
std::cout << std::endl;
}
file1.close();
return 0;
}
Tag: technology
Blog Entry © Thursday, January 23, 2025, by James Pate Williams, Jr. Ackermann’s Super-Exponential Recursive Function in Vanilla C Programming Language
i = 2
j = 1
a(2, 1) =
4
# decimal digits = 1
enter another set (n to quit)? y
i = 2
j = 2
a(2, 2) =
16
# decimal digits = 2
enter another set (n to quit)? y
i = 2
j = 3
a(2, 3) =
65536
# decimal digits = 5
enter another set (n to quit)? y
i = 2
j = 4
a(2, 4) =
200352993040684646497907235156025575044782547556975141926501697371089\
405955631145308950613088093334810103823434290726318182294938211881266886\
950636476154702916504187191635158796634721944293092798208430910485599057\
015931895963952486337236720300291696959215610876494888925409080591145703\
767520850020667156370236612635974714480711177481588091413574272096719015\
183628256061809145885269982614142503012339110827360384376787644904320596\
037912449090570756031403507616256247603186379312648470374378295497561377\
098160461441330869211810248595915238019533103029216280016056867010565164\
...
506264233788565146467060429856478196846159366328895429978072254226479040\
061601975197500746054515006029180663827149701611098795133663377137843441\
619405312144529185518013657555866761501937302969193207612000925506508158\
327550849934076879725236998702356793102680413674571895664143185267905471\
716996299036301554564509004480278905570196832831363071899769915316667920\
895876857229060091547291963638167359667395997571032601557192023734858052\
112811745861006515259888384311451189488055212914577569914657753004138471\
712457796504817585639507289533753975582208777750607233944558789590571915\
6736
# decimal digits = 19729
enter another set (n to quit)?
/*
** Computation of Akermann's super
** exponential function by James
** Pate Williams, Jr. (c) Tuesday,
** August 27, 2024 lip version
*/
#include <stdio.h>
#include "lip.h"
verylong Ackermann(verylong zi, verylong zj) {
verylong a = 0;
if (zscompare(zi, 1) == 0) {
verylong ztwo = 0;
zintoz(2, &ztwo);
zexp(ztwo, zj, &a);
return a;
}
else if (zscompare(zj, 1) == 0)
{
verylong ztwo = 0, ziminus1 = 0;
zintoz(2, &ztwo);
zsadd(zi, -1, &ziminus1);
return Ackermann(ziminus1, ztwo);
}
else if (
zscompare(zi, 2) >= 0 &&
zscompare(zj, 2) >= 0) {
verylong ziminus1 = 0;
verylong zjminus1 = 0;
verylong temp = 0;
zsadd(zi, -1, &ziminus1);
zsadd(zj, -1, &zjminus1);
if (zscompare(ziminus1, 1) >= 0 &&
zscompare(zjminus1, 1) >= 0) {
return
Ackermann(ziminus1, Ackermann(zi, zjminus1));
}
}
return 0;
}
int DigitCount(verylong za) {
int count = 0;
while (zscompare(za, 0) > 0) {
zsdiv(za, 10, &za);
count++;
}
return count;
}
int main(void) {
for (;;) {
char buffer[256] = { '\0' };
int i = 0, j = 0, number = 0;
verylong za = 0, zi = 0, zj = 0;
buffer[0] = '\0';
printf_s("i = ");
scanf_s("%d", &i);
printf_s("j = ");
scanf_s("%d", &j);
zintoz(i, &zi);
zintoz(j, &zj);
printf_s("a(%d, %d) = \n", i, j);
za = Ackermann(zi, zj);
zwriteln(za);
number = DigitCount(za);
printf_s("# decimal digits = %d\n",
number);
printf_s("enter another set (n to quit)? ");
scanf_s("%s", buffer, sizeof(buffer));
zfree(&za);
if (buffer[0] == 'n')
break;
}
return 0;
}
Blog Entry (c) Friday, October 18, 2024, by James Pate Williams, Jr. Ab Initio Quantum Chemical Calculation
On Wednesday, October 16, 2024, I bought an Amazon Kindle book named “Modern Quantum Chemistry: Introduction to Advanced Electronic Structure Theory” by Attila Szabo and Neil S. Ostlund. It cost me $10.69 which is a real bargain. In Appendix B there is a listing for a FORTRAN program to perform an ab initio Hartree-Fock Self Consistent Field calculation for a two-electron heteronuclear molecule namely the helium-hydrogen cation. I successfully translated the program from FORTRAN to C++. I had to remember that FORTRAN stores matrices in column major order and C/C++ stores matrices in row major order. I took the transposes of two FORTRAN COMMON matrices to get the correct C++ storage. The authors of the book did an extensive treatment of the test calculation. The application is only 823 lines of monolithic C++ source code. I used FORTRAN like array indexing starting at 1 instead of the C initial beginning index of 0. I think I will try to get in touch with the authors to get permission to post the source code and results on my blog.
P. S. I got permission from Dover Books to publish my source code and results. I think I will reconsider posting the C++ source code. The actual ground state energy of the cation is -2.97867. My calculation and the book’s computation are in percentage errors of about 4%. The book’s value is a little closer to the exact value than my result. The book calculation was done in FORTRAN double precision on a Digital Equipment Corporation PDP-10 minicomputer. My recreation of the book’s endeavor was executed on an Intel Itanium Core 7 and Windows 10 Professional machine using Win32 C++. The C++ compiler was from Microsoft Visual Studio 2019 Community Version.
Note I added a calculation for a homonuclear molecule, namely, the hydrogen diatomic molecule.
Blog Entry Wednesday, July 10, 2024, © James Pate Williams, Jr. My Dual Interests in Cryptography and Number Theory
I became fascinated with secret key cryptography as a child. Later, as an adult, in around 1979, I started creating crude symmetric cryptographic algorithms. I became further enthralled with cryptography and number theory in 1996 upon reading Applied Cryptography, Second Edition: Protocols, Algorithms, and Source Code in C by Bruce Schneier and later the Handbook of Applied Cryptography by Alfred J. Menezes, Paul C. van Oorschot, and Scott A. Vanstone. After implementing many of the algorithms in both tomes, I communicated my results to two of the authors namely Bruce Schneier and Professor Alfred J. Menezes. In 1997 I developed a website devoted to constraint satisfaction problems and their solutions, cryptography, and number theory. I posted legal C and C++ source code. Professor Menezes advertised my website along with his treatise. See the following blurb:

In the spirit of my twin scientific infatuations, I offer yet another C integer factoring implementation utilizing the Free Large Integer Package (known more widely as lip) which was created by Arjen K. Lenstra (now a Professor Emeritus). This implementation includes Henri Cohen’s Trial Division algorithm, the Brent-Cohen-Pollard rho method, the Cohen-Pollard p – 1 stage 1 method, and the Lenstra lip Elliptic Curve Method. If I can get the proper authorization, I will later post the source code.
total time required for initialization: 0.056000 seconds
enter number below:
2^111+2
== Menu ==
1 Trial Division
2 Pollard-Brent-Cohen rho
3 p - 1 Pollard-Cohen
4 Lenstra's Elliptic Curve Method
5 Pollard-Lenstra rho
1
2596148429267413814265248164610050
number is composite
factors:
total time required factoring: 0.014000 seconds:
2
5 ^ 2
41
397
2113
enter number below:
0
total time required for initialization: 0.056000 seconds
enter number below:
2^111+2
== Menu ==
1 Trial Division
2 Pollard-Brent-Cohen rho
3 p - 1 Pollard-Cohen
4 Lenstra's Elliptic Curve Method
5 Pollard-Lenstra rho
2
2596148429267413814265248164610050
number is composite
factors:
total time required factoring: 1.531000 seconds:
2
5 ^ 2
41
397
2113
415878438361
3630105520141
enter number below:
0
total time required for initialization: 0.055000 seconds
enter number below:
2^111+2
== Menu ==
1 Trial Division
2 Pollard-Brent-Cohen rho
3 p - 1 Pollard-Cohen
4 Lenstra's Elliptic Curve Method
5 Pollard-Lenstra rho
3
2596148429267413814265248164610050
number is composite
factors:
total time required factoring: 0.066000 seconds:
2
5 ^ 2
41
838861
415878438361
3630105520141
enter number below:
0
total time required for initialization: 0.056000 seconds
enter number below:
2^111+2
== Menu ==
1 Trial Division
2 Pollard-Brent-Cohen rho
3 p - 1 Pollard-Cohen
4 Lenstra's Elliptic Curve Method
5 Pollard-Lenstra rho
4
2596148429267413814265248164610050
number is composite
factors:
total time required factoring: 0.013000 seconds:
2
5
205
838861
415878438361
3630105520141
enter number below:
0