Blog Entry © Wednesday, February 4, 2026, by James Pate Williams, Jr. and the Microsoft Copilot Two Multiplication Algorithms from Wikipedia

[BEGIN COPILOT TEXT]

## Introduction

This small C++ console program implements two classical multiplication algorithms: 

**long multiplication** (the grade‑school method) and **Karatsuba multiplication**, the first sub‑quadratic multiplication algorithm discovered (Anatoly Karatsuba, 1960).

My goal here is not to optimize or modernize the algorithms, but to preserve their structure in a clear, readable form. The implementations follow the versions presented in the Wikipedia “Multiplication” article as faithfully as possible.

The code is intentionally simple and procedural. It uses 1‑based indexing for digit arrays because that mirrors the mathematical notation more closely than 0‑based indexing.

## Long Multiplication

This function implements the classical long multiplication algorithm in an arbitrary base. 

Digits are stored in reverse order (least significant digit first), which simplifies carry propagation.

The implementation below is intentionally direct and mirrors the textbook algorithm step‑by‑step

## Karatsuba Multiplication (Base 10)

This is a minimal, non‑recursive implementation of the Karatsuba method. 

The inputs are split into high and low parts:

x = x_1 * B ^ m + x_0

y = y_1 * B ^ m + y_0

Karatsuba reduces the number of multiplications from 4 to 3 by computing:

z = x * y = z_2 *B ^ 2m + z_1 * B ^ m + z_0

This implementation follows the Wikipedia pseudocode closely.

[END COPILOT TEXT]

#pragma once

class MyAlgorithms
{

public:

	static void LongMultiplication(
		int base, int p, int q, int& pSize,
		std::vector<int> a,
		std::vector<int> b,
		std::vector<int>& product
	);

	static void KaratsubaBase10(
		int x0, int x1, int y0, int y1,
		int B, int m, long long& z);
};
#include "pch.h"
#include "MyAlgorithms.h"

void MyAlgorithms::LongMultiplication(
	int base, int p, int q, int& pSize,
	std::vector<int> a, std::vector<int> b,
	std::vector<int>& product)
{
	pSize = p + q;
	product.resize(pSize + 1LL);

	for (int b_i = 1; b_i <= q; b_i++)
	{
		int carry = 0;

		for (int a_i = 1; a_i <= p; a_i++)
		{
			product[static_cast<long long>(a_i) + b_i - 1LL] +=
				carry + a[a_i] * b[b_i];
			carry = product[static_cast<long long>(a_i) + b_i - 1] / base;
			product[static_cast<long long>(a_i) + b_i - 1] =
				product[static_cast<long long>(a_i) + b_i - 1] % base;
		}

		product[static_cast<long long>(b_i) + p] = carry;
	}
}

void MyAlgorithms::KaratsubaBase10(
	int B, int m, int x0, int x1,
	int y0, int y1, long long& z)
{
	int pb = static_cast<int>(pow(B, m));
	int x = x1 * pb + x0;
	int y = y1 * pb + y0;
	int z2 = x1 * y1;
	int z1 = x1 * y0 + x0 * y1;
	int z0 = x0 * y0;

	z = z2 * static_cast<int>(pow(B, 2 * m)) + z1 * pb + z0;
}

#include "MyAlgorithms.h"

static void DoLongMultiplication()
{
	int base = 0, p = 0, q = 0, pSize = 0;
	char line[128] = "";
	char inputaStr[128] = "";
	char inputbStr[128] = "";
	char* aReverseStr = nullptr;
	char* bReverseStr = nullptr;
	std::cout << "Enter base = ";
	std::cin.getline(line, 128);
	base = atoi(line);
	std::cout << "a = ";
	std::cin.getline(inputaStr, 128);
	std::cout << "b = ";
	std::cin.getline(inputbStr, 128);
	aReverseStr = _strrev(inputaStr);
	bReverseStr = _strrev(inputbStr);
	p = static_cast<int>(strlen(aReverseStr));
	q = static_cast<int>(strlen(bReverseStr));
	pSize = p + q;
	std::vector<int> a(p + 1);
	std::vector<int> b(q + 1);
	std::vector<int> ab(p + q + 1);
	std::vector<int> product;

	for (int i = 1; i <= p; i++)
		a[i] = aReverseStr[i - 1] - '0';

	for (int i = 1; i <= q; i++)
		b[i] = bReverseStr[i - 1] - '0';

	MyAlgorithms::LongMultiplication(
		base, p, q, pSize, a, b, ab);

	size_t i = ab.size() - 1, j = 1;

	while (i >= 0)
	{
		if (ab[i] == 0)
			i--;
		else
			break;
	}

	product.push_back(0);

	for (j = i; j >= 1; j--)
		product.push_back(ab[j]);

	std::cout << "product = ";

	for (int i = 1; i < product.size(); i++)
		std::cout << product[i];

	std::cout << std::endl;
}

static void DoKaratsuba()
{
	char line[128] = "";
	std::cout << "Enter base = ";
	std::cin.getline(line, 128);
	int B = atoi(line);
	std::cout << "Enter m = ";
	std::cin.getline(line, 128);
	int m = atoi(line);
	std::cout << "x1 = ";
	std::cin.getline(line, 128);
	int x1 = atoi(line);
	std::cout << "x0 = ";
	std::cin.getline(line, 128);
	int x0 = atoi(line);
	std::cout << "y1 = ";
	std::cin.getline(line, 128);
	int y1 = atoi(line);
	std::cout << "y0 = ";
	std::cin.getline(line, 128);
	int y0 = atoi(line);
	long long z = 0;
	MyAlgorithms::KaratsubaBase10(
		B, m, x0, x1, y0, y1, z);
	std::cout << "z = " << z << std::endl;
}

int main()
{
	while (true)
	{
		char line[128] = "";
		std::cout << "== Menu ==" << std::endl;
		std::cout << "1 Long Multiplication" << std::endl;
		std::cout << "2 Karatsuba Multiplication" << std::endl;
		std::cout << "3 Exit" << std::endl;
		std::cout << "Option (1 or 2 or 3) = ";
		std::cin.getline(line, 128);
		char option = line[0];

		if (option == '1')
		{
			DoLongMultiplication();
		}

		else if (option == '2')
		{
			DoKaratsuba();
		}

		else
			break;
	}

	return 0;
}

== Menu ==
1 Long Multiplication
2 Karatsuba Multiplication
3 Exit
Option (1 or 2 or 3) = 1
Enter base = 10
a = 506
b = 208
product = 105248
== Menu ==
1 Long Multiplication
2 Karatsuba Multiplication
3 Exit
Option (1 or 2 or 3) = 2
Enter base = 10
Enter m = 2
x1 = 5
x0 = 6
y1 = 2
y0 = 8
z = 105248
== Menu ==
1 Long Multiplication
2 Karatsuba Multiplication
3 Exit
Option (1 or 2 or 3) = 3

D:\Multiplication\x64\Debug\Multiplication.exe (process 30912) exited with code 0 (0x0).
Press any key to close this window . . .

Blog Entry (c) Tuesday February 3, 2026, by James Pate Williams, Jr. Derivation of the Classical Kinetic Energy Formula

// KineticEnergy.cpp
// Author: James Pate Williams, Jr. and NIST
// Copyright Monday February 2, 2026

#include <iomanip>
#include <iostream>
#include <vector>

const double c = 2.99792458E8;
const double mass0 = 1.67492750056E-27;

static double KineticEnergy(
    double v, size_t number,
    double& kilotonsTNT,
    std::vector<double>& terms)
{
    double c2 = c * c, ke = 0;;
    
    if (number >= 0)
        terms[0] = mass0 * c2;
    if (number >= 1)
        terms[1] = 0.5 * mass0 * v * v;
    if (number >= 2)
        terms[2] = 3.0 * mass0 * pow(v, 4.0) / (8.0 * c2);
    if (number >= 3)
        terms[3] = 225.0 * mass0 * pow(v, 6.0) / (720.0 * c2 * c2);

    for (size_t i = 0; i <= number; i++)
        ke += terms[i];

    kilotonsTNT = 2.3900573613767E-13 * ke;
    return ke;
}

int main()
{
    char line[128] = "";

    while (true)
    {
        double ke = 0.0, kilotonsTNT = 0.0, v = 0.0;
        size_t number = 0;

        std::cout << "Enter v = ";
        std::cin.getline(line, 128);
        v = atof(line);

        if (v == 0)
            break;

        std::cout << "# Terms = ";
        std::cin.getline(line, 128);
        number = static_cast<size_t>(atoi(line));
        std::vector<double> terms(number + 1);
        ke = KineticEnergy(v, number, kilotonsTNT, terms);
        std::cout << "KE = " << ke << " joules" << std::endl;
        std::cout << "KE = " << kilotonsTNT << " kilotons TNT";
        std::cout << std::endl;
        std::cout << std::scientific;

        for (size_t i = 0; i <= number; i++)
        {
            std::cout << "KE[" << i << "] = " << terms[i];
            std::cout << " joules" << std::endl;
            kilotonsTNT = 2.3900573613767E-13 * terms[i];
            std::cout << "KE[" << i << "] = " << kilotonsTNT;
            std::cout << " kilotons TNT" << std::endl;
        }
    }
}

Blog Entry (c) February 2, 2026, by James Pate Williams, Jr. and Especially the Microsoft Copilot Three Iterative Dichotomiser 3 (ID3) Examples

Blog Entry © Sunday, January 25, 2026, by James Pate Williams, Jr., Schwarzschild Solution of Einstein’s General Relativity Gravitational Field Equation

Blog Entry © Friday, January 16, 2026, by James Pate Williams, Jr., Another Update of My Iowa Class Battleship Artillery Exterior Ballistics Application

Blog Entry © Wednesday, January 14, 2026, by James Pate Williams, Jr. Curvature of the Earth Table

// CurvatureOfTheEarth.cpp : Defines the entry point for the application.
//

#include "pch.h"
#include "framework.h"
#include "CurvatureOfTheEarth.h"
#include "GreatCircleDistance.h"
#include "Vincenty.h"

#define MAX_LOADSTRING 100

// Global Variables:
HINSTANCE hInst;                                // current instance
WCHAR szTitle[MAX_LOADSTRING];                  // The title bar text
WCHAR szWindowClass[MAX_LOADSTRING];            // the main window class name
WCHAR line[128];                                // general purpose buffer
std::wstring outputText;                        // output wide character text

// Forward declarations of functions included in this code module:
ATOM                MyRegisterClass(HINSTANCE hInstance);
BOOL                InitInstance(HINSTANCE, int);
LRESULT CALLBACK    WndProc(HWND, UINT, WPARAM, LPARAM);
INT_PTR CALLBACK    About(HWND, UINT, WPARAM, LPARAM);
INT_PTR CALLBACK    TableDialog(HWND, UINT, WPARAM, LPARAM);

int APIENTRY wWinMain(_In_ HINSTANCE hInstance,
                     _In_opt_ HINSTANCE hPrevInstance,
                     _In_ LPWSTR    lpCmdLine,
                     _In_ int       nCmdShow)
{
    UNREFERENCED_PARAMETER(hPrevInstance);
    UNREFERENCED_PARAMETER(lpCmdLine);

    // TODO: Place code here.

    // Initialize global strings
    LoadStringW(hInstance, IDS_APP_TITLE, szTitle, MAX_LOADSTRING);
    LoadStringW(hInstance, IDC_CURVATUREOFTHEEARTH, szWindowClass, MAX_LOADSTRING);
    MyRegisterClass(hInstance);

    // Perform application initialization:
    if (!InitInstance (hInstance, nCmdShow))
    {
        return FALSE;
    }

    HACCEL hAccelTable = LoadAccelerators(hInstance, MAKEINTRESOURCE(IDC_CURVATUREOFTHEEARTH));

    MSG msg;

    // Main message loop:
    while (GetMessage(&msg, nullptr, 0, 0))
    {
        if (!TranslateAccelerator(msg.hwnd, hAccelTable, &msg))
        {
            TranslateMessage(&msg);
            DispatchMessage(&msg);
        }
    }

    return (int) msg.wParam;
}

//
//  FUNCTION: MyRegisterClass()
//
//  PURPOSE: Registers the window class.
//
ATOM MyRegisterClass(HINSTANCE hInstance)
{
    WNDCLASSEXW wcex = { 0 };

    wcex.cbSize = sizeof(WNDCLASSEX);

    wcex.style          = CS_HREDRAW | CS_VREDRAW;
    wcex.lpfnWndProc    = WndProc;
    wcex.cbClsExtra     = 0;
    wcex.cbWndExtra     = 0;
    wcex.hInstance      = hInstance;
    wcex.hIcon          = LoadIcon(hInstance, MAKEINTRESOURCE(IDI_CURVATUREOFTHEEARTH));
    wcex.hCursor        = LoadCursor(nullptr, IDC_ARROW);
    wcex.hbrBackground  = (HBRUSH)(COLOR_WINDOW+1);
    wcex.lpszMenuName   = MAKEINTRESOURCEW(IDC_CURVATUREOFTHEEARTH);
    wcex.lpszClassName  = szWindowClass;
    wcex.hIconSm        = LoadIcon(wcex.hInstance, MAKEINTRESOURCE(IDI_SMALL));

    return RegisterClassExW(&wcex);
}

//
//   FUNCTION: InitInstance(HINSTANCE, int)
//
//   PURPOSE: Saves instance handle and creates main window
//
//   COMMENTS:
//
//        In this function, we save the instance handle in a global variable and
//        create and display the main program window.
//
BOOL InitInstance(HINSTANCE hInstance, int nCmdShow)
{
   hInst = hInstance; // Store instance handle in our global variable

   HWND hWnd = CreateWindowW(szWindowClass, szTitle, WS_OVERLAPPEDWINDOW,
      CW_USEDEFAULT, 0, CW_USEDEFAULT, 0, nullptr, nullptr, hInstance, nullptr);

   if (!hWnd)
   {
      return FALSE;
   }

   ShowWindow(hWnd, nCmdShow);
   UpdateWindow(hWnd);

   return TRUE;
}

//
//  FUNCTION: WndProc(HWND, UINT, WPARAM, LPARAM)
//
//  PURPOSE: Processes messages for the main window.
//
//  WM_COMMAND  - process the application menu
//  WM_PAINT    - Paint the main window
//  WM_DESTROY  - post a quit message and return
//
//
LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
{
    switch (message)
    {
    case WM_CREATE:
        DialogBox(hInst, MAKEINTRESOURCE(IDD_TABLE_DIALOG), hWnd, TableDialog);
        break;
    case WM_COMMAND:
        {
            int wmId = LOWORD(wParam);
            // Parse the menu selections:
            switch (wmId)
            {
            case IDM_ABOUT:
                DialogBox(hInst, MAKEINTRESOURCE(IDD_ABOUTBOX), hWnd, About);
                break;
            case IDM_EXIT:
                DestroyWindow(hWnd);
                break;
            default:
                return DefWindowProc(hWnd, message, wParam, lParam);
            }
        }
        break;
    case WM_PAINT:
        {
            PAINTSTRUCT ps;
            HDC hdc = BeginPaint(hWnd, &ps);
            // TODO: Add any drawing code that uses hdc here...
            EndPaint(hWnd, &ps);
        }
        break;
    case WM_DESTROY:
        PostQuitMessage(0);
        break;
    default:
        return DefWindowProc(hWnd, message, wParam, lParam);
    }
    return 0;
}

// Message handler for about box.
INT_PTR CALLBACK About(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam)
{
    UNREFERENCED_PARAMETER(lParam);
    switch (message)
    {
    case WM_INITDIALOG:
        return (INT_PTR)TRUE;

    case WM_COMMAND:
        if (LOWORD(wParam) == IDOK || LOWORD(wParam) == IDCANCEL)
        {
            EndDialog(hDlg, LOWORD(wParam));
            return (INT_PTR)TRUE;
        }
        break;
    }
    return (INT_PTR)FALSE;
}

INT_PTR CALLBACK TableDialog(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam)
{
    UNREFERENCED_PARAMETER(lParam);
    static double deltaTime = 0;
    static int height = 20, width = 80;
    static size_t delta = 0;
    static HFONT hFont = NULL;
    static HWND hEditMultiline = NULL;
    static GreatCircleDistance gcd;
    static Vincenty vincenty;

    switch (message)
    {
    case WM_INITDIALOG:
        hFont = CreateFont(
            -MulDiv(7, GetDeviceCaps(GetDC(hDlg), LOGPIXELSY), 72),
            0, 0, 0, FW_BOLD, FALSE, FALSE, FALSE,
            DEFAULT_CHARSET, OUT_DEFAULT_PRECIS,
            CLIP_DEFAULT_PRECIS, DEFAULT_QUALITY,
            FIXED_PITCH | FF_MODERN,
            TEXT("Courier New")
        );

        hEditMultiline = CreateWindowEx(
            WS_EX_CLIENTEDGE,                       // Extended style for sunken border
            TEXT("EDIT"),                           // Class name
            TEXT(""),                               // Initial text (can be blank)
            WS_CHILD | WS_VISIBLE | WS_VSCROLL | ES_LEFT | ES_MULTILINE | ES_READONLY,
            10, 0, 10 * width, 30 * height,         // Position and size
            hDlg,                                   // Parent window handle
            (HMENU)IDC_EDIT_MULTILINE,              // Unique control ID
            hInst,                                  // Application instance
            NULL                                    // Extra parameter
        );

        SendMessage(hEditMultiline, WM_SETFONT, (WPARAM)hFont, TRUE);
        return (INT_PTR)TRUE;

    case WM_COMMAND:
        if (LOWORD(wParam) == IDCANCEL)
        {
            EndDialog(hDlg, LOWORD(wParam));
            return (INT_PTR)TRUE;
        }

        if (LOWORD(wParam) == IDC_BUTTON_COMPUTE)
        {
            outputText += L"Range in Yards Versus Curvature of the Earth in Feet\r\n\r\n";
            outputText += L"Yards\t 0\t 100\t 200\t 300\t 400\t 500\t 600\t 700\t 800\t 900\r\n";
                
            for (double row = 1000.0; row <= 40000.0; row += 1000.0)
            {
                swprintf_s(line, L"%6.1lf\t", row);
                outputText += line;

                for (double col = 0.0; col <= 900.0; col += 100.0)
                {
                    double r = row + col;
                    double distance = 3.0 * gcd.Interpolate(r);

                    swprintf_s(line, L"%4.1lf\t", distance);
                    outputText += line;
                }

                outputText += L"\r\n";
            }

            outputText += L"Yards\t 0\t 100\t 200\t 300\t 400\t 500\t 600\t 700\t 800\t 900\r\n";
            SetWindowText(hEditMultiline, outputText.c_str());
            return (INT_PTR)TRUE;
        }
    }

    return (INT_PTR)FALSE;
}

#pragma once
#include "Vincenty.h"

struct PointRaR
{
    double ra;   // curvature of the Earth correction in yards
    double r;    // flat Earth distance (chord) in yards

    PointRaR(double ra, double r)
    {
        this->ra = ra;
        this->r = r;
    }
};

class GreatCircleDistance
{

private:

    Vincenty vincenty;
    std::vector<PointRaR> pts;

    bool binarySearch(double x, int& lt, int& rt);

public:

    friend int compare(
        const PointRaR& lt,
        const PointRaR& rt)
    {
        if (lt.r < rt.r)
            return -1;
        if (lt.r > rt.r)
            return +1;

        return 0;
    };

    // R is in yards, returns in yards
    // Curvature of the Earth correction

    double Interpolate(double R);

    // construction of the curvature of the Earth table

    GreatCircleDistance();
};

#pragma once

class Vincenty
{

public:

    static double Re; // Radius of Earth in meters
    double deltaSigma(
        double phi1, double lambda1,
        double phi2, double lambda2);
    double distance(
        double phi1, double lambda1,
        double phi2, double lambda2);
    double distanceConstantLambda(
        double phi1, double phi2);
    double distanceConstantXY(double z);
    double x(double phi, double lambda);
    double y(double phi, double lambda);
    double z(double phi);
    double phi(double z);
    double lambda(double x, double y);
};

#include "pch.h"
#include "GreatCircleDistance.h"

bool GreatCircleDistance::binarySearch(
    double x, int& lt, int& rt)
{
    int n = static_cast<int>(pts.size()), L = 0, M, R = n - 1;

Label10:

    if (x == pts[L].r)
    {
        lt = rt = L;
        return true;
    }

    if (x == pts[R].r)
    {
        lt = rt = R;
        return true;
    }

    if (x > pts[L].r && x < pts[R].r && R - L == 1)
    {
        lt = L;
        rt = R;
        return true;
    }

    if (x > pts[L].r && x < pts[R].r)
    {
        M = (L + R) / 2;

        if (x > pts[M].r)
        {
            L = M;
            goto Label10;
        }

        if (x < pts[M].r)
        {
            R = M;
            goto Label10;
        }
    }

    lt = rt = -1;
    return false;
}

double GreatCircleDistance::Interpolate(double R)
{
    int lt, rt;

    if (binarySearch(R, lt, rt))
    {
        double x0 = pts[lt].ra, x1 = pts[rt].ra;
        double y0 = pts[lt].r, y1 = pts[rt].r;
        double deltaX = x1 - x0, deltaY = y1 - y0;
        double ra = deltaX * (R - y0) / deltaY + x0;

        return ra;
    }

    return -1.0;
}

GreatCircleDistance::GreatCircleDistance()
{
    double deltaPhi = 0.000001, phi1 = 0.0, phi2 = deltaPhi, delta;
    double deltaRa, d0 = vincenty.z(phi1), d1, r, ra;
    int cnt = 0;

    pts.push_back(PointRaR(0.0, 0.0));

    while (cnt < 10000)
    {
        d1 = vincenty.z(phi2);

        if (d0 >= d1)
        {
            delta = d0 - d1;
            deltaRa = d0 * d0 - d1 * d1;
        }

        else
        {
            delta = d1 - d0;
            deltaRa = d1 * d1 - d0 * d0;
        }

        r = delta;
        ra = sqrt(deltaRa);
        ra = r >= ra ? r - ra : ra - r;
        pts.push_back(PointRaR(1.0936 * r, 1.0936 * ra));
        phi2 += deltaPhi;
        cnt++;
    }
}

#include "pch.h"
#include "Vincenty.h"

double Vincenty::Re = 6378137.0;	// radius of Earth in meters

double Vincenty::deltaSigma(
    double phi1, double lambda1,
    double phi2, double lambda2)
{
    double deltaPhi = phi1 - phi2, deltaLambda = lambda1 - lambda2;
    double cosPhi1 = cos(phi1), cosPhi2 = cos(phi2);
    double sinPhi1 = sin(phi1), sinPhi2 = sin(phi2);
    double cosDeltaLambda = cos(deltaLambda), sinDeltaLambda = sin(deltaLambda);
    double numer1 = cosPhi2 * sinDeltaLambda;
    double numer2 = cosPhi1 * sinPhi2 - sinPhi1 * cosPhi2 * cosDeltaLambda;
    double numer = sqrt(numer1 * numer1 + numer2 * numer2);
    double denom = sinPhi1 * sinPhi2 + cosPhi1 * cosPhi2 * cosDeltaLambda;

    return atan2(numer, denom);
}

double Vincenty::distance(
    double phi1, double lambda1,
    double phi2, double lambda2)
{
    return Re * deltaSigma(phi1, lambda1, phi2, lambda2);
}

double Vincenty::distanceConstantLambda(
    double phi1, double phi2)
{
    return Re * distance(phi1, 0.0, phi2, 0.0);
}

double Vincenty::distanceConstantXY(double z)
{
    return Re * distance(phi(0.0), 0.0, phi(z), 0.0);
}

double Vincenty::x(double phi, double lambda)
{
    return Re * sin(phi) * cos(lambda);
}

double Vincenty::y(double phi, double lambda)
{
    return Re * sin(phi) * sin(lambda);
}

double Vincenty::z(double phi)
{
    return Re * cos(phi);
}

double Vincenty::phi(double z)
{
    return acos(z / sqrt(Re));
}

double Vincenty::lambda(double x, double y)
{
    return asin(y / sqrt(x * x + y * y));
}

Blog Entry © Thursday, January 8, 2026, by James Pate Williams, Jr., Revised United States Navy Fast Battleship Iowa Class Artillery Ballistics Tables

Blog Entry © Sunday, January 4, 2026, by James Pate Williams, Jr. Iterative Deepening A* Search to Solve the Fifteen Tile Puzzle (Win32 C/C++ Release x64 Configuration)

Blog Entry © Thursday, January 1, 2026, by James Pate Williams, Jr., Win32 C/C++ Fast Battleship Class Iowa Ballistics Calculator (BB-61 Iowa, BB-62 New Jersey, BB-63 Missouri, BB-64 Wisconsin)

Blog Entry © Wednesday, December 24, 2025, by James Pate Williams, Jr. ID3 Decision Tree Metadata Parser

// ID3MetadataParser.cpp (c) December 2025
// by James Pate Williams, Jr.

#include "pch.h"

#define FILE_EOF			0
#define NO_ERROR			1
#define EMPTY_FILE			2
#define INVALID_LINE		3
#define MISSING_NAME		4
#define INVALID_NAME		5
#define INVALID_DESCRIPTION 6
#define MISSING_DESCRIPTION 7
#define INVALID_TYPE		8
#define MISSING_TYPE		9
#define INVALID_RANGE		10
#define INVALID_CATEGORICAL	11
#define INVALID_DOUBLE		12
#define INVALID_FLOAT		13
#define INVALID_INTEGER		14
#define INVALID_ROLE		15
#define MISSING_ROLE		16

enum AttributeType {
	categorical, integer, doubleReal, FloatReal
};

typedef struct tagCategoricalAttribute {
	std::string name = "";
	std::string description = "";
	std::vector<char> category;
} CategoricalAttribute, * PCategoricalAttribute;

typedef struct tagIntegerAttribute {
	std::string name = "";
	std::string description = "";
	int loValue= -1, hiValue = -1;
} IntegerAttribute, * PIntegerAttribute;

typedef struct tagDoubleAttribute {
	std::string name = "";
	std::string description = "";
	double loValue = -1.0, hiValue = -1.0;
} DoubleAttribute, * PDoubleAttribute;

typedef struct tagFloatAttribute {
	std::string name = "";
	std::string description = "";
	float loValue = -1.0f, hiValue = -1.0f;
} FloatAttribute, * PFloatAttribute;

static bool parseName(
	char line[],
	int length,
	int& errorCode,
	int& index,
	bool& feature,
	std::string& name)
{
	char* cptr1 = std::strstr(line, "#name: feature ");
	char* cptr2 = std::strstr(line, "#name: target ");

	if (cptr1 == nullptr && cptr2 == nullptr) {
		errorCode = MISSING_NAME;
		return false;
	}

	if (cptr1) {
		feature = true;
		index = static_cast<int>(strlen("#name: feature "));
	}

	else if (cptr2) {
		feature = false;
		index = static_cast<int>(strlen("#name: target "));
	}

	else {
		errorCode = INVALID_NAME;
		return false;
	}

	if (index >= static_cast<int>(strlen(line))) {
		errorCode = INVALID_NAME;
		return false;
	}

	if (line[index] >= L'A' && line[index] <= 'Z' ||
		line[index] >= L'a' && line[index] <= 'z') {
		bool first = true;

		name = "";

		while (index < strlen(line)) {
			if (line[index] >= 'A' && line[index] <= 'Z' ||
				line[index] >= 'a' && line[index] <= 'z' ||
				line[index] == ' ') {
				if (first)
					name += line[index++];
				else if (first &&
					line[index] >= '0' &&
					line[index] <= '9') {
					first = false;
					name += line[index++];
				}

				if (!first)
					name += line[index++];
			}

			else if (!first) {
				errorCode = INVALID_NAME;
				return false;
			}
		}
	}

	errorCode = 0;
	index = length;
	return true;
}

static bool parseDescription(
	char line[],
	int length,
	int& errorCode,
	int& index,
	std::string& description) {
	
	char* cptr = std::strstr(line, "#description: ");

	if (cptr == nullptr) {
		errorCode = MISSING_DESCRIPTION;
		return false;
	}

	int lengthDesc = static_cast<int>(
		strlen("#description: "));

	if (lengthDesc == length) {
		errorCode = INVALID_DESCRIPTION;
		return false;
	}

	index = lengthDesc;

	while (index < length)
		description += line[index++];
	
	errorCode = 0;
	return true;
}

static bool parseCategorical(
	char line[],
	int length,
	int& errorCode,
	int& index,
	std::vector<char>& category) {
	int delta = static_cast<int>(strlen("#type: categorical: {"));
	char* cptr = line + delta - 1;
	char ch = *cptr++;

	while (ch != '}' && index < length) {
		while (ch != ',' && index < length) {
			
			if (ch == '}') {
				if (index == length - 1)
					break;
				
				else {
					errorCode = INVALID_TYPE;
					return false;
				}

			}
			
			category.push_back(ch);
			index++;
			break;
		}

		cptr++;
		ch = *cptr;
	}

	if (category.size() != 0 && ch == '}') {
		errorCode = 0;
		return true;
	}

	else {
		errorCode = INVALID_CATEGORICAL;
		return false;
	}
}

static bool parseDoubleRange(
	char line[],
	int length,
	int& errorCode,
	int& index,
	double& hiDouble,
	double& loDouble)
{
	index = static_cast<int>(strlen("#type: doubleReal ["));
	char ch = line[index++];
	std::string doubleStr;

	while (ch != ',' &&
		index < static_cast<int>(strlen(line))) {
		doubleStr.push_back(ch);
		ch = line[index++];
	}

	if (doubleStr.size() == 0) {
		errorCode = INVALID_DOUBLE;
		return false;
	}

	try {
		loDouble = std::stod(doubleStr);
		doubleStr = "";
		ch = line[index++];

		while (ch != ']' && index < strlen(line)) {
			doubleStr.push_back(ch);
			ch = line[index++];
		}

		if (doubleStr.size() == 0) {
			errorCode = INVALID_DOUBLE;
			return false;
		}

		hiDouble = std::stod(doubleStr);
		errorCode = 0;
		return true;
	}
	catch (std::exception ex) {
		errorCode = INVALID_DOUBLE;
		return false;
	}

	errorCode = INVALID_RANGE;
	return false;
}

static bool parseFloatRange(
	char line[],
	int length,
	int& errorCode,
	int& index,
	float& hiFloat,
	float& loFloat)
{
	char ch = '\0';
	std::string floatStr;
	ch = line[index++];

	while (ch != ',' && index < strlen(line)) {
		floatStr.push_back(ch);
		ch = line[index++];
	}

	if (floatStr.size() == 0) {
		errorCode = INVALID_INTEGER;
		return false;
	}

	else {
		try {
			loFloat = std::stof(floatStr);
			floatStr = "";
			ch = line[++index];

			while (ch != ']' && index < strlen(line)) {
				floatStr.push_back(ch);
				ch = line[index++];
			}

			if (floatStr.size() == 0) {
				errorCode = INVALID_FLOAT;
				return false;
			}

			hiFloat = std::stof(floatStr);
			errorCode = 0;
			return true;
		}
		catch (std::exception ex) {
			errorCode = INVALID_FLOAT;
			return false;
		}
	}

	errorCode = INVALID_RANGE;
	return false;
}

static bool parseIntegerRange(
	char line[],
	int length,
	int& errorCode,
	int& index,
	int& hiInteger,
	int& loInteger) {
	char ch = '\0';
	int i = 0;
	std::string integerStr;
	
	ch = line[i++];

	if (ch < '0' || ch > '9') {
		errorCode = INVALID_INTEGER;
		return false;
	}

	while (ch != ',' &&	index < length) {
		integerStr.push_back(ch);
		ch = line[i++];
		index++;
	}

	if (integerStr.size() == 0) {
		errorCode = INVALID_INTEGER;
		return false;
	}

	else {
		try {
			loInteger = std::stoi(integerStr);
			integerStr = "";
			i = 0;
			ch = line[i++];
			ch = line[i++];
			ch = line[i++];
			index += 3;

			while (
				ch >= '0' && ch <= '9' &&
				ch != ']' && index < length) {
				integerStr.push_back(ch);
				ch = line[i++];
				index++;
			}

			if (integerStr.size() == 0) {
				errorCode = INVALID_INTEGER;
				return false;
			}

			hiInteger = std::stoi(integerStr);
			errorCode = 0;
			return true;
		}
		catch (std::exception ex) {
			errorCode = INVALID_INTEGER;
			return false;
		}
	}

	errorCode = INVALID_RANGE;
	return false;
}

static bool parseType(
	char line[],
	int length,
	double& hiDouble,
	double& loDouble,
	float& hiFloat,
	float& loFloat,
	int& errorCode,
	int& index,
	int& hiInteger,
	int& loInteger,
	std::string& type,
	std::vector<char>& alphabet) {

	char* cptr = std::strstr(line, "#type: ");

	if (cptr == nullptr) {
		errorCode = MISSING_TYPE;
		return false;
	}

	int lengthType = static_cast<int>(strlen("#type: "));

	if (lengthType >= length) {
		errorCode = INVALID_TYPE;
		return false;
	}

	index = lengthType;
	cptr = line + index;

	if (std::strstr(cptr, "categorical {") != nullptr) {
		if (parseCategorical(line, length, errorCode,
			index, alphabet)) {
			errorCode = 0;
			type = "categorical";
			return true;
		}

		else {
			errorCode = INVALID_CATEGORICAL;
			return false;
		}
	}

	if (std::strstr(cptr, "integer [") != nullptr) {
		bool pir = parseIntegerRange(
			line + index + strlen("integer ["),
			length,
			errorCode,
			index,
			hiInteger,
			loInteger);
		if (pir) {
			type = "integer";
			return true;
		}

		else
			return false;
	}

	if (std::strstr(cptr, "doubleReal [") != nullptr) {
		bool pdr = parseDoubleRange(
			line,
			length,
			errorCode,
			index,
			hiDouble,
			loDouble);

		if (pdr) {
			type = "doubleReal";
			return true;
		}

		else
			return false;
	}

	if (std::strstr(cptr, "floatReal [") != nullptr) {
		bool pfr = parseFloatRange(
			line,
			length,
			errorCode,
			index,
			hiFloat,
			loFloat);

		if (pfr) {
			type = "floatReal";
			return true;
		}
	}

	errorCode = INVALID_TYPE;
	return false;
}

static bool readMetaDataLine(
	std::ifstream& file1,
	char line[],
	int& errorCode,
	int& index) {
	file1.getline(line, 256);

	if (strlen(line) == 0 && index == -1) {
		errorCode = EMPTY_FILE;
		return false;
	}

	if (file1.eof()) {
		errorCode = 0;
		index = FILE_EOF;
		return false;
	}

	if (strlen(line) > 0 &&
		std::strstr(line, "#endheader") != nullptr)
		return false;

	if (strlen(line) > 0)
		return true;
	else
		return false;
}

double dbl_max[8] = { 0 };
double dbl_min[8] = { 0 };
int int_max = 0;
int int_min = 0;

static void readDatasetFile(
	std::ifstream& file2) {
	char line[256] = "";

	for (int i = 0; i < 8; i++) {
		dbl_min[i] = DBL_MAX;
		dbl_max[i] = DBL_MIN;
	}

	int_min = INT_MAX;
	int_max = INT_MIN;

	while (!file2.eof()) {
		file2.getline(line, 256);
		int count = 0, index = 0;

		while (
			count <= 9 &&
			index < static_cast<int>(strlen(line))) {
			char ch = line[index++], subline[256] = "";
			int cp = 0;

			while (ch != ',' && cp < static_cast<int>(strlen(line))) {
				subline[cp++] = ch;
				ch = line[index++];
			}

			count++;

			if (strlen(subline) >= 1)
				subline[cp] = '\0';

			if (count >= 1 && count <= 8 && cp > 1) {
				std::string substr(subline);
				double x = std::stod(subline);

				if (x > dbl_max[count - 1])
					dbl_max[count - 1] = x;
				if (x < dbl_min[count - 1])
					dbl_min[count - 1] = x;
			}

			else if (count == 9 && !(
				strstr(subline, "F") ||
				strstr(subline, "I") ||
				strstr(subline, "M"))) {
				std::string substr(subline);
				int x = std::stoi(substr);

				if (x > int_max)
					int_max = x;
				if (x < int_min)
					int_min = x;
			}
		}
	}

	file2.close();
}

int main()
{
	bool feature = false;
	char filename1[256] = "C:\\Users\\James\\OneDrive\\Desktop\\ID3MetadataParser\\x64\\Debug\\ID3MetadataParserDataFile.txt";
	char filename2[256] = "C:\\Users\\James\\OneDrive\\Desktop\\ID3MetadataParser\\x64\\Debug\\abalone.data.txt";
	char line[256] = "";
	int errorCode = -1, index = -1, role = -1;
	std::ifstream file1(filename1);
	std::ifstream file2(filename2);

	// file1 format
	std::string name, description, type;
	std::vector<char> category;
	
	// file2 format
	std::string cat, length, diameter, height, whole;
	std::string shucked, viscera, shell, rings;
	
	std::vector<CategoricalAttribute> categoricalAttributes;
	std::vector<IntegerAttribute> integerAttributes;
	std::vector<DoubleAttribute> doubleAttributes;
	std::vector<FloatAttribute> floatAttributes;

	std::vector<std::string> names;
	std::vector<std::string> descriptions;
	std::vector<std::string> types;

	while (!file1.eof()) {
		index = -1;

		bool result = readMetaDataLine(
			file1,
			line,
			errorCode,
			index);

		if (!result)
			break;

		index = 0;
		int length = static_cast<int>(strlen(line));

		if (length == 0)
			break;
		
		name = "";

		bool pn = parseName(
			line,
			length,
			errorCode,
			index,
			feature,
			name);

		if (pn) {
			bool result = readMetaDataLine(
				file1,
				line,
				errorCode,
				index);

			if (!result)
				break;
		
			length = static_cast<int>(strlen(line));
			index = 0;
			description = "";

			bool pd = parseDescription(
				line,
				length,
				errorCode,
				index,
				description);

			if (pd) {
				bool result = readMetaDataLine(
					file1,
					line,
					errorCode,
					index);

				if (!result)
					break;

				length = static_cast<int>(strlen(line));
				index = 0;
				type = "";

				double hiDouble = DBL_MIN;
				double loDouble = DBL_MAX;
				float hiFloat = FLT_MIN;
				float loFloat = FLT_MAX;
				int hiInteger = INT_MIN;
				int loInteger = INT_MAX;

				bool pt = parseType(
					line,
					length,
					hiDouble,
					loDouble,
					hiFloat,
					loFloat,
					errorCode,
					index,
					hiInteger,
					loInteger,
					type,
					category);
					length = static_cast<int>(strlen(line));

				if (pt) {
					if (type == "categorical") {
						CategoricalAttribute ca;
						ca.category = category;
						ca.description = description;
						ca.name = name;
						categoricalAttributes.push_back(ca);
					}

					else if (type == "integer") {
						IntegerAttribute ia;
						ia.loValue = loInteger;
						ia.hiValue = hiInteger;
						ia.description = description;
						ia.name = name;
						integerAttributes.push_back(ia);
					}

					else if (type == "doubleReal") {
						DoubleAttribute da;
						da.loValue = loDouble;
						da.hiValue = hiDouble;
						da.description = description;
						da.name = name;
						doubleAttributes.push_back(da);
					}

					else if (type == "floatReal") {
						FloatAttribute fa;
						fa.loValue = loFloat;
						fa.hiValue = hiFloat;
						fa.description = description;
						fa.name = name;
						floatAttributes.push_back(fa);
					}

					else {
						errorCode = INVALID_TYPE;
						break;
					}
				}

				else {
					errorCode = MISSING_TYPE;
					break;
				}
			}

			else {
				errorCode = INVALID_DESCRIPTION;
				break;
			}
		}

		else {
			errorCode = INVALID_NAME;
			return false;
		}
	}

	readDatasetFile(file2);

	for (int i = 1; i <= 7; i++) {
		std::cout << i << '\t' << dbl_min[i];
		std::cout << '\t' << dbl_max[i];
		std::cout << std::endl;
	}

	std::cout << "8\t" << int_min << '\t' << int_max;
	std::cout << std::endl;
	std::cout << std::endl;

	for (int i = 0; i < static_cast<int>(categoricalAttributes.size()); i++) {
		std::cout << categoricalAttributes[i].name << ' ';
		std::cout << categoricalAttributes[i].description << ' ';
		std::cout << std::endl;
	}

	for (int i = 0; i < static_cast<int>(doubleAttributes.size()); i++) {
		std::cout << doubleAttributes[i].name << ' ';
		std::cout << doubleAttributes[i].description << ' ';
		std::cout << doubleAttributes[i].loValue << ' ';
		std::cout << doubleAttributes[i].hiValue;
		std::cout << std::endl;
	}

	for (int i = 0; i < static_cast<int>(floatAttributes.size()); i++) {
		std::cout << floatAttributes[i].name << ' ';
		std::cout << floatAttributes[i].description << ' ';
		std::cout << floatAttributes[i].loValue << ' ';
		std::cout << floatAttributes[i].hiValue;
		std::cout << std::endl;
	}

	for (int i = 0; i < static_cast<int>(integerAttributes.size()); i++) {
		std::cout << integerAttributes[i].name << ' ';
		std::cout << integerAttributes[i].description << ' ';
		std::cout << integerAttributes[i].loValue << ' ';
		std::cout << integerAttributes[i].hiValue;
		std::cout << std::endl;
	}

	file1.close();
	return 0;
}