https://www.sciencedirect.com/science/article/pii/0022247X73902771
https://en.wikipedia.org/wiki/Laplace%27s_equation
https://math.libretexts.org/Bookshelves/Differential_Equations/A_First_Course_in_Differential_Equations_for_Scientists_and_Engineers_(Herman)/04%3A_Series_Solutions/4.05%3A_Legendre_Polynomials#:~:text=Solutions%20to%20this%20equation%2C%20Pm%20n%28x%29%20and%20Qm,Legendre%20functions%20of%20the%20first%20and%20second%20kind
Fast and Slow Matrix Multiplication by James Pate Williams, Jr. © January 13, 2024, All Applicable Rights Reserved
Note: For comparison with a paper on matrix operations, we use column major order also known as column major echelon order. Column major order is used in Fortran and row major order is utilized in C computations.
Here is the supposedly slow C source code:
void ColMajorSlowMatMul(double** a, double** b, double** c, int n)
{
for (int j = 1; j <= n; j++)
for (int k = 1; k <= n; k++)
c[j][k] = 0.0;
for (int j = 1; j <= n; j++)
{
for (int i = 1; i <= n; i++)
{
double t = 0;
for (int k = 1; k <= n; k++)
t += a[i][k] * b[k][j];
c[i][j] = t;
}
}
}
And now we present the allegedly fast multiplication code:
void ColMajorFastMatMul(double** a, double** b, double** c, int n)
{
for (int j = 1; j <= n; j++)
for (int k = 1; k <= n; k++)
c[j][k] = 0.0;
for (int j = 1; j <= n; j++)
{
for (int k = 1; k <= n; k++)
{
double t = b[k][j];
for (int i = 1; i <= n; i++)
c[i][j] += a[i][k] * t;
}
}
}
Finally, we have another algorithm for matrix multiplication:
void ColMajorSemiMatMul(double** a, double** b, double** c, int n)
{
for (int j = 1; j <= n; j++)
for (int k = 1; k <= n; k++)
c[j][k] = 0.0;
for (int j = 1; j <= n; j++)
{
for (int i = 1; i <= n; i++)
{
for (int k = 1; k <= n; k++)
c[i][j] += a[i][k] * b[k][j];
}
}
}
We performed the following experiments.
slow runtime in microseconds: 2736
semi runtime in microseconds: 4001
fast runtime in microseconds: 5293
n * n = 10000
equal = 10000
n * n = 10000
equal = 10000
slow runtime in microseconds: 21962
semi runtime in microseconds: 29585
fast runtime in microseconds: 28701
n * n = 40000
equal = 40000
n * n = 40000
equal = 40000
slow runtime in microseconds: 67256
semi runtime in microseconds: 101554
fast runtime in microseconds: 107969
n * n = 90000
equal = 90000
n * n = 90000
equal = 90000
slow runtime in microseconds: 183839
semi runtime in microseconds: 268616
fast runtime in microseconds: 244832
n * n = 160000
equal = 160000
n * n = 160000
equal = 160000
slow runtime in microseconds: 428263
semi runtime in microseconds: 638721
fast runtime in microseconds: 650535
n * n = 250000
equal = 250000
n * n = 250000
equal = 250000
C:\Users\james\source\repos\FastMatrixMultiplication\Debug\FastMatrixMultiplication.exe (process 29552) exited with code 0.
Press any key to close this window . . .
// FastMatrixMultiplication.cpp : This file contains the 'main' function. Program execution begins and ends there.
// https://www-users.york.ac.uk/~mijp1/teaching/grad_HPC_for_MatSci/Lecture4.pdf
// https://stackoverflow.com/questions/25483620/how-to-measure-running-time-of-specific-function-in-c-very-accurate
#include <stdlib.h>
#include <chrono>
#include <iostream>
using namespace std;
typedef chrono::high_resolution_clock Clock;
void ColMajorFastMatMul(double** a, double** b, double** c, int n)
{
for (int j = 1; j <= n; j++)
for (int k = 1; k <= n; k++)
c[j][k] = 0.0;
for (int j = 1; j <= n; j++)
{
for (int k = 1; k <= n; k++)
{
double t = b[k][j];
for (int i = 1; i <= n; i++)
c[i][j] += a[i][k] * t;
}
}
}
void ColMajorSemiMatMul(double** a, double** b, double** c, int n)
{
for (int j = 1; j <= n; j++)
for (int k = 1; k <= n; k++)
c[j][k] = 0.0;
for (int j = 1; j <= n; j++)
{
for (int i = 1; i <= n; i++)
{
for (int k = 1; k <= n; k++)
c[i][j] += a[i][k] * b[k][j];
}
}
}
void ColMajorSlowMatMul(double** a, double** b, double** c, int n)
{
for (int j = 1; j <= n; j++)
for (int k = 1; k <= n; k++)
c[j][k] = 0.0;
for (int j = 1; j <= n; j++)
{
for (int i = 1; i <= n; i++)
{
double t = 0;
for (int k = 1; k <= n; k++)
t += a[i][k] * b[k][j];
c[i][j] = t;
}
}
}
void GenerateMatrix(double** a, int n, int seed)
{
srand(seed);
for (int j = 1; j <= n; j++)
{
for (int k = 1; k <= n; k++)
{
a[j][k] = (double)rand() / RAND_MAX;
}
}
}
int main()
{
for (int n = 100; n <= 500; n += 100)
{
double** a = new double* [n + 1];
double** b = new double* [n + 1];
double** c = new double* [n + 1];
double** d = new double* [n + 1];
double** e = new double* [n + 1];
for (int j = 0; j < n + 1; j++)
{
a[j] = new double[n + 1];
b[j] = new double[n + 1];
c[j] = new double[n + 1];
d[j] = new double[n + 1];
e[j] = new double[n + 1];
}
GenerateMatrix(a, n, 1);
GenerateMatrix(b, n, 2);
auto clock1 = Clock::now();
ColMajorSlowMatMul(a, b, c, n);
auto clock2 = Clock::now();
long long microseconds1 = chrono::duration_cast<chrono::microseconds>
(clock2 - clock1).count();
auto clock3 = Clock::now();
ColMajorSemiMatMul(a, b, d, n);
auto clock4 = Clock::now();
long long microseconds2 = chrono::duration_cast<chrono::microseconds>
(clock4 - clock3).count();
auto clock5 = Clock::now();
ColMajorFastMatMul(a, b, e, n);
auto clock6 = Clock::now();
long long microseconds3 = chrono::duration_cast<chrono::microseconds>
(clock6 - clock5).count();
cout << "slow runtime in microseconds: " << microseconds1 << endl;
cout << "semi runtime in microseconds: " << microseconds2 << endl;
cout << "fast runtime in microseconds: " << microseconds3 << endl;
long long equal = 0;
for (int j = 1; j <= n; j++)
for (int k = 1; k <= n; k++)
if (c[j][k] == d[j][k])
equal++;
cout << "n * n = " << n * n << endl;
cout << "equal = " << equal << endl;
equal = 0;
for (int j = 1; j <= n; j++)
for (int k = 1; k <= n; k++)
if (c[j][k] == e[j][k])
equal++;
cout << "n * n = " << n * n << endl;
cout << "equal = " << equal << endl;
for (int j = 0; j < n + 1; j++)
{
delete[] a[j];
delete[] b[j];
delete[] c[j];
delete[] d[j];
delete[] e[j];
}
delete[] a;
delete[] b;
delete[] c;
delete[] d;
delete[] e;
}
}
Sixth Order Galerkin Approximate Solution of a Second Order Linear Ordinary Differential Equation by James Pate Williams, Jr. January 12, 2024
Second Order Linear Boundary Value Problem Solution


Complex Contour (Conformal) Mapping by James Pate Williams, Jr. on January 9, 2024



New Jacobi Polynomials Application January 8, 2024, by James Pate Williams, Jr.
The two primary references used to create my application were: “A Numerical Library in C for Scientists and Engineers” by H. T. Lau and the following website: https://en.wikipedia.org/wiki/Jacobi_polynomials.
Using the Jacobi parameters alpha = 0 and beta = 0, we have the Legendre polynomials for degrees 4 and 6 and their associated roots:




Using alpha = 0.5 and beta= 0.5 we obtain for degrees 4 and 6:




Electron Probability Distribution Function Etc. (c) James Pate Williams, Jr. December 2023
More than Four Dimensions Why Worry a Blog Entry by James Pate Williams, Jr. December 27, 2023
Some modern physical models of our universe require more than Einstein’s four dimensions: three spatial dimensions and one time dimension. Why do people worry about introducing more dimensions into our understanding of chemistry and physics? When Erwin Schrödinger introduced his famous quantum mechanical two-body solution of the time independent hydrogen-like atom wave equation he went four dimensions to three spatial dimensions. Later, Wolfgang Pauli espoused his famous Pauli Exclusion Principle that simply stated no two electrons (fermions) in an atomic orbital can have the same quantum spin number. Atoms live in a four-dimensional quantum number space augmented by three spatial dimensions and one time dimension.