A Benchmark of matrix multiplication between C and Python
#Motivation
After a Python convention in my city (Python Brasil) me, a unqualified newbie and a friend of mine from the comp. sci. academia discussed with a few colleagues about the potential advantages of python, including its application in the scientific field for numerical applications.
One of their arguments was that runtime optimization provided by pypy offered a significant advantage over C.
Well without further ado, here are the source codes for each language.
#Source Codes
**python w/ numpy**
#! /usr/bin/python
import sys
import numpy as np
import time
n = int(sys.argv[1])
m1_file = sys.argv[2]
m2_file = sys.argv[3]
m1 = np.loadtxt(m1_file)
m2 = np.loadtxt(m2_file)
m3 = np.zeros(m1.shape)
start = time.time()
np.matmul(m1, m2, m3)
end = time.time()
print m3
print 'Time:', (end - start) * 1000.0
**python**
#! /usr/bin/python2
import sys
import time
n = int(sys.argv[1])
m1_file = sys.argv[2]
m2_file = sys.argv[3]
def readm(filename):
f = open(filename, 'r')
d = f.read()
mat = [[float(i) for i in row] for row in [s.split(' ')[0:n] for s in d.split('\n')[0:n]]]
return mat
m1 = readm(m1_file)
m2 = readm(m2_file)
m3 = [[0 for i in range(n)] for j in range(n)]
start = time.time();
for i in range(n):
for j in range(n):
for k in range(n):
m3[i][j] += (m1[i][k] * m2[k][j])
end = time.time();
for i in range(n):
for j in range(n):
print m3[i][j],
print ''
print 'Time:', (end - start) * 1000.0
**C**
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#ifdef ARRAY
void readm(FILE* f, int n, double* m) {
#endif
#ifdef MATRIX
void readm(FILE* f, int n, double** m) {
#endif
int i, j;
#ifdef ARRAY
for (i = 0; i < n * n; ++i)
fscanf(f, "%lf", &m[i]);
#endif
#ifdef MATRIX
for (i = 0; i < n; ++i)
for (j = 0; j < n; ++j)
fscanf(f, "%lf", &m[i][j]);
#endif
}
int main(int argc, char** argv) {
int i, j, k;
double start, end;
struct timeval tv_start, tv_end;
int n = atoi(argv[1]);
FILE* f1 = fopen(argv[2], "r");
FILE* f2 = fopen(argv[3], "r");
#ifdef ARRAY
double* m1 = (double*) malloc(sizeof(double) * n * n);
double* m2 = (double*) malloc(sizeof(double) * n * n);
double* m3 = (double*) malloc(sizeof(double) * n * n);
for (i = 0; i < n * n; ++i) m3[i] = 0;
readm(f1, n, m1);
readm(f2, n, m2);
gettimeofday(&tv_start, NULL);
for (i = 0; i < n; ++i)
for (j = 0; j < n; ++j)
for (k = 0; k < n; ++k)
m3[(i * n) + j] += (m1[(i * n) + k] * m2[(k * n) + j]);
gettimeofday(&tv_end, NULL);
for (i = 0; i < n; ++i) {
for (j = 0; j < n; ++j)
fprintf(stderr, "%lf ", m3[(i * n) + j]);
fprintf(stderr, "\n");
}
#endif
#ifdef MATRIX
double** m1 = (double**) malloc(sizeof(double*) * n);
double** m2 = (double**) malloc(sizeof(double*) * n);
double** m3 = (double**) malloc(sizeof(double*) * n);
for (i = 0; i < n; ++i) {
m1[i] = (double*) malloc(sizeof(double) * n);
m2[i] = (double*) malloc(sizeof(double) * n);
m3[i] = (double*) malloc(sizeof(double) * n);
}
for (i = 0; i < n; ++i)
for (j = 0; j < n; ++j)
m3[i][j] = 0;
readm(f1, n, m1);
readm(f2, n, m2);
gettimeofday(&tv_start, NULL);
for (i = 0; i < n; ++i)
for (j = 0; j < n; ++j)
for (k = 0; k < n; ++k)
m3[i][j] += (m1[i][k] * m2[k][j]);
gettimeofday(&tv_end, NUL
#Motivation
After a Python convention in my city (Python Brasil) me, a unqualified newbie and a friend of mine from the comp. sci. academia discussed with a few colleagues about the potential advantages of python, including its application in the scientific field for numerical applications.
One of their arguments was that runtime optimization provided by pypy offered a significant advantage over C.
Well without further ado, here are the source codes for each language.
#Source Codes
**python w/ numpy**
#! /usr/bin/python
import sys
import numpy as np
import time
n = int(sys.argv[1])
m1_file = sys.argv[2]
m2_file = sys.argv[3]
m1 = np.loadtxt(m1_file)
m2 = np.loadtxt(m2_file)
m3 = np.zeros(m1.shape)
start = time.time()
np.matmul(m1, m2, m3)
end = time.time()
print m3
print 'Time:', (end - start) * 1000.0
**python**
#! /usr/bin/python2
import sys
import time
n = int(sys.argv[1])
m1_file = sys.argv[2]
m2_file = sys.argv[3]
def readm(filename):
f = open(filename, 'r')
d = f.read()
mat = [[float(i) for i in row] for row in [s.split(' ')[0:n] for s in d.split('\n')[0:n]]]
return mat
m1 = readm(m1_file)
m2 = readm(m2_file)
m3 = [[0 for i in range(n)] for j in range(n)]
start = time.time();
for i in range(n):
for j in range(n):
for k in range(n):
m3[i][j] += (m1[i][k] * m2[k][j])
end = time.time();
for i in range(n):
for j in range(n):
print m3[i][j],
print ''
print 'Time:', (end - start) * 1000.0
**C**
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#ifdef ARRAY
void readm(FILE* f, int n, double* m) {
#endif
#ifdef MATRIX
void readm(FILE* f, int n, double** m) {
#endif
int i, j;
#ifdef ARRAY
for (i = 0; i < n * n; ++i)
fscanf(f, "%lf", &m[i]);
#endif
#ifdef MATRIX
for (i = 0; i < n; ++i)
for (j = 0; j < n; ++j)
fscanf(f, "%lf", &m[i][j]);
#endif
}
int main(int argc, char** argv) {
int i, j, k;
double start, end;
struct timeval tv_start, tv_end;
int n = atoi(argv[1]);
FILE* f1 = fopen(argv[2], "r");
FILE* f2 = fopen(argv[3], "r");
#ifdef ARRAY
double* m1 = (double*) malloc(sizeof(double) * n * n);
double* m2 = (double*) malloc(sizeof(double) * n * n);
double* m3 = (double*) malloc(sizeof(double) * n * n);
for (i = 0; i < n * n; ++i) m3[i] = 0;
readm(f1, n, m1);
readm(f2, n, m2);
gettimeofday(&tv_start, NULL);
for (i = 0; i < n; ++i)
for (j = 0; j < n; ++j)
for (k = 0; k < n; ++k)
m3[(i * n) + j] += (m1[(i * n) + k] * m2[(k * n) + j]);
gettimeofday(&tv_end, NULL);
for (i = 0; i < n; ++i) {
for (j = 0; j < n; ++j)
fprintf(stderr, "%lf ", m3[(i * n) + j]);
fprintf(stderr, "\n");
}
#endif
#ifdef MATRIX
double** m1 = (double**) malloc(sizeof(double*) * n);
double** m2 = (double**) malloc(sizeof(double*) * n);
double** m3 = (double**) malloc(sizeof(double*) * n);
for (i = 0; i < n; ++i) {
m1[i] = (double*) malloc(sizeof(double) * n);
m2[i] = (double*) malloc(sizeof(double) * n);
m3[i] = (double*) malloc(sizeof(double) * n);
}
for (i = 0; i < n; ++i)
for (j = 0; j < n; ++j)
m3[i][j] = 0;
readm(f1, n, m1);
readm(f2, n, m2);
gettimeofday(&tv_start, NULL);
for (i = 0; i < n; ++i)
for (j = 0; j < n; ++j)
for (k = 0; k < n; ++k)
m3[i][j] += (m1[i][k] * m2[k][j]);
gettimeofday(&tv_end, NUL
L);
for (i = 0; i < n; ++i) {
for (j = 0; j < n; ++j)
fprintf(stderr, "%lf ", m3[i][j]);
fprintf(stderr, "\n");
}
#endif
start = ((double) tv_start.tv_sec * 1000.0) + ((double) tv_start.tv_usec / 1000.0);
end = ((double) tv_end.tv_sec * 1000.0) + ((double) tv_end.tv_usec / 1000.0);
printf("Time:%lf\n", end - start);
return 0;
}
**matrix generation code**
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <string.h>
int main(int argc, char** argv) {
srand(time(0));
int n = atoi(argv[1]);
char buf[128];
strcpy(buf, argv[1]);
strcat(buf, ".matrix");
FILE* f = fopen(buf, "w");
int i, j;
double num;
for (i = 0; i < n; ++i) {
for (j = 0; j < n; ++j) {
num = (double) rand() / (double) RAND_MAX;
num = num * pow(10, rand() % 4);
fprintf(f, "%lf ", num);
}
fprintf(f, "\n");
}
fclose(f);
return 0;
}
The machine used had an i7 (this is as much as the half-assed bastard managed to tell me), he used Debian Stretch
for the python code, -O3 optimization for C.
[Full results](https://i.imgur.com/S5YC4kM.png)
[Results without standard naïve Python](https://i.imgur.com/13ZjEuK.png)
[Logarithm scale](https://i.imgur.com/TNXjyyG.png)
From my understanding, numpy simply calls a pre-compiled function for matrix multiplication that's, in of itself, implemented in C++, it's also possibly implemented using the Strassen algorithm, and/or some form of parallelization(?), which would be much faster than the naïve implementation of matrix multiplication.
Then again, I don't know much, I couldn't even figure out how to use github to do this quick post.
Sorry for the atrocious English, hope this benchmark proves itself informative :D
/r/Python
https://redd.it/79wmcw
for (i = 0; i < n; ++i) {
for (j = 0; j < n; ++j)
fprintf(stderr, "%lf ", m3[i][j]);
fprintf(stderr, "\n");
}
#endif
start = ((double) tv_start.tv_sec * 1000.0) + ((double) tv_start.tv_usec / 1000.0);
end = ((double) tv_end.tv_sec * 1000.0) + ((double) tv_end.tv_usec / 1000.0);
printf("Time:%lf\n", end - start);
return 0;
}
**matrix generation code**
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <string.h>
int main(int argc, char** argv) {
srand(time(0));
int n = atoi(argv[1]);
char buf[128];
strcpy(buf, argv[1]);
strcat(buf, ".matrix");
FILE* f = fopen(buf, "w");
int i, j;
double num;
for (i = 0; i < n; ++i) {
for (j = 0; j < n; ++j) {
num = (double) rand() / (double) RAND_MAX;
num = num * pow(10, rand() % 4);
fprintf(f, "%lf ", num);
}
fprintf(f, "\n");
}
fclose(f);
return 0;
}
The machine used had an i7 (this is as much as the half-assed bastard managed to tell me), he used Debian Stretch
for the python code, -O3 optimization for C.
[Full results](https://i.imgur.com/S5YC4kM.png)
[Results without standard naïve Python](https://i.imgur.com/13ZjEuK.png)
[Logarithm scale](https://i.imgur.com/TNXjyyG.png)
From my understanding, numpy simply calls a pre-compiled function for matrix multiplication that's, in of itself, implemented in C++, it's also possibly implemented using the Strassen algorithm, and/or some form of parallelization(?), which would be much faster than the naïve implementation of matrix multiplication.
Then again, I don't know much, I couldn't even figure out how to use github to do this quick post.
Sorry for the atrocious English, hope this benchmark proves itself informative :D
/r/Python
https://redd.it/79wmcw
How to interop between C and python
First of all, I want to make it clear that wrapping is a whole area, and what you're about to read is just the basics to get you started if you're interested in the subject.
basically every kind of wrapping code consists of you describing the bytes of each function (input and output), or the schema of each structure
For our example we will use a basic module of the 4 operations in C, (yes it's useless, it's just to demonstrate how it works)
#### Generate the linker object
Save it as **cmodule.c**
~~~c
int add(int x , int y){
return x + y;
}
int sub(int x , int y){
return x - y;
}
int mul(int x, int y){
return x * y;
}
double div(int x , int y){
return x /y;
}
#ifdef _WIN32
__declspec(dllexport) int add(int x , int y);
__declspec(dllexport) int sub(int x , int y);
__declspec(dllexport) int mul(int x, int y);
__declspec(dllexport) double div(int x , int y)
#endif
~~~
if you are on linux generete the linker object with
~~~shell
gcc -c -o cmodule.o -fPIC cmodule.c && gcc -shared -o cmodule.so cmodule.o
~~~
If you are on Windows Generate the linker with
~~~cmd
gcc -c -o
/r/Python
https://redd.it/13zw527
First of all, I want to make it clear that wrapping is a whole area, and what you're about to read is just the basics to get you started if you're interested in the subject.
basically every kind of wrapping code consists of you describing the bytes of each function (input and output), or the schema of each structure
For our example we will use a basic module of the 4 operations in C, (yes it's useless, it's just to demonstrate how it works)
#### Generate the linker object
Save it as **cmodule.c**
~~~c
int add(int x , int y){
return x + y;
}
int sub(int x , int y){
return x - y;
}
int mul(int x, int y){
return x * y;
}
double div(int x , int y){
return x /y;
}
#ifdef _WIN32
__declspec(dllexport) int add(int x , int y);
__declspec(dllexport) int sub(int x , int y);
__declspec(dllexport) int mul(int x, int y);
__declspec(dllexport) double div(int x , int y)
#endif
~~~
if you are on linux generete the linker object with
~~~shell
gcc -c -o cmodule.o -fPIC cmodule.c && gcc -shared -o cmodule.so cmodule.o
~~~
If you are on Windows Generate the linker with
~~~cmd
gcc -c -o
/r/Python
https://redd.it/13zw527
Reddit
From the Python community on Reddit
Explore this post and more from the Python community
A Visual Basic for Applications precompiler written in python.
I’ve just created an official release of a VBA precompiler written in python.
# What my Project Does
For those who don’t know, in VBA you can have precompiler blocks to change things for different OSs or VBA versions.
For example, to change the function signature for a new windows version:
#if Win64 Then
Function foo(Bar, Baz)
#Else
Function Foo(Bar)
#Endif
The problem with this is, if you want to scan raw source code, you can’t just ignore the precompiler lines, because now the code looks like you’ve tried to define the same function twice, and one was never ended.
This tool takes environment variables that the user provides, and will comment out any lines that need to be skipped, creating 100% valid precompiled code. It even performs the comparisons, and arithmetic operations specified in the VBA specification.
# Target Audience
People interested in malware prevention, static analysis, and Linting may find it helpful. Also, useful if you are interested in learning about compilers, ANTLR, and code parsing.
# Limitations
It is currently missing the standard library functions, like Cbool(), Abs(), etc. I’m guessing these are never called by users in the precompiler phase.
/r/Python
https://redd.it/1ay6lt1
I’ve just created an official release of a VBA precompiler written in python.
# What my Project Does
For those who don’t know, in VBA you can have precompiler blocks to change things for different OSs or VBA versions.
For example, to change the function signature for a new windows version:
#if Win64 Then
Function foo(Bar, Baz)
#Else
Function Foo(Bar)
#Endif
The problem with this is, if you want to scan raw source code, you can’t just ignore the precompiler lines, because now the code looks like you’ve tried to define the same function twice, and one was never ended.
This tool takes environment variables that the user provides, and will comment out any lines that need to be skipped, creating 100% valid precompiled code. It even performs the comparisons, and arithmetic operations specified in the VBA specification.
# Target Audience
People interested in malware prevention, static analysis, and Linting may find it helpful. Also, useful if you are interested in learning about compilers, ANTLR, and code parsing.
# Limitations
It is currently missing the standard library functions, like Cbool(), Abs(), etc. I’m guessing these are never called by users in the precompiler phase.
/r/Python
https://redd.it/1ay6lt1
GitHub
GitHub - Beakerboy/VBA-Precompiler: Precompile VBA source files with specified environment values.
Precompile VBA source files with specified environment values. - Beakerboy/VBA-Precompiler