Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Basic math and bitwise operators #154

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 61 additions & 8 deletions src/math/p_add.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,64 @@
*
*/

void p_add_f32(const float *a, const float *b, float *c, int n)
{

int i;
for (i = 0; i < n; i++) {
*(c + i) = *(a + i) + *(b + i);
}
}
#define GEN_FUNC_ADD(NAME,TYPE) \
/** NAME TYPE */ \
void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \
{ \
*(c) = *(a) + *(b); \
for (;--n;) \
*(c + n) = *(a + n) + *(b + n); \
}

GEN_FUNC_ADD(p_add_f32,float);

GEN_FUNC_ADD(p_add_int8,int8_t);
GEN_FUNC_ADD(p_add_uint8,uint8_t);

GEN_FUNC_ADD(p_add_int16,int16_t);
GEN_FUNC_ADD(p_add_uint16,uint16_t);

GEN_FUNC_ADD(p_add_int32,int32_t);
GEN_FUNC_ADD(p_add_uint32,uint32_t);

GEN_FUNC_ADD(p_add_int64,int64_t);
GEN_FUNC_ADD(p_add_uint64,uint64_t);

/**
*
* Element wise vector addition between input vector 'a' and scalar 'b'
*
* @param a Pointer to input vector
*
* @param b Pointer to input scalar
*
* @param c Pointer to output vector
*
* @param n Size of 'a' and 'c' vector.
*
* @return None
*
*/

#define GEN_FUNC_ADDS(NAME,TYPE) \
/** NAME TYPE */ \
void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \
{ \
*(c) = *(a) + *(b); \
for (;--n;) \
*(c + n) = *(a + n) + *(b); \
}

GEN_FUNC_ADDS(p_adds_f32,float);

GEN_FUNC_ADDS(p_adds_int8,int8_t);
GEN_FUNC_ADDS(p_adds_uint8,uint8_t);

GEN_FUNC_ADDS(p_adds_int16,int16_t);
GEN_FUNC_ADDS(p_adds_uint16,uint16_t);

GEN_FUNC_ADDS(p_adds_int32,int32_t);
GEN_FUNC_ADDS(p_adds_uint32,uint32_t);

GEN_FUNC_ADDS(p_adds_int64,int64_t);
GEN_FUNC_ADDS(p_adds_uint64,uint64_t);
83 changes: 83 additions & 0 deletions src/math/p_and.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#include <pal.h>

/**
*
* Element wise vector 'bitwise and' between input vectors 'a' and 'b'
*
* @param a Pointer to first input vector
*
* @param b Pointer to second input vector
*
* @param c Pointer to output vector
*
* @param n Size of 'a' and 'c' vector.
*
* @param p Number of processor to use (task parallelism)
*
* @param team Team to work with
*
* @return None
*
*/

#define GEN_FUNC_AND(NAME,TYPE) \
/** NAME TYPE */ \
void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \
{ \
*(c) = *(a) & *(b); \
for (;--n;) \
*(c + n) = *(a + n) & *(b + n); \
}

GEN_FUNC_AND(p_and_int8,int8_t);
GEN_FUNC_AND(p_and_uint8,uint8_t);

GEN_FUNC_AND(p_and_int16,int16_t);
GEN_FUNC_AND(p_and_uint16,uint16_t);

GEN_FUNC_AND(p_and_int32,int32_t);
GEN_FUNC_AND(p_and_uint32,uint32_t);

GEN_FUNC_AND(p_and_int64,int64_t);
GEN_FUNC_AND(p_and_uint64,uint64_t);

/**
*
* Element wise vector 'bitwise and' between input vector 'a' and scalar 'b'
*
* @param a Pointer to input vector
*
* @param b Pointer to input scalar
*
* @param c Pointer to output vector
*
* @param n Size of 'a' and 'c' vector.
*
* @param p Number of processor to use (task parallelism)
*
* @param team Team to work with
*
* @return None
*
*/

#define GEN_FUNC_ANDS(NAME,TYPE) \
/** NAME TYPE */ \
void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \
{ \
*(c) = *(a) & *(b); \
for (;--n;) \
*(c + n) = *(a + n) & *(b); \
}

GEN_FUNC_ANDS(p_ands_int8,int8_t);
GEN_FUNC_ANDS(p_ands_uint8,uint8_t);

GEN_FUNC_ANDS(p_ands_int16,int16_t);
GEN_FUNC_ANDS(p_ands_uint16,uint16_t);

GEN_FUNC_ANDS(p_ands_int32,int32_t);
GEN_FUNC_ANDS(p_ands_uint32,uint32_t);

GEN_FUNC_ANDS(p_ands_int64,int64_t);
GEN_FUNC_ANDS(p_ands_uint64,uint64_t);
67 changes: 61 additions & 6 deletions src/math/p_div.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,66 @@
*
*/

void p_div_f32(const float *a, const float *b, float *c, int n)
{

int i;
for (i = 0; i < n; i++) {
*(c + i) = *(a + i) / *(b + i);
#define GEN_FUNC_DIV(NAME,TYPE) \
/** NAME TYPE */ \
void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \
{ \
*(c) = *(a) / *(b); \
for (;--n;) \
*(c + n) = *(a + n) / *(b + n); \
}
}

GEN_FUNC_DIV(p_div_f32,float);

GEN_FUNC_DIV(p_div_int8,int8_t);
GEN_FUNC_DIV(p_div_uint8,uint8_t);

GEN_FUNC_DIV(p_div_int16,int16_t);
GEN_FUNC_DIV(p_div_uint16,uint16_t);

GEN_FUNC_DIV(p_div_int32,int32_t);
GEN_FUNC_DIV(p_div_uint32,uint32_t);

GEN_FUNC_DIV(p_div_int64,int64_t);
GEN_FUNC_DIV(p_div_uint64,uint64_t);

/**
*
* Element wise vector division between input vector 'a' and scalar 'b'
*
* @param a Pointer to input vector
*
* @param b Pointer to input scalar
*
* @param c Pointer to output vector
*
* @param n Size of 'a' and 'c' vector.
*
* @return None
*
*/

#define GEN_FUNC_DIVS(NAME,TYPE) \
/** NAME TYPE */ \
void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \
{ \
float t = 1.0f / *(b); \
*(c) = *(a) * t; \
for (;--n;) \
*(c + n) = *(a + n) * t; \
}

GEN_FUNC_DIVS(p_divs_f32,float);

GEN_FUNC_DIVS(p_divs_int8,int8_t);
GEN_FUNC_DIVS(p_divs_uint8,uint8_t);

GEN_FUNC_DIVS(p_divs_int16,int16_t);
GEN_FUNC_DIVS(p_divs_uint16,uint16_t);

GEN_FUNC_DIVS(p_divs_int32,int32_t);
GEN_FUNC_DIVS(p_divs_uint32,uint32_t);

GEN_FUNC_DIVS(p_divs_int64,int64_t);
GEN_FUNC_DIVS(p_divs_uint64,uint64_t);
67 changes: 60 additions & 7 deletions src/math/p_mul.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,65 @@
*
*/

void p_mul_f32(const float *a, const float *b, float *c,
int n)
{

int i;
for (i = 0; i < n; i++) {
*(c + i) = *(a + i) * *(b + i);
#define GEN_FUNC_MUL(NAME,TYPE) \
/** NAME TYPE */ \
void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \
{ \
*(c) = *(a) * *(b); \
for (;--n;) \
*(c + n) = *(a + n) * *(b + n); \
}
}

GEN_FUNC_MUL(p_mul_f32,float);

GEN_FUNC_MUL(p_mul_int8,int8_t);
GEN_FUNC_MUL(p_mul_uint8,uint8_t);

GEN_FUNC_MUL(p_mul_int16,int16_t);
GEN_FUNC_MUL(p_mul_uint16,uint16_t);

GEN_FUNC_MUL(p_mul_int32,int32_t);
GEN_FUNC_MUL(p_mul_uint32,uint32_t);

GEN_FUNC_MUL(p_mul_int64,int64_t);
GEN_FUNC_MUL(p_mul_uint64,uint64_t);

/**
*
* Element wise vector multiplication between input vector 'a' and scalar 'b'
*
* @param a Pointer to input vector
*
* @param b Pointer to input scalar
*
* @param c Pointer to output vector
*
* @param n Size of 'a' and 'c' vector.
*
* @return None
*
*/

#define GEN_FUNC_MULS(NAME,TYPE) \
/** NAME TYPE */ \
void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \
{ \
*(c) = *(a) * *(b); \
for (;--n;) \
*(c + n) = *(a + n) * *(b); \
}

GEN_FUNC_MULS(p_muls_f32,float);

GEN_FUNC_MULS(p_muls_int8,int8_t);
GEN_FUNC_MULS(p_muls_uint8,uint8_t);

GEN_FUNC_MULS(p_muls_int16,int16_t);
GEN_FUNC_MULS(p_muls_uint16,uint16_t);

GEN_FUNC_MULS(p_muls_int32,int32_t);
GEN_FUNC_MULS(p_muls_uint32,uint32_t);

GEN_FUNC_MULS(p_muls_int64,int64_t);
GEN_FUNC_MULS(p_muls_uint64,uint64_t);
41 changes: 41 additions & 0 deletions src/math/p_not.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#include <pal.h>

/**
*
* Element wise vector 'bitwise not' of input vector 'a'
*
* @param a Pointer to input vector
*
* @param c Pointer to output vector
*
* @param n Size of 'a' and 'c' vector.
*
* @param p Number of processor to use (task parallelism)
*
* @param team Team to work with
*
* @return None
*
*/

#define GEN_FUNC(NAME,TYPE) \
/** NAME TYPE */ \
void NAME(const TYPE * restrict a, TYPE * restrict c, int n) \
{ \
*c = ~*(a); \
for (;--n;) \
*c = ~*(a + n); \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

*(c + n) = ~*(a + n); \

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mateunho Thanks, fixed.

@aolofsson Yes, loops are very shallow ( will look into the possibility of using a duff's device to do some unrolling, or if -funroll-all-loops can be used without pigging out on size). Also I noticed it can be counterproductive to have a decreasing index.

What I think we should do now is to define a number of platforms we want to target and the compiler flags we want to use ( O2 or O3, and more importantly on x86 the SSE flags. Or should we split x86 in 2 ).

What is the preferred method of implementing platform specific code? #ifdef tree?

}


GEN_FUNC(p_not_int8,int8_t);
GEN_FUNC(p_not_uint8,uint8_t);

GEN_FUNC(p_not_int16,int16_t);
GEN_FUNC(p_not_uint16,uint16_t);

GEN_FUNC(p_not_int32,int32_t);
GEN_FUNC(p_not_uint32,uint32_t);

GEN_FUNC(p_not_int64,int64_t);
GEN_FUNC(p_not_uint64,uint64_t);
Loading