diff --git a/neuralredis.c b/neuralredis.c index bfabd09..18dbf13 100644 --- a/neuralredis.c +++ b/neuralredis.c @@ -64,7 +64,7 @@ uint64_t NRNextId = 1; /* Next neural network unique ID. */ #define NR_FLAG_BACKTRACK (1<<6) /* Auto stop with backtracking. */ /* Flags to persist when saving the NN. */ -#define NR_FLAG_TO_PRESIST (NR_FLAG_REGRESSOR| \ +#define NR_FLAG_TO_PERSIST (NR_FLAG_REGRESSOR| \ NR_FLAG_CLASSIFIER| \ NR_FLAG_NORMALIZE| \ NR_FLAG_OF_DETECTED) @@ -91,7 +91,7 @@ typedef struct { uint64_t training_max_ms; /* Max time of a single training. */ uint32_t flags; /* NR_FLAG_... */ uint32_t epochs; /* Number of training epochs so far. */ - struct Ann *nn; /* Neural network structure. */ + AnnRprop *nn; /* Neural network structure. */ NRDataset dataset; /* Training dataset. */ NRDataset test; /* Testing dataset. */ float dataset_error; /* Average error in the training dataset. */ @@ -104,7 +104,7 @@ typedef struct { float *onorm; /* Outputs normalization factors. */ } NRTypeObject; -struct { +typedef struct { RedisModuleString *key; /* Key name of the NN we are training. Set to NULL for unused slots. */ int db_id; /* DB ID where the key is. */ @@ -115,7 +115,7 @@ struct { float test_error; /* Test error in the last cycle. */ float class_error; /* Percentage of wrong classifications. */ int curcycle; /* Current cycle. */ -} typedef NRPendingTraining; +} NRPendingTraining; /* We take an array with NNs currently training in other threads. * Every time an NN command is called, we try to see if there are @@ -152,8 +152,8 @@ NRTypeObject *createNRTypeObject(int flags, int *layers, int numlayers, int dset o->nn = AnnCreateNet(numlayers,layers); o->dataset.maxlen = dset_len; o->test.maxlen = test_len; - int ilen = INPUT_UNITS(o->nn); - int olen = OUTPUT_UNITS(o->nn); + int ilen = ANN_INPUT_UNITS(o->nn); + int olen = ANN_OUTPUT_UNITS(o->nn); o->inorm = RedisModule_Calloc(1,sizeof(float)*ilen); o->onorm = RedisModule_Calloc(1,sizeof(float)*olen); for (int j = 0; j < ilen; j++) o->inorm[j] = 1; @@ -216,8 +216,8 @@ void NRTypeInsertData(NRTypeObject *o, float *inputs, float *outputs, /* Append if there is room or substitute with a random entry. */ size_t idx; - int j, numin = INPUT_UNITS(o->nn), - numout = OUTPUT_UNITS(o->nn); + int j, numin = ANN_INPUT_UNITS(o->nn), + numout = ANN_OUTPUT_UNITS(o->nn); if (target->maxlen == target->len) { idx = rand() % target->maxlen; @@ -275,8 +275,8 @@ NRTypeObject *NRClone(NRTypeObject *o, int newid) { copy->dataset = o->dataset; copy->test = o->test; - int ilen = INPUT_UNITS(o->nn); - int olen = OUTPUT_UNITS(o->nn); + int ilen = ANN_INPUT_UNITS(o->nn); + int olen = ANN_OUTPUT_UNITS(o->nn); copy->dataset.inputs = RedisModule_Alloc(sizeof(float)*ilen*o->dataset.len); copy->dataset.outputs = RedisModule_Alloc(sizeof(float)*olen*o->dataset.len); copy->test.inputs = RedisModule_Alloc(sizeof(float)*ilen*o->test.len); @@ -318,8 +318,8 @@ void NRTransferWeights(RedisModuleCtx *ctx, NRTypeObject *dst, NRTypeObject *src dst->test_class_error = src->test_class_error; dst->flags |= src->flags & NR_FLAG_TO_TRANSFER; - int ilen = INPUT_UNITS(src->nn); - int olen = OUTPUT_UNITS(src->nn); + int ilen = ANN_INPUT_UNITS(src->nn); + int olen = ANN_OUTPUT_UNITS(src->nn); memcpy(dst->inorm,src->inorm,sizeof(float)*ilen); memcpy(dst->onorm,src->onorm,sizeof(float)*olen); } @@ -361,8 +361,8 @@ void *NRTrainingThreadMain(void *arg) { * (NR_FLAG_CLASSIFIER), no output normalization will be done since * the data is already in 0/1 format. */ if ((nr->flags & NR_FLAG_NORMALIZE) && nr->dataset.len) { - int ilen = INPUT_UNITS(nr->nn); - int olen = OUTPUT_UNITS(nr->nn); + int ilen = ANN_INPUT_UNITS(nr->nn); + int olen = ANN_OUTPUT_UNITS(nr->nn); float *imax = nr->inorm; float *omax = nr->onorm; float *inputs = nr->dataset.inputs; @@ -410,7 +410,7 @@ void *NRTrainingThreadMain(void *arg) { } } - struct Ann *saved = NULL; /* Saved to recover on overfitting. */ + AnnRprop *saved = NULL; /* Saved to recover on overfitting. */ float saved_error; /* The test error of the saved NN. */ float saved_train_error; /* The training dataset error of the saved NN */ float saved_class_error; /* The % of classification errors of saved NN */ @@ -424,7 +424,7 @@ void *NRTrainingThreadMain(void *arg) { 0, training_iterations, nr->dataset.len, - NN_ALGO_BPROP); + ANN_ALGO_BPROP); cycle_time = NRMilliseconds() - cycle_start; nr->training_total_steps += nr->dataset.len*training_iterations; @@ -747,7 +747,7 @@ int NRGenericRun_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int "Use this command with a classifier network"); - int ilen = INPUT_UNITS(nr->nn); + int ilen = ANN_INPUT_UNITS(nr->nn); if (argc != ilen+2) return RedisModule_ReplyWithError(ctx, "ERR number of arguments does not " @@ -760,19 +760,19 @@ int NRGenericRun_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int "ERR invalid neural network input: must be a valid float " "precision floating point number"); if (nr->flags & NR_FLAG_NORMALIZE) input /= nr->inorm[j]; - INPUT_NODE(nr->nn,j) = input; + ANN_INPUT_NODE(nr->nn,j) = input; } AnnSimulate(nr->nn); /* Output the raw net output or the class ID if the network * is a classifier and the command invoked was NR.CLASS. */ - int olen = OUTPUT_UNITS(nr->nn); + int olen = ANN_OUTPUT_UNITS(nr->nn); if (output_class) { - float max = OUTPUT_NODE(nr->nn,0); + float max = ANN_OUTPUT_NODE(nr->nn,0); int max_class = 0; for(int j = 1; j < olen; j++) { - float output = OUTPUT_NODE(nr->nn,j); + float output = ANN_OUTPUT_NODE(nr->nn,j); if (output > max) { max = output; max_class = j; @@ -782,7 +782,7 @@ int NRGenericRun_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int } else { RedisModule_ReplyWithArray(ctx,olen); for(int j = 0; j < olen; j++) { - float output = OUTPUT_NODE(nr->nn,j); + float output = ANN_OUTPUT_NODE(nr->nn,j); if (!(nr->flags & NR_FLAG_CLASSIFIER) && (nr->flags & NR_FLAG_NORMALIZE)) { @@ -816,8 +816,8 @@ int NRObserve_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int ar return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE); NRTypeObject *nr = RedisModule_ModuleTypeGetValue(key); - int ilen = INPUT_UNITS(nr->nn); - int olen = OUTPUT_UNITS(nr->nn); + int ilen = ANN_INPUT_UNITS(nr->nn); + int olen = ANN_OUTPUT_UNITS(nr->nn); int oargs = (nr->flags & NR_FLAG_CLASSIFIER) ? 1 : olen; int target = NR_INSERT_NO_TARGET; @@ -1017,9 +1017,9 @@ int NRInfo_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) RedisModule_ReplyWithLongLong(ctx,!!(nr->flags & NR_FLAG_TRAINING)); RedisModule_ReplyWithSimpleString(ctx,"layout"); - RedisModule_ReplyWithArray(ctx,LAYERS(nr->nn)); - for (int i = LAYERS(nr->nn)-1; i >= 0; i--) { - int units = UNITS(nr->nn,i); + RedisModule_ReplyWithArray(ctx,ANN_LAYERS(nr->nn)); + for (int i = ANN_LAYERS(nr->nn)-1; i >= 0; i--) { + int units = ANN_UNITS(nr->nn,i); if (i != 0) units--; /* Don't count the bias unit. */ RedisModule_ReplyWithLongLong(ctx,units); } @@ -1111,8 +1111,8 @@ int NRGetdata_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int ar NRTypeObject *nr = RedisModule_ModuleTypeGetValue(key); - int ilen = INPUT_UNITS(nr->nn); - int olen = OUTPUT_UNITS(nr->nn); + int ilen = ANN_INPUT_UNITS(nr->nn); + int olen = ANN_OUTPUT_UNITS(nr->nn); NRDataset *target = NULL; long long idx; @@ -1173,15 +1173,15 @@ void NRTypeRdbSave(RedisModuleIO *rdb, void *value) { NRTypeObject *nr = value; /* Save the neural network layout. */ - RedisModule_SaveUnsigned(rdb,LAYERS(nr->nn)); - for (int j = 0; j < LAYERS(nr->nn); j++) { - int units = UNITS(nr->nn,j); + RedisModule_SaveUnsigned(rdb,ANN_LAYERS(nr->nn)); + for (int j = 0; j < ANN_LAYERS(nr->nn); j++) { + int units = ANN_UNITS(nr->nn,j); if (j != 0) units--; /* Don't count the bias unit. */ RedisModule_SaveUnsigned(rdb,units); } /* Save the object metadata. */ - RedisModule_SaveUnsigned(rdb,nr->flags & NR_FLAG_TO_PRESIST); + RedisModule_SaveUnsigned(rdb,nr->flags & NR_FLAG_TO_PERSIST); RedisModule_SaveUnsigned(rdb,nr->id); RedisModule_SaveUnsigned(rdb,nr->training_total_steps); RedisModule_SaveUnsigned(rdb,nr->training_total_ms); @@ -1193,8 +1193,8 @@ void NRTypeRdbSave(RedisModuleIO *rdb, void *value) { /* Save the neural network weights and biases. We start * at layer 1 since the first layer are just outputs. */ - for (int j = 1; j < LAYERS(nr->nn); j++) { - int weights = WEIGHTS(nr->nn,j); + for (int j = 1; j < ANN_LAYERS(nr->nn); j++) { + int weights = ANN_WEIGHTS(nr->nn,j); for (int i = 0; i < weights; i++) RedisModule_SaveFloat(rdb,nr->nn->layer[j].weight[i]); for (int i = 0; i < weights; i++) @@ -1204,8 +1204,8 @@ void NRTypeRdbSave(RedisModuleIO *rdb, void *value) { } /* Save the normalization vectors. */ - uint32_t ilen = INPUT_UNITS(nr->nn); - uint32_t olen = OUTPUT_UNITS(nr->nn); + uint32_t ilen = ANN_INPUT_UNITS(nr->nn); + uint32_t olen = ANN_OUTPUT_UNITS(nr->nn); for (uint32_t j = 0; j < ilen; j++) RedisModule_SaveFloat(rdb,nr->inorm[j]); for (uint32_t j = 0; j < olen; j++) RedisModule_SaveFloat(rdb,nr->onorm[j]); @@ -1261,8 +1261,8 @@ void *NRTypeRdbLoad(RedisModuleIO *rdb, int encver) { nr->test_class_error = RedisModule_LoadFloat(rdb); /* Load the neural network weights. */ - for (int j = 1; j < LAYERS(nr->nn); j++) { - int weights = WEIGHTS(nr->nn,j); + for (int j = 1; j < ANN_LAYERS(nr->nn); j++) { + int weights = ANN_WEIGHTS(nr->nn,j); for (int i = 0; i < weights; i++) nr->nn->layer[j].weight[i] = RedisModule_LoadFloat(rdb); for (int i = 0; i < weights; i++) @@ -1272,8 +1272,8 @@ void *NRTypeRdbLoad(RedisModuleIO *rdb, int encver) { } /* Load the normalization vector. */ - uint32_t ilen = INPUT_UNITS(nr->nn); - uint32_t olen = OUTPUT_UNITS(nr->nn); + uint32_t ilen = ANN_INPUT_UNITS(nr->nn); + uint32_t olen = ANN_OUTPUT_UNITS(nr->nn); for (uint32_t j = 0; j < ilen; j++) nr->inorm[j] = RedisModule_LoadFloat(rdb); for (uint32_t j = 0; j < olen; j++) diff --git a/nn.c b/nn.c index 2476c06..1f4b542 100644 --- a/nn.c +++ b/nn.c @@ -35,6 +35,12 @@ #include #include +#include "nn.h" + +/* +There is a problem with memory alignment when using avx and avx512 +on some machines avx aligned works fine on others don't +*/ #if defined(USE_AVX512) #define USING_SIMD #include @@ -45,17 +51,19 @@ typedef __m512 simdf_t; #define simdf_zero() _mm512_setzero_ps() #define simdf_set1f(x) _mm512_set1_ps(x) #define simdf_loadu(x) _mm512_loadu_ps(x) +#define simdf_load(x) _mm512_loadu_ps(x) //we are still using unaligned here #define simdf_mul(a,b) _mm512_mul_ps(a,b) #define simdf_add(a,b) _mm512_add_ps(a,b) #define simdf_storeu(a,b) _mm512_storeu_ps(a,b) +#define simdf_store(a,b) _mm512_storeu_ps(a,b) //we are still using unaligned here //let the compiler optmize this #define simdf_sum(x) (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7] + \ - x[8] + x[9] + x[10] + x[11] + x[12] + x[13] + x[14] + x[15]) + x[8] + x[9] + x[10] + x[11] + x[12] + x[13] + x[14] + x[15]) #define simdf_show(x) printf("%d : %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f\n", \ - __LINE__, x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], \ - x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]); + __LINE__, x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], \ + x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]); #endif #if defined(USE_AVX) @@ -68,15 +76,17 @@ typedef __m256 simdf_t; #define simdf_zero() _mm256_setzero_ps() #define simdf_set1f(x) _mm256_set1_ps(x) #define simdf_loadu(x) _mm256_loadu_ps(x) +#define simdf_load(x) _mm256_loadu_ps(x) //we are still using unaligned here #define simdf_mul(a,b) _mm256_mul_ps(a,b) #define simdf_add(a,b) _mm256_add_ps(a,b) #define simdf_storeu(a,b) _mm256_storeu_ps(a,b) +#define simdf_store(a,b) _mm256_storeu_ps(a,b) //we are still using unaligned here //let the compiler optmize this #define simdf_sum(x) (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7]) #define simdf_show(x) printf("%d : %f, %f, %f, %f, %f, %f, %f, %f\n", \ - __LINE__, x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7]); + __LINE__, x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7]); #endif #if defined(USE_SSE) @@ -89,9 +99,11 @@ typedef __m128 simdf_t; #define simdf_zero() _mm_setzero_ps() #define simdf_set1f(x) _mm_set1_ps(x) #define simdf_loadu(x) _mm_loadu_ps(x) +#define simdf_load(x) _mm_load_ps(x) #define simdf_mul(a,b) _mm_mul_ps(a,b) #define simdf_add(a,b) _mm_add_ps(a,b) #define simdf_storeu(a,b) _mm_storeu_ps(a,b) +#define simdf_store(a,b) _mm_store_ps(a,b) //let the compiler optmize this #define simdf_sum(x) (x[0] + x[1] + x[2] + x[3]) @@ -103,15 +115,17 @@ typedef __m128 simdf_t; #define USING_SIMD #include -typedef float32x4_t simdf_t; +typedef ann_float_t32x4_t simdf_t; #define SIMDF_SIZE 4 #define simdf_zero() vdupq_n_f32(0.0f) #define simdf_set1f(x) vdupq_n_f32(x); #define simdf_loadu(x) vld1q_f32(x) +#define simdf_load(x) vld1q_f32(x) #define simdf_mul(a,b) vmulq_f32(a,b) #define simdf_add(a,b) vaddq_f32(a,b) -#define simdf_storeu(a,b) vst1q_f32((float32_t*)a,b) +#define simdf_storeu(a,b) vst1q_f32((ann_float_t32_t*)a,b) +#define simdf_store(a,b) vst1q_f32((ann_float_t32_t*)a,b) //let the compiler optmize this #define simdf_sum(x) (x[0] + x[1] + x[2] + x[3]) @@ -119,20 +133,64 @@ typedef float32x4_t simdf_t; #define simdf_show(x) printf("%d : %f, %f, %f, %f\n", __LINE__, x[0], x[1], x[2], x[3]); #endif -#include "nn.h" +#ifndef SIMDF_SIZE +#define SIMDF_SIZE 1 +#endif // SIMDF_SIZE + +#define ANN_SIZEOF_ann_float_t sizeof(ann_float_t) +#define ANN_ALIGN_BASE (SIMDF_SIZE * ANN_SIZEOF_ann_float_t) +#define ANN_ALIGN_ROUND(x) ((x%ANN_ALIGN_BASE) ? (((x/ANN_ALIGN_BASE)+1)*ANN_ALIGN_BASE) : (size_t)x) + +#ifndef HAS_ANN_MALLOC +#define ann_malloc(x) malloc(x) +#define ann_free(x) free(x) +#else +extern void *ann_malloc(size_t sz); +extern void ann_free(void *ptr); +#endif +/* +void *nnpmalloc(int line, size_t sz) { + printf("%d : %zu : %zu\n", line, sz, ANN_ALIGN_ROUND(sz)); + return malloc(sz); +} +#define ann_malloc(x) nnpmalloc(__LINE__, x) +*/ /* Node Transfer Function */ -float sigmoid(float x) { - return (float)1/(1+exp(-x)); +ann_float_t AnnTransferFunctionSigmoid(ann_float_t x) { + return ((ann_float_t)1)/(1+exp(-x)); } -float relu(float x) { +ann_float_t AnnTransferFunctionRelu(ann_float_t x) { return (x > 0) ? x : 0; } +ann_float_t AnnTransferFunctionTanh(ann_float_t x) { + return tanh(x); +} + +/* +ann_float_t AnnDerivativeIdentity(ann_float_t x) { + return 1; +} +*/ + +ann_float_t AnnDerivativeSigmoid(ann_float_t x) { + return x*(1-x); +} + +ann_float_t AnnDerivativeTanh(ann_float_t x) { + return (1-x)*(1+x); +} + +ann_float_t AnnDerivativeRelu(ann_float_t x) { + return (x > 0) ? 1 : 0; +} + /* Reset layer data to zero-units */ -void AnnResetLayer(struct AnnLayer *layer) { +void AnnResetLayer(AnnLayer *layer) { layer->units = 0; + layer->units_aligned = 0; layer->output = NULL; layer->error = NULL; layer->weight = NULL; @@ -143,24 +201,26 @@ void AnnResetLayer(struct AnnLayer *layer) { } /* Allocate and return an initialized N-layers network */ -struct Ann *AnnAlloc(int layers) { - struct Ann *net; +AnnRprop *AnnAlloc(int layers) { + AnnRprop *net; int i; /* Alloc the net structure */ - if ((net = malloc(sizeof(*net))) == NULL) + if ((net = ann_malloc(sizeof(*net))) == NULL) return NULL; /* Alloc layers */ - if ((net->layer = malloc(sizeof(struct AnnLayer)*layers)) == NULL) { - free(net); + if ((net->layer = ann_malloc(sizeof(AnnLayer)*layers)) == NULL) { + ann_free(net); return NULL; } net->layers = layers; net->flags = 0; - net->rprop_nminus = DEFAULT_RPROP_NMINUS; - net->rprop_nplus = DEFAULT_RPROP_NPLUS; - net->rprop_maxupdate = DEFAULT_RPROP_MAXUPDATE; - net->rprop_minupdate = DEFAULT_RPROP_MINUPDATE; + net->rprop_nminus = ANN_DEFAULT_RPROP_NMINUS; + net->rprop_nplus = ANN_DEFAULT_RPROP_NPLUS; + net->rprop_maxupdate = ANN_DEFAULT_RPROP_MAXUPDATE; + net->rprop_minupdate = ANN_DEFAULT_RPROP_MINUPDATE; + net->node_transf_func = AnnTransferFunctionSigmoid; + net->derivative_func = AnnDerivativeSigmoid; /* Init layers */ for (i = 0; i < layers; i++) AnnResetLayer(&net->layer[i]); @@ -168,115 +228,114 @@ struct Ann *AnnAlloc(int layers) { } /* Free a single layer */ -void AnnFreeLayer(struct AnnLayer *layer) +void AnnFreeLayer(AnnLayer *layer) { - free(layer->output); - free(layer->error); - free(layer->weight); - free(layer->gradient); - free(layer->pgradient); - free(layer->delta); - free(layer->sgradient); + ann_free(layer->output); + ann_free(layer->error); + ann_free(layer->weight); + ann_free(layer->gradient); + ann_free(layer->pgradient); + ann_free(layer->delta); + ann_free(layer->sgradient); AnnResetLayer(layer); } /* Free the target net */ -void AnnFree(struct Ann *net) +void AnnFree(AnnRprop *net) { int i; /* Free layer data */ for (i = 0; i < net->layers; i++) AnnFreeLayer(&net->layer[i]); /* Free allocated layers structures */ - free(net->layer); + ann_free(net->layer); /* And the main structure itself */ - free(net); + ann_free(net); } /* Init a layer of the net with the specified number of units. * Return non-zero on out of memory. */ -int AnnInitLayer(struct Ann *net, int i, int units, int bias) { +int AnnInitLayer(AnnRprop *net, int i, int units, int bias) { if (bias) units++; /* Take count of the bias unit */ - net->layer[i].output = malloc(sizeof(float)*units); - net->layer[i].error = malloc(sizeof(float)*units); + int ann_float_t_units = ANN_ALIGN_ROUND(units*ANN_SIZEOF_ann_float_t); + int units_aligned = ann_float_t_units/ANN_SIZEOF_ann_float_t; + int ann_float_t_units_units = 0; + AnnLayer *layer = &ANN_LAYER(net, i); + layer->units = units; + layer->units_aligned = units_aligned; + layer->output = ann_malloc(ann_float_t_units); + layer->error = ann_malloc(ann_float_t_units); if (i) { /* not for output layer */ - net->layer[i].weight = - malloc(sizeof(float)*units*net->layer[i-1].units); - net->layer[i].gradient = - malloc(sizeof(float)*units*net->layer[i-1].units); - net->layer[i].pgradient = - malloc(sizeof(float)*units*net->layer[i-1].units); - net->layer[i].delta = - malloc(sizeof(float)*units*net->layer[i-1].units); - net->layer[i].sgradient = - malloc(sizeof(float)*units*net->layer[i-1].units); - } - net->layer[i].units = units; + ann_float_t_units_units = ann_float_t_units*ANN_LAYER(net, i-1).units; + layer->weight = ann_malloc(ann_float_t_units_units); + layer->gradient = ann_malloc(ann_float_t_units_units); + layer->pgradient = ann_malloc(ann_float_t_units_units); + layer->delta = ann_malloc(ann_float_t_units_units); + layer->sgradient = ann_malloc(ann_float_t_units_units); + } /* Check for out of memory conditions */ - if (net->layer[i].output == NULL || - net->layer[i].error == NULL || - (i && net->layer[i].weight == NULL) || - (i && net->layer[i].gradient == NULL) || - (i && net->layer[i].pgradient == NULL) || - (i && net->layer[i].sgradient == NULL) || - (i && net->layer[i].delta == NULL)) + if (layer->output == NULL || + layer->error == NULL || + (i && layer->weight == NULL) || + (i && layer->gradient == NULL) || + (i && layer->pgradient == NULL) || + (i && layer->sgradient == NULL) || + (i && layer->delta == NULL)) { - AnnFreeLayer(&net->layer[i]); - AnnResetLayer(&net->layer[i]); + AnnFreeLayer(layer); + AnnResetLayer(layer); return 1; } /* Set all the values to zero */ - memset(net->layer[i].output, 0, sizeof(float)*units); - memset(net->layer[i].error, 0, sizeof(float)*units); + memset(layer->output, 0, ann_float_t_units); + memset(layer->error, 0, ann_float_t_units); if (i) { - memset(net->layer[i].weight, 0, - sizeof(float)*units*net->layer[i-1].units); - memset(net->layer[i].gradient, 0, - sizeof(float)*units*net->layer[i-1].units); - memset(net->layer[i].pgradient, 0, - sizeof(float)*units*net->layer[i-1].units); - memset(net->layer[i].delta, 0, - sizeof(float)*units*net->layer[i-1].units); - memset(net->layer[i].sgradient, 0, - sizeof(float)*units*net->layer[i-1].units); + memset(layer->weight, 0, ann_float_t_units_units); + memset(layer->gradient, 0, ann_float_t_units_units); + memset(layer->pgradient, 0, ann_float_t_units_units); + memset(layer->delta, 0, ann_float_t_units_units); + memset(layer->sgradient, 0, ann_float_t_units_units); } /* Set the bias unit output to 1 */ - if (bias) net->layer[i].output[units-1] = 1; + if (bias) layer->output[units-1] = 1; return 0; } /* Clone a network. On out of memory NULL is returned. */ -struct Ann *AnnClone(struct Ann* net) { - struct Ann* copy; +AnnRprop *AnnClone(const AnnRprop* net) { + AnnRprop* copy; int j; - if ((copy = AnnAlloc(LAYERS(net))) == NULL) return NULL; - for (j = 0; j < LAYERS(net); j++) { - struct AnnLayer *ldst, *lsrc; - int units = UNITS(net,j); + if ((copy = AnnAlloc(ANN_LAYERS(net))) == NULL) return NULL; + for (j = 0; j < ANN_LAYERS(net); j++) { + AnnLayer *ldst; + const AnnLayer *lsrc; + int units = ANN_UNITS(net,j); int bias = j > 0; if (AnnInitLayer(copy, j, units-bias, bias)) { AnnFree(copy); return NULL; } + int ann_float_t_units = units*ANN_SIZEOF_ann_float_t; lsrc = &net->layer[j]; ldst = ©->layer[j]; if (lsrc->output) - memcpy(ldst->output, lsrc->output, sizeof(float)*units); + memcpy(ldst->output, lsrc->output, ann_float_t_units); if (lsrc->error) - memcpy(ldst->error, lsrc->error, sizeof(float)*units); + memcpy(ldst->error, lsrc->error, ann_float_t_units); if (j) { - int weights = WEIGHTS(net,j); + int weights = ANN_WEIGHTS(net,j); + ann_float_t_units = weights*ANN_SIZEOF_ann_float_t; if (lsrc->weight) - memcpy(ldst->weight, lsrc->weight, sizeof(float)*weights); + memcpy(ldst->weight, lsrc->weight, ann_float_t_units); if (lsrc->gradient) - memcpy(ldst->gradient, lsrc->gradient, sizeof(float)*weights); + memcpy(ldst->gradient, lsrc->gradient, ann_float_t_units); if (lsrc->pgradient) - memcpy(ldst->pgradient, lsrc->pgradient, sizeof(float)*weights); + memcpy(ldst->pgradient, lsrc->pgradient, ann_float_t_units); if (lsrc->delta) - memcpy(ldst->delta, lsrc->delta, sizeof(float)*weights); + memcpy(ldst->delta, lsrc->delta, ann_float_t_units); if (lsrc->sgradient) - memcpy(ldst->sgradient, lsrc->sgradient, sizeof(float)*weights); + memcpy(ldst->sgradient, lsrc->sgradient, ann_float_t_units); } } copy->rprop_nminus = net->rprop_nminus; @@ -284,14 +343,16 @@ struct Ann *AnnClone(struct Ann* net) { copy->rprop_maxupdate = net->rprop_maxupdate; copy->rprop_minupdate = net->rprop_minupdate; copy->flags = net->flags; + copy->node_transf_func = net->node_transf_func; + copy->derivative_func = net->derivative_func; return copy; } /* Create a N-layer input/hidden/output net. * The units array should specify the number of * units in every layer from the output to the input layer. */ -struct Ann *AnnCreateNet(int layers, int *units) { - struct Ann *net; +AnnRprop *AnnCreateNet(int layers, int *units) { + AnnRprop *net; int i; if ((net = AnnAlloc(layers)) == NULL) return NULL; @@ -302,17 +363,17 @@ struct Ann *AnnCreateNet(int layers, int *units) { } } AnnSetRandomWeights(net); - AnnSetDeltas(net, RPROP_INITIAL_DELTA); - LEARN_RATE(net) = DEFAULT_LEARN_RATE; + AnnSetDeltas(net, ANN_RPROP_INITIAL_DELTA); + ANN_LEARN_RATE(net) = ANN_DEFAULT_LEARN_RATE; return net; } /* Return the total number of weights this NN has. */ -size_t AnnCountWeights(struct Ann *net) { +size_t AnnCountWeights(AnnRprop *net) { size_t weights = 0; - for (int i = net->layers-1; i > 0; i--) { - int nextunits = net->layer[i-1].units; - int units = net->layer[i].units; + for (int i = ANN_LAYERS(net)-1; i > 0; i--) { + int nextunits = ANN_UNITS(net, i-1); + int units = ANN_UNITS(net, i); if (i > 1) nextunits--; /* we don't output on bias units */ weights += units*nextunits; } @@ -320,7 +381,7 @@ size_t AnnCountWeights(struct Ann *net) { } /* Create a 4-layer input/hidden/output net */ -struct Ann *AnnCreateNet4(int iunits, int hunits, int hunits2, int ounits) { +AnnRprop *AnnCreateNet4(int iunits, int hunits, int hunits2, int ounits) { int units[4]; units[0] = ounits; @@ -331,7 +392,7 @@ struct Ann *AnnCreateNet4(int iunits, int hunits, int hunits2, int ounits) { } /* Create a 3-layer input/hidden/output net */ -struct Ann *AnnCreateNet3(int iunits, int hunits, int ounits) { +AnnRprop *AnnCreateNet3(int iunits, int hunits, int ounits) { int units[3]; units[0] = ounits; @@ -342,7 +403,7 @@ struct Ann *AnnCreateNet3(int iunits, int hunits, int ounits) { /* Create a 2-layer "linear" network. */ -struct Ann *AnnCreateNet2(int iunits, int ounits) { +AnnRprop *AnnCreateNet2(int iunits, int ounits) { int units[2]; units[0] = ounits; @@ -351,107 +412,159 @@ struct Ann *AnnCreateNet2(int iunits, int ounits) { } -void AnnSimulate(struct Ann *net) { +void AnnSimulate(AnnRprop *net) { int i, j, k; - for (i = net->layers-1; i > 0; i--) { - int nextunits = net->layer[i-1].units; - int units = net->layer[i].units; + for (i = ANN_LAYERS(net)-1; i > 0; i--) { + AnnLayer *layer = &ANN_LAYER(net, i); + int nextunits = ANN_UNITS(net, i-1); + int units_aligned = layer->units_aligned; + int units = layer->units; if (i > 1) nextunits--; /* dont output on bias units */ +#ifdef USING_SIMD + int xps, psteps = units/SIMDF_SIZE; +#endif // USING_SIMD for (j = 0; j < nextunits; j++) { - float A = 0; /* Activation final value. */ - float *w = net->layer[i].weight + j*units; - float *o = net->layer[i].output; + ann_float_t A = 0; /* Activation final value. */ + ann_float_t *w = layer->weight + j*units_aligned; + ann_float_t *o = layer->output; k = 0; #ifdef USING_SIMD - int psteps = units/SIMDF_SIZE; - simdf_t sumA = simdf_zero(); - for (int x = 0; x < psteps; x++) { - simdf_t weights = simdf_loadu(w); - simdf_t outputs = simdf_loadu(o); - simdf_t prod = simdf_mul(weights,outputs); - sumA = simdf_add(sumA, prod); - w += SIMDF_SIZE; - o += SIMDF_SIZE; + if(psteps) + { + simdf_t sumA = simdf_zero(); + for (xps = 0; xps < psteps; xps++) { + simdf_t weights = simdf_load(w); + simdf_t outputs = simdf_load(o); + simdf_t prod = simdf_mul(weights,outputs); + sumA = simdf_add(sumA, prod); + w += SIMDF_SIZE; + o += SIMDF_SIZE; + } + A += simdf_sum(sumA); + k += psteps*SIMDF_SIZE; } - A += simdf_sum(sumA); - k += SIMDF_SIZE*psteps; #endif /* Handle final piece shorter than SIMDF_SIZE . */ for (; k < units; k++) { A += (*w++) * (*o++); } - OUTPUT(net, i-1, j) = sigmoid(A); + //ANN_OUTPUT(net, i-1, j) = (*net->node_transf_func)(A); //sigmoid(A); + ANN_OUTPUT(net, i-1, j) = 1.0/(1.0+exp(-A)); } } } /* Create a Tcl procedure that simulates the neural network */ -void Ann2Tcl(struct Ann *net) { +void Ann2Tcl(const AnnRprop *net) { int i, j, k; printf("proc ann input {\n"); printf(" set output {"); - for (i = 0; i < OUTPUT_UNITS(net); i++) { + for (i = 0; i < ANN_OUTPUT_UNITS(net); i++) { printf("0 "); } printf("}\n"); - for (i = net->layers-1; i > 0; i--) { - int nextunits = net->layer[i-1].units; - int units = net->layer[i].units; - if (i > 1) nextunits--; /* dont output on bias units */ + printf(" proc sigmoid x {return [expr {1/(1+exp(-$x))}]}\n"); + for(i=0, k=ANN_INPUT_UNITS(net); i < k; ++i) { + printf(" set input_%d [lindex $input %d]\n", i, i); + } + for (i = ANN_LAYERS(net)-1; i > 0; i--) { + int nextunits = ANN_UNITS(net, i-1); + int units = ANN_UNITS(net, i); + //if (i > 1) nextunits--; /* dont output on bias units */ for (j = 0; j < nextunits; j++) { - float W; + ann_float_t W; if (i == 1) { printf(" lset output %d ", j); } else { printf(" set O_%d_%d", i-1, j); } - printf(" [expr { \\\n"); + printf(" [sigmoid [expr { \\\n"); for (k = 0; k < units; k++) { - W = WEIGHT(net, i, k, j); + W = ANN_WEIGHT(net, i, k, j); if (i > 1 && k == units-1) { printf(" (%.9f)", W); - } else if (i == net->layers-1) { - printf(" (%.9f*[lindex $input %d])", W, k); + } else if (i == ANN_LAYERS(net)-1) { + printf(" (%.9f*$input_%d)", W, k); } else { printf(" (%.9f*$O_%d_%d)", W, i, k); } if ((k+1) < units) printf("+ \\\n"); } - printf("}]\n"); + printf("}]]\n"); + } + } + printf(" return $output\n"); + printf("}\n"); +} + +/* Create a Javascript procedure that simulates the neural network */ +void Ann2Js(const AnnRprop *net) { + int i, j, k; + + printf("function ann( input ) {\n"); + printf(" var output = ["); + for (i = 0; i < ANN_OUTPUT_UNITS(net); i++) { + if(i) printf(", "); + printf("0"); + } + printf("];\n"); + printf(" var sigmoid = function(x) {return 1.0/(1.0+Math.exp(-x));};\n"); + for(i=0, k=ANN_INPUT_UNITS(net); i < k; ++i) { + printf(" var input_%d = input[%d];\n", i, i); + } + for (i = ANN_LAYERS(net)-1; i > 0; i--) { + int nextunits = ANN_UNITS(net, i-1); + int units = ANN_UNITS(net, i); + //if (i > 1) nextunits--; /* dont output on bias units */ + for (j = 0; j < nextunits; j++) { + ann_float_t W; if (i == 1) { - printf(" lset output %d [expr {1/(1+exp(-[lindex $output %d]))}]\n", j, j); + printf(" output[%d]", j); } else { - printf(" lset O_%d_%d [expr {1/(1+exp(-$O_%d_%d))}]\n", i-1, j, i-1, j); + printf(" var O_%d_%d", i-1, j); + } + printf(" = sigmoid(\n"); + for (k = 0; k < units; k++) { + W = ANN_WEIGHT(net, i, k, j); + if (i > 1 && k == units-1) { + printf(" (%.9f)", W); + } else if (i == ANN_LAYERS(net)-1) { + printf(" (%.9f*input_%d)", W, k); + } else { + printf(" (%.9f*O_%d_%d)", W, i, k); + } + if ((k+1) < units) printf("+\n"); } + printf(");\n"); } } - printf(" return $output\n"); + printf(" return output;\n"); printf("}\n"); } /* Print a network representation */ -void AnnPrint(struct Ann *net) { +void AnnPrint(const AnnRprop *net) { int i, j, k; - for (i = 0; i < LAYERS(net); i++) { + for (i = 0; i < ANN_LAYERS(net); i++) { char *layertype = "Hidden"; if (i == 0) layertype = "Output"; - if (i == LAYERS(net)-1) layertype = "Input"; - printf("%s layer %d, units %d\n", layertype, i, UNITS(net,i)); + if (i == ANN_LAYERS(net)-1) layertype = "Input"; + printf("%s layer %d, units %d\n", layertype, i, ANN_UNITS(net,i)); if (i) { /* Don't compute the bias unit as a target. */ - int targets = UNITS(net,i-1) - (i-1>0); + int targets = ANN_UNITS(net,i-1) - (i-1>0); /* Weights */ printf("\tW"); - for (j = 0; j < UNITS(net, i); j++) { + for (j = 0; j < ANN_UNITS(net, i); j++) { printf("("); for (k = 0; k < targets; k++) { - printf("%f", WEIGHT(net,i,j,k)); + printf("%f", ANN_WEIGHT(net,i,j,k)); if (k != targets-1) printf(" "); } printf(") "); @@ -459,10 +572,10 @@ void AnnPrint(struct Ann *net) { printf("\n"); /* Gradients */ printf("\tg"); - for (j = 0; j < UNITS(net, i); j++) { + for (j = 0; j < ANN_UNITS(net, i); j++) { printf("["); for (k = 0; k < targets; k++) { - printf("%f", GRADIENT(net,i,j,k)); + printf("%f", ANN_GRADIENT(net,i,j,k)); if (k != targets-1) printf(" "); } printf("] "); @@ -470,10 +583,10 @@ void AnnPrint(struct Ann *net) { printf("\n"); /* SGradients */ printf("\tG"); - for (j = 0; j < UNITS(net, i); j++) { + for (j = 0; j < ANN_UNITS(net, i); j++) { printf("["); for (k = 0; k < targets; k++) { - printf("%f", SGRADIENT(net,i,j,k)); + printf("%f", ANN_SGRADIENT(net,i,j,k)); if (k != targets-1) printf(" "); } printf("] "); @@ -481,10 +594,10 @@ void AnnPrint(struct Ann *net) { printf("\n"); /* Gradients at t-1 */ printf("\tP"); - for (j = 0; j < UNITS(net, i); j++) { + for (j = 0; j < ANN_UNITS(net, i); j++) { printf("["); for (k = 0; k < targets; k++) { - printf("%f", PGRADIENT(net,i,j,k)); + printf("%f", ANN_PGRADIENT(net,i,j,k)); if (k != targets-1) printf(" "); } printf("] "); @@ -492,23 +605,23 @@ void AnnPrint(struct Ann *net) { printf("\n"); /* Delta */ printf("\tD"); - for (j = 0; j < UNITS(net, i); j++) { + for (j = 0; j < ANN_UNITS(net, i); j++) { printf("|"); for (k = 0; k < targets; k++) { - printf("%f", DELTA(net,i,j,k)); + printf("%f", ANN_DELTA(net,i,j,k)); if (k != targets-1) printf(" "); } printf("| "); } printf("\n"); } - for (j = 0; j < UNITS(net,i); j++) { - printf("\tO: %f ", OUTPUT(net,i,j)); + for (j = 0; j < ANN_UNITS(net,i); j++) { + printf("\tO: %f ", ANN_OUTPUT(net,i,j)); } printf("\n"); printf("\tE /"); - for (j = 0; j < UNITS(net,i); j++) { - printf("%f ", ERROR(net,i,j)); + for (j = 0; j < ANN_UNITS(net,i); j++) { + printf("%f ", ANN_ERROR(net,i,j)); } printf("/\n"); } @@ -517,28 +630,28 @@ void AnnPrint(struct Ann *net) { /* Calcuate the global error of the net. This is just the * Root Mean Square (RMS) error, which is half the sum of the squared * errors. */ -float AnnGlobalError(struct Ann *net, float *desired) { - float e, t; - int i, outputs = OUTPUT_UNITS(net); +ann_float_t AnnGlobalError(AnnRprop *net, ann_float_t *desired) { + ann_float_t e, t; + int i, outputs = ANN_OUTPUT_UNITS(net); e = 0; for (i = 0; i < outputs; i++) { - t = desired[i] - OUTPUT_NODE(net,i); + t = desired[i] - ANN_OUTPUT_NODE(net,i); e += t*t; /* No need for fabs(t), t*t will always be positive. */ } return .5*e; } /* Set the network input */ -void AnnSetInput(struct Ann *net, float *input) +void AnnSetInput(AnnRprop *net, ann_float_t *input) { - int i, inputs = INPUT_UNITS(net); + int i, inputs = ANN_INPUT_UNITS(net); - for (i = 0; i < inputs; i++) INPUT_NODE(net,i) = input[i]; + for (i = 0; i < inputs; i++) ANN_INPUT_NODE(net,i) = input[i]; } /* Simulate the net, and return the global error */ -float AnnSimulateError(struct Ann *net, float *input, float *desired) { +ann_float_t AnnSimulateError(AnnRprop *net, ann_float_t *input, ann_float_t *desired) { AnnSetInput(net, input); AnnSimulate(net); return AnnGlobalError(net, desired); @@ -546,12 +659,12 @@ float AnnSimulateError(struct Ann *net, float *input, float *desired) { /* Compute the error vector y-t in the output unit. This error depends * on the loss function we use. */ -void AnnCalculateOutputError(struct Ann *net, float *desired) { - int units = OUTPUT_UNITS(net); - float factor = (float)2/units; +void AnnCalculateOutputError(AnnRprop *net, ann_float_t *desired) { + int units = ANN_OUTPUT_UNITS(net); + ann_float_t factor = (ann_float_t)2/units; + AnnLayer *layer = &ANN_LAYER(net, 0); for (int j = 0; j < units; j++) { - net->layer[0].error[j] = - factor * (net->layer[0].output[j] - desired[j]); + layer->error[j] = factor * (layer->output[j] - desired[j]); } } @@ -563,35 +676,35 @@ void AnnCalculateOutputError(struct Ann *net, float *desired) { * points (E1, with the real weight, and E2 with the weight W = W + 0.1), * than the approximation of the gradient is G = (E2-E1)/0.1. */ #define GTRIVIAL_DELTA 0.001 -void AnnCalculateGradientsTrivial(struct Ann *net, float *desired) { - int j, i, layers = LAYERS(net); +void AnnCalculateGradientsTrivial(AnnRprop *net, ann_float_t *desired) { + int j, i, layers = ANN_LAYERS(net); for (j = 1; j < layers; j++) { - int units = UNITS(net, j); - int weights = units * UNITS(net,j-1); + int weights = ANN_WEIGHTS(net,j); for (i = 0; i < weights; i++) { - float t, e1, e2; + ann_float_t t, e1, e2; + AnnLayer *layer = &ANN_LAYER(net,j); /* Calculate the value of the error function * in this point. */ AnnSimulate(net); e1 = AnnGlobalError(net, desired); - t = net->layer[j].weight[i]; + t = layer->weight[i]; /* Calculate the error a bit on the right */ - net->layer[j].weight[i] += GTRIVIAL_DELTA; + layer->weight[i] += GTRIVIAL_DELTA; AnnSimulate(net); e2 = AnnGlobalError(net, desired); /* Restore the original weight */ - net->layer[j].weight[i] = t; + layer->weight[i] = t; /* Calculate the gradient */ - net->layer[j].gradient[i] = (e2-e1)/GTRIVIAL_DELTA; + layer->gradient[i] = (e2-e1)/GTRIVIAL_DELTA; } } } /* Calculate gradients using the back propagation algorithm */ -void AnnCalculateGradients(struct Ann *net, float *desired) { - int j, layers = LAYERS(net)-1; +void AnnCalculateGradients(AnnRprop *net, ann_float_t *desired) { + int j, layers = ANN_LAYERS(net)-1; /* Populate the error vector net->layer[0]->error according * to the loss function. */ @@ -600,19 +713,25 @@ void AnnCalculateGradients(struct Ann *net, float *desired) { /* Back-propagate the error and compute the gradient * for every weight in the net. */ for (j = 0; j < layers; j++) { - struct AnnLayer *layer = &net->layer[j]; - struct AnnLayer *prev_layer = &net->layer[j+1]; + AnnLayer *layer = &ANN_LAYER(net, j); + AnnLayer *prev_layer = &ANN_LAYER(net, j+1); int i, units = layer->units; int prevunits = prev_layer->units; + int prevunits_aligned = prev_layer->units_aligned; +#ifdef USING_SIMD + int xps, psteps = prevunits/SIMDF_SIZE; + simdf_t es; +#endif // USING_SIMD /* Skip bias units, they have no connections with the previous * layers. */ if (j > 1) units--; /* Reset the next layer errors array */ - for (i = 0; i < prevunits; i++) prev_layer->error[i] = 0; + //for (i = 0; i < prevunits; i++) prev_layer->error[i] = 0; + memset(prev_layer->error, 0, ANN_SIZEOF_ann_float_t*prevunits); /* For every node in this layer ... */ for (i = 0; i < units; i++) { - float error_signal, ei, oi, derivative; + ann_float_t error_signal, ei, oi, derivative; int k; /* Compute gradient. */ @@ -627,31 +746,34 @@ void AnnCalculateGradients(struct Ann *net, float *desired) { * tanh: (1-oi)*(1+oi), that's 1-(oi*oi) * relu: (oi > 0) ? 1 : 0 */ - derivative = oi*(1-oi); + //derivative = oi*(1-oi); + derivative = (*net->derivative_func)(oi); error_signal = ei*derivative; /* For every weight between this node and * the previous layer's nodes: */ - float *g = prev_layer->gradient + i*prevunits; - float *w = prev_layer->weight + i*prevunits; - float *o = prev_layer->output; - float *e = prev_layer->error; + ann_float_t *g = prev_layer->gradient + i*prevunits_aligned; + ann_float_t *w = prev_layer->weight + i*prevunits_aligned; + ann_float_t *o = prev_layer->output; + ann_float_t *e = prev_layer->error; /* 1. Calculate the gradient */ k = 0; #ifdef USING_SIMD - simdf_t es = simdf_set1f(error_signal); - - int psteps = prevunits/SIMDF_SIZE; - for (int x = 0; x < psteps; x++) { - simdf_t outputs = simdf_loadu(o); - //simdf_t gradients = simdf_mul(es,outputs); - simdf_storeu(g,simdf_mul(es,outputs)); - o += SIMDF_SIZE; - g += SIMDF_SIZE; + if(psteps) + { + es = simdf_set1f(error_signal); +//printf("%d : %ld\n", __LINE__, ((long)o & 15)); + for (xps = 0; xps < psteps; xps++) { + simdf_t outputs = simdf_load(o); + simdf_t gradients = simdf_mul(es,outputs); + simdf_store(g, gradients); + o += SIMDF_SIZE; + g += SIMDF_SIZE; + } + k += psteps*SIMDF_SIZE; } - k += SIMDF_SIZE*psteps; #endif /* Handle final piece shorter than SIMDF_SIZE . */ for (; k < prevunits; k++) *g++ = error_signal*(*o++); @@ -659,15 +781,18 @@ void AnnCalculateGradients(struct Ann *net, float *desired) { /* 2. And back-propagate the error to the previous layer */ k = 0; #ifdef USING_SIMD - for (int x = 0; x < psteps; x++) { - simdf_t weights = simdf_loadu(w); - simdf_t errors = simdf_loadu(e); - //simdf_t prod = simdf_mul(es, weights); - simdf_storeu(e, simdf_add( simdf_mul(es, weights), errors)); - e += SIMDF_SIZE; - w += SIMDF_SIZE; + if(psteps) + { + for (xps = 0; xps < psteps; xps++) { + simdf_t weights = simdf_load(w); + simdf_t errors = simdf_load(e); + simdf_t prod = simdf_mul(es, weights); + simdf_store(e, simdf_add(prod , errors)); + e += SIMDF_SIZE; + w += SIMDF_SIZE; + } + k += psteps*SIMDF_SIZE; } - k += SIMDF_SIZE*psteps; #endif /* Handle final piece shorter than SIMDF_SIZE . */ for (; k < prevunits; k++) { @@ -678,77 +803,92 @@ void AnnCalculateGradients(struct Ann *net, float *desired) { } /* Set the delta values of the net to a given value */ -void AnnSetDeltas(struct Ann *net, float val) { - int j, layers = LAYERS(net); +void AnnSetDeltas(AnnRprop *net, ann_float_t val) { + int j, layers = ANN_LAYERS(net); for (j = 1; j < layers; j++) { - int units = UNITS(net, j); - int weights = units * UNITS(net,j-1); + int weights = ANN_WEIGHTS(net,j); int i; - for (i = 0; i < weights; i++) net->layer[j].delta[i] = val; + AnnLayer *layer = &ANN_LAYER(net, j); + for (i = 0; i < weights; i++) layer->delta[i] = val; } } /* Set the sgradient values to zero */ -void AnnResetSgradient(struct Ann *net) { - int j, layers = LAYERS(net); +void AnnResetSgradient(AnnRprop *net) { + int j, layers = ANN_LAYERS(net); for (j = 1; j < layers; j++) { - int units = UNITS(net, j); - int weights = units * UNITS(net,j-1); - memset(net->layer[j].sgradient, 0, sizeof(float)*weights); + int weights = ANN_WEIGHTS(net, j); + memset(ANN_LAYER(net, j).sgradient, 0, ANN_SIZEOF_ann_float_t*weights); } } /* Set random weights in the range -0.05,+0.05 */ -void AnnSetRandomWeights(struct Ann *net) { +void AnnSetRandomWeights(AnnRprop *net) { int i, j, k; - for (i = 1; i < LAYERS(net); i++) { - for (k = 0; k < UNITS(net, i-1); k++) { - for (j = 0; j < UNITS(net, i); j++) { - WEIGHT(net,i,j,k) = -0.05+.1*(rand()/(RAND_MAX+1.0)); + for (i = 1; i < ANN_LAYERS(net); i++) { + for (k = 0; k < ANN_UNITS(net, i-1); k++) { + for (j = 0; j < ANN_UNITS(net, i); j++) { + ANN_WEIGHT(net,i,j,k) = -0.05+.1*(rand()/(RAND_MAX+1.0)); } } } } /* Scale the net weights of the given factor */ -void AnnScaleWeights(struct Ann *net, float factor) { - int j, layers = LAYERS(net); +void AnnScaleWeights(AnnRprop *net, ann_float_t factor) { + int j, layers = ANN_LAYERS(net); for (j = 1; j < layers; j++) { - int units = UNITS(net, j); - int weights = units * UNITS(net,j-1); + int weights = ANN_WEIGHTS(net,j); int i; + AnnLayer *layer = &ANN_LAYER(net, j); for (i = 0; i < weights; i++) - net->layer[j].weight[i] *= factor; + layer->weight[i] *= factor; } } /* Update the sgradient, that's the sum of the weight's gradient for every * element of the training set. This is used for the RPROP algorithm * that works with the sign of the derivative for the whole set. */ -void AnnUpdateSgradient(struct Ann *net) { - int j, i, layers = LAYERS(net); +void AnnUpdateSgradient(AnnRprop *net) { + int j, i, layers = ANN_LAYERS(net); for (j = 1; j < layers; j++) { - int units = UNITS(net, j); - int weights = units * UNITS(net,j-1); - /* In theory this is a good target for SSE "ADDPS" instructions, - * however modern compilers figure out this automatically. */ - for (i = 0; i < weights; i++) - net->layer[j].sgradient[i] += net->layer[j].gradient[i]; + int weights = ANN_WEIGHTS(net,j); + ann_float_t *sg = net->layer[j].sgradient; + ann_float_t *g = net->layer[j].gradient; + i = 0; +#ifdef USING_SIMD + int psteps = weights/SIMDF_SIZE; + if(psteps) + { + int xps; + for (xps = 0; xps < psteps; xps++) { + simdf_t sgradient = simdf_load(sg); + simdf_t gradient = simdf_load(g); + simdf_store(sg, simdf_add( sgradient, gradient)); + sg += SIMDF_SIZE; + g += SIMDF_SIZE; + } + i += psteps*SIMDF_SIZE; + } +#endif + /* Handle final piece shorter than SIMDF_SIZE . */ + for (; i < weights; i++) + (*sg++) += (*g++); } } /* Helper function for RPROP, returns -1 if n < 0, +1 if n > 0, 0 if n == 0 */ -float sign(float n) { - if (n > 0) return +1; - if (n < 0) return -1; - return 0; +static inline ann_float_t sign(ann_float_t n) { + if (n > 0) return +1.0; + if (n < 0) return -1.0; + return 0.0; } /* The core of the RPROP algorithm. @@ -756,42 +896,42 @@ float sign(float n) { * Note that: * sgradient is the set-wise gradient. * delta is the per-weight update value. */ -void AnnAdjustWeightsResilientBP(struct Ann *net) { - int j, i, layers = LAYERS(net); +void AnnAdjustWeightsResilientBP(AnnRprop *net) { + int j, i, layers = ANN_LAYERS(net); for (j = 1; j < layers; j++) { - int units = UNITS(net, j); - int weights = units * UNITS(net,j-1) - (j-1>0); + int weights = ANN_WEIGHTS(net,j) - (j-1>0); + AnnLayer *layer = &ANN_LAYER(net, j); for (i = 0; i < weights; i++) { - float t = net->layer[j].pgradient[i] * - net->layer[j].sgradient[i]; - float delta = net->layer[j].delta[i]; + ann_float_t sgradient = layer->sgradient[i]; + ann_float_t t = layer->pgradient[i] * sgradient; + ann_float_t delta = layer->delta[i]; if (t > 0) { - delta = MIN(delta*RPROP_NPLUS(net),RPROP_MAXUPDATE(net)); - float wdelta = -sign(net->layer[j].sgradient[i]) * delta; - net->layer[j].weight[i] += wdelta; - net->layer[j].delta[i] = delta; - net->layer[j].pgradient[i] = net->layer[j].sgradient[i]; + delta = ANN_MIN(delta*ANN_RPROP_NPLUS(net),ANN_RPROP_MAXUPDATE(net)); + ann_float_t wdelta = -sign(sgradient) * delta; + layer->weight[i] += wdelta; + layer->delta[i] = delta; + layer->pgradient[i] = sgradient; } else if (t < 0) { - float past_wdelta = -sign(net->layer[j].pgradient[i]) * delta; - delta = MAX(delta*RPROP_NMINUS(net),RPROP_MINUPDATE(net)); - net->layer[j].weight[i] -= past_wdelta; - net->layer[j].delta[i] = delta; - net->layer[j].pgradient[i] = 0; + ann_float_t past_wdelta = -sign(layer->pgradient[i]) * delta; + delta = ANN_MAX(delta*ANN_RPROP_NMINUS(net),ANN_RPROP_MINUPDATE(net)); + layer->weight[i] -= past_wdelta; + layer->delta[i] = delta; + layer->pgradient[i] = 0; } else { /* t == 0 */ - float wdelta = -sign(net->layer[j].sgradient[i]) * delta; - net->layer[j].weight[i] += wdelta; - net->layer[j].pgradient[i] = net->layer[j].sgradient[i]; + ann_float_t wdelta = -sign(sgradient) * delta; + layer->weight[i] += wdelta; + layer->pgradient[i] = sgradient; } } } } /* Resilient Backpropagation Epoch */ -float AnnResilientBPEpoch(struct Ann *net, float *input, float *desired, int setlen) { - float error = 0; - int j, inputs = INPUT_UNITS(net), outputs = OUTPUT_UNITS(net); +ann_float_t AnnResilientBPEpoch(AnnRprop *net, ann_float_t *input, ann_float_t *desired, int setlen) { + ann_float_t error = 0; + int j, inputs = ANN_INPUT_UNITS(net), outputs = ANN_OUTPUT_UNITS(net); AnnResetSgradient(net); for (j = 0; j < setlen; j++) { @@ -807,34 +947,34 @@ float AnnResilientBPEpoch(struct Ann *net, float *input, float *desired, int set /* Update the deltas using the gradient descend algorithm. * Gradients should be already computed with AnnCalculateGraidents(). */ -void AnnUpdateDeltasGD(struct Ann *net) { - int j, i, layers = LAYERS(net); +void AnnUpdateDeltasGD(AnnRprop *net) { + int j, i, layers = ANN_LAYERS(net); for (j = 1; j < layers; j++) { - int units = UNITS(net, j); - int weights = units * UNITS(net,j-1); + int weights = ANN_WEIGHTS(net,j); + AnnLayer *layer = &ANN_LAYER(net, j); for (i = 0; i < weights; i++) - net->layer[j].delta[i] += net->layer[j].gradient[i]; + layer->delta[i] += layer->gradient[i]; } } /* Adjust net weights using the (already) calculated deltas. */ -void AnnAdjustWeights(struct Ann *net, int setlen) { - int j, i, layers = LAYERS(net); +void AnnAdjustWeights(AnnRprop *net, int setlen) { + int j, i, layers = ANN_LAYERS(net); for (j = 1; j < layers; j++) { - int units = UNITS(net, j); - int weights = units * UNITS(net,j-1); + int weights = ANN_WEIGHTS(net,j); + AnnLayer *layer = &ANN_LAYER(net, j); for (i = 0; i < weights; i++) { - net->layer[j].weight[i] -= LEARN_RATE(net)/setlen*net->layer[j].delta[i]; + layer->weight[i] -= ANN_LEARN_RATE(net)/setlen*layer->delta[i]; } } } /* Gradient Descend training */ -float AnnGDEpoch(struct Ann *net, float *input, float *desidered, int setlen) { - float error = 0; - int j, inputs = INPUT_UNITS(net), outputs = OUTPUT_UNITS(net); +ann_float_t AnnGDEpoch(AnnRprop *net, ann_float_t *input, ann_float_t *desidered, int setlen) { + ann_float_t error = 0; + int j, inputs = ANN_INPUT_UNITS(net), outputs = ANN_OUTPUT_UNITS(net); for (j = 0; j < setlen; j++) { AnnSetDeltas(net, 0); @@ -851,10 +991,10 @@ float AnnGDEpoch(struct Ann *net, float *input, float *desidered, int setlen) { /* This function, called after AnnSimulate(), will return 1 if there is * an error in the detected class (compared to the desired output), * othewise 0 is returned. */ -int AnnTestClassError(struct Ann *net, float *desired) { - int i, outputs = OUTPUT_UNITS(net); +int AnnTestClassError(AnnRprop *net, ann_float_t *desired) { + int i, outputs = ANN_OUTPUT_UNITS(net); int classid, outid; - float max = 0; + ann_float_t max = 0; /* Get the class ID from the test dataset output. */ classid = 0; @@ -863,10 +1003,10 @@ int AnnTestClassError(struct Ann *net, float *desired) { classid = i; /* Get the network classification. */ - max = OUTPUT_NODE(net,0); + max = ANN_OUTPUT_NODE(net,0); outid = 0; for (i = 1; i < outputs; i++) { - float o = OUTPUT_NODE(net,i); + ann_float_t o = ANN_OUTPUT_NODE(net,i); if (o > max) { outid = i; max = o; @@ -877,9 +1017,9 @@ int AnnTestClassError(struct Ann *net, float *desired) { /* Simulate the entire test dataset with the neural network and returns the * average error of all the entries tested. */ -void AnnTestError(struct Ann *net, float *input, float *desired, int setlen, float *avgerr, float *classerr) { - float error = 0; - int j, inputs = INPUT_UNITS(net), outputs = OUTPUT_UNITS(net); +void AnnTestError(AnnRprop *net, ann_float_t *input, ann_float_t *desired, int setlen, ann_float_t *avgerr, ann_float_t *classerr) { + ann_float_t error = 0; + int j, inputs = ANN_INPUT_UNITS(net), outputs = ANN_OUTPUT_UNITS(net); int class_errors = 0; for (j = 0; j < setlen; j++) { @@ -890,20 +1030,28 @@ void AnnTestError(struct Ann *net, float *input, float *desired, int setlen, flo desired += outputs; } if (avgerr) *avgerr = error/setlen; - if (classerr) *classerr = (float)class_errors*100/setlen; + if (classerr) *classerr = (ann_float_t)class_errors*100/setlen; } /* Train the net */ -float AnnTrain(struct Ann *net, float *input, float *desired, float maxerr, int maxepochs, int setlen, int algo) { +ann_float_t AnnTrainWithAlgoFunc(AnnRprop *net, ann_float_t *input, ann_float_t *desired, ann_float_t maxerr, + int maxepochs, int setlen, AnnTrainAlgoFunc algo_func) { int i = 0; - float e = maxerr+1; + ann_float_t e = maxerr+1; while (i++ < maxepochs && e >= maxerr) { - if (algo == NN_ALGO_BPROP) { - e = AnnResilientBPEpoch(net, input, desired, setlen); - } else if (algo == NN_ALGO_GD) { - e = AnnGDEpoch(net, input, desired, setlen); - } + e = (*algo_func)(net, input, desired, setlen); } return e; } + + +ann_float_t AnnTrain(AnnRprop *net, ann_float_t *input, ann_float_t *desired, ann_float_t maxerr, int maxepochs, + int setlen, int algo) { + AnnTrainAlgoFunc algo_func; + if(algo == ANN_ALGO_BPROP) algo_func = AnnResilientBPEpoch; + else if(algo == ANN_ALGO_GD) algo_func = AnnGDEpoch; + else return -1; + + return AnnTrainWithAlgoFunc(net, input, desired, maxerr, maxepochs, setlen, algo_func); +} diff --git a/nn.h b/nn.h index 8106daf..23348c9 100644 --- a/nn.h +++ b/nn.h @@ -32,106 +32,128 @@ #ifndef __NN_H #define __NN_H +//#include + +typedef float ann_float_t; +typedef ann_float_t (*AnnDerivativeFunc)(ann_float_t v); /* Data structures. * Nets are not so 'dynamic', but enough to support * an arbitrary number of layers, with arbitrary units for layer. * Only fully connected feed-forward networks are supported. */ -struct AnnLayer { - int units; - float *output; /* output[i], output of i-th unit */ - float *error; /* error[i], output error of i-th unit*/ - float *weight; /* weight[(i*units)+j] */ +typedef struct { + ann_float_t *output; /* output[i], output of i-th unit */ + ann_float_t *error; /* error[i], output error of i-th unit*/ + ann_float_t *weight; /* weight[(i*units)+j] */ /* weight between unit i-th and next j-th */ - float *gradient; /* gradient[(i*units)+j] gradient */ - float *sgradient; /* gradient for the full training set */ + ann_float_t *gradient; /* gradient[(i*units)+j] gradient */ + ann_float_t *sgradient; /* gradient for the full training set */ /* only used for RPROP */ - float *pgradient; /* pastgradient[(i*units)+j] t-1 gradient */ + ann_float_t *pgradient; /* pastgradient[(i*units)+j] t-1 gradient */ /* (t-1 sgradient for resilient BP) */ - float *delta; /* delta[(i*units)+j] cumulative update */ + ann_float_t *delta; /* delta[(i*units)+j] cumulative update */ /* (per-weight delta for RPROP) */ -}; + int units; /*moved to last position for alignment purposes*/ + int units_aligned; /*units rounded up for alignment*/ +} AnnLayer; /* Feed forward network structure */ -struct Ann { +typedef struct { + AnnLayer *layer; int flags; int layers; - float rprop_nminus; - float rprop_nplus; - float rprop_maxupdate; - float rprop_minupdate; - float learn_rate; /* Used for GD training. */ - struct AnnLayer *layer; -}; + AnnDerivativeFunc node_transf_func; + AnnDerivativeFunc derivative_func; + ann_float_t rprop_nminus; + ann_float_t rprop_nplus; + ann_float_t rprop_maxupdate; + ann_float_t rprop_minupdate; + ann_float_t learn_rate; /* Used for GD training. */ +} AnnRprop; + +typedef ann_float_t (*AnnTrainAlgoFunc)(AnnRprop *net, ann_float_t *input, ann_float_t *desired, int setlen); /* Raw interface to data structures */ -#define OUTPUT(net,l,i) (net)->layer[l].output[i] -#define ERROR(net,l,i) (net)->layer[l].error[i] -#define WEIGHT(net,l,i,j) (net)->layer[l].weight[((j)*(net)->layer[l].units)+(i)] -#define GRADIENT(net,l,i,j) (net)->layer[l].gradient[((j)*(net)->layer[l].units)+(i)] -#define SGRADIENT(net,l,i,j) (net)->layer[l].sgradient[((j)*(net)->layer[l].units)+(i)] -#define PGRADIENT(net,l,i,j) (net)->layer[l].pgradient[((j)*(net)->layer[l].units)+(i)] -#define DELTA(net,l,i,j) (net)->layer[l].delta[((j)*(net)->layer[l].units)+(i)] -#define LAYERS(net) (net)->layers -#define UNITS(net,l) (net)->layer[l].units -#define WEIGHTS(net,l) (UNITS(net,l)*UNITS(net,l-1)) -#define OUTPUT_NODE(net,i) OUTPUT(net,0,i) -#define INPUT_NODE(net,i) OUTPUT(net,((net)->layers)-1,i) -#define OUTPUT_UNITS(net) UNITS(net,0) -#define INPUT_UNITS(net) (UNITS(net,((net)->layers)-1)-1) -#define RPROP_NMINUS(net) (net)->rprop_nminus -#define RPROP_NPLUS(net) (net)->rprop_nplus -#define RPROP_MAXUPDATE(net) (net)->rprop_maxupdate -#define RPROP_MINUPDATE(net) (net)->rprop_minupdate -#define LEARN_RATE(net) (net)->learn_rate +#define ANN_LAYERS(net) (net)->layers +#define ANN_LAYER(net, l) (net)->layer[/*assert(l >= 0),*/l] +#define ANN_OUTPUT(net,l,i) ANN_LAYER(net, l).output[i] +#define ANN_ERROR(net,l,i) ANN_LAYER(net, l).error[i] +#define ANN_LAYER_IDX(net,l,i,j) (((j)*ANN_LAYER(net, l).units_aligned)+(i)) +#define ANN_WEIGHT(net,l,i,j) ANN_LAYER(net, l).weight[ANN_LAYER_IDX(net,l,i,j)] +#define ANN_GRADIENT(net,l,i,j) ANN_LAYER(net, l).gradient[ANN_LAYER_IDX(net,l,i,j)] +#define ANN_SGRADIENT(net,l,i,j) ANN_LAYER(net, l).sgradient[ANN_LAYER_IDX(net,l,i,j)] +#define ANN_PGRADIENT(net,l,i,j) ANN_LAYER(net, l).pgradient[ANN_LAYER_IDX(net,l,i,j)] +#define ANN_DELTA(net,l,i,j) ANN_LAYER(net, l).delta[ANN_LAYER_IDX(net,l,i,j)] +#define ANN_UNITS(net,l) ANN_LAYER(net, l).units +#define ANN_UNITS_ALLOCATED(net,l) ANN_LAYER(net, l).units_aligned +#define ANN_WEIGHTS(net,l) (ANN_UNITS(net,l)*ANN_UNITS(net,l-1)) +#define ANN_OUTPUT_NODE(net,i) ANN_OUTPUT(net,0,i) +#define ANN_INPUT_NODE(net,i) ANN_OUTPUT(net,(ANN_LAYERS(net))-1,i) +#define ANN_OUTPUT_UNITS(net) ANN_UNITS(net,0) +#define ANN_INPUT_UNITS(net) (ANN_UNITS(net,(ANN_LAYERS(net))-1)-1) +#define ANN_RPROP_NMINUS(net) (net)->rprop_nminus +#define ANN_RPROP_NPLUS(net) (net)->rprop_nplus +#define ANN_RPROP_MAXUPDATE(net) (net)->rprop_maxupdate +#define ANN_RPROP_MINUPDATE(net) (net)->rprop_minupdate +#define ANN_LEARN_RATE(net) (net)->learn_rate /* Constants */ -#define DEFAULT_RPROP_NMINUS 0.5 -#define DEFAULT_RPROP_NPLUS 1.2 -#define DEFAULT_RPROP_MAXUPDATE 50 -#define DEFAULT_RPROP_MINUPDATE 0.000001 -#define RPROP_INITIAL_DELTA 0.1 -#define DEFAULT_LEARN_RATE 0.1 -#define NN_ALGO_BPROP 0 -#define NN_ALGO_GD 1 +#define ANN_DEFAULT_RPROP_NMINUS 0.5 +#define ANN_DEFAULT_RPROP_NPLUS 1.2 +#define ANN_DEFAULT_RPROP_MAXUPDATE 50 +#define ANN_DEFAULT_RPROP_MINUPDATE 0.000001 +#define ANN_RPROP_INITIAL_DELTA 0.1 +#define ANN_DEFAULT_LEARN_RATE 0.1 +#define ANN_ALGO_BPROP 0 +#define ANN_ALGO_GD 1 /* Misc */ -#define MAX(a,b) (((a)>(b))?(a):(b)) -#define MIN(a,b) (((a)<(b))?(a):(b)) +#define ANN_MAX(a,b) (((a)>(b))?(a):(b)) +#define ANN_MIN(a,b) (((a)<(b))?(a):(b)) /* Prototypes */ -void AnnResetLayer(struct AnnLayer *layer); -struct Ann *AnnAlloc(int layers); -void AnnFreeLayer(struct AnnLayer *layer); -void AnnFree(struct Ann *net); -int AnnInitLayer(struct Ann *net, int i, int units, int bias); -struct Ann *AnnCreateNet(int layers, int *units); -struct Ann *AnnCreateNet2(int iunits, int ounits); -struct Ann *AnnCreateNet3(int iunits, int hunits, int ounits); -struct Ann *AnnCreateNet4(int iunits, int hunits, int hunits2, int ounits); -struct Ann *AnnClone(struct Ann* net); -size_t AnnCountWeights(struct Ann *net); -void AnnSimulate(struct Ann *net); -void Ann2Tcl(struct Ann *net); -void AnnPrint(struct Ann *net); -float AnnGlobalError(struct Ann *net, float *desidered); -void AnnSetInput(struct Ann *net, float *input); -float AnnSimulateError(struct Ann *net, float *input, float *desidered); -void AnnCalculateGradientsTrivial(struct Ann *net, float *desidered); -void AnnCalculateGradients(struct Ann *net, float *desidered); -void AnnSetDeltas(struct Ann *net, float val); -void AnnResetDeltas(struct Ann *net); -void AnnResetSgradient(struct Ann *net); -void AnnSetRandomWeights(struct Ann *net); -void AnnScaleWeights(struct Ann *net, float factor); -void AnnUpdateDeltasGD(struct Ann *net); -void AnnUpdateDeltasGDM(struct Ann *net); -void AnnUpdateSgradient(struct Ann *net); -void AnnAdjustWeights(struct Ann *net, int setlen); -float AnnBatchGDEpoch(struct Ann *net, float *input, float *desidered, int setlen); -float AnnBatchGDMEpoch(struct Ann *net, float *input, float *desidered, int setlen); -void AnnAdjustWeightsResilientBP(struct Ann *net); -float AnnResilientBPEpoch(struct Ann *net, float *input, float *desidered, int setlen); -float AnnTrain(struct Ann *net, float *input, float *desidered, float maxerr, int maxepochs, int setlen, int algo); -void AnnTestError(struct Ann *net, float *input, float *desired, int setlen, float *avgerr, float *classerr); +ann_float_t AnnTransferFunctionSigmoid(ann_float_t x); +ann_float_t AnnTransferFunctionRelu(ann_float_t x); +ann_float_t AnnTransferFunctionTanh(ann_float_t x); +//ann_float_t AnnDerivativeIdentity(ann_float_t x); +ann_float_t AnnDerivativeSigmoid(ann_float_t x); +ann_float_t AnnDerivativeTanh(ann_float_t x); +ann_float_t AnnDerivativeRelu(ann_float_t x); + +void AnnResetLayer(AnnLayer *layer); +AnnRprop *AnnAlloc(int layers); +void AnnFreeLayer(AnnLayer *layer); +void AnnFree(AnnRprop *net); +int AnnInitLayer(AnnRprop *net, int i, int units, int bias); +AnnRprop *AnnCreateNet(int layers, int *units); +AnnRprop *AnnCreateNet2(int iunits, int ounits); +AnnRprop *AnnCreateNet3(int iunits, int hunits, int ounits); +AnnRprop *AnnCreateNet4(int iunits, int hunits, int hunits2, int ounits); +AnnRprop *AnnClone(const AnnRprop* net); +size_t AnnCountWeights(AnnRprop *net); +void AnnSimulate(AnnRprop *net); +void Ann2Tcl(const AnnRprop *net); +void Ann2Js(const AnnRprop *net); +void AnnPrint(const AnnRprop *net); +ann_float_t AnnGlobalError(AnnRprop *net, ann_float_t *desidered); +void AnnSetInput(AnnRprop *net, ann_float_t *input); +ann_float_t AnnSimulateError(AnnRprop *net, ann_float_t *input, ann_float_t *desidered); +void AnnCalculateGradientsTrivial(AnnRprop *net, ann_float_t *desidered); +void AnnCalculateGradients(AnnRprop *net, ann_float_t *desidered); +void AnnSetDeltas(AnnRprop *net, ann_float_t val); +void AnnResetDeltas(AnnRprop *net); +void AnnResetSgradient(AnnRprop *net); +void AnnSetRandomWeights(AnnRprop *net); +void AnnScaleWeights(AnnRprop *net, ann_float_t factor); +void AnnUpdateDeltasGD(AnnRprop *net); +void AnnUpdateDeltasGDM(AnnRprop *net); +void AnnUpdateSgradient(AnnRprop *net); +void AnnAdjustWeights(AnnRprop *net, int setlen); +ann_float_t AnnBatchGDEpoch(AnnRprop *net, ann_float_t *input, ann_float_t *desidered, int setlen); +ann_float_t AnnBatchGDMEpoch(AnnRprop *net, ann_float_t *input, ann_float_t *desidered, int setlen); +void AnnAdjustWeightsResilientBP(AnnRprop *net); +ann_float_t AnnResilientBPEpoch(AnnRprop *net, ann_float_t *input, ann_float_t *desidered, int setlen); +ann_float_t AnnTrainWithAlgoFunc(AnnRprop *net, ann_float_t *input, ann_float_t *desidered, ann_float_t maxerr, int maxepochs, int setlen, AnnTrainAlgoFunc algo_func); +ann_float_t AnnTrain(AnnRprop *net, ann_float_t *input, ann_float_t *desidered, ann_float_t maxerr, int maxepochs, int setlen, int algo); +void AnnTestError(AnnRprop *net, ann_float_t *input, ann_float_t *desired, int setlen, ann_float_t *avgerr, ann_float_t *classerr); #endif /* __NN_H */ diff --git a/tests/Makefile b/tests/Makefile index 574c80f..0584996 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -1,13 +1,13 @@ all: nn-test-1 nn-test-2 nn-benchmark nn-test-1: nn-test-1.c ../nn.c ../nn.h - $(CC) nn-test-1.c ../nn.c -Wall -W -O2 -o nn-test-1 + $(CC) nn-test-1.c ../nn.c -Wall -W -O2 -o nn-test-1 -lm nn-test-2: nn-test-2.c ../nn.c ../nn.h - $(CC) nn-test-2.c ../nn.c -Wall -W -O2 -o nn-test-2 + $(CC) nn-test-2.c ../nn.c -Wall -W -O2 -o nn-test-2 -lm nn-benchmark: nn-benchmark.c ../nn.c ../nn.h - $(CC) -DUSE_SSE nn-benchmark.c ../nn.c -Wall -W -O3 -o nn-benchmark + $(CC) -DUSE_SSE nn-benchmark.c ../nn.c -Wall -W -O3 -o nn-benchmark -lm clean: rm -f nn-test-1 nn-test-2 nn-benchmark diff --git a/tests/nn-benchmark.c b/tests/nn-benchmark.c index 80868ff..ed2c1c6 100644 --- a/tests/nn-benchmark.c +++ b/tests/nn-benchmark.c @@ -21,35 +21,45 @@ long long mstime(void) { return ust/1000; } -void gen_dataset(struct Ann *nn, float **inputs, float **outputs, int setsize) { - *inputs = malloc(sizeof(float)*setsize*NUM_INPUTS); - *outputs = malloc(sizeof(float)*setsize*NUM_INPUTS); - int ilen = INPUT_UNITS(nn); - int olen = OUTPUT_UNITS(nn); +int math_random(int low, int up) { + ann_float_t r = rand() * (1.0 / (RAND_MAX + 1.0)); + r *= (up - low) + 1.0; + return (int)r+low; +} + +void gen_dataset(AnnRprop *nn, ann_float_t **inputs, ann_float_t **outputs, int setsize) { + *inputs = calloc(1, sizeof(ann_float_t)*setsize*NUM_INPUTS); + *outputs = calloc(1, sizeof(ann_float_t)*setsize*NUM_OUTPUTS); + int ilen = ANN_INPUT_UNITS(nn); + int olen = ANN_OUTPUT_UNITS(nn); + int olen_1 = olen - 1; - float *in = *inputs; - float *out = *outputs; + ann_float_t *in = *inputs; + ann_float_t *out = *outputs; for (int j = 0; j < setsize; j++) { for (int k = 0; k < ilen; k++) in[k] = rand() & 1; - int r = rand() & olen; - for (int k = 0; k < olen; k++) { - out[k] = (k == r) ? 1 : 0; - } + //int r = rand() & olen_1; + int r = math_random(0, olen_1); + out[r] = 1; + //printf("%d : %d\n", j, r); + //for (int k = 0; k < olen; k++) { + // out[k] = (k == r) ? 1 : 0; + //} in+= ilen; out+= olen; } } int main(void) { - struct Ann *nn = AnnCreateNet3(NUM_INPUTS, NUM_INPUTS*2, NUM_OUTPUTS); - float *inputs, *outputs; + AnnRprop *nn = AnnCreateNet3(NUM_INPUTS, NUM_INPUTS*2, NUM_OUTPUTS); + ann_float_t *inputs, *outputs; int setsize = 1000; nn->learn_rate = 0.5; gen_dataset(nn, &inputs, &outputs, setsize); int j; - float classerr = 100; + ann_float_t classerr = 100; long long totaltime = 0; int benchmark_milestone = 0; for (j = 0; j < 1000000; j++) { @@ -60,11 +70,12 @@ int main(void) { benchmark_milestone = 1; } long long start = mstime(); - AnnTrain(nn,inputs,outputs,0,1,setsize,NN_ALGO_BPROP); + AnnTrain(nn,inputs,outputs,0,1,setsize,ANN_ALGO_BPROP); long long elapsed = mstime() - start; totaltime += elapsed; AnnTestError(nn,inputs,outputs,setsize,NULL,&classerr); } + AnnFree(nn); return 0; } diff --git a/tests/nn-test-1.c b/tests/nn-test-1.c index 1da9d29..58e5eae 100644 --- a/tests/nn-test-1.c +++ b/tests/nn-test-1.c @@ -10,34 +10,36 @@ #include "../nn.h" int main(void) { - struct Ann *nn = AnnCreateNet3(2, 2, 2); - float inputs[2] = {.05,.10}; - float desired[2] = {.01,.99}; + AnnRprop *nn = AnnCreateNet3(2, 2, 2); + ann_float_t inputs[2] = {.05,.10}; + ann_float_t desired[2] = {.01,.99}; nn->learn_rate = 0.5; /* Input layer. */ - WEIGHT(nn,2,0,0) = .15; - WEIGHT(nn,2,0,1) = .25; + ANN_WEIGHT(nn,2,0,0) = .15; + ANN_WEIGHT(nn,2,0,1) = .25; - WEIGHT(nn,2,1,0) = .20; - WEIGHT(nn,2,1,1) = .30; + ANN_WEIGHT(nn,2,1,0) = .20; + ANN_WEIGHT(nn,2,1,1) = .30; - WEIGHT(nn,2,2,0) = .35; - WEIGHT(nn,2,2,1) = .35; + ANN_WEIGHT(nn,2,2,0) = .35; + ANN_WEIGHT(nn,2,2,1) = .36; - INPUT_NODE(nn,0) = inputs[0]; - INPUT_NODE(nn,1) = inputs[1]; + ANN_INPUT_NODE(nn,0) = inputs[0]; + ANN_INPUT_NODE(nn,1) = inputs[1]; /* Hidden layer. */ - WEIGHT(nn,1,0,0) = .40; - WEIGHT(nn,1,0,1) = .50; + ANN_WEIGHT(nn,1,0,0) = .40; + ANN_WEIGHT(nn,1,0,1) = .50; - WEIGHT(nn,1,1,0) = .45; - WEIGHT(nn,1,1,1) = .55; + ANN_WEIGHT(nn,1,1,0) = .45; + ANN_WEIGHT(nn,1,1,1) = .55; - WEIGHT(nn,1,2,0) = .60; - WEIGHT(nn,1,2,1) = .60; + ANN_WEIGHT(nn,1,2,0) = .60; + ANN_WEIGHT(nn,1,2,1) = .61; + + AnnPrint(nn); int j; for (j = 0; j < 10000; j++) { @@ -56,5 +58,10 @@ int main(void) { } printf("\nAfter training:\n\n"); AnnPrint(nn); + printf("\nTCL simulation:\n\n"); + Ann2Tcl(nn); + Ann2Js(nn); + + AnnFree(nn); return 0; } diff --git a/tests/nn-test-2.c b/tests/nn-test-2.c index 888f6b7..74e70f8 100644 --- a/tests/nn-test-2.c +++ b/tests/nn-test-2.c @@ -10,22 +10,26 @@ #include "../nn.h" int main(void) { - struct Ann *nn = AnnCreateNet3(2, 3, 1); - float inputs[8] = {0,0, 1,0, 0,1, 1,1}; - float desired[4] = {0, 1, 1, 0}; + AnnRprop *nn = AnnCreateNet3(2, 3, 1); + ann_float_t inputs[8] = {0,0, 1,0, 0,1, 1,1}; + ann_float_t desired[4] = {0, 1, 1, 0}; nn->learn_rate = 0.5; int j; for (j = 0; j < 100000; j++) { - float error = AnnTrain(nn, inputs, desired, 0, 1, 4, NN_ALGO_GD); + ann_float_t error = AnnTrain(nn, inputs, desired, 0, 1, 4, ANN_ALGO_GD); printf("Error: %f\n", error); } printf("\nAfter training:\n\n"); for (j = 0; j < 4; j++) { AnnSetInput(nn,inputs+j*2); AnnSimulate(nn); - printf("%f\n", OUTPUT_NODE(nn,0)); + printf("%f\n", ANN_OUTPUT_NODE(nn,0)); } + printf("\nTCL simulation:\n\n"); + Ann2Tcl(nn); + Ann2Js(nn); + AnnFree(nn); return 0; }