From fff35116eed83c9368e8bd07d02c9e95c447e018 Mon Sep 17 00:00:00 2001 From: jvech Date: Tue, 8 Aug 2023 08:55:34 -0500 Subject: add: activation and cost functions added Training function were fixed and new activation and cost function were implemented --- Makefile | 4 +- doc/main.pdf | Bin 530497 -> 530552 bytes doc/main.tex | 3 +- src/nn.c | 134 ++++++++++++++++++++++++++++++++++++++++++----------------- src/nn.h | 2 +- 5 files changed, 102 insertions(+), 41 deletions(-) diff --git a/Makefile b/Makefile index 26f5ec1..cd3e4ea 100644 --- a/Makefile +++ b/Makefile @@ -24,8 +24,8 @@ build: $(OBJS) run: build ./${BIN} -debug: $(BIN) - gdb $< --tui +debug: build + gdb ${BIN} -x breaks.txt --tui clean: @rm $(OBJS) $(OBJDIR) -rv diff --git a/doc/main.pdf b/doc/main.pdf index 1a2404e..ce0c426 100644 Binary files a/doc/main.pdf and b/doc/main.pdf differ diff --git a/doc/main.tex b/doc/main.tex index 00028f9..d8f90c3 100644 --- a/doc/main.tex +++ b/doc/main.tex @@ -43,9 +43,10 @@ \pdv{\xi}{\omega^l_{ij}} = & \delta_j^l \pdv{z_j^l}{\omega_{ij}} \\ \delta_j^l = & \pdv{\xi}{z_j^l} \\ \pdv{z_j^l}{\omega_{ij}} = & a_i^{l-1} \\ + \pdv{\xi}{\omega_{ij}} = & \delta^l_{j} a_i^{l-1} \\ \end{eqnarray} -Output Layer +Output Layer \begin{eqnarray} \delta_j^L =& \pdv{\xi}{z_j^L} = \pdv{\xi}{a_j^L} \pdv{a_j^L}{z_j^L}\\ diff --git a/src/nn.c b/src/nn.c index 1d08c6b..0bcc28b 100644 --- a/src/nn.c +++ b/src/nn.c @@ -1,13 +1,37 @@ #include "nn.h" static void fill_random_weights(double *weights, double *bias, size_t rows, size_t cols); -static double get_avg_loss(double labels[], double outs[], size_t shape[2], double (*loss)(double, double)); +static double get_avg_loss( + double labels[], double outs[], size_t shape[2], + double (*loss)(double *, double *, size_t)); +double square_loss(double labels[], double net_outs[], size_t shape); +double square_dloss_out(double labels, double net_out); + +double leaky_relu(double x); +double dleaky_relu(double x); double relu(double x); double drelu(double x); double sigmoid(double x); double dsigmoid(double x); +double softplus(double x); +double dsoftplus(double x); + +struct Cost NN_SQUARE = { + .func = square_loss, + .dfunc_out = square_dloss_out +}; + +struct Activation NN_SOFTPLUS = { + .func = softplus, + .dfunc = dsoftplus, +}; + +struct Activation NN_LEAKY_RELU = { + .func = leaky_relu, + .dfunc = dleaky_relu +}; struct Activation NN_RELU = { .func = relu, @@ -45,7 +69,7 @@ void nn_network_train( biases[l] = calloc(network[l].neurons, sizeof(double)); } - for (size_t epoch = 0; epoch < epochs; epochs++) { + for (size_t epoch = 0; epoch < epochs; epoch++) { nn_forward(outs, zouts, input, input_shape, network, network_size); nn_backward( weights, biases, @@ -55,7 +79,7 @@ void nn_network_train( network, network_size, cost.dfunc_out, alpha); double *net_out = outs[network_size - 1]; - fprintf(stderr, "epoch: %zu \tavg loss: %6.2lf\n", + fprintf(stdout, "epoch: %zu \t loss: %6.2lf\n", epoch, get_avg_loss(labels, net_out, labels_shape, cost.func)); } @@ -71,6 +95,7 @@ void nn_network_train( free(weights); free(biases); + return; nn_network_train_error: perror("nn_network_train() Error"); exit(1); @@ -89,54 +114,55 @@ void nn_backward( for (size_t l = 0; l < network_size; l++) { max_neurons = (max_neurons > network[l].neurons) ? max_neurons : network[l].neurons; } - double *dcost_out = calloc(labels_shape[0] * labels_shape[1], sizeof(double)); + double *dcost_outs = calloc(labels_shape[0] * labels_shape[1], sizeof(double)); double *delta = calloc(max_neurons, sizeof(double)); double *delta_next = calloc(max_neurons, sizeof(double)); - if (!dcost_out || !delta || !delta_next) goto nn_backward_error; + if (!dcost_outs || !delta || !delta_next) goto nn_backward_error; for (size_t i = 0; i < labels_shape[0]; i++) { - for (size_t j = 0; j < labels_shape[0]; j++) { + for (size_t j = 0; j < labels_shape[1]; j++) { size_t index = i * labels_shape[1] + j; - dcost_out[index] = dcost_out_func(Labels[index], Outs[network_size - 1][index]); + dcost_outs[index] = dcost_out_func(Labels[index], Outs[network_size - 1][index]); } } for (size_t sample = 0; sample < input_shape[0]; sample++) { - for (size_t l = network_size - 1; l >= 0; l--) { - size_t weigths_shape[2] = {network[l].input_nodes, network[l].neurons}; + for (size_t l = network_size - 1; l >= 0 && l < network_size; l--) { + size_t weights_shape[2] = {network[l].input_nodes, network[l].neurons}; if (l == network_size - 1) { double *zout = Zout[l] + sample * network[l].neurons; double *out_prev = Outs[l - 1] + sample * network[l-1].neurons; + double *dcost_out = dcost_outs + sample * network[l].neurons; nn_layer_out_delta(delta, dcost_out, zout, network[l].neurons, network[l].activation.dfunc); - nn_layer_backward(weights[l], bias[l], weigths_shape, delta, out_prev, network[l], alpha); + nn_layer_backward(weights[l], bias[l], weights_shape, delta, out_prev, network[l], alpha); } else if (l == 0) { - size_t weigths_next_shape[2] = {network[l+1].input_nodes, network[l+1].neurons}; + size_t weights_next_shape[2] = {network[l+1].input_nodes, network[l+1].neurons}; double *zout = Zout[l] + sample * network[l].neurons; double *input = Input + sample * input_shape[1]; - nn_layer_hidden_delta(delta, delta_next, zout, weights[l+1], weigths_next_shape, network[l].activation.dfunc); - nn_layer_backward(weights[l], bias[l], weigths_shape, delta, input, network[l], alpha); - break; + nn_layer_hidden_delta(delta, delta_next, zout, weights[l+1], weights_next_shape, network[l].activation.dfunc); + nn_layer_backward(weights[l], bias[l], weights_shape, delta, input, network[l], alpha); } else { - size_t weigths_next_shape[2] = {network[l+1].input_nodes, network[l+1].neurons}; + size_t weights_next_shape[2] = {network[l+1].input_nodes, network[l+1].neurons}; double *zout = Zout[l] + sample * network[l].neurons; double *out_prev = Outs[l - 1] + sample * network[l-1].neurons; - nn_layer_hidden_delta(delta, delta_next, zout, weights[l+1], weigths_next_shape, network[l].activation.dfunc); - nn_layer_backward(weights[l], bias[l], weigths_shape, delta, out_prev, network[l], alpha); + nn_layer_hidden_delta(delta, delta_next, zout, weights[l+1], weights_next_shape, network[l].activation.dfunc); + nn_layer_backward(weights[l], bias[l], weights_shape, delta, out_prev, network[l], alpha); } - memmove(delta_next, delta, weigths_shape[1] * sizeof(double)); + memmove(delta_next, delta, weights_shape[1] * sizeof(double)); } - for (size_t l = network_size - 1; l >= 0; l--) { - size_t weigths_shape[2] = {network[l].input_nodes, network[l].neurons}; - memmove(network[l].weights, weights[l], weigths_shape[0] * weigths_shape[1] * sizeof(double)); - memmove(network[l].bias, bias[l], weigths_shape[1] * sizeof(double)); + + for (size_t l = 0; l < network_size; l++) { + size_t weights_shape[2] = {network[l].input_nodes, network[l].neurons}; + memmove(network[l].weights, weights[l], weights_shape[0] * weights_shape[1] * sizeof(double)); + memmove(network[l].bias, bias[l], weights_shape[1] * sizeof(double)); } } - free(dcost_out); + free(dcost_outs); free(delta); free(delta_next); - + return; nn_backward_error: perror("nn_backward() Error"); exit(1); @@ -161,14 +187,14 @@ void nn_layer_backward( void nn_layer_hidden_delta( double *delta, double *delta_next, double *zout, - double *weigths_next, size_t weigths_shape[2], + double *weights_next, size_t weights_shape[2], double (*activation_derivative)(double)) { - for (size_t j = 0; j < weigths_shape[0]; j++) { + for (size_t j = 0; j < weights_shape[0]; j++) { double sum = 0; - for (size_t k = 0; k < weigths_shape[1]; k++) { - size_t index = j * weigths_shape[1] + k; - sum += delta_next[k] * weigths_next[index]; + for (size_t k = 0; k < weights_shape[1]; k++) { + size_t index = j * weights_shape[1] + k; + sum += delta_next[k] * weights_next[index]; } delta[j] = sum * activation_derivative(zout[j]); } @@ -327,18 +353,52 @@ double relu(double x) return (x > 0) ? x : 0; } -double drelu(double x) { +double drelu(double x) +{ return (x > 0) ? 1 : 0; } -double get_avg_loss(double labels[], double outs[], size_t shape[2], double (*loss)(double, double)) +double leaky_relu(double x) +{ + return (x > 0) ? x : 0.01 * x; +} + +double dleaky_relu(double x) +{ + return (x > 0) ? 1 : 0.01; +} + +double softplus(double x) +{ + return log1p(exp(x)); +} + +double dsoftplus(double x) +{ + return sigmoid(x); +} + +double square_loss(double labels[], double net_out[], size_t shape) { double sum = 0; - for (size_t i = 0; i < shape[0]; i++) { - for (size_t j = 0; j < shape[1]; j++) { - size_t index = i * shape[1] + j; - sum += loss(labels[index], outs[index]); - } + for (size_t i = 0; i < shape; i++) { + sum += pow(labels[i] - net_out[i], 2); + } + return 0.5 * sum; +} + +double square_dloss_out(double label, double net_out) +{ + return net_out - label; +} + +double get_avg_loss( + double labels[], double outs[], size_t shape[2], + double (*loss)(double *, double *, size_t shape)) +{ + double sum = 0; + for (size_t i = 0; i < shape[0]; i += shape[1]) { + sum += loss(labels + i, outs + i, shape[1]); } - return sum / shape[1]; + return sum / shape[0]; } diff --git a/src/nn.h b/src/nn.h index 40066e3..2fcf9be 100644 --- a/src/nn.h +++ b/src/nn.h @@ -11,7 +11,7 @@ #include struct Cost { - double (*func)(double labels, double net_out); + double (*func)(double labels[], double net_out[], size_t shape); double (*dfunc_out)(double labels, double net_out); }; -- cgit v1.2.3-70-g09d2