/** * ml - a neural network processor written with C * Copyright (C) 2023 jvech * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include #include #include #include #include "util.h" #include "nn.h" static void dataset_shuffle_rows( double *inputs, size_t in_shape[2], double *labels, size_t lbl_shape[2]); static void fill_random_weights(double *weights, double *bias, size_t rows, size_t cols); static double get_avg_loss( double labels[], double outs[], size_t shape[2], double (*loss)(double *, double *, size_t)); double square_loss(double labels[], double net_outs[], size_t shape); double square_dloss_out(double labels, double net_out); struct Cost NN_SQUARE = { .func = square_loss, .dfunc_out = square_dloss_out }; void nn_network_predict( double *output, size_t output_shape[2], double *input, size_t input_shape[2], Layer network[], size_t network_size) { double **outs = calloc(network_size, sizeof(double *)); double **zouts = calloc(network_size, sizeof(double *)); size_t samples = input_shape[0]; for (size_t l = 0; l < network_size; l++) { outs[l] = calloc(samples * network[l].neurons, sizeof(double)); zouts[l] = calloc(samples * network[l].neurons, sizeof(double)); } nn_forward(outs, zouts, input, input_shape, network, network_size); memmove(output, outs[network_size - 1], samples * output_shape[1] * sizeof(double)); for (size_t l = 0; l < network_size; l++) { free(outs[l]); free(zouts[l]); } free(outs); free(zouts); } void nn_network_train( Layer network[], size_t network_size, double *input, size_t input_shape[2], double *labels, size_t labels_shape[2], struct Cost cost, size_t epochs, size_t batch_size, double alpha, bool shuffle) { assert(input_shape[0] == labels_shape[0] && "label samples don't correspond with input samples\n"); double **outs = calloc(network_size, sizeof(double *)); double **zouts = calloc(network_size, sizeof(double *)); double **weights = calloc(network_size, sizeof(double *)); double **biases = calloc(network_size, sizeof(double *)); if (!outs || !zouts || !weights || !biases) goto nn_network_train_error; double *input_random = calloc(input_shape[0] * input_shape[1], sizeof(double)); double *labels_random = calloc(labels_shape[0] * labels_shape[1], sizeof(double)); if (!input_random || !labels_random) goto nn_network_train_error; memcpy(input_random, input, sizeof(double) * input_shape[0] * input_shape[1]); memcpy(labels_random, labels, sizeof(double) * labels_shape[0] * labels_shape[1]); size_t samples = input_shape[0]; for (size_t l = 0; l < network_size; l++) { outs[l] = calloc(batch_size * network[l].neurons, sizeof(double)); zouts[l] = calloc(batch_size * network[l].neurons, sizeof(double)); weights[l] = malloc(network[l].input_nodes * network[l].neurons * sizeof(double)); biases[l] = malloc(network[l].neurons * sizeof(double)); if (!outs[l] || !zouts || !weights[l] || !biases) goto nn_network_train_error; memcpy(weights[l], network[l].weights, sizeof(double) * network[l].input_nodes * network[l].neurons); memcpy(biases[l], network[l].bias, sizeof(double) * network[l].neurons); } size_t batch_input_shape[2] = {batch_size, input_shape[1]}; size_t batch_labels_shape[2] = {batch_size, labels_shape[1]}; size_t n_batches = input_shape[0] / batch_size; if (samples % batch_size) { n_batches++; } for (size_t epoch = 0; epoch < epochs; epoch++) { if (shuffle) { dataset_shuffle_rows(input_random, input_shape, labels_random, labels_shape); } for (size_t batch_idx = 0; batch_idx < n_batches; batch_idx++) { size_t index = batch_size * batch_idx; double *input_batch = input_random + index * input_shape[1]; double *labels_batch = labels_random + index * labels_shape[1]; if (batch_idx == n_batches - 1 && samples % batch_size) { batch_input_shape[0] = samples % batch_size; batch_labels_shape[0] = samples % batch_size; } nn_forward(outs, zouts, input_batch, batch_input_shape, network, network_size); nn_backward( weights, biases, zouts, outs, input_batch, batch_input_shape, labels_batch, batch_labels_shape, network, network_size, cost.dfunc_out, alpha); double *net_out = outs[network_size - 1]; fprintf(stdout, "epoch: %g \t loss: %6.6lf\n", epoch + (float)batch_idx / n_batches, get_avg_loss(labels, net_out, batch_labels_shape, cost.func)); } } for (size_t l = 0; l < network_size; l++) { free(outs[l]); free(zouts[l]); free(weights[l]); free(biases[l]); } free(zouts); free(outs); free(weights); free(biases); return; nn_network_train_error: perror("nn_network_train() Error"); exit(1); } void nn_backward( double **weights, double **bias, double **Zout, double **Outs, double *Input, size_t input_shape[2], double *Labels, size_t labels_shape[2], Layer network[], size_t network_size, double (dcost_out_func)(double, double), double alpha) { size_t max_neurons = 0; for (size_t l = 0; l < network_size; l++) { max_neurons = (max_neurons > network[l].neurons) ? max_neurons : network[l].neurons; } double *dcost_outs = calloc(labels_shape[0] * labels_shape[1], sizeof(double)); double *delta = calloc(max_neurons, sizeof(double)); double *delta_next = calloc(max_neurons, sizeof(double)); if (!dcost_outs || !delta || !delta_next) goto nn_backward_error; for (size_t i = 0; i < labels_shape[0]; i++) { for (size_t j = 0; j < labels_shape[1]; j++) { size_t index = i * labels_shape[1] + j; dcost_outs[index] = dcost_out_func(Labels[index], Outs[network_size - 1][index]); } } for (size_t sample = 0; sample < input_shape[0]; sample++) { for (size_t l = network_size - 1; l < network_size; l--) { size_t weights_shape[2] = {network[l].input_nodes, network[l].neurons}; if (l == network_size - 1) { double *zout = Zout[l] + sample * network[l].neurons; double *out_prev = Outs[l - 1] + sample * network[l-1].neurons; double *dcost_out = dcost_outs + sample * network[l].neurons; nn_layer_out_delta(delta, dcost_out, zout, network[l].neurons, network[l].activation.dfunc); nn_layer_backward(weights[l], bias[l], weights_shape, delta, out_prev, network[l], alpha); } else if (l == 0) { size_t weights_next_shape[2] = {network[l+1].input_nodes, network[l+1].neurons}; double *zout = Zout[l] + sample * network[l].neurons; double *input = Input + sample * input_shape[1]; nn_layer_hidden_delta(delta, delta_next, zout, weights[l+1], weights_next_shape, network[l].activation.dfunc); nn_layer_backward(weights[l], bias[l], weights_shape, delta, input, network[l], alpha); } else { size_t weights_next_shape[2] = {network[l+1].input_nodes, network[l+1].neurons}; double *zout = Zout[l] + sample * network[l].neurons; double *out_prev = Outs[l - 1] + sample * network[l-1].neurons; nn_layer_hidden_delta(delta, delta_next, zout, weights[l+1], weights_next_shape, network[l].activation.dfunc); nn_layer_backward(weights[l], bias[l], weights_shape, delta, out_prev, network[l], alpha); } memmove(delta_next, delta, weights_shape[1] * sizeof(double)); } } for (size_t l = 0; l < network_size; l++) { size_t weights_shape[2] = {network[l].input_nodes, network[l].neurons}; memcpy(network[l].weights, weights[l], weights_shape[0] * weights_shape[1] * sizeof(double)); memcpy(network[l].bias, bias[l], weights_shape[1] * sizeof(double)); } free(dcost_outs); free(delta); free(delta_next); return; nn_backward_error: perror("nn_backward() Error"); exit(1); } void nn_layer_backward( double *weights, double *bias, size_t weights_shape[2], double *delta, double *out_prev, Layer layer, double alpha) { // W_next = W - alpha * out_prev @ delta.T cblas_dger(CblasRowMajor, weights_shape[0], weights_shape[1], -alpha, out_prev, 1, delta, 1, weights, weights_shape[1]); for (size_t j = 0; j < weights_shape[1]; j++) bias[j] = bias[j] - alpha * delta[j]; } void nn_layer_hidden_delta( double *delta, double *delta_next, double *zout, double *weights_next, size_t weights_shape[2], double (*activation_derivative)(double)) { for (size_t j = 0; j < weights_shape[0]; j++) { double sum = 0; for (size_t k = 0; k < weights_shape[1]; k++) { size_t index = j * weights_shape[1] + k; sum += delta_next[k] * weights_next[index]; } delta[j] = sum * activation_derivative(zout[j]); } } void nn_layer_out_delta( double *delta, double *error, double *zout, size_t cols, double (*activation_derivative)(double)) { for (size_t i = 0; i < cols; i++) { delta[i] = error[i] * activation_derivative(zout[i]); } } void nn_forward( double **out, double **zout, double *X, size_t X_shape[2], Layer network[], size_t network_size) { size_t in_shape[2] = {X_shape[0], X_shape[1]}; size_t out_shape[2]; out_shape[0] = X_shape[0]; double *input = X; for (size_t l = 0; l < network_size; l++) { out_shape[1] = network[l].neurons; nn_layer_forward(network[l], zout[l], out_shape, input, in_shape); nn_layer_map_activation(network[l].activation.func, out[l], out_shape, zout[l], out_shape); in_shape[1] = out_shape[1]; input = out[l]; } } void nn_layer_map_activation( double (*activation)(double), double *aout, size_t aout_shape[2], double *zout, size_t zout_shape[2]) { if (zout_shape[0] != aout_shape[0] || zout_shape[1] != aout_shape[1]) { fprintf(stderr, "nn_layer_map_activation() Error: zout must have (%zu x %zu) dimensions not (%zu x %zu)\n", aout_shape[0], aout_shape[1], zout_shape[0], zout_shape[1]); exit(1); } for (size_t i = 0; i < aout_shape[0]; i++) { for (size_t j = 0; j < aout_shape[1]; j ++) { size_t index = aout_shape[1] * i + j; aout[index] = activation(zout[index]); } } } void nn_layer_forward(Layer layer, double *zout, size_t zout_shape[2], double *input, size_t input_shape[2]) { if (zout_shape[0] != input_shape[0] || zout_shape[1] != layer.neurons) { fprintf(stderr, "nn_layer_forward() Error: zout must have (%zu x %zu) dimensions not (%zu x %zu)\n", input_shape[0], layer.neurons, zout_shape[0], zout_shape[1]); exit(1); } for (size_t i = 0; i < input_shape[0]; i++) { for (size_t j = 0; j < layer.neurons; j++) { size_t index = layer.neurons * i + j; zout[index] = layer.bias[j]; } } cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, input_shape[0], layer.neurons, layer.input_nodes, // m, n, k 1.0, input, input_shape[1], //alpha X layer.weights, layer.neurons, // W 1.0, zout, layer.neurons); // beta B } void nn_network_read_weights(char *filepath, Layer *network, size_t network_size) { FILE *fp = fopen(filepath, "rb"); if (fp == NULL) die("nn_network_read_weights Error():"); size_t net_size, shape[2], ret; ret = fread(&net_size, sizeof(size_t), 1, fp); if (net_size != network_size) goto nn_network_read_weights_error; for (size_t i = 0; i < network_size; i++) { fread(shape, sizeof(size_t), 2, fp); if (shape[0] != network[i].input_nodes || shape[1] != network[i].neurons) { goto nn_network_read_weights_error; } if (!network[i].weights || !network[i].bias) { die("nn_network_read_weights() Error: " "the weights on layer %zu haven't been initialized", i); } ret = fread(network[i].weights, sizeof(double), shape[0] * shape[1], fp); if (ret != shape[0] * shape[1]) goto nn_network_read_weights_error; ret = fread(network[i].bias, sizeof(double), shape[1], fp); if (ret != shape[1]) goto nn_network_read_weights_error; } fclose(fp); return; nn_network_read_weights_error: fclose(fp); die("nn_network_read_weights() Error: " "number of read objects does not match with expected ones"); } void nn_network_write_weights(char *filepath, Layer *network, size_t network_size) { FILE *fp = fopen(filepath, "wb"); if (fp == NULL) die("nn_network_write_weights() Error:"); fwrite(&network_size, sizeof(size_t), 1, fp); size_t ret; for (size_t i = 0; i < network_size; i++) { size_t shape[2] = {network[i].input_nodes, network[i].neurons}; size_t size = shape[0] * shape[1]; ret = fwrite(shape, sizeof(size_t), 2, fp); if (ret != 2) goto nn_network_write_weights_error; ret = fwrite(network[i].weights, sizeof(double), size, fp); if (ret != size) goto nn_network_write_weights_error; ret = fwrite(network[i].bias, sizeof(double), network[i].neurons, fp); if (ret != network[i].neurons) goto nn_network_write_weights_error; } fclose(fp); return; nn_network_write_weights_error: fclose(fp); die("nn_network_write_weights() Error: " "number of written objects does not match with number of objects"); } void nn_network_init_weights(Layer layers[], size_t nmemb, size_t n_inputs, bool fill_random) { size_t i, prev_size = n_inputs; for (i = 0; i < nmemb; i++) { layers[i].weights = calloc(prev_size * layers[i].neurons, sizeof(double)); layers[i].bias = calloc(layers[i].neurons, sizeof(double)); if (layers[i].weights == NULL || layers[i].bias == NULL) { goto nn_layers_calloc_weights_error; } if (fill_random) fill_random_weights(layers[i].weights, layers[i].bias, prev_size, layers[i].neurons); layers[i].input_nodes = prev_size; prev_size = layers[i].neurons; } return; nn_layers_calloc_weights_error: perror("nn_layers_calloc_weights() Error"); exit(1); } void nn_network_free_weights(Layer layers[], size_t nmemb) { size_t i; for (i = 0; i < nmemb; i++) { free(layers[i].weights); free(layers[i].bias); } } void fill_random_weights(double *weights, double *bias, size_t rows, size_t cols) { FILE *fp = fopen("/dev/random", "rb"); if (fp == NULL) goto nn_fill_random_weights_error; size_t weights_size = rows * cols; int64_t *random_weights = calloc(weights_size, sizeof(int64_t)); int64_t *random_bias = calloc(cols, sizeof(int64_t)); fread(random_weights, sizeof(int64_t), weights_size, fp); fread(random_bias, sizeof(int64_t), cols, fp); if (!random_weights || !random_bias) goto nn_fill_random_weights_error; for (size_t i = 0; i < weights_size; i++) { weights[i] = (double)random_weights[i] / (double)INT64_MAX * 2; } for (size_t i = 0; i < cols; i++) { bias[i] = (double)random_bias[i] / (double)INT64_MAX * 2; } free(random_weights); free(random_bias); fclose(fp); return; nn_fill_random_weights_error: perror("nn_fill_random_weights Error()"); exit(1); } void dataset_shuffle_rows( double *inputs, size_t in_shape[2], double *labels, size_t lbl_shape[2]) { size_t random_row; size_t in_index, lbl_index; size_t shuffle_in_index, column_in_bytes; size_t shuffle_lbl_index, column_lbl_bytes; double *in_buffer, *lbl_buffer; in_buffer = malloc(sizeof(double) * in_shape[1]); lbl_buffer = malloc(sizeof(double) * lbl_shape[1]); if (in_buffer == NULL || lbl_buffer == NULL) goto dataset_shuffle_rows_error; column_in_bytes = sizeof(double) * in_shape[1]; column_lbl_bytes = sizeof(double) * lbl_shape[1]; for (size_t row = 0; row < in_shape[0]; row++) { /* Swap actual row with a random row*/ random_row = random() % in_shape[0]; /* Input Swap */ in_index = row * in_shape[1]; shuffle_in_index = random_row * in_shape[1]; memcpy(in_buffer, inputs + in_index, column_in_bytes); memcpy(inputs + in_index, inputs + shuffle_in_index, column_in_bytes); memcpy(inputs + shuffle_in_index, in_buffer, column_in_bytes); /* Label Swap */ lbl_index = row * lbl_shape[1]; shuffle_lbl_index = random_row * lbl_shape[1]; memcpy(lbl_buffer, labels + lbl_index, column_lbl_bytes); memcpy(labels + lbl_index, labels + shuffle_lbl_index, column_lbl_bytes); memcpy(labels + shuffle_lbl_index, lbl_buffer, column_lbl_bytes); } free(in_buffer); free(lbl_buffer); return; dataset_shuffle_rows_error: die("dataset_shuffle_rows() malloc Error:"); } double square_loss(double labels[], double net_out[], size_t shape) { double sum = 0; for (size_t i = 0; i < shape; i++) { sum += pow(labels[i] - net_out[i], 2); } return 0.5 * sum; } double square_dloss_out(double label, double net_out) { return net_out - label; } double get_avg_loss( double labels[], double outs[], size_t shape[2], double (*loss)(double *, double *, size_t shape)) { double sum = 0; for (size_t i = 0; i < shape[0]; i += shape[1]) { sum += loss(labels + i, outs + i, shape[1]); } return sum / shape[0]; } #ifdef NN_TEST /* * compile: clang -Wall -Wextra -g -DNN_TEST -o objs/test_nn src/util.c src/nn.c $(pkg-config --libs-only-l blas) -lm */ int main(void) { /* * array_shuffle_rows() test */ srandom(42); double input_array[12] = { 11, 12, 13, 21, 22, 23, 31, 32, 33, 41, 42, 43, }; double shuffled_input_array[12] = { 21, 22, 23, 41, 42, 43, 31, 32, 33, 11, 12, 13, }; size_t in_shape[2] = {4,3}; double label_array[4] = {1, 2, 3, 4}; double shuffled_label_array[4] = {2, 4, 3, 1}; size_t lbl_shape[2] = {4,1}; dataset_shuffle_rows(input_array, in_shape, label_array, lbl_shape); size_t i, j, index; for (i = 0; i < in_shape[0]; i++) { for (j = 0; j < in_shape[1]; j++) { index = i * in_shape[1] + j; if (input_array[index] != shuffled_input_array[index]) { printf("- array_shuffle_rows() failure: input_array mismatch on (%zu,%zu)\n", i, j); return 1; } } for (j = 0; j < lbl_shape[1]; j++) { index = i * lbl_shape[1] + j; if (label_array[index] != shuffled_label_array[index]) { printf("- array_shuffle_rows() failure: label_array mismatch on (%zu,%zu)\n", i, j); return 1; } } } printf("- array_shuffle_rows() success\n"); return 0; } #endif //NN_TEST