/**
* ml - a neural network processor written with C
* Copyright (C) 2023 jvech
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include "util.h"
#include "nn.h"
#define MAX_FILE_SIZE 536870912 //1<<29; 0.5 GiB
typedef struct Array {
double *data;
size_t shape[2];
} Array;
#define ARRAY_SIZE(x, type) sizeof(x) / sizeof(type)
static void json_read(
const char *filepath,
Array *input, Array *out,
char *out_keys[], size_t out_keys_size,
char *in_keys[], size_t in_keys_size,
bool read_output);
static void json_write(
const char *filepath,
Array input, Array out,
char *out_keys[], size_t out_keys_size,
char *in_keys[], size_t in_keys_size);
void json_read(
const char *filepath,
Array *input, Array *out,
char *out_keys[], size_t n_out_keys,
char *in_keys[], size_t n_input_keys,
bool read_output)
{
FILE *fp = NULL;
static char fp_buffer[MAX_FILE_SIZE];
fp = (!strcmp(filepath, "-")) ? fopen("/dev/stdin", "r") : fopen(filepath, "r");
if (fp == NULL) goto json_read_error;
size_t i = 0;
do {
if (i >= MAX_FILE_SIZE) die("json_read() Error: file size is bigger than '%zu'", i, MAX_FILE_SIZE);
fp_buffer[i] = fgetc(fp);
} while (fp_buffer[i++] != EOF);
json_object *json_obj;
json_obj = json_tokener_parse(fp_buffer);
size_t json_obj_length = json_object_array_length(json_obj);
input->shape[0] = (size_t)json_obj_length;
input->shape[1] = n_input_keys;
input->data = calloc(input->shape[0] * input->shape[1], sizeof(input->data[0]));
out->shape[0] = (size_t)json_obj_length;
out->shape[1] = n_out_keys;
out->data = calloc(out->shape[0] * out->shape[1], sizeof(out->data[0]));
if (!input->data || !out->data) goto json_read_error;
for (int i = 0; i < json_object_array_length(json_obj); i++) {
json_object *item = json_object_array_get_idx(json_obj, i);
for (int j = 0; j < n_input_keys; j++) {
size_t index = n_input_keys * i + j;
input->data[index] = json_object_get_double(json_object_object_get(item, in_keys[j]));
}
if (!read_output) continue;
for (int j = 0; j < n_out_keys; j++) {
size_t index = n_out_keys * i + j;
out->data[index] = json_object_get_double(json_object_object_get(item, out_keys[j]));
}
}
json_object_put(json_obj);
fclose(fp);
return;
json_read_error:
perror("json_read() Error");
exit(1);
}
void json_write(
const char *filepath,
Array input, Array out,
char *out_keys[], size_t out_keys_size,
char *in_keys[], size_t in_keys_size)
{
FILE *fp = (!filepath) ? fopen("/dev/stdout", "w") : fopen(filepath, "w");
if (!fp) die("json_read() Error:");
fprintf(fp, "[\n");
for (size_t i = 0; i < input.shape[0]; i++) {
fprintf(fp, " {\n");
for (size_t j = 0; j < input.shape[1]; j++) {
size_t index = input.shape[1] * i + j;
fprintf(fp, " \"%s\": %lf,\n", in_keys[j], input.data[index]);
}
for (size_t j = 0; j < out.shape[1]; j++) {
size_t index = out.shape[1] * i + j;
fprintf(fp, " \"%s\": %lf", out_keys[j], out.data[index]);
if (j == out.shape[1] - 1) fprintf(fp, "\n");
else fprintf(fp, ",\n");
}
if (i == input.shape[0] - 1) fprintf(fp, " }\n");
else fprintf(fp, " },\n");
}
fprintf(fp, "]\n");
fclose(fp);
}
void load_config(struct Configs *cfg, int n_args, ...)
{
char *filepath;
va_list ap;
va_start(ap, n_args);
int i;
for (i = 0; i < n_args; i++) {
filepath = va_arg(ap, char *);
util_load_config(cfg, filepath);
if (errno == 0) {
va_end(ap);
return;
} else if (errno == ENOENT && i < n_args - 1) {
errno = 0;
} else break;
}
va_end(ap);
die("load_config() Error:");
}
Layer * load_network(struct Configs cfg)
{
extern struct Activation NN_RELU;
extern struct Activation NN_SOFTPLUS;
extern struct Activation NN_SIGMOID;
extern struct Activation NN_LEAKY_RELU;
extern struct Activation NN_LINEAR;
Layer *network = ecalloc(cfg.network_size, sizeof(Layer));
for (size_t i = 0; i < cfg.network_size; i++) {
if (!strcmp("relu", cfg.activations[i])) network[i].activation = NN_RELU;
else if (!strcmp("sigmoid", cfg.activations[i])) network[i].activation = NN_SIGMOID;
else if (!strcmp("softplus", cfg.activations[i])) network[i].activation = NN_SOFTPLUS;
else if (!strcmp("leaky_relu", cfg.activations[i])) network[i].activation = NN_LEAKY_RELU;
else if (!strcmp("linear", cfg.activations[i])) network[i].activation = NN_LINEAR;
else die("load_network() Error: Unknown '%s' activation", cfg.activations[i]);
network[i].neurons = cfg.neurons[i];
}
return network;
}
struct Cost load_loss(struct Configs cfg)
{
extern struct Cost NN_SQUARE;
if (!strcmp("square", cfg.loss)) return NN_SQUARE;
die("load_loss() Error: Unknown '%s' loss function", cfg.loss);
exit(1);
}
int main(int argc, char *argv[]) {
char default_config_path[512];
struct Configs ml_configs = {
.epochs = 100,
.alpha = 1e-5,
.config_filepath = "utils/settings.cfg",
.network_size = 0,
.out_filepath = NULL,
};
// First past to check if --config option was put
util_load_cli(&ml_configs, argc, argv);
optind = 1;
// Load configs with different possible paths
sprintf(default_config_path, "%s/%s", getenv("HOME"), ".config/ml/ml.cfg");
load_config(&ml_configs, 2, ml_configs.config_filepath, default_config_path);
// re-read cli options again, to overwrite file configuration options
util_load_cli(&ml_configs, argc, argv);
argc -= optind;
argv += optind;
Layer *network = load_network(ml_configs);
Array X, y;
if (!strcmp("train", argv[0])) {
json_read(argv[1], &X, &y, ml_configs.label_keys, ml_configs.n_label_keys, ml_configs.input_keys, ml_configs.n_input_keys, true);
nn_network_init_weights(network, ml_configs.network_size, X.shape[1], true);
nn_network_train(
network, ml_configs.network_size,
X.data, X.shape,
y.data, y.shape,
load_loss(ml_configs),
ml_configs.epochs,
ml_configs.alpha);
nn_network_write_weights(ml_configs.weights_filepath, network, ml_configs.network_size);
fprintf(stderr, "weights saved on '%s'\n", ml_configs.weights_filepath);
} else if (!strcmp("predict", argv[0])) {
json_read(argv[1], &X, &y, ml_configs.label_keys, ml_configs.n_label_keys, ml_configs.input_keys, ml_configs.n_input_keys, false);
nn_network_init_weights(network, ml_configs.network_size, X.shape[1], false);
nn_network_read_weights(ml_configs.weights_filepath, network, ml_configs.network_size);
nn_network_predict(y.data, y.shape, X.data, X.shape, network, ml_configs.network_size);
json_write(ml_configs.out_filepath, X, y, ml_configs.label_keys, ml_configs.n_label_keys, ml_configs.input_keys, ml_configs.n_input_keys);
} else usage(1);
nn_network_free_weights(network, ml_configs.network_size);
free(network);
util_free_config(&ml_configs);
return 0;
}