/**
* ml - a neural network processor written with C
* Copyright (C) 2023 jvech
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include "util.h"
#include "nn.h"
const size_t MAX_FILE_SIZE = 1<<29; // 0.5 GiB
typedef struct Array {
double *data;
size_t shape[2];
} Array;
#define ARRAY_SIZE(x, type) sizeof(x) / sizeof(type)
static void json_read(
const char *filepath,
Array *input, Array *out,
char *out_keys[], size_t out_keys_size,
char *in_keys[], size_t in_keys_size,
bool read_output);
static void json_write(
const char *filepath,
Array input, Array out,
char *out_keys[], size_t out_keys_size,
char *in_keys[], size_t in_keys_size);
void json_read(
const char *filepath,
Array *input, Array *out,
char *out_keys[], size_t n_out_keys,
char *in_keys[], size_t n_input_keys,
bool read_output)
{
FILE *fp = NULL;
char *fp_buffer = NULL;
size_t ret;
int64_t fp_size;
fp = fopen(filepath, "r");
if (fp == NULL) goto json_read_error;
ret = (size_t)fseek(fp, 0L, SEEK_END);
if ((int)ret == -1) goto json_read_error;
fp_size = ftell(fp);
if (fp_size == -1) goto json_read_error;
if (fp_size >= MAX_FILE_SIZE) {
fprintf(stderr, "ftell Error(): '%s' size greater than '%zu'\n", filepath, MAX_FILE_SIZE);
}
rewind(fp);
fp_buffer = calloc(sizeof(char), fp_size);
if (fp_buffer == NULL) goto json_read_error;
ret = fread(fp_buffer, sizeof(char), (size_t)fp_size, fp);
if (ret != (size_t)fp_size) {
fprintf(stderr, "json_read() Error: fread bytes '%zd' does not match with buffer size '%zd'", ret, (size_t)fp_size);
exit(1);
}
json_object *json_obj;
json_obj = json_tokener_parse(fp_buffer);
size_t json_obj_length = json_object_array_length(json_obj);
input->shape[0] = (size_t)json_obj_length;
input->shape[1] = n_input_keys;
input->data = calloc(input->shape[0] * input->shape[1], sizeof(input->data[0]));
out->shape[0] = (size_t)json_obj_length;
out->shape[1] = n_out_keys;
out->data = calloc(out->shape[0] * out->shape[1], sizeof(out->data[0]));
if (!input->data || !out->data) goto json_read_error;
for (int i = 0; i < json_object_array_length(json_obj); i++) {
json_object *item = json_object_array_get_idx(json_obj, i);
for (int j = 0; j < n_input_keys; j++) {
size_t index = n_input_keys * i + j;
input->data[index] = json_object_get_double(json_object_object_get(item, in_keys[j]));
}
if (!read_output) continue;
for (int j = 0; j < n_out_keys; j++) {
size_t index = n_out_keys * i + j;
out->data[index] = json_object_get_double(json_object_object_get(item, out_keys[j]));
}
}
json_object_put(json_obj);
fclose(fp);
return;
json_read_error:
perror("json_read() Error");
exit(1);
}
void json_write(
const char *filepath,
Array input, Array out,
char *out_keys[], size_t out_keys_size,
char *in_keys[], size_t in_keys_size)
{
FILE *fp = (!filepath) ? fopen("/dev/stdout", "w") : fopen(filepath, "w");
if (!fp) die("json_read() Error:");
fprintf(fp, "[\n");
for (size_t i = 0; i < input.shape[0]; i++) {
fprintf(fp, " {\n");
for (size_t j = 0; j < input.shape[1]; j++) {
size_t index = input.shape[1] * i + j;
fprintf(fp, " \"%s\": %lf,\n", in_keys[j], input.data[index]);
}
for (size_t j = 0; j < out.shape[1]; j++) {
size_t index = out.shape[1] * i + j;
fprintf(fp, " \"%s\": %lf", out_keys[j], out.data[index]);
if (j == out.shape[1] - 1) fprintf(fp, "\n");
else fprintf(fp, ",\n");
}
if (i == input.shape[0] - 1) fprintf(fp, " }\n");
else fprintf(fp, " },\n");
}
fprintf(fp, "]\n");
fclose(fp);
}
void load_config(struct Configs *cfg, int n_args, ...)
{
char *filepath;
va_list ap;
va_start(ap, n_args);
int i;
for (i = 0; i < n_args; i++) {
filepath = va_arg(ap, char *);
util_load_config(cfg, filepath);
if (errno == 0) {
va_end(ap);
return;
} else if (errno == ENOENT && i < n_args - 1) {
errno = 0;
} else break;
}
va_end(ap);
die("load_config() Error:");
}
Layer * load_network(struct Configs cfg)
{
extern struct Activation NN_RELU;
extern struct Activation NN_SOFTPLUS;
extern struct Activation NN_SIGMOID;
extern struct Activation NN_LEAKY_RELU;
Layer *network = ecalloc(cfg.network_size, sizeof(Layer));
for (size_t i = 0; i < cfg.network_size; i++) {
if (!strcmp("relu", cfg.activations[i])) network[i].activation = NN_RELU;
else if (!strcmp("sigmoid", cfg.activations[i])) network[i].activation = NN_SIGMOID;
else if (!strcmp("softplus", cfg.activations[i])) network[i].activation = NN_SOFTPLUS;
else if (!strcmp("leaky_relu", cfg.activations[i])) network[i].activation = NN_LEAKY_RELU;
else die("load_network() Error: Unknown '%s' activation", cfg.activations[i]);
network[i].neurons = cfg.neurons[i];
}
return network;
}
struct Cost load_loss(struct Configs cfg)
{
extern struct Cost NN_SQUARE;
if (!strcmp("square", cfg.loss)) return NN_SQUARE;
die("load_loss() Error: Unknown '%s' loss function", cfg.loss);
exit(1);
}
int main(int argc, char *argv[]) {
struct Configs ml_configs = {
.epochs = 100,
.alpha = 1e-5,
.config_filepath = "utils/settings.cfg",
.network_size = 0,
.out_filepath = NULL,
};
// Try different config paths
load_config(&ml_configs, 3, "~/.config/ml/ml.cfg", "~/.ml/ml.cfg", ml_configs.config_filepath);
util_load_cli(&ml_configs, argc, argv);
argc -= optind;
argv += optind;
Layer *network = load_network(ml_configs);
Array X, y;
if (!strcmp("train", argv[0])) {
json_read(argv[1], &X, &y, ml_configs.label_keys, ml_configs.n_label_keys, ml_configs.input_keys, ml_configs.n_input_keys, true);
nn_network_init_weights(network, ml_configs.network_size, X.shape[1], true);
nn_network_train(
network, ml_configs.network_size,
X.data, X.shape,
y.data, y.shape,
load_loss(ml_configs),
ml_configs.epochs,
ml_configs.alpha);
nn_network_write_weights(ml_configs.weights_filepath, network, ml_configs.network_size);
fprintf(stderr, "weights saved on '%s'\n", ml_configs.weights_filepath);
} else if (!strcmp("predict", argv[0])) {
json_read(argv[1], &X, &y, ml_configs.label_keys, ml_configs.n_label_keys, ml_configs.input_keys, ml_configs.n_input_keys, false);
nn_network_init_weights(network, ml_configs.network_size, X.shape[1], false);
nn_network_read_weights(ml_configs.weights_filepath, network, ml_configs.network_size);
nn_network_predict(y.data, y.shape, X.data, X.shape, network, ml_configs.network_size);
json_write(ml_configs.out_filepath, X, y, ml_configs.label_keys, ml_configs.n_label_keys, ml_configs.input_keys, ml_configs.n_input_keys);
} else usage(1);
nn_network_free_weights(network, ml_configs.network_size);
free(network);
util_free_config(&ml_configs);
return 0;
}