From ebd66e65bf18574fa8905d7b0ae3fbb85bfc9e06 Mon Sep 17 00:00:00 2001 From: jvech Date: Tue, 6 Aug 2024 14:29:42 -0500 Subject: add: file parsing improved Things implemented: * json_read() must die if the key does not exist or the value type is wrong. * on predict command input should be shown exactly the same * float precision CLI option should be added. --- Makefile | 8 ++++- doc/ml.1 | 10 ++++-- src/main.c | 4 ++- src/nn.c | 10 +++--- src/parse.c | 72 ++++++++++++++++++++++++++++++----------- src/parse.h | 3 +- src/util.c | 12 +++++-- src/util.h | 1 + tests/architectures/gauss2d.cfg | 19 +++++++++++ tests/plots.gpi | 27 +++++++++------- 10 files changed, 123 insertions(+), 43 deletions(-) create mode 100644 tests/architectures/gauss2d.cfg diff --git a/Makefile b/Makefile index 7ca4091..7223e7b 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,13 @@ include config.mk +# you can `export DEV_MODE=true` to compile the binaries with more warnings and debugging support +ifdef DEV_MODE +CFLAGS = -std=gnu11 -Wall -Wextra -g +else +CFLAGS = -std=gnu11 -Wall -O2 +endif + CC = clang -CFLAGS = -std=gnu11 -Wall -g BIN = ml OBJDIR = objs SRC = $(wildcard src/*.c) diff --git a/doc/ml.1 b/doc/ml.1 index ea5439f..0bc536d 100644 --- a/doc/ml.1 +++ b/doc/ml.1 @@ -1,13 +1,13 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. -.TH ML "1" "July 2024" "ml 0.1" "User Commands" +.TH ML "1" "August 2024" "ml 0.2.0" "User Commands" .SH NAME -ml \- manual page for ml 0.1 +ml \- manual page for ml 0.2.0 .SH SYNOPSIS .B ml \fI\,train \/\fR[\fI\,Options\/\fR] \fI\,FILE\/\fR .br .B ml -\fI\,predict \/\fR[\fI\,-Ohv\/\fR] [\fI\,-f FORMAT\/\fR] [\fI\,-o FILE\/\fR] \fI\,FILE\/\fR +\fI\,predict \/\fR[\fI\,-Ohv\/\fR] [\fI\,-f FORMAT\/\fR] [\fI\,-o FILE\/\fR] [\fI\,-p INT\/\fR] \fI\,FILE\/\fR .SH DESCRIPTION ml is a simple neural network maker made to train and predict JSON data, it is suitable to work on classification problems. @@ -33,6 +33,10 @@ Don't show input fields (only works with predict) .TP \fB\-c\fR, \fB\-\-config\fR=\fI\,FILE\/\fR Configuration filepath [default=~/.config/ml/ml.cfg] +.TP +\fB\-p\fR, \fB\-\-precision\fR=\fI\,INT\/\fR +Decimals output precision (only works with predict) +[default=auto] .SH FILES ~/.config/ml/ml.cfg File path for network configuration, here you can setup the network diff --git a/src/main.c b/src/main.c index 216d8d4..38f26ad 100644 --- a/src/main.c +++ b/src/main.c @@ -95,6 +95,7 @@ int main(int argc, char *argv[]) { .config_filepath = "utils/settings.cfg", .network_size = 0, .only_out = false, + .decimal_precision = -1, .file_format = NULL, .out_filepath = NULL, }; @@ -146,7 +147,8 @@ int main(int argc, char *argv[]) { file_write(ml_configs.out_filepath, X, y, ml_configs.input_keys, ml_configs.n_input_keys, ml_configs.label_keys, ml_configs.n_label_keys, - !ml_configs.only_out, ml_configs.file_format); + !ml_configs.only_out, ml_configs.file_format, + ml_configs.decimal_precision); } else usage(1); nn_network_free_weights(network, ml_configs.network_size); diff --git a/src/nn.c b/src/nn.c index 4927dc6..916803e 100644 --- a/src/nn.c +++ b/src/nn.c @@ -154,7 +154,7 @@ void nn_backward( } for (size_t sample = 0; sample < input_shape[0]; sample++) { - for (size_t l = network_size - 1; l >= 0 && l < network_size; l--) { + for (size_t l = network_size - 1; l < network_size; l--) { size_t weights_shape[2] = {network[l].input_nodes, network[l].neurons}; if (l == network_size - 1) { double *zout = Zout[l] + sample * network[l].neurons; @@ -328,6 +328,7 @@ void nn_network_read_weights(char *filepath, Layer *network, size_t network_size return; nn_network_read_weights_error: + fclose(fp); die("nn_network_read_weights() Error: " "number of read objects does not match with expected ones"); } @@ -357,14 +358,14 @@ void nn_network_write_weights(char *filepath, Layer *network, size_t network_siz return; nn_network_write_weights_error: + fclose(fp); die("nn_network_write_weights() Error: " "number of written objects does not match with number of objects"); } void nn_network_init_weights(Layer layers[], size_t nmemb, size_t n_inputs, bool fill_random) { - int i; - size_t prev_size = n_inputs; + size_t i, prev_size = n_inputs; for (i = 0; i < nmemb; i++) { @@ -390,7 +391,8 @@ nn_layers_calloc_weights_error: void nn_network_free_weights(Layer layers[], size_t nmemb) { - for (int i = 0; i < nmemb; i++) { + size_t i; + for (i = 0; i < nmemb; i++) { free(layers[i].weights); free(layers[i].bias); } diff --git a/src/parse.c b/src/parse.c index c9b17ca..cea595b 100644 --- a/src/parse.c +++ b/src/parse.c @@ -49,14 +49,16 @@ static void json_write( Array input, Array out, char *in_keys[], size_t in_keys_size, char *out_keys[], size_t out_keys_size, - bool write_input + bool write_input, + int decimal_precision ); static void csv_write( FILE *fp, Array input, Array out, bool write_input, - char separator + char separator, + int decimal_precision ); static void csv_columns_select( @@ -112,7 +114,8 @@ void file_write( char *in_keys[], size_t n_in_keys, char *out_keys[], size_t n_out_keys, bool write_input, - char *file_format) + char *file_format, + int decimal_precision) { FILE *fp; @@ -129,9 +132,9 @@ void file_write( if (fp == NULL) die("file_write() Error:"); - if (!strcmp(file_format, "json")) json_write(fp, input, out, in_keys, n_in_keys, out_keys, n_out_keys, write_input); - else if (!strcmp(file_format, "csv")) csv_write(fp, input, out, write_input, ','); - else if (!strcmp(file_format, "tsv")) csv_write(fp, input, out, write_input, '\t'); + if (!strcmp(file_format, "json")) json_write(fp, input, out, in_keys, n_in_keys, out_keys, n_out_keys, write_input, decimal_precision); + else if (!strcmp(file_format, "csv")) csv_write(fp, input, out, write_input, ',', decimal_precision); + else if (!strcmp(file_format, "tsv")) csv_write(fp, input, out, write_input, '\t', decimal_precision); else { die("file_write() Error: unable to write %s files", file_format); } @@ -147,6 +150,8 @@ void json_read( { static char fp_buffer[MAX_FILE_SIZE]; size_t i, j, json_obj_length, index; + json_object *json_obj, *item, *value; + json_type obj_type; if (fp == NULL) goto json_read_error; @@ -157,10 +162,14 @@ void json_read( fp_buffer[i] = fgetc(fp); } while (fp_buffer[i++] != EOF); - json_object *json_obj; json_obj = json_tokener_parse(fp_buffer); + if (!json_object_is_type(json_obj, json_type_array)) { + die("json_read() Error: unexpected JSON data received, expecting an array"); + } json_obj_length = json_object_array_length(json_obj); + + input->shape[0] = (size_t)json_obj_length; input->shape[1] = n_input_keys; input->data = calloc(input->shape[0] * input->shape[1], sizeof(input->data[0])); @@ -172,18 +181,42 @@ void json_read( if (!input->data || !out->data) goto json_read_error; for (i = 0; i < json_object_array_length(json_obj); i++) { - json_object *item = json_object_array_get_idx(json_obj, i); + item = json_object_array_get_idx(json_obj, i); + + if (!json_object_is_type(item, json_type_object)) { + die("json_read() Error: unexpected JSON data received, expecting an object"); + } for (j = 0; j < n_input_keys; j++) { - index = n_input_keys * i + j; - input->data[index] = json_object_get_double(json_object_object_get(item, in_keys[j])); + value = json_object_object_get(item, in_keys[j]); + obj_type = json_object_get_type(value); + switch (obj_type) { + case json_type_double: + case json_type_int: + index = n_input_keys * i + j; + input->data[index] = json_object_get_double(value); + break; + default: + die("json_read() Error: unexpected JSON data received, expecting a number"); + break; + } } if (!read_output) continue; for (j = 0; j < n_out_keys; j++) { - index = n_out_keys * i + j; - out->data[index] = json_object_get_double(json_object_object_get(item, out_keys[j])); + value = json_object_object_get(item, out_keys[j]); + obj_type = json_object_get_type(value); + switch (obj_type) { + case json_type_double: + case json_type_int: + index = n_out_keys * i + j; + out->data[index] = json_object_get_double(value); + break; + default: + die("json_read() Error: unexpected JSON data received, expecting a number"); + break; + } } } @@ -266,7 +299,8 @@ void json_write( Array input, Array out, char *in_keys[], size_t in_keys_size, char *out_keys[], size_t out_keys_size, - bool write_input) + bool write_input, + int decimal_precision) { fprintf(fp, "[\n"); @@ -286,13 +320,13 @@ void json_write( if (write_input) { for (size_t j = 0; j < input.shape[1]; j++) { size_t index = input.shape[1] * i + j; - fprintf(fp, " \"%s\": %lf,\n", in_keys[j], input.data[index]); + fprintf(fp, " \"%s\": %g,\n", in_keys[j], input.data[index]); } } for (size_t j = 0; j < out.shape[1]; j++) { size_t index = out.shape[1] * i + j; - fprintf(fp, " \"%s\": %lf", out_keys[j], out.data[index]); + fprintf(fp, " \"%s\": %.*g", out_keys[j], decimal_precision, out.data[index]); if (j == out.shape[1] - 1) fprintf(fp, "\n"); else fprintf(fp, ",\n"); @@ -308,20 +342,20 @@ void csv_write( FILE *fp, Array input, Array out, bool write_input, - char separator - ) + char separator, + int decimal_precision) { size_t line, col, index; for (line = 0; line < input.shape[0]; line++) { if (write_input) { for (col = 0; col < input.shape[1]; col++) { index = input.shape[1] * line + col; - fprintf(fp, "%lf%c", input.data[index], separator); + fprintf(fp, "%g%c", input.data[index], separator); } } for (col = 0; col < out.shape[1]; col++) { index = out.shape[1] * line + col; - fprintf(fp, "%lf", out.data[index]); + fprintf(fp, "%.*g", decimal_precision, out.data[index]); if (col == out.shape[1] - 1) continue; fprintf(fp, "%c", separator); } diff --git a/src/parse.h b/src/parse.h index d8aeada..18130c7 100644 --- a/src/parse.h +++ b/src/parse.h @@ -25,7 +25,8 @@ void file_write( char *in_keys[], size_t n_in_keys, char *out_keys[], size_t n_out_keys, bool write_input, - char *file_format); + char *file_format, + int decimal_precision); char * file_format_infer(char *filename); #endif diff --git a/src/util.c b/src/util.c index 8fa8a87..4621836 100644 --- a/src/util.c +++ b/src/util.c @@ -76,7 +76,7 @@ char *e_strdup(const char *s) void version() { - printf("ml 0.1\n"); + printf("ml 0.2.0\n"); printf( "Copyright (C) 2023 jvech\n\n" "This program is free software: you can redistribute it and/or modify\n" "it under the terms of the GNU General Public License as published by\n" @@ -92,7 +92,7 @@ void usage(int exit_code) FILE *fp = (!exit_code) ? stdout : stderr; fprintf(fp, "Usage: ml train [Options] FILE\n" - " or: ml predict [-Ohv] [-f FORMAT] [-o FILE] FILE\n" + " or: ml predict [-Ohv] [-f FORMAT] [-o FILE] [-p INT] FILE\n" "\n" "Options:\n" " -h, --help Show this message\n" @@ -102,6 +102,8 @@ void usage(int exit_code) " -o, --output=FILE Output file (only works with predict)\n" " -O, --only-out Don't show input fields (only works with predict)\n" " -c, --config=FILE Configuration filepath [default=~/.config/ml/ml.cfg]\n" + " -p, --precision=INT Decimals output precision (only works with predict)\n" + " [default=auto]\n" "\n" ); exit(exit_code); @@ -119,12 +121,13 @@ void util_load_cli(struct Configs *ml, int argc, char *argv[]) {"output", required_argument, 0, 'o'}, {"config", required_argument, 0, 'c'}, {"only-out", no_argument, 0, 'O'}, + {"precision", required_argument, 0, 'p'}, {0, 0, 0, 0 }, }; int c; while (1) { - c = getopt_long(argc, argv, "hvOc:e:a:o:i:f:", long_opts, NULL); + c = getopt_long(argc, argv, "hvOc:e:a:o:i:f:p:", long_opts, NULL); if (c == -1) { break; @@ -148,6 +151,9 @@ void util_load_cli(struct Configs *ml, int argc, char *argv[]) case 'O': ml->only_out = true; break; + case 'p': + ml->decimal_precision = (!strcmp("auto", optarg))? -1: (int)atoi(optarg); + break; case 'h': usage(0); break; diff --git a/src/util.h b/src/util.h index dbaae15..6ae9bab 100644 --- a/src/util.h +++ b/src/util.h @@ -17,6 +17,7 @@ struct Configs { char *file_format; char *in_filepath; char *out_filepath; + int decimal_precision; bool only_out; /* layer cfgs */ size_t network_size; diff --git a/tests/architectures/gauss2d.cfg b/tests/architectures/gauss2d.cfg new file mode 100644 index 0000000..d9236ad --- /dev/null +++ b/tests/architectures/gauss2d.cfg @@ -0,0 +1,19 @@ +[net] +loss = square ; options (square) +epochs = 1000 ; comment +alpha = 2e-4 +weights_path = data/gauss2d.bin +inputs = x,y +labels = z + +; activation options (relu, sigmoid, softplus, leaky_relu) + +[layer] +neurons=20 +activation=sigmoid +[layer] +neurons=10 +activation=relu + +[outlayer] +activation = sigmoid diff --git a/tests/plots.gpi b/tests/plots.gpi index 4fd11b0..2101520 100644 --- a/tests/plots.gpi +++ b/tests/plots.gpi @@ -1,7 +1,7 @@ #!/usr/bin/gnuplot -set term pngcairo size 1080,720 +set term pngcairo size 1080,360*3 set output 'tests/network_accuracy.png' -set multiplot layout 2, 2 +set multiplot layout 3, 2 set grid json2tsv = "jq -r '.[] | [.[]] | @tsv' %s" @@ -12,11 +12,13 @@ predict_cmd = "<./ml predict %s -c %s | ".sprintf(json2tsv, "-") data_gauss1d = "data/gauss1d.json" data_xor = "data/xor.json" data_sine = "data/sine.json" +data_gauss2d = "data/gauss2d.json" # -- arch_gauss1d = "tests/architectures/gauss1d.cfg" arch_xor = "tests/architectures/xor.cfg" arch_sine = "tests/architectures/sine.cfg" +arch_gauss2d = "tests/architectures/gauss2d.cfg" set ylabel arch_gauss1d @@ -28,15 +30,6 @@ unset ylabel plot sprintf(predict_cmd, data_gauss1d, arch_gauss1d) with lines title 'network',\ "<".sprintf(json2tsv, data_gauss1d) with lines title 'original' -#set ylabel arch_xor -#set logscale x -#plot sprintf(train_cmd, data_xor, arch_xor) u 2:4 with lines title 'loss' -#unset logscale -#unset ylabel -# -#set table "/dev/stdout" -#plot "<".sprintf(json2tsv, data_xor) using 1:2:3 with table,\ -# sprintf(predict_cmd, data_xor, arch_xor) using 3 with table set ylabel arch_sine set logscale x @@ -46,3 +39,15 @@ unset ylabel plot sprintf(predict_cmd, data_sine, arch_sine) with lines title 'network',\ "<".sprintf(json2tsv, data_sine) with lines title 'original' + + +set ylabel arch_gauss2d +set logscale x +plot sprintf(train_cmd, data_gauss2d, arch_gauss2d) u 2:4 with lines title 'loss' +unset logscale +unset ylabel + +set view 45,30 +splot "<".sprintf(json2tsv, data_gauss2d) using 1:2:3 with lines title 'network',\ + sprintf(predict_cmd, data_gauss2d, arch_gauss2d) with lines title 'original' +unset multiplot -- cgit v1.2.3-70-g09d2