From d45581c0b067b9526ce88ba9d3a1bd861f4ff7cc Mon Sep 17 00:00:00 2001 From: jvech Date: Wed, 24 Jul 2024 15:31:02 -0500 Subject: add: file_read() and format integraded on main program things implemented: - read output in false bug was solved. - Make generic rule added to build test executables - format option added to the CLI --- Makefile | 5 +++- doc/ml.1 | 7 ++++-- src/main.c | 78 ++++--------------------------------------------------------- src/parse.c | 22 ++++++++--------- src/util.c | 9 +++++-- src/util.h | 1 + 6 files changed, 31 insertions(+), 91 deletions(-) diff --git a/Makefile b/Makefile index 4521115..7ca4091 100644 --- a/Makefile +++ b/Makefile @@ -48,9 +48,12 @@ run: build @jq -r '.[] | [values[] as $$val | $$val] | @tsv' data/sample_data.json > data/sample_data.tsv @gnuplot utils/plot.gpi +test_%: src/%.c $(OBJDIR) + $(shell sed -n 's/.*compile: clang/clang/;/clang/p' $<) + debug: build gdb -x utils/commands.gdb --tui --args ${BIN} train data/xor.json -e 100 @#gdb -x utils/commands.gdb --tui --args ${BIN} predict data/sample_data.json clean: - @rm $(OBJS) $(OBJDIR) -rv + @rm $(OBJDIR) -rv diff --git a/doc/ml.1 b/doc/ml.1 index babac62..6450709 100644 --- a/doc/ml.1 +++ b/doc/ml.1 @@ -1,10 +1,10 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. -.TH ML "1" "October 2023" "ml 0.1" "User Commands" +.TH ML "1" "July 2024" "ml 0.1" "User Commands" .SH NAME ml \- manual page for ml 0.1 .SH SYNOPSIS .B ml -\fI\,train \/\fR[\fI\,Options\/\fR] \fI\,JSON_FILE\/\fR +\fI\,train \/\fR[\fI\,Options\/\fR] \fI\,FILE\/\fR .br .B ml \fI\,predict \/\fR[\fI\,-o FILE\/\fR] \fI\,FILE\/\fR @@ -16,6 +16,9 @@ it is suitable to work on classification problems. \fB\-h\fR, \fB\-\-help\fR Show this message .TP +\fB\-f\fR, \fB\-\-format\fR=\fI\,FORMAT\/\fR +File input and/or output format +.TP \fB\-a\fR, \fB\-\-alpha\fR=\fI\,ALPHA\/\fR Learning rate (only works with train) .TP diff --git a/src/main.c b/src/main.c index dab8bd9..e692756 100644 --- a/src/main.c +++ b/src/main.c @@ -26,91 +26,20 @@ #include #include "util.h" +#include "parse.h" #include "nn.h" #define MAX_FILE_SIZE 536870912 //1<<29; 0.5 GiB -typedef struct Array { - double *data; - size_t shape[2]; -} Array; - #define ARRAY_SIZE(x, type) sizeof(x) / sizeof(type) -static void json_read( - const char *filepath, - Array *input, Array *out, - char *out_keys[], size_t out_keys_size, - char *in_keys[], size_t in_keys_size, - bool read_output); - static void json_write( const char *filepath, Array input, Array out, char *out_keys[], size_t out_keys_size, char *in_keys[], size_t in_keys_size); -void json_read( - const char *filepath, - Array *input, Array *out, - char *out_keys[], size_t n_out_keys, - char *in_keys[], size_t n_input_keys, - bool read_output) -{ - FILE *fp = NULL; - static char fp_buffer[MAX_FILE_SIZE]; - - fp = (!strcmp(filepath, "-")) ? fopen("/dev/stdin", "r") : fopen(filepath, "r"); - - if (fp == NULL) goto json_read_error; - - size_t i = 0; - do { - if (i >= MAX_FILE_SIZE) die("json_read() Error: file size is bigger than '%zu'", i, MAX_FILE_SIZE); - fp_buffer[i] = fgetc(fp); - } while (fp_buffer[i++] != EOF); - - json_object *json_obj; - json_obj = json_tokener_parse(fp_buffer); - size_t json_obj_length = json_object_array_length(json_obj); - - input->shape[0] = (size_t)json_obj_length; - input->shape[1] = n_input_keys; - input->data = calloc(input->shape[0] * input->shape[1], sizeof(input->data[0])); - - out->shape[0] = (size_t)json_obj_length; - out->shape[1] = n_out_keys; - out->data = calloc(out->shape[0] * out->shape[1], sizeof(out->data[0])); - - if (!input->data || !out->data) goto json_read_error; - - for (int i = 0; i < json_object_array_length(json_obj); i++) { - json_object *item = json_object_array_get_idx(json_obj, i); - - for (int j = 0; j < n_input_keys; j++) { - size_t index = n_input_keys * i + j; - input->data[index] = json_object_get_double(json_object_object_get(item, in_keys[j])); - } - - if (!read_output) continue; - - for (int j = 0; j < n_out_keys; j++) { - size_t index = n_out_keys * i + j; - out->data[index] = json_object_get_double(json_object_object_get(item, out_keys[j])); - } - } - - json_object_put(json_obj); - fclose(fp); - - return; - -json_read_error: - perror("json_read() Error"); - exit(1); -} - void json_write( const char *filepath, Array input, Array out, @@ -204,6 +133,7 @@ int main(int argc, char *argv[]) { .alpha = 1e-5, .config_filepath = "utils/settings.cfg", .network_size = 0, + .file_format = NULL, .out_filepath = NULL, }; @@ -223,7 +153,7 @@ int main(int argc, char *argv[]) { Array X, y; if (!strcmp("train", argv[0])) { - json_read(argv[1], &X, &y, ml_configs.label_keys, ml_configs.n_label_keys, ml_configs.input_keys, ml_configs.n_input_keys, true); + file_read(argv[1], &X, &y, ml_configs.input_keys, ml_configs.n_input_keys, ml_configs.label_keys, ml_configs.n_label_keys, true, ml_configs.file_format); nn_network_init_weights(network, ml_configs.network_size, X.shape[1], true); nn_network_train( network, ml_configs.network_size, @@ -235,7 +165,7 @@ int main(int argc, char *argv[]) { nn_network_write_weights(ml_configs.weights_filepath, network, ml_configs.network_size); fprintf(stderr, "weights saved on '%s'\n", ml_configs.weights_filepath); } else if (!strcmp("predict", argv[0])) { - json_read(argv[1], &X, &y, ml_configs.label_keys, ml_configs.n_label_keys, ml_configs.input_keys, ml_configs.n_input_keys, false); + file_read(argv[1], &X, &y, ml_configs.input_keys, ml_configs.n_input_keys, ml_configs.label_keys, ml_configs.n_label_keys, false, ml_configs.file_format); nn_network_init_weights(network, ml_configs.network_size, X.shape[1], false); nn_network_read_weights(ml_configs.weights_filepath, network, ml_configs.network_size); nn_network_predict(y.data, y.shape, X.data, X.shape, network, ml_configs.network_size); diff --git a/src/parse.c b/src/parse.c index 8d31da5..18668ec 100644 --- a/src/parse.c +++ b/src/parse.c @@ -152,20 +152,16 @@ void csv_read( in_cols = ecalloc(n_in_cols, sizeof(size_t)); csv_keys2cols(in_cols, in_keys, n_in_cols); - if (read_output) { - out_cols = ecalloc(n_out_cols, sizeof(size_t)); - csv_keys2cols(out_cols, out_keys, n_out_cols); - } + out_cols = ecalloc(n_out_cols, sizeof(size_t)); + csv_keys2cols(out_cols, out_keys, n_out_cols); input->shape[0] = 1; input->shape[1] = n_in_cols; input->data = ecalloc(input->shape[1], sizeof(double)); - if (read_output) { - out->shape[0] = 1; - out->shape[1] = n_out_cols; - out->data = ecalloc(input->shape[1], sizeof(double)); - } + out->shape[0] = 1; + out->shape[1] = n_out_cols; + out->data = ecalloc(out->shape[1], sizeof(double)); fgets(line_buffer, 1024, fp); for (line_ptr = line_buffer; *line_ptr != '\0'; line_ptr++) { @@ -178,7 +174,9 @@ void csv_read( csv_readline_values(num_buffer, num_buffer_length, line_buffer, 1, separator); csv_columns_select(input->data + line * input->shape[1], num_buffer, in_cols, n_in_cols, num_buffer_length); - if (read_output) csv_columns_select(out->data + line * out->shape[1], num_buffer, out_cols, n_out_cols, num_buffer_length); + if (read_output) { + csv_columns_select(out->data + line * out->shape[1], num_buffer, out_cols, n_out_cols, num_buffer_length); + } for (line = 1; fgets(line_buffer, 1024, fp) != NULL; line++) { csv_readline_values(num_buffer, num_buffer_length, line_buffer, line+1, separator); @@ -187,9 +185,9 @@ void csv_read( input->data = erealloc(input->data, input->shape[0] * input->shape[1] * sizeof(double)); csv_columns_select(input->data + line * input->shape[1], num_buffer, in_cols, n_in_cols, num_buffer_length); + out->shape[0]++; + out->data = erealloc(out->data, out->shape[0] * out->shape[1] * sizeof(double)); if (read_output) { - out->shape[0]++; - out->data = erealloc(out->data, out->shape[0] * out->shape[1] * sizeof(double)); csv_columns_select(out->data + line * out->shape[1], num_buffer, out_cols, n_out_cols, num_buffer_length); } } diff --git a/src/util.c b/src/util.c index cd87d5c..8a7924f 100644 --- a/src/util.c +++ b/src/util.c @@ -91,11 +91,12 @@ void usage(int exit_code) { FILE *fp = (!exit_code) ? stdout : stderr; fprintf(fp, - "Usage: ml train [Options] JSON_FILE\n" + "Usage: ml train [Options] FILE\n" " or: ml predict [-o FILE] FILE\n" "\n" "Options:\n" " -h, --help Show this message\n" + " -f, --format=FORMAT File input and/or output format\n" " -a, --alpha=ALPHA Learning rate (only works with train)\n" " -e, --epochs=EPOCHS Epochs to train the model (only works with train)\n" " -o, --output=FILE Output file (only works with predict)\n" @@ -111,6 +112,7 @@ void util_load_cli(struct Configs *ml, int argc, char *argv[]) static struct option long_opts[] = { {"help", no_argument, 0, 'h'}, {"version", no_argument, 0, 'v'}, + {"format", required_argument, 0, 'f'}, {"epochs", required_argument, 0, 'e'}, {"alpha", required_argument, 0, 'a'}, {"output", required_argument, 0, 'o'}, @@ -120,7 +122,7 @@ void util_load_cli(struct Configs *ml, int argc, char *argv[]) int c; while (1) { - c = getopt_long(argc, argv, "hvc:e:a:o:i:l:", long_opts, NULL); + c = getopt_long(argc, argv, "hvc:e:a:o:i:f:", long_opts, NULL); if (c == -1) { break; @@ -138,6 +140,9 @@ void util_load_cli(struct Configs *ml, int argc, char *argv[]) case 'c': ml->config_filepath = optarg; break; + case 'f': + ml->file_format = optarg; + break; case 'h': usage(0); case 'v': diff --git a/src/util.h b/src/util.h index a3ef908..9523ab7 100644 --- a/src/util.h +++ b/src/util.h @@ -13,6 +13,7 @@ struct Configs { char *weights_filepath; char *config_filepath; /* cli cfgs */ + char *file_format; char *in_filepath; char *out_filepath; /* layer cfgs */ -- cgit v1.2.3-70-g09d2