diff options
author | jvech <jmvalenciae@unal.edu.co> | 2024-07-24 15:31:02 -0500 |
---|---|---|
committer | jvech <jmvalenciae@unal.edu.co> | 2024-07-24 15:31:02 -0500 |
commit | d45581c0b067b9526ce88ba9d3a1bd861f4ff7cc (patch) | |
tree | a907346b2b282437537d7f4f6b138b3efddcce22 /src | |
parent | b9deaf6ec1ba587f2b81a63c75b696c6def33436 (diff) |
add: file_read() and format integraded on main program
things implemented:
- read output in false bug was solved.
- Make generic rule added to build test executables
- format option added to the CLI
Diffstat (limited to 'src')
-rw-r--r-- | src/main.c | 78 | ||||
-rw-r--r-- | src/parse.c | 22 | ||||
-rw-r--r-- | src/util.c | 9 | ||||
-rw-r--r-- | src/util.h | 1 |
4 files changed, 22 insertions, 88 deletions
@@ -26,91 +26,20 @@ #include <json-c/json.h> #include "util.h" +#include "parse.h" #include "nn.h" #define MAX_FILE_SIZE 536870912 //1<<29; 0.5 GiB -typedef struct Array { - double *data; - size_t shape[2]; -} Array; - #define ARRAY_SIZE(x, type) sizeof(x) / sizeof(type) -static void json_read( - const char *filepath, - Array *input, Array *out, - char *out_keys[], size_t out_keys_size, - char *in_keys[], size_t in_keys_size, - bool read_output); - static void json_write( const char *filepath, Array input, Array out, char *out_keys[], size_t out_keys_size, char *in_keys[], size_t in_keys_size); -void json_read( - const char *filepath, - Array *input, Array *out, - char *out_keys[], size_t n_out_keys, - char *in_keys[], size_t n_input_keys, - bool read_output) -{ - FILE *fp = NULL; - static char fp_buffer[MAX_FILE_SIZE]; - - fp = (!strcmp(filepath, "-")) ? fopen("/dev/stdin", "r") : fopen(filepath, "r"); - - if (fp == NULL) goto json_read_error; - - size_t i = 0; - do { - if (i >= MAX_FILE_SIZE) die("json_read() Error: file size is bigger than '%zu'", i, MAX_FILE_SIZE); - fp_buffer[i] = fgetc(fp); - } while (fp_buffer[i++] != EOF); - - json_object *json_obj; - json_obj = json_tokener_parse(fp_buffer); - size_t json_obj_length = json_object_array_length(json_obj); - - input->shape[0] = (size_t)json_obj_length; - input->shape[1] = n_input_keys; - input->data = calloc(input->shape[0] * input->shape[1], sizeof(input->data[0])); - - out->shape[0] = (size_t)json_obj_length; - out->shape[1] = n_out_keys; - out->data = calloc(out->shape[0] * out->shape[1], sizeof(out->data[0])); - - if (!input->data || !out->data) goto json_read_error; - - for (int i = 0; i < json_object_array_length(json_obj); i++) { - json_object *item = json_object_array_get_idx(json_obj, i); - - for (int j = 0; j < n_input_keys; j++) { - size_t index = n_input_keys * i + j; - input->data[index] = json_object_get_double(json_object_object_get(item, in_keys[j])); - } - - if (!read_output) continue; - - for (int j = 0; j < n_out_keys; j++) { - size_t index = n_out_keys * i + j; - out->data[index] = json_object_get_double(json_object_object_get(item, out_keys[j])); - } - } - - json_object_put(json_obj); - fclose(fp); - - return; - -json_read_error: - perror("json_read() Error"); - exit(1); -} - void json_write( const char *filepath, Array input, Array out, @@ -204,6 +133,7 @@ int main(int argc, char *argv[]) { .alpha = 1e-5, .config_filepath = "utils/settings.cfg", .network_size = 0, + .file_format = NULL, .out_filepath = NULL, }; @@ -223,7 +153,7 @@ int main(int argc, char *argv[]) { Array X, y; if (!strcmp("train", argv[0])) { - json_read(argv[1], &X, &y, ml_configs.label_keys, ml_configs.n_label_keys, ml_configs.input_keys, ml_configs.n_input_keys, true); + file_read(argv[1], &X, &y, ml_configs.input_keys, ml_configs.n_input_keys, ml_configs.label_keys, ml_configs.n_label_keys, true, ml_configs.file_format); nn_network_init_weights(network, ml_configs.network_size, X.shape[1], true); nn_network_train( network, ml_configs.network_size, @@ -235,7 +165,7 @@ int main(int argc, char *argv[]) { nn_network_write_weights(ml_configs.weights_filepath, network, ml_configs.network_size); fprintf(stderr, "weights saved on '%s'\n", ml_configs.weights_filepath); } else if (!strcmp("predict", argv[0])) { - json_read(argv[1], &X, &y, ml_configs.label_keys, ml_configs.n_label_keys, ml_configs.input_keys, ml_configs.n_input_keys, false); + file_read(argv[1], &X, &y, ml_configs.input_keys, ml_configs.n_input_keys, ml_configs.label_keys, ml_configs.n_label_keys, false, ml_configs.file_format); nn_network_init_weights(network, ml_configs.network_size, X.shape[1], false); nn_network_read_weights(ml_configs.weights_filepath, network, ml_configs.network_size); nn_network_predict(y.data, y.shape, X.data, X.shape, network, ml_configs.network_size); diff --git a/src/parse.c b/src/parse.c index 8d31da5..18668ec 100644 --- a/src/parse.c +++ b/src/parse.c @@ -152,20 +152,16 @@ void csv_read( in_cols = ecalloc(n_in_cols, sizeof(size_t)); csv_keys2cols(in_cols, in_keys, n_in_cols); - if (read_output) { - out_cols = ecalloc(n_out_cols, sizeof(size_t)); - csv_keys2cols(out_cols, out_keys, n_out_cols); - } + out_cols = ecalloc(n_out_cols, sizeof(size_t)); + csv_keys2cols(out_cols, out_keys, n_out_cols); input->shape[0] = 1; input->shape[1] = n_in_cols; input->data = ecalloc(input->shape[1], sizeof(double)); - if (read_output) { - out->shape[0] = 1; - out->shape[1] = n_out_cols; - out->data = ecalloc(input->shape[1], sizeof(double)); - } + out->shape[0] = 1; + out->shape[1] = n_out_cols; + out->data = ecalloc(out->shape[1], sizeof(double)); fgets(line_buffer, 1024, fp); for (line_ptr = line_buffer; *line_ptr != '\0'; line_ptr++) { @@ -178,7 +174,9 @@ void csv_read( csv_readline_values(num_buffer, num_buffer_length, line_buffer, 1, separator); csv_columns_select(input->data + line * input->shape[1], num_buffer, in_cols, n_in_cols, num_buffer_length); - if (read_output) csv_columns_select(out->data + line * out->shape[1], num_buffer, out_cols, n_out_cols, num_buffer_length); + if (read_output) { + csv_columns_select(out->data + line * out->shape[1], num_buffer, out_cols, n_out_cols, num_buffer_length); + } for (line = 1; fgets(line_buffer, 1024, fp) != NULL; line++) { csv_readline_values(num_buffer, num_buffer_length, line_buffer, line+1, separator); @@ -187,9 +185,9 @@ void csv_read( input->data = erealloc(input->data, input->shape[0] * input->shape[1] * sizeof(double)); csv_columns_select(input->data + line * input->shape[1], num_buffer, in_cols, n_in_cols, num_buffer_length); + out->shape[0]++; + out->data = erealloc(out->data, out->shape[0] * out->shape[1] * sizeof(double)); if (read_output) { - out->shape[0]++; - out->data = erealloc(out->data, out->shape[0] * out->shape[1] * sizeof(double)); csv_columns_select(out->data + line * out->shape[1], num_buffer, out_cols, n_out_cols, num_buffer_length); } } @@ -91,11 +91,12 @@ void usage(int exit_code) { FILE *fp = (!exit_code) ? stdout : stderr; fprintf(fp, - "Usage: ml train [Options] JSON_FILE\n" + "Usage: ml train [Options] FILE\n" " or: ml predict [-o FILE] FILE\n" "\n" "Options:\n" " -h, --help Show this message\n" + " -f, --format=FORMAT File input and/or output format\n" " -a, --alpha=ALPHA Learning rate (only works with train)\n" " -e, --epochs=EPOCHS Epochs to train the model (only works with train)\n" " -o, --output=FILE Output file (only works with predict)\n" @@ -111,6 +112,7 @@ void util_load_cli(struct Configs *ml, int argc, char *argv[]) static struct option long_opts[] = { {"help", no_argument, 0, 'h'}, {"version", no_argument, 0, 'v'}, + {"format", required_argument, 0, 'f'}, {"epochs", required_argument, 0, 'e'}, {"alpha", required_argument, 0, 'a'}, {"output", required_argument, 0, 'o'}, @@ -120,7 +122,7 @@ void util_load_cli(struct Configs *ml, int argc, char *argv[]) int c; while (1) { - c = getopt_long(argc, argv, "hvc:e:a:o:i:l:", long_opts, NULL); + c = getopt_long(argc, argv, "hvc:e:a:o:i:f:", long_opts, NULL); if (c == -1) { break; @@ -138,6 +140,9 @@ void util_load_cli(struct Configs *ml, int argc, char *argv[]) case 'c': ml->config_filepath = optarg; break; + case 'f': + ml->file_format = optarg; + break; case 'h': usage(0); case 'v': @@ -13,6 +13,7 @@ struct Configs { char *weights_filepath; char *config_filepath; /* cli cfgs */ + char *file_format; char *in_filepath; char *out_filepath; /* layer cfgs */ |