aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorjvech <jmvalenciae@unal.edu.co>2024-08-06 14:29:42 -0500
committerjvech <jmvalenciae@unal.edu.co>2024-08-06 14:29:42 -0500
commitebd66e65bf18574fa8905d7b0ae3fbb85bfc9e06 (patch)
treeda128cfa54b20abbff670c89278f0005b0f128cb /src
parentce0001538820d819bf965a24ffbb6f6e6269859c (diff)
add: file parsing improved
Things implemented: * json_read() must die if the key does not exist or the value type is wrong. * on predict command input should be shown exactly the same * float precision CLI option should be added.
Diffstat (limited to 'src')
-rw-r--r--src/main.c4
-rw-r--r--src/nn.c10
-rw-r--r--src/parse.c72
-rw-r--r--src/parse.h3
-rw-r--r--src/util.c12
-rw-r--r--src/util.h1
6 files changed, 74 insertions, 28 deletions
diff --git a/src/main.c b/src/main.c
index 216d8d4..38f26ad 100644
--- a/src/main.c
+++ b/src/main.c
@@ -95,6 +95,7 @@ int main(int argc, char *argv[]) {
.config_filepath = "utils/settings.cfg",
.network_size = 0,
.only_out = false,
+ .decimal_precision = -1,
.file_format = NULL,
.out_filepath = NULL,
};
@@ -146,7 +147,8 @@ int main(int argc, char *argv[]) {
file_write(ml_configs.out_filepath, X, y,
ml_configs.input_keys, ml_configs.n_input_keys,
ml_configs.label_keys, ml_configs.n_label_keys,
- !ml_configs.only_out, ml_configs.file_format);
+ !ml_configs.only_out, ml_configs.file_format,
+ ml_configs.decimal_precision);
} else usage(1);
nn_network_free_weights(network, ml_configs.network_size);
diff --git a/src/nn.c b/src/nn.c
index 4927dc6..916803e 100644
--- a/src/nn.c
+++ b/src/nn.c
@@ -154,7 +154,7 @@ void nn_backward(
}
for (size_t sample = 0; sample < input_shape[0]; sample++) {
- for (size_t l = network_size - 1; l >= 0 && l < network_size; l--) {
+ for (size_t l = network_size - 1; l < network_size; l--) {
size_t weights_shape[2] = {network[l].input_nodes, network[l].neurons};
if (l == network_size - 1) {
double *zout = Zout[l] + sample * network[l].neurons;
@@ -328,6 +328,7 @@ void nn_network_read_weights(char *filepath, Layer *network, size_t network_size
return;
nn_network_read_weights_error:
+ fclose(fp);
die("nn_network_read_weights() Error: "
"number of read objects does not match with expected ones");
}
@@ -357,14 +358,14 @@ void nn_network_write_weights(char *filepath, Layer *network, size_t network_siz
return;
nn_network_write_weights_error:
+ fclose(fp);
die("nn_network_write_weights() Error: "
"number of written objects does not match with number of objects");
}
void nn_network_init_weights(Layer layers[], size_t nmemb, size_t n_inputs, bool fill_random)
{
- int i;
- size_t prev_size = n_inputs;
+ size_t i, prev_size = n_inputs;
for (i = 0; i < nmemb; i++) {
@@ -390,7 +391,8 @@ nn_layers_calloc_weights_error:
void nn_network_free_weights(Layer layers[], size_t nmemb)
{
- for (int i = 0; i < nmemb; i++) {
+ size_t i;
+ for (i = 0; i < nmemb; i++) {
free(layers[i].weights);
free(layers[i].bias);
}
diff --git a/src/parse.c b/src/parse.c
index c9b17ca..cea595b 100644
--- a/src/parse.c
+++ b/src/parse.c
@@ -49,14 +49,16 @@ static void json_write(
Array input, Array out,
char *in_keys[], size_t in_keys_size,
char *out_keys[], size_t out_keys_size,
- bool write_input
+ bool write_input,
+ int decimal_precision
);
static void csv_write(
FILE *fp,
Array input, Array out,
bool write_input,
- char separator
+ char separator,
+ int decimal_precision
);
static void csv_columns_select(
@@ -112,7 +114,8 @@ void file_write(
char *in_keys[], size_t n_in_keys,
char *out_keys[], size_t n_out_keys,
bool write_input,
- char *file_format)
+ char *file_format,
+ int decimal_precision)
{
FILE *fp;
@@ -129,9 +132,9 @@ void file_write(
if (fp == NULL) die("file_write() Error:");
- if (!strcmp(file_format, "json")) json_write(fp, input, out, in_keys, n_in_keys, out_keys, n_out_keys, write_input);
- else if (!strcmp(file_format, "csv")) csv_write(fp, input, out, write_input, ',');
- else if (!strcmp(file_format, "tsv")) csv_write(fp, input, out, write_input, '\t');
+ if (!strcmp(file_format, "json")) json_write(fp, input, out, in_keys, n_in_keys, out_keys, n_out_keys, write_input, decimal_precision);
+ else if (!strcmp(file_format, "csv")) csv_write(fp, input, out, write_input, ',', decimal_precision);
+ else if (!strcmp(file_format, "tsv")) csv_write(fp, input, out, write_input, '\t', decimal_precision);
else {
die("file_write() Error: unable to write %s files", file_format);
}
@@ -147,6 +150,8 @@ void json_read(
{
static char fp_buffer[MAX_FILE_SIZE];
size_t i, j, json_obj_length, index;
+ json_object *json_obj, *item, *value;
+ json_type obj_type;
if (fp == NULL) goto json_read_error;
@@ -157,10 +162,14 @@ void json_read(
fp_buffer[i] = fgetc(fp);
} while (fp_buffer[i++] != EOF);
- json_object *json_obj;
json_obj = json_tokener_parse(fp_buffer);
+ if (!json_object_is_type(json_obj, json_type_array)) {
+ die("json_read() Error: unexpected JSON data received, expecting an array");
+ }
json_obj_length = json_object_array_length(json_obj);
+
+
input->shape[0] = (size_t)json_obj_length;
input->shape[1] = n_input_keys;
input->data = calloc(input->shape[0] * input->shape[1], sizeof(input->data[0]));
@@ -172,18 +181,42 @@ void json_read(
if (!input->data || !out->data) goto json_read_error;
for (i = 0; i < json_object_array_length(json_obj); i++) {
- json_object *item = json_object_array_get_idx(json_obj, i);
+ item = json_object_array_get_idx(json_obj, i);
+
+ if (!json_object_is_type(item, json_type_object)) {
+ die("json_read() Error: unexpected JSON data received, expecting an object");
+ }
for (j = 0; j < n_input_keys; j++) {
- index = n_input_keys * i + j;
- input->data[index] = json_object_get_double(json_object_object_get(item, in_keys[j]));
+ value = json_object_object_get(item, in_keys[j]);
+ obj_type = json_object_get_type(value);
+ switch (obj_type) {
+ case json_type_double:
+ case json_type_int:
+ index = n_input_keys * i + j;
+ input->data[index] = json_object_get_double(value);
+ break;
+ default:
+ die("json_read() Error: unexpected JSON data received, expecting a number");
+ break;
+ }
}
if (!read_output) continue;
for (j = 0; j < n_out_keys; j++) {
- index = n_out_keys * i + j;
- out->data[index] = json_object_get_double(json_object_object_get(item, out_keys[j]));
+ value = json_object_object_get(item, out_keys[j]);
+ obj_type = json_object_get_type(value);
+ switch (obj_type) {
+ case json_type_double:
+ case json_type_int:
+ index = n_out_keys * i + j;
+ out->data[index] = json_object_get_double(value);
+ break;
+ default:
+ die("json_read() Error: unexpected JSON data received, expecting a number");
+ break;
+ }
}
}
@@ -266,7 +299,8 @@ void json_write(
Array input, Array out,
char *in_keys[], size_t in_keys_size,
char *out_keys[], size_t out_keys_size,
- bool write_input)
+ bool write_input,
+ int decimal_precision)
{
fprintf(fp, "[\n");
@@ -286,13 +320,13 @@ void json_write(
if (write_input) {
for (size_t j = 0; j < input.shape[1]; j++) {
size_t index = input.shape[1] * i + j;
- fprintf(fp, " \"%s\": %lf,\n", in_keys[j], input.data[index]);
+ fprintf(fp, " \"%s\": %g,\n", in_keys[j], input.data[index]);
}
}
for (size_t j = 0; j < out.shape[1]; j++) {
size_t index = out.shape[1] * i + j;
- fprintf(fp, " \"%s\": %lf", out_keys[j], out.data[index]);
+ fprintf(fp, " \"%s\": %.*g", out_keys[j], decimal_precision, out.data[index]);
if (j == out.shape[1] - 1) fprintf(fp, "\n");
else fprintf(fp, ",\n");
@@ -308,20 +342,20 @@ void csv_write(
FILE *fp,
Array input, Array out,
bool write_input,
- char separator
- )
+ char separator,
+ int decimal_precision)
{
size_t line, col, index;
for (line = 0; line < input.shape[0]; line++) {
if (write_input) {
for (col = 0; col < input.shape[1]; col++) {
index = input.shape[1] * line + col;
- fprintf(fp, "%lf%c", input.data[index], separator);
+ fprintf(fp, "%g%c", input.data[index], separator);
}
}
for (col = 0; col < out.shape[1]; col++) {
index = out.shape[1] * line + col;
- fprintf(fp, "%lf", out.data[index]);
+ fprintf(fp, "%.*g", decimal_precision, out.data[index]);
if (col == out.shape[1] - 1) continue;
fprintf(fp, "%c", separator);
}
diff --git a/src/parse.h b/src/parse.h
index d8aeada..18130c7 100644
--- a/src/parse.h
+++ b/src/parse.h
@@ -25,7 +25,8 @@ void file_write(
char *in_keys[], size_t n_in_keys,
char *out_keys[], size_t n_out_keys,
bool write_input,
- char *file_format);
+ char *file_format,
+ int decimal_precision);
char * file_format_infer(char *filename);
#endif
diff --git a/src/util.c b/src/util.c
index 8fa8a87..4621836 100644
--- a/src/util.c
+++ b/src/util.c
@@ -76,7 +76,7 @@ char *e_strdup(const char *s)
void version()
{
- printf("ml 0.1\n");
+ printf("ml 0.2.0\n");
printf( "Copyright (C) 2023 jvech\n\n"
"This program is free software: you can redistribute it and/or modify\n"
"it under the terms of the GNU General Public License as published by\n"
@@ -92,7 +92,7 @@ void usage(int exit_code)
FILE *fp = (!exit_code) ? stdout : stderr;
fprintf(fp,
"Usage: ml train [Options] FILE\n"
- " or: ml predict [-Ohv] [-f FORMAT] [-o FILE] FILE\n"
+ " or: ml predict [-Ohv] [-f FORMAT] [-o FILE] [-p INT] FILE\n"
"\n"
"Options:\n"
" -h, --help Show this message\n"
@@ -102,6 +102,8 @@ void usage(int exit_code)
" -o, --output=FILE Output file (only works with predict)\n"
" -O, --only-out Don't show input fields (only works with predict)\n"
" -c, --config=FILE Configuration filepath [default=~/.config/ml/ml.cfg]\n"
+ " -p, --precision=INT Decimals output precision (only works with predict)\n"
+ " [default=auto]\n"
"\n"
);
exit(exit_code);
@@ -119,12 +121,13 @@ void util_load_cli(struct Configs *ml, int argc, char *argv[])
{"output", required_argument, 0, 'o'},
{"config", required_argument, 0, 'c'},
{"only-out", no_argument, 0, 'O'},
+ {"precision", required_argument, 0, 'p'},
{0, 0, 0, 0 },
};
int c;
while (1) {
- c = getopt_long(argc, argv, "hvOc:e:a:o:i:f:", long_opts, NULL);
+ c = getopt_long(argc, argv, "hvOc:e:a:o:i:f:p:", long_opts, NULL);
if (c == -1) {
break;
@@ -148,6 +151,9 @@ void util_load_cli(struct Configs *ml, int argc, char *argv[])
case 'O':
ml->only_out = true;
break;
+ case 'p':
+ ml->decimal_precision = (!strcmp("auto", optarg))? -1: (int)atoi(optarg);
+ break;
case 'h':
usage(0);
break;
diff --git a/src/util.h b/src/util.h
index dbaae15..6ae9bab 100644
--- a/src/util.h
+++ b/src/util.h
@@ -17,6 +17,7 @@ struct Configs {
char *file_format;
char *in_filepath;
char *out_filepath;
+ int decimal_precision;
bool only_out;
/* layer cfgs */
size_t network_size;
Feel free to download, copy and edit any repo