From 65926438256c1ed46993e1c8611597af5a9c23f1 Mon Sep 17 00:00:00 2001 From: jvech Date: Wed, 7 Aug 2024 10:06:35 -0500 Subject: add: CLI improvements and small documentation updates Things done: * config path should search config file in the following order: cli option, environment, xdg_path * Implement a retrain command. * when you require more keys than the ones available in the input, stop the program. --- .gitignore | 1 + README.md | 9 +++++++-- doc/ml.1 | 9 ++++++--- src/main.c | 25 ++++++++++++++++++------- src/parse.c | 7 ++++++- src/util.c | 2 +- utils/settings.cfg | 2 +- 7 files changed, 40 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index 8ec2d49..4b5e2c4 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ utils/*.gpi *.bin utils/*.py tests/*.png +issues.txt diff --git a/README.md b/README.md index 1e53f55..413d64a 100644 --- a/README.md +++ b/README.md @@ -26,15 +26,20 @@ sudo make uninstall ## Usage ``` -Usage: ml train [Options] JSON_FILE - or: ml predict [-o FILE] FILE +Usage: ml [re]train [Options] FILE + or: ml predict [-Ohv] [-f FORMAT] [-o FILE] [-p INT] FILE Options: -h, --help Show this message + -f, --format=FORMAT Define input or output FILE format if needed -a, --alpha=ALPHA Learning rate (only works with train) -e, --epochs=EPOCHS Epochs to train the model (only works with train) -o, --output=FILE Output file (only works with predict) + -O, --only-out Don't show input fields (only works with predict) -c, --config=FILE Configuration filepath [default=~/.config/ml/ml.cfg] + -p, --precision=INT Decimals output precision (only works with predict) + [default=auto] + Examples: diff --git a/doc/ml.1 b/doc/ml.1 index 0bc536d..81a9c1b 100644 --- a/doc/ml.1 +++ b/doc/ml.1 @@ -4,13 +4,13 @@ ml \- manual page for ml 0.2.0 .SH SYNOPSIS .B ml -\fI\,train \/\fR[\fI\,Options\/\fR] \fI\,FILE\/\fR +[\fI\,re\/\fR]\fI\,train \/\fR[\fI\,Options\/\fR] \fI\,FILE\/\fR .br .B ml \fI\,predict \/\fR[\fI\,-Ohv\/\fR] [\fI\,-f FORMAT\/\fR] [\fI\,-o FILE\/\fR] [\fI\,-p INT\/\fR] \fI\,FILE\/\fR .SH DESCRIPTION -ml is a simple neural network maker made to train and predict JSON data, -it is suitable to work on classification problems. +ml is a simple neural network maker made to train and predict over JSON, CSV +and TSV data, it is suitable to work on classification problems. .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR @@ -37,6 +37,9 @@ Configuration filepath [default=~/.config/ml/ml.cfg] \fB\-p\fR, \fB\-\-precision\fR=\fI\,INT\/\fR Decimals output precision (only works with predict) [default=auto] +.SH ENVIRONMENT +ML_CONFIG_PATH + Set the configuration filepath .SH FILES ~/.config/ml/ml.cfg File path for network configuration, here you can setup the network diff --git a/src/main.c b/src/main.c index 38f26ad..22737dc 100644 --- a/src/main.c +++ b/src/main.c @@ -51,7 +51,7 @@ void load_config(struct Configs *cfg, int n_args, ...) } else break; } va_end(ap); - die("load_config() Error:"); + die("load_config('%s') Error:", filepath); } Layer * load_network(struct Configs cfg) @@ -88,11 +88,11 @@ struct Cost load_loss(struct Configs cfg) } int main(int argc, char *argv[]) { - char default_config_path[512]; + char default_config_path[512], *env_config_path; struct Configs ml_configs = { .epochs = 100, .alpha = 1e-5, - .config_filepath = "utils/settings.cfg", + .config_filepath = "", .network_size = 0, .only_out = false, .decimal_precision = -1, @@ -103,9 +103,15 @@ int main(int argc, char *argv[]) { // First past to check if --config option was put util_load_cli(&ml_configs, argc, argv); optind = 1; + // Load configs with different possible paths sprintf(default_config_path, "%s/%s", getenv("HOME"), ".config/ml/ml.cfg"); - load_config(&ml_configs, 2, ml_configs.config_filepath, default_config_path); + env_config_path = (getenv("ML_CONFIG_PATH"))? getenv("ML_CONFIG_PATH"):""; + + load_config(&ml_configs, 3, + ml_configs.config_filepath, + env_config_path, + default_config_path); // re-read cli options again, to overwrite file configuration options util_load_cli(&ml_configs, argc, argv); @@ -115,12 +121,17 @@ int main(int argc, char *argv[]) { Layer *network = load_network(ml_configs); Array X, y; - if (!strcmp("train", argv[0])) { + if (!strcmp("train", argv[0]) || !strcmp("retrain", argv[0])) { file_read(argv[1], &X, &y, ml_configs.input_keys, ml_configs.n_input_keys, ml_configs.label_keys, ml_configs.n_label_keys, true, ml_configs.file_format); - nn_network_init_weights(network, ml_configs.network_size, X.shape[1], true); + if (!strcmp("train", argv[0])) { + nn_network_init_weights(network, ml_configs.network_size, X.shape[1], true); + } else if (!strcmp("retrain", argv[0])) { + nn_network_init_weights(network, ml_configs.network_size, X.shape[1], false); + nn_network_read_weights(ml_configs.weights_filepath, network, ml_configs.network_size); + } nn_network_train( network, ml_configs.network_size, X.data, X.shape, @@ -139,7 +150,7 @@ int main(int argc, char *argv[]) { nn_network_read_weights(ml_configs.weights_filepath, network, ml_configs.network_size); nn_network_predict(y.data, y.shape, X.data, X.shape, network, ml_configs.network_size); - // If neither output and file_format defined use input to define the format + // If neither output and file_format defined use input to define the output format if (!ml_configs.file_format && !ml_configs.out_filepath) { ml_configs.file_format = file_format_infer(ml_configs.in_filepath); } diff --git a/src/parse.c b/src/parse.c index cea595b..a06f0f3 100644 --- a/src/parse.c +++ b/src/parse.c @@ -187,6 +187,11 @@ void json_read( die("json_read() Error: unexpected JSON data received, expecting an object"); } + if ((size_t)json_object_object_length(item) < n_input_keys + n_out_keys) { + die("json_read() Error: the number of keys required is greater " + "than the keys available in the object:\n%s", + json_object_to_json_string_ext(item, JSON_C_TO_STRING_PRETTY)); + } for (j = 0; j < n_input_keys; j++) { value = json_object_object_get(item, in_keys[j]); obj_type = json_object_get_type(value); @@ -517,7 +522,7 @@ int main(int argc, char *argv[]) { // use input format if format variable is not defined format = (!format && !strcmp(out_file, "-")) ? file_format_infer(in_file) : format; - file_write(out_file, X, y, in_cols, n_in_cols, out_cols, n_out_cols, true, format); + file_write(out_file, X, y, in_cols, n_in_cols, out_cols, n_out_cols, true, format, -1); for (i = 0; i < n_in_cols; i++) free(in_cols[i]); for (i = 0; i < n_out_cols; i++) free(out_cols[i]); diff --git a/src/util.c b/src/util.c index 4621836..9a00aa3 100644 --- a/src/util.c +++ b/src/util.c @@ -91,7 +91,7 @@ void usage(int exit_code) { FILE *fp = (!exit_code) ? stdout : stderr; fprintf(fp, - "Usage: ml train [Options] FILE\n" + "Usage: ml [re]train [Options] FILE\n" " or: ml predict [-Ohv] [-f FORMAT] [-o FILE] [-p INT] FILE\n" "\n" "Options:\n" diff --git a/utils/settings.cfg b/utils/settings.cfg index 83a0b62..d820262 100644 --- a/utils/settings.cfg +++ b/utils/settings.cfg @@ -6,7 +6,7 @@ weights_path = utils/weights.bin inputs = x, y labels = z -; activation options (relu, sigmoid, softplus, leaky_relu) +; activation options (relu, sigmoid, softplus, leaky_relu, linear, tanh) [layer] neurons=10 -- cgit v1.2.3-70-g09d2