aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjvech <jmvalenciae@unal.edu.co>2024-07-24 15:31:02 -0500
committerjvech <jmvalenciae@unal.edu.co>2024-07-24 15:31:02 -0500
commitd45581c0b067b9526ce88ba9d3a1bd861f4ff7cc (patch)
treea907346b2b282437537d7f4f6b138b3efddcce22
parentb9deaf6ec1ba587f2b81a63c75b696c6def33436 (diff)
add: file_read() and format integraded on main program
things implemented: - read output in false bug was solved. - Make generic rule added to build test executables - format option added to the CLI
-rw-r--r--Makefile5
-rw-r--r--doc/ml.17
-rw-r--r--src/main.c78
-rw-r--r--src/parse.c22
-rw-r--r--src/util.c9
-rw-r--r--src/util.h1
6 files changed, 31 insertions, 91 deletions
diff --git a/Makefile b/Makefile
index 4521115..7ca4091 100644
--- a/Makefile
+++ b/Makefile
@@ -48,9 +48,12 @@ run: build
@jq -r '.[] | [values[] as $$val | $$val] | @tsv' data/sample_data.json > data/sample_data.tsv
@gnuplot utils/plot.gpi
+test_%: src/%.c $(OBJDIR)
+ $(shell sed -n 's/.*compile: clang/clang/;/clang/p' $<)
+
debug: build
gdb -x utils/commands.gdb --tui --args ${BIN} train data/xor.json -e 100
@#gdb -x utils/commands.gdb --tui --args ${BIN} predict data/sample_data.json
clean:
- @rm $(OBJS) $(OBJDIR) -rv
+ @rm $(OBJDIR) -rv
diff --git a/doc/ml.1 b/doc/ml.1
index babac62..6450709 100644
--- a/doc/ml.1
+++ b/doc/ml.1
@@ -1,10 +1,10 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3.
-.TH ML "1" "October 2023" "ml 0.1" "User Commands"
+.TH ML "1" "July 2024" "ml 0.1" "User Commands"
.SH NAME
ml \- manual page for ml 0.1
.SH SYNOPSIS
.B ml
-\fI\,train \/\fR[\fI\,Options\/\fR] \fI\,JSON_FILE\/\fR
+\fI\,train \/\fR[\fI\,Options\/\fR] \fI\,FILE\/\fR
.br
.B ml
\fI\,predict \/\fR[\fI\,-o FILE\/\fR] \fI\,FILE\/\fR
@@ -16,6 +16,9 @@ it is suitable to work on classification problems.
\fB\-h\fR, \fB\-\-help\fR
Show this message
.TP
+\fB\-f\fR, \fB\-\-format\fR=\fI\,FORMAT\/\fR
+File input and/or output format
+.TP
\fB\-a\fR, \fB\-\-alpha\fR=\fI\,ALPHA\/\fR
Learning rate (only works with train)
.TP
diff --git a/src/main.c b/src/main.c
index dab8bd9..e692756 100644
--- a/src/main.c
+++ b/src/main.c
@@ -26,91 +26,20 @@
#include <json-c/json.h>
#include "util.h"
+#include "parse.h"
#include "nn.h"
#define MAX_FILE_SIZE 536870912 //1<<29; 0.5 GiB
-typedef struct Array {
- double *data;
- size_t shape[2];
-} Array;
-
#define ARRAY_SIZE(x, type) sizeof(x) / sizeof(type)
-static void json_read(
- const char *filepath,
- Array *input, Array *out,
- char *out_keys[], size_t out_keys_size,
- char *in_keys[], size_t in_keys_size,
- bool read_output);
-
static void json_write(
const char *filepath,
Array input, Array out,
char *out_keys[], size_t out_keys_size,
char *in_keys[], size_t in_keys_size);
-void json_read(
- const char *filepath,
- Array *input, Array *out,
- char *out_keys[], size_t n_out_keys,
- char *in_keys[], size_t n_input_keys,
- bool read_output)
-{
- FILE *fp = NULL;
- static char fp_buffer[MAX_FILE_SIZE];
-
- fp = (!strcmp(filepath, "-")) ? fopen("/dev/stdin", "r") : fopen(filepath, "r");
-
- if (fp == NULL) goto json_read_error;
-
- size_t i = 0;
- do {
- if (i >= MAX_FILE_SIZE) die("json_read() Error: file size is bigger than '%zu'", i, MAX_FILE_SIZE);
- fp_buffer[i] = fgetc(fp);
- } while (fp_buffer[i++] != EOF);
-
- json_object *json_obj;
- json_obj = json_tokener_parse(fp_buffer);
- size_t json_obj_length = json_object_array_length(json_obj);
-
- input->shape[0] = (size_t)json_obj_length;
- input->shape[1] = n_input_keys;
- input->data = calloc(input->shape[0] * input->shape[1], sizeof(input->data[0]));
-
- out->shape[0] = (size_t)json_obj_length;
- out->shape[1] = n_out_keys;
- out->data = calloc(out->shape[0] * out->shape[1], sizeof(out->data[0]));
-
- if (!input->data || !out->data) goto json_read_error;
-
- for (int i = 0; i < json_object_array_length(json_obj); i++) {
- json_object *item = json_object_array_get_idx(json_obj, i);
-
- for (int j = 0; j < n_input_keys; j++) {
- size_t index = n_input_keys * i + j;
- input->data[index] = json_object_get_double(json_object_object_get(item, in_keys[j]));
- }
-
- if (!read_output) continue;
-
- for (int j = 0; j < n_out_keys; j++) {
- size_t index = n_out_keys * i + j;
- out->data[index] = json_object_get_double(json_object_object_get(item, out_keys[j]));
- }
- }
-
- json_object_put(json_obj);
- fclose(fp);
-
- return;
-
-json_read_error:
- perror("json_read() Error");
- exit(1);
-}
-
void json_write(
const char *filepath,
Array input, Array out,
@@ -204,6 +133,7 @@ int main(int argc, char *argv[]) {
.alpha = 1e-5,
.config_filepath = "utils/settings.cfg",
.network_size = 0,
+ .file_format = NULL,
.out_filepath = NULL,
};
@@ -223,7 +153,7 @@ int main(int argc, char *argv[]) {
Array X, y;
if (!strcmp("train", argv[0])) {
- json_read(argv[1], &X, &y, ml_configs.label_keys, ml_configs.n_label_keys, ml_configs.input_keys, ml_configs.n_input_keys, true);
+ file_read(argv[1], &X, &y, ml_configs.input_keys, ml_configs.n_input_keys, ml_configs.label_keys, ml_configs.n_label_keys, true, ml_configs.file_format);
nn_network_init_weights(network, ml_configs.network_size, X.shape[1], true);
nn_network_train(
network, ml_configs.network_size,
@@ -235,7 +165,7 @@ int main(int argc, char *argv[]) {
nn_network_write_weights(ml_configs.weights_filepath, network, ml_configs.network_size);
fprintf(stderr, "weights saved on '%s'\n", ml_configs.weights_filepath);
} else if (!strcmp("predict", argv[0])) {
- json_read(argv[1], &X, &y, ml_configs.label_keys, ml_configs.n_label_keys, ml_configs.input_keys, ml_configs.n_input_keys, false);
+ file_read(argv[1], &X, &y, ml_configs.input_keys, ml_configs.n_input_keys, ml_configs.label_keys, ml_configs.n_label_keys, false, ml_configs.file_format);
nn_network_init_weights(network, ml_configs.network_size, X.shape[1], false);
nn_network_read_weights(ml_configs.weights_filepath, network, ml_configs.network_size);
nn_network_predict(y.data, y.shape, X.data, X.shape, network, ml_configs.network_size);
diff --git a/src/parse.c b/src/parse.c
index 8d31da5..18668ec 100644
--- a/src/parse.c
+++ b/src/parse.c
@@ -152,20 +152,16 @@ void csv_read(
in_cols = ecalloc(n_in_cols, sizeof(size_t));
csv_keys2cols(in_cols, in_keys, n_in_cols);
- if (read_output) {
- out_cols = ecalloc(n_out_cols, sizeof(size_t));
- csv_keys2cols(out_cols, out_keys, n_out_cols);
- }
+ out_cols = ecalloc(n_out_cols, sizeof(size_t));
+ csv_keys2cols(out_cols, out_keys, n_out_cols);
input->shape[0] = 1;
input->shape[1] = n_in_cols;
input->data = ecalloc(input->shape[1], sizeof(double));
- if (read_output) {
- out->shape[0] = 1;
- out->shape[1] = n_out_cols;
- out->data = ecalloc(input->shape[1], sizeof(double));
- }
+ out->shape[0] = 1;
+ out->shape[1] = n_out_cols;
+ out->data = ecalloc(out->shape[1], sizeof(double));
fgets(line_buffer, 1024, fp);
for (line_ptr = line_buffer; *line_ptr != '\0'; line_ptr++) {
@@ -178,7 +174,9 @@ void csv_read(
csv_readline_values(num_buffer, num_buffer_length, line_buffer, 1, separator);
csv_columns_select(input->data + line * input->shape[1], num_buffer, in_cols, n_in_cols, num_buffer_length);
- if (read_output) csv_columns_select(out->data + line * out->shape[1], num_buffer, out_cols, n_out_cols, num_buffer_length);
+ if (read_output) {
+ csv_columns_select(out->data + line * out->shape[1], num_buffer, out_cols, n_out_cols, num_buffer_length);
+ }
for (line = 1; fgets(line_buffer, 1024, fp) != NULL; line++) {
csv_readline_values(num_buffer, num_buffer_length, line_buffer, line+1, separator);
@@ -187,9 +185,9 @@ void csv_read(
input->data = erealloc(input->data, input->shape[0] * input->shape[1] * sizeof(double));
csv_columns_select(input->data + line * input->shape[1], num_buffer, in_cols, n_in_cols, num_buffer_length);
+ out->shape[0]++;
+ out->data = erealloc(out->data, out->shape[0] * out->shape[1] * sizeof(double));
if (read_output) {
- out->shape[0]++;
- out->data = erealloc(out->data, out->shape[0] * out->shape[1] * sizeof(double));
csv_columns_select(out->data + line * out->shape[1], num_buffer, out_cols, n_out_cols, num_buffer_length);
}
}
diff --git a/src/util.c b/src/util.c
index cd87d5c..8a7924f 100644
--- a/src/util.c
+++ b/src/util.c
@@ -91,11 +91,12 @@ void usage(int exit_code)
{
FILE *fp = (!exit_code) ? stdout : stderr;
fprintf(fp,
- "Usage: ml train [Options] JSON_FILE\n"
+ "Usage: ml train [Options] FILE\n"
" or: ml predict [-o FILE] FILE\n"
"\n"
"Options:\n"
" -h, --help Show this message\n"
+ " -f, --format=FORMAT File input and/or output format\n"
" -a, --alpha=ALPHA Learning rate (only works with train)\n"
" -e, --epochs=EPOCHS Epochs to train the model (only works with train)\n"
" -o, --output=FILE Output file (only works with predict)\n"
@@ -111,6 +112,7 @@ void util_load_cli(struct Configs *ml, int argc, char *argv[])
static struct option long_opts[] = {
{"help", no_argument, 0, 'h'},
{"version", no_argument, 0, 'v'},
+ {"format", required_argument, 0, 'f'},
{"epochs", required_argument, 0, 'e'},
{"alpha", required_argument, 0, 'a'},
{"output", required_argument, 0, 'o'},
@@ -120,7 +122,7 @@ void util_load_cli(struct Configs *ml, int argc, char *argv[])
int c;
while (1) {
- c = getopt_long(argc, argv, "hvc:e:a:o:i:l:", long_opts, NULL);
+ c = getopt_long(argc, argv, "hvc:e:a:o:i:f:", long_opts, NULL);
if (c == -1) {
break;
@@ -138,6 +140,9 @@ void util_load_cli(struct Configs *ml, int argc, char *argv[])
case 'c':
ml->config_filepath = optarg;
break;
+ case 'f':
+ ml->file_format = optarg;
+ break;
case 'h':
usage(0);
case 'v':
diff --git a/src/util.h b/src/util.h
index a3ef908..9523ab7 100644
--- a/src/util.h
+++ b/src/util.h
@@ -13,6 +13,7 @@ struct Configs {
char *weights_filepath;
char *config_filepath;
/* cli cfgs */
+ char *file_format;
char *in_filepath;
char *out_filepath;
/* layer cfgs */
Feel free to download, copy and edit any repo