From 84c9508273e8e4503adae06fc999b850c22f6d2b Mon Sep 17 00:00:00 2001 From: Luke <2142748+lsh-0@users.noreply.github.com> Date: Tue, 14 Nov 2023 16:44:25 +1030 Subject: [PATCH] main.go, replaced arg parsing with builtin 'flag' --- go.mod | 7 +++---- go.sum | 13 ++++--------- main.go | 56 ++++++++++++++++++++++---------------------------------- 3 files changed, 29 insertions(+), 47 deletions(-) diff --git a/go.mod b/go.mod index 4612671..14864cb 100644 --- a/go.mod +++ b/go.mod @@ -5,13 +5,12 @@ go 1.21 require ( github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 github.com/sourcegraph/conc v0.3.0 - github.com/tidwall/gjson v1.15.0 + github.com/tidwall/gjson v1.17.0 + github.com/tidwall/sjson v1.2.5 ) require ( github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.1 // indirect - github.com/tidwall/sjson v1.2.5 // indirect - go.uber.org/atomic v1.7.0 // indirect - go.uber.org/multierr v1.9.0 // indirect + go.uber.org/multierr v1.11.0 // indirect ) diff --git a/go.sum b/go.sum index 27129b4..991472a 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,3 @@ -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -7,13 +6,11 @@ github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 h1:lZUw3E0/J3roVtGQ+SCrUrg3ON6Ng github.com/santhosh-tekuri/jsonschema/v5 v5.3.1/go.mod h1:uToXkOrWAZ6/Oc07xWQrPOhJotwFIyu2bBVN41fcDUY= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= -github.com/tidwall/gjson v1.15.0 h1:5n/pM+v3r5ujuNl4YLZLsQ+UE5jlkLVm7jMzT5Mpolw= -github.com/tidwall/gjson v1.15.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/gjson v1.17.0 h1:/Jocvlh98kcTfpN2+JzGQWQcqrPQwDrVEMApx/M5ZwM= +github.com/tidwall/gjson v1.17.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= @@ -21,9 +18,7 @@ github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= -go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= -go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= -go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= -go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go index 87ecc83..28ae726 100644 --- a/main.go +++ b/main.go @@ -7,6 +7,7 @@ import ( "bytes" "encoding/json" "errors" + "flag" "fmt" "os" "path" @@ -15,7 +16,6 @@ import ( "runtime/pprof" "slices" "sort" - "strconv" "strings" "sync" "time" @@ -248,7 +248,7 @@ func process_files_with_feeder(buffer_size int, num_workers int, file_list []str article_chan <- read_article_data(file) } close(article_chan) - println("(done reading files)") + //println("(done reading files)") }(article_chan, &wg) // process articles from `article_chan` until it's closed. @@ -274,43 +274,31 @@ func process_files_with_feeder(buffer_size int, num_workers int, file_list []str } func do() { - var err error - args := os.Args[1:] - - // required first argument is path to an xml document or a directory of xml. - die(len(args) < 1, "first argument must be the path to api-raml schema root.") - schema_root := args[0] - die(!path_exists(schema_root), "path to api-raml directory does not exist") + schema_root_ptr := flag.String("schema-root", "", "the path to api-raml schema root") + input_path_ptr := flag.String("article-json", "", "the path to a article-json file or directory") + sample_size_ptr := flag.Int("sample-size", -1, "the number of article-json files to parse") + num_workers_ptr := flag.Int("num-workers", runtime.NumCPU(), "the number of workers to process the article-json files") + // 1k articles is about ~1.5GiB of RAM + buffer_size_ptr := flag.Int("buffer-size", 2000, "the maximum number of article-json files to keep in memory at once") + flag.Parse() + + schema_root := *schema_root_ptr + die(schema_root == "", "--schema-root is required") + die(!path_exists(schema_root), "--schema-root path does not exist. it should be a path to the api-raml.") schema_map := configure_validator(schema_root) - die(len(args) < 2, "second argument must be the path to a article-json file or directory.") - input_path := args[1] - die(!path_exists(input_path), "input path does not exist") + input_path := *input_path_ptr + die(input_path == "", "--article-json is required") + die(!path_exists(input_path), "--article-json path does not exist. it should be a path to an article-json file or a directory of article-json files.") - // optional second argument is sample size. - sample_size := -1 - if len(args) > 2 { - sample_size, err = strconv.Atoi(args[2]) - die(err != nil, "second argument (article sample size) is not an integer. use -1 for 'all' articles (default).") - die(sample_size <= 0 && sample_size != -1, "second argument must be -1 or a positive integer.") - } + sample_size := *sample_size_ptr + die(sample_size < -1, "--sample-size must be -1 or greater") - num_workers := runtime.NumCPU() - if len(args) > 3 { - num_workers, err = strconv.Atoi(args[3]) - die(err != nil, "third argument (number of workers) is not an integer.") - if num_workers <= 0 && num_workers != -1 { - fmt.Printf("third argument (number of workers) must be either -1 (unbounded) or a value > 0") - os.Exit(1) - } - } + num_workers := *num_workers_ptr + die(num_workers < -1, "--num-workers must be -1 or greater") - // the number of articles to keep in memory at once. - buffer_size := 2000 // 1k articles is about ~1.5GiB of RAM - if len(args) > 4 { - buffer_size, err = strconv.Atoi(args[4]) - die(err != nil, "fourth argument (buffer size) is not an integer.") - } + buffer_size := *buffer_size_ptr + die(buffer_size < 1, "--buffer-size must be a positive integer") if !path_is_dir(input_path) { // validate single