Skip to content

Commit

Permalink
main.go, replaced arg parsing with builtin 'flag'
Browse files Browse the repository at this point in the history
  • Loading branch information
lsh-0 committed Nov 14, 2023
1 parent 4400171 commit 84c9508
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 47 deletions.
7 changes: 3 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,12 @@ go 1.21
require (
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
github.com/sourcegraph/conc v0.3.0
github.com/tidwall/gjson v1.15.0
github.com/tidwall/gjson v1.17.0
github.com/tidwall/sjson v1.2.5
)

require (
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.1 // indirect
github.com/tidwall/sjson v1.2.5 // indirect
go.uber.org/atomic v1.7.0 // indirect
go.uber.org/multierr v1.9.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
)
13 changes: 4 additions & 9 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
Expand All @@ -7,23 +6,19 @@ github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 h1:lZUw3E0/J3roVtGQ+SCrUrg3ON6Ng
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1/go.mod h1:uToXkOrWAZ6/Oc07xWQrPOhJotwFIyu2bBVN41fcDUY=
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/gjson v1.15.0 h1:5n/pM+v3r5ujuNl4YLZLsQ+UE5jlkLVm7jMzT5Mpolw=
github.com/tidwall/gjson v1.15.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/gjson v1.17.0 h1:/Jocvlh98kcTfpN2+JzGQWQcqrPQwDrVEMApx/M5ZwM=
github.com/tidwall/gjson v1.17.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw=
go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI=
go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
56 changes: 22 additions & 34 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"bytes"
"encoding/json"
"errors"
"flag"
"fmt"
"os"
"path"
Expand All @@ -15,7 +16,6 @@ import (
"runtime/pprof"
"slices"
"sort"
"strconv"
"strings"
"sync"
"time"
Expand Down Expand Up @@ -248,7 +248,7 @@ func process_files_with_feeder(buffer_size int, num_workers int, file_list []str
article_chan <- read_article_data(file)
}
close(article_chan)
println("(done reading files)")
//println("(done reading files)")
}(article_chan, &wg)

// process articles from `article_chan` until it's closed.
Expand All @@ -274,43 +274,31 @@ func process_files_with_feeder(buffer_size int, num_workers int, file_list []str
}

func do() {
var err error
args := os.Args[1:]

// required first argument is path to an xml document or a directory of xml.
die(len(args) < 1, "first argument must be the path to api-raml schema root.")
schema_root := args[0]
die(!path_exists(schema_root), "path to api-raml directory does not exist")
schema_root_ptr := flag.String("schema-root", "", "the path to api-raml schema root")
input_path_ptr := flag.String("article-json", "", "the path to a article-json file or directory")
sample_size_ptr := flag.Int("sample-size", -1, "the number of article-json files to parse")
num_workers_ptr := flag.Int("num-workers", runtime.NumCPU(), "the number of workers to process the article-json files")
// 1k articles is about ~1.5GiB of RAM
buffer_size_ptr := flag.Int("buffer-size", 2000, "the maximum number of article-json files to keep in memory at once")
flag.Parse()

schema_root := *schema_root_ptr
die(schema_root == "", "--schema-root is required")
die(!path_exists(schema_root), "--schema-root path does not exist. it should be a path to the api-raml.")
schema_map := configure_validator(schema_root)

die(len(args) < 2, "second argument must be the path to a article-json file or directory.")
input_path := args[1]
die(!path_exists(input_path), "input path does not exist")
input_path := *input_path_ptr
die(input_path == "", "--article-json is required")
die(!path_exists(input_path), "--article-json path does not exist. it should be a path to an article-json file or a directory of article-json files.")

// optional second argument is sample size.
sample_size := -1
if len(args) > 2 {
sample_size, err = strconv.Atoi(args[2])
die(err != nil, "second argument (article sample size) is not an integer. use -1 for 'all' articles (default).")
die(sample_size <= 0 && sample_size != -1, "second argument must be -1 or a positive integer.")
}
sample_size := *sample_size_ptr
die(sample_size < -1, "--sample-size must be -1 or greater")

num_workers := runtime.NumCPU()
if len(args) > 3 {
num_workers, err = strconv.Atoi(args[3])
die(err != nil, "third argument (number of workers) is not an integer.")
if num_workers <= 0 && num_workers != -1 {
fmt.Printf("third argument (number of workers) must be either -1 (unbounded) or a value > 0")
os.Exit(1)
}
}
num_workers := *num_workers_ptr
die(num_workers < -1, "--num-workers must be -1 or greater")

// the number of articles to keep in memory at once.
buffer_size := 2000 // 1k articles is about ~1.5GiB of RAM
if len(args) > 4 {
buffer_size, err = strconv.Atoi(args[4])
die(err != nil, "fourth argument (buffer size) is not an integer.")
}
buffer_size := *buffer_size_ptr
die(buffer_size < 1, "--buffer-size must be a positive integer")

if !path_is_dir(input_path) {
// validate single
Expand Down

0 comments on commit 84c9508

Please sign in to comment.