Skip to content

Commit

Permalink
cli flags and logging rearrangment
Browse files Browse the repository at this point in the history
  • Loading branch information
bjesus committed Sep 2, 2024
1 parent 04df9f5 commit 332cd4a
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 24 deletions.
61 changes: 41 additions & 20 deletions cmd/pipet/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package main

import (
"fmt"
"io"
"log"
"os"
"os/exec"
Expand All @@ -20,39 +21,54 @@ func main() {
log.SetFlags(log.Lshortfile | log.Ltime)

app := &cli.App{
Name: "pipet",
Usage: "Easy web scraping CLI tool",
Name: "pipet",
Usage: "swiss-army tool for web scraping, made for hackers",
HideHelpCommand: true,
UseShortOptionHandling: true,
EnableBashCompletion: true,
ArgsUsage: "<pipet_file>",
Flags: []cli.Flag{
&cli.BoolFlag{
Name: "json",
Usage: "Output as JSON",
},
&cli.StringSliceFlag{
Name: "separator",
Usage: "Separator for text output (can be used multiple times)",
Name: "json",
Aliases: []string{"j"},
Usage: "output as JSON",
},
&cli.StringFlag{
Name: "template",
Usage: "Path to template file for output",
Name: "template",
Aliases: []string{"t"},
Usage: "path to file for template output",
},
&cli.StringSliceFlag{
Name: "separator",
Aliases: []string{"s"},
Usage: "set a separator for text output (can be used multiple times)",
},
&cli.IntFlag{
Name: "max-pages",
Value: 3,
Usage: "Maximum number of pages to scrape",
Name: "max-pages",
Value: 3,
Aliases: []string{"p"},
Usage: "maximum number of pages to scrape",
},
&cli.IntFlag{
Name: "interval",
Value: 0,
Usage: "Maximum number of pages to scrape",
Name: "interval",
Value: 0,
Aliases: []string{"i"},
Usage: "rerun pipet after X seconds, 0 to disable",
},
&cli.StringFlag{
Name: "on-change",
Usage: "Path to template file for output",
Name: "on-change",
Aliases: []string{"c"},
Usage: "a command to run when the pipet result is new",
},
&cli.BoolFlag{
Name: "verbose",
Aliases: []string{"v"},
Usage: "enable verbose logging",
},
},
Action: func(c *cli.Context) error {
if c.NArg() == 0 {
return fmt.Errorf("spec argument is required")
return fmt.Errorf("pipet file argument is required")
}
spec := c.Args().Get(0)
return runPipet(c, spec)
Expand All @@ -71,13 +87,18 @@ func runPipet(c *cli.Context, specFile string) error {
onChange := c.String("on-change")
maxPages := c.Int("max-pages")
interval := c.Int("interval")
verbose := c.Bool("verbose")

if !verbose {
log.SetOutput(io.Discard)
}

pipet := &common.PipetApp{
MaxPages: maxPages,
Separator: separators,
}

log.Println("Parsing spec file:", specFile)
log.Println("Parsing pipet file:", specFile)
err := app.ParseSpecFile(pipet, specFile)
if err != nil {
return fmt.Errorf("error parsing spec file: %w", err)
Expand Down
8 changes: 4 additions & 4 deletions parsers/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ func ExecuteCurlBlock(block common.Block) (interface{}, error) {
}

// Split the command into curl and its arguments
parts, err := shlex.Split(block.Command)
parts, _ := shlex.Split(block.Command)
log.Println("Execute curl:", block.Command)
cmd := exec.Command(parts[0], parts[1:]...)
output, err := cmd.Output()
Expand All @@ -27,12 +27,12 @@ func ExecuteCurlBlock(block common.Block) (interface{}, error) {
}

isJSON := json.Valid(bytes.TrimSpace(output))
log.Println(isJSON)

if isJSON {
log.Println("it's json")
log.Println("JSON detected")
return ParseJSONQueries(output, block.Queries)
} else {
log.Println("it's HTM")
log.Println("HTML detected")
return ParseHTMLQueries(output, block.Queries)
}
}
Expand Down

0 comments on commit 332cd4a

Please sign in to comment.