Skip to content

Commit

Permalink
Added more options to monitoring; Waiting for local scrape first. (#12)
Browse files Browse the repository at this point in the history
Signed-off-by: Bartlomiej Plotka <[email protected]>
  • Loading branch information
bwplotka authored Aug 29, 2021
1 parent b28b6c2 commit f4cc6db
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 21 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,10 @@ if err != nil {
This will start Prometheus with automatic discovery for every new and old instrumented runnables being scraped. It also runs cadvisor that monitors docker itself if `env.DockerEnvironment` is started and show generic performance metrics per container (e.g `container_memory_rss`). Run `OpenUserInterfaceInBrowser()` to open Prometheus UI in browser.

```go mdox-exec="sed -n '86,89p' examples/thanos/standalone.go"
}
// Open monitoring page with all metrics.
if err := mon.OpenUserInterfaceInBrowser(); err != nil {
return errors.Wrap(err, "open monitoring UI in browser")
}
```
To see how it works in practice run our example code in [standalone.go](examples/thanos/standalone.go) by running `make run-example`. At the end, three UIs should show in your browser. Thanos one, monitoring (Prometheus) one and tracing (Jaeger) one. In monitoring UI you can then e.g query docker container metrics using `container_memory_working_set_bytes{id!="/"}` metric e.g:
Expand Down
16 changes: 2 additions & 14 deletions examples/thanos/standalone.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ func deployWithMonitoring(ctx context.Context) error {
}).
Init(e2e.StartOptions{Image: "jaegertracing/all-in-one:1.25"})

jaegerConfig := fmt.Sprintf(`type: JAEGER
jaegerConfig := fmt.Sprintf(
`type: JAEGER
config:
service_name: thanos
sampler_type: const
Expand Down Expand Up @@ -98,19 +99,6 @@ config:
return nil
}

//func Heap(dir string) (err error) {
// if err := os.MkdirAll(dir, os.ModePerm); err != nil {
// return err
// }
//
// f, err := os.Create(filepath.Join(dir, "mem.pprof"))
// if err != nil {
// return err
// }
// defer errcapture.Do(&err, f.Close, "close")
// return pprof.WriteHeapProfile(f)
//}

// In order to run it, invoke make run-example from repo root or just go run it.
func main() {
g := &run.Group{}
Expand Down
39 changes: 33 additions & 6 deletions monitoring/monitoring.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"os"
"path/filepath"
"strings"
"sync"
"time"

"github.com/containerd/cgroups"
Expand All @@ -35,7 +36,8 @@ type Service struct {
type listener struct {
p *e2edb.Prometheus

localAddr string
localAddr string
scrapeInterval time.Duration
}

func (l *listener) updateConfig(started map[string]e2e.Instrumented) error {
Expand All @@ -44,7 +46,7 @@ func (l *listener) updateConfig(started map[string]e2e.Instrumented) error {
cfg := promconfig.Config{
GlobalConfig: promconfig.GlobalConfig{
ExternalLabels: map[model.LabelName]model.LabelValue{"prometheus": model.LabelValue(l.p.Name())},
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeInterval: model.Duration(l.scrapeInterval),
},
}

Expand Down Expand Up @@ -106,6 +108,7 @@ func (l *listener) OnRunnableChange(started []e2e.Runnable) error {

type opt struct {
currentProcessAsContainer bool
scrapeInterval time.Duration
}

// WithCurrentProcessAsContainer makes Start put current process PID into cgroups and organize
Expand All @@ -117,12 +120,19 @@ func WithCurrentProcessAsContainer() func(*opt) {
}
}

// WithScrapeInterval changes how often metrics are scrape by Prometheus. 5s by default.
func WithScrapeInterval(interval time.Duration) func(*opt) {
return func(o *opt) {
o.scrapeInterval = interval
}
}

type Option func(*opt)

// Start deploys monitoring service which deploys Prometheus that monitors all registered InstrumentedServices
// in environment.
func Start(env e2e.Environment, opts ...Option) (_ *Service, err error) {
opt := opt{}
opt := opt{scrapeInterval: 5 * time.Second}
for _, o := range opts {
o(&opt)
}
Expand All @@ -136,7 +146,13 @@ func Start(env e2e.Environment, opts ...Option) (_ *Service, err error) {
)

m := http.NewServeMux()
m.Handle("/metrics", promhttp.HandlerFor(metrics, promhttp.HandlerOpts{}))
h := promhttp.HandlerFor(metrics, promhttp.HandlerOpts{})
o := sync.Once{}
scraped := make(chan struct{})
m.Handle("/metrics", http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
o.Do(func() { close(scraped) })
h.ServeHTTP(w, req)
}))

// Listen on all addresses, since we need to connect to it from docker container.
list, err := net.Listen("tcp", "0.0.0.0:0")
Expand All @@ -154,7 +170,7 @@ func Start(env e2e.Environment, opts ...Option) (_ *Service, err error) {
if err != nil {
return nil, err
}
l := &listener{p: p, localAddr: net.JoinHostPort(env.HostAddr(), port)}
l := &listener{p: p, localAddr: net.JoinHostPort(env.HostAddr(), port), scrapeInterval: opt.scrapeInterval}
if err := l.updateConfig(map[string]e2e.Instrumented{}); err != nil {
return nil, err
}
Expand All @@ -172,7 +188,18 @@ func Start(env e2e.Environment, opts ...Option) (_ *Service, err error) {
if err := newCadvisor(env, "cadvisor", path...).Start(); err != nil {
return nil, err
}
return &Service{p: p}, e2e.StartAndWaitReady(p)

if err := e2e.StartAndWaitReady(p); err != nil {
return nil, err
}

select {
case <-time.After(2 * time.Minute):
return nil, errors.New("Prometheus failed to scrape local endpoint after 2 minutes, check monitoring Prometheus logs")
case <-scraped:
}

return &Service{p: p}, nil
}

func (s *Service) OpenUserInterfaceInBrowser() error {
Expand Down

0 comments on commit f4cc6db

Please sign in to comment.