-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcisco_collector.go
233 lines (199 loc) · 8.09 KB
/
cisco_collector.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
package main
import (
"gitlab.com/wobcom/cisco-exporter/local_pools"
"sync"
"time"
"gitlab.com/wobcom/cisco-exporter/aaa"
"gitlab.com/wobcom/cisco-exporter/bgp"
"gitlab.com/wobcom/cisco-exporter/collector"
"gitlab.com/wobcom/cisco-exporter/config"
"gitlab.com/wobcom/cisco-exporter/connector"
"gitlab.com/wobcom/cisco-exporter/cpu"
"gitlab.com/wobcom/cisco-exporter/environment"
"gitlab.com/wobcom/cisco-exporter/interfaces"
"gitlab.com/wobcom/cisco-exporter/memory"
"gitlab.com/wobcom/cisco-exporter/mpls"
"gitlab.com/wobcom/cisco-exporter/nat"
"gitlab.com/wobcom/cisco-exporter/optics-ios"
"gitlab.com/wobcom/cisco-exporter/optics-nxos"
"gitlab.com/wobcom/cisco-exporter/optics-xe"
"gitlab.com/wobcom/cisco-exporter/pppoe"
"gitlab.com/wobcom/cisco-exporter/users"
"gitlab.com/wobcom/cisco-exporter/vlans"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
)
const prefix = "cisco_"
var (
upDesc *prometheus.Desc
versionDesc *prometheus.Desc
errorsDesc *prometheus.Desc
retryCountDesc *prometheus.Desc
scrapeCollectorDurationDesc *prometheus.Desc
scrapeDurationDesc *prometheus.Desc
)
func init() {
upDesc = prometheus.NewDesc(prefix+"up", "Scrape of target was successful", []string{"target"}, nil)
versionDesc = prometheus.NewDesc(prefix+"version_info", "Information about the running operating system", []string{"target", "os_name"}, nil)
retryCountDesc = prometheus.NewDesc(prefix+"retry_total", "Counts the retries of a collector", []string{"target", "collector"}, nil)
errorsDesc = prometheus.NewDesc(prefix+"collector_errors", "Error counter of a scrape by collector and target", []string{"target", "collector"}, nil)
scrapeDurationDesc = prometheus.NewDesc(prefix+"collector_duration_seconds", "Duration of a collector scrape for one target", []string{"target"}, nil)
scrapeCollectorDurationDesc = prometheus.NewDesc(prefix+"collect_duration_seconds", "Duration of a scrape by collector and target", []string{"target", "collector"}, nil)
}
// CiscoCollector bundles all available Collectors and runs them against multiple devices
type CiscoCollector struct {
devices []string
deviceGroups []*config.DeviceGroupConfig
connectionManager *connector.SSHConnectionManager
collectors map[string]collector.Collector
collectorsForDevice map[string][]collector.Collector
}
func newCiscoCollector(targets []string, connectionManager *connector.SSHConnectionManager) *CiscoCollector {
collectors := make(map[string]collector.Collector)
collectorsForDevice := make(map[string][]collector.Collector)
memoryCollector := memory.NewCollector()
cpuCollector := cpu.NewCollector()
environmentCollector := environment.NewCollector()
interfaceCollector := interfaces.NewCollector()
vlanCollector := vlans.NewCollector()
bgpCollector := bgp.NewCollector()
opticsIOSCollector := opticsios.NewCollector()
opticsXECollector := opticsxe.NewCollector()
opticsNXOSCollector := opticsnxos.NewCollector()
aaaCollector := aaa.NewCollector()
usersCollector := users.NewCollector()
pppoeCollector := pppoe.NewCollector()
mplsCollector := mpls.NewCollector()
natCollector := nat.NewCollector()
poolCollector := local_pools.NewCollector()
collectors[memoryCollector.Name()] = memoryCollector
collectors[cpuCollector.Name()] = cpuCollector
collectors[environmentCollector.Name()] = environmentCollector
collectors[interfaceCollector.Name()] = interfaceCollector
collectors[vlanCollector.Name()] = vlanCollector
collectors[bgpCollector.Name()] = bgpCollector
collectors[aaaCollector.Name()] = aaaCollector
collectors[usersCollector.Name()] = usersCollector
collectors[pppoeCollector.Name()] = pppoeCollector
collectors[mplsCollector.Name()] = mplsCollector
collectors[natCollector.Name()] = natCollector
collectors[poolCollector.Name()] = poolCollector
for _, target := range targets {
deviceGroup := configuration.GetDeviceGroup(target)
for _, collectorName := range deviceGroup.EnabledCollectors {
collector, found := collectors[collectorName]
if !found {
if collectorName == "optics" {
switch deviceGroup.OSVersion {
case config.NXOS:
collector = opticsNXOSCollector
case config.IOS:
collector = opticsIOSCollector
case config.IOSXE:
collector = opticsXECollector
}
} else {
log.Errorf("Configured collector '%s' for device '%s'. No such collector", collectorName, target)
continue
}
}
if collector != nil {
collectorsForDevice[target] = append(collectorsForDevice[target], collector)
}
}
}
return &CiscoCollector{
devices: targets,
connectionManager: connectionManager,
collectors: collectors,
collectorsForDevice: collectorsForDevice,
}
}
// Describe sends the super-set of all possible descriptors of metrics
// collected by this Collector to the provided channel and returns once
// the last descriptor has been sent.
func (c *CiscoCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- upDesc
ch <- versionDesc
ch <- retryCountDesc
ch <- errorsDesc
ch <- scrapeDurationDesc
ch <- scrapeCollectorDurationDesc
for _, col := range c.collectors {
col.Describe(ch)
}
}
// Collect provides all the metrics from all devices to the chanell
func (c *CiscoCollector) Collect(ch chan<- prometheus.Metric) {
wg := &sync.WaitGroup{}
wg.Add(len(c.devices))
for _, target := range c.devices {
dg := configuration.GetDeviceGroup(target)
go c.collectForDevice(target, dg, ch, wg)
}
wg.Wait()
}
func (c *CiscoCollector) createCollectContext(target string, deviceGroupConfig *config.DeviceGroupConfig, ch chan<- prometheus.Metric) (*collector.CollectContext, error) {
connection, err := c.connectionManager.GetConnection(target, deviceGroupConfig)
if err != nil {
return nil, errors.Wrapf(err, "Could not get connection for device %s: %v", target, err)
}
return &collector.CollectContext{
Connection: connection,
LabelValues: []string{target},
Metrics: ch,
Errors: make(chan error),
Done: make(chan struct{}),
}, nil
}
func runCollector(collector collector.Collector, collectorContext *collector.CollectContext) []error {
errs := make([]error, 0)
go collector.Collect(collectorContext)
for {
select {
case <-collectorContext.Done:
return errs
case err := <-collectorContext.Errors:
log.Errorf("Error while running collector %s on device %s: %v", collector.Name(), collectorContext.Connection.Target, err)
errs = append(errs, err)
continue
}
}
}
func (c *CiscoCollector) collectForDevice(target string, deviceGroup *config.DeviceGroupConfig, ch chan<- prometheus.Metric, wg *sync.WaitGroup) {
defer wg.Done()
ciscoUp := 1.0
startTime := time.Now()
defer func() {
ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, time.Since(startTime).Seconds(), target)
ch <- prometheus.MustNewConstMetric(upDesc, prometheus.GaugeValue, ciscoUp, target)
ch <- prometheus.MustNewConstMetric(versionDesc, prometheus.GaugeValue, 2, target, deviceGroup.OSVersion.String())
}()
for _, specificCollector := range c.collectorsForDevice[target] {
startTimeCollector := time.Now()
totalCollectorErrors := 0.0
success := false
for retryCount := 0; retryCount < 2 && !success; retryCount++ {
if time.Since(startTime) > *scrapeTimeout {
log.Errorf("Ran into scrape timeout for device %s", target)
return
}
collectContext, err := c.createCollectContext(target, deviceGroup, ch)
if err != nil {
ciscoUp = 0
log.Errorf("Could not create CollectContext for device %s: %v", target, err)
continue
} else {
ciscoUp = 1
}
errs := runCollector(specificCollector, collectContext)
totalCollectorErrors += float64(len(errs))
success = len(errs) == 0
}
labels := []string{target, specificCollector.Name()}
elapsedSeconds := time.Since(startTimeCollector).Seconds()
ch <- prometheus.MustNewConstMetric(errorsDesc, prometheus.GaugeValue, totalCollectorErrors, labels...)
ch <- prometheus.MustNewConstMetric(scrapeCollectorDurationDesc, prometheus.GaugeValue, elapsedSeconds, labels...)
}
}