Skip to content

Commit cb5bdc4

Browse files
committed
parser: Cleanup nginx-combined with splitFields()
1 parent db202f6 commit cb5bdc4

File tree

2 files changed

+36
-84
lines changed

2 files changed

+36
-84
lines changed

pkg/parser/common.go

+17-8
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package parser
22

33
import (
44
"bytes"
5-
"errors"
5+
"fmt"
66
"time"
77
)
88

@@ -37,25 +37,34 @@ func findEndingDoubleQuote(data []byte) int {
3737

3838
func splitFields(line []byte) ([][]byte, error) {
3939
res := make([][]byte, 0, 16)
40+
loop:
4041
for baseIdx := 0; baseIdx < len(line); {
41-
if line[baseIdx] == '"' {
42+
switch line[baseIdx] {
43+
case '"':
4244
quoteIdx := findEndingDoubleQuote(line[baseIdx+1:])
4345
if quoteIdx == -1 {
44-
return res, errors.New("unexpected format: unbalanced quotes")
46+
return res, fmt.Errorf("unexpected format: unbalanced quotes [ at %d", baseIdx)
4547
}
4648
res = append(res, line[baseIdx+1:baseIdx+quoteIdx+1])
4749
baseIdx += quoteIdx + 2
48-
if line[baseIdx] == ' ' {
49-
baseIdx++
50+
case '[':
51+
closingIdx := bytes.IndexByte(line[baseIdx+1:], ']')
52+
if closingIdx == -1 {
53+
return res, fmt.Errorf("unexpected format: unmatched [ at %d", baseIdx)
5054
}
51-
} else {
55+
res = append(res, line[baseIdx+1:baseIdx+closingIdx+1])
56+
baseIdx += closingIdx + 2
57+
default:
5258
spaceIdx := bytes.IndexByte(line[baseIdx:], ' ')
5359
if spaceIdx == -1 {
5460
res = append(res, line[baseIdx:])
55-
break
61+
break loop
5662
}
5763
res = append(res, line[baseIdx:baseIdx+spaceIdx])
58-
baseIdx += spaceIdx + 1
64+
baseIdx += spaceIdx
65+
}
66+
if baseIdx < len(line) && line[baseIdx] == ' ' {
67+
baseIdx++
5968
}
6069
}
6170
return res, nil

pkg/parser/nginx-combined.go

+19-76
Original file line numberDiff line numberDiff line change
@@ -36,46 +36,24 @@ func init() {
3636
})
3737
}
3838

39-
func ParseNginxCombined(line []byte) (LogItem, error) {
40-
baseIdx := 0
41-
// get the first -
42-
delimIndex := bytes.IndexByte(line, '-')
43-
if delimIndex == -1 {
44-
return LogItem{}, errors.New("unexpected format: no - (empty identity)")
45-
}
46-
47-
clientIP := line[:delimIndex-1]
48-
baseIdx = delimIndex + 1
49-
// get time within [$time_local]
50-
leftBracketIndex := bytes.IndexByte(line[baseIdx:], '[')
51-
if leftBracketIndex == -1 {
52-
return LogItem{}, errors.New("unexpected format: no [ (datetime)")
39+
func ParseNginxCombined(line []byte) (logItem LogItem, err error) {
40+
fields, err := splitFields(line)
41+
if err != nil {
42+
return logItem, err
5343
}
54-
rightBracketIndex := bytes.IndexByte(line[baseIdx+leftBracketIndex+1:], ']')
55-
if rightBracketIndex == -1 {
56-
return LogItem{}, errors.New("unexpected format: no ] (datetime)")
44+
if len(fields) != 9 {
45+
return logItem, fmt.Errorf("invalid format: expected 9 fields, got %d", len(fields))
5746
}
5847

59-
localTimeByte := line[baseIdx+leftBracketIndex+1 : baseIdx+leftBracketIndex+rightBracketIndex+1]
60-
// localTime, err := time.Parse("02/Jan/2006:15:04:05 -0700", string(localTimeByte))
61-
// if err != nil {
62-
// return LogItem{}, err
63-
// }
64-
localTime := clfDateParse(localTimeByte)
65-
baseIdx += leftBracketIndex + rightBracketIndex + 2
66-
67-
// get URL within first "$request"
68-
leftQuoteIndex := bytes.IndexByte(line[baseIdx:], '"')
69-
if leftQuoteIndex == -1 {
70-
return LogItem{}, errors.New("unexpected format: no \" (request)")
71-
}
72-
rightQuoteIndex := findEndingDoubleQuote(line[baseIdx+leftQuoteIndex+1:])
73-
if rightQuoteIndex == -1 {
74-
return LogItem{}, errors.New("unexpected format: no \" after first \" (request)")
48+
if string(fields[1]) != "-" {
49+
return logItem, errors.New("unexpected format: no - (empty identity)")
7550
}
7651

77-
url := line[baseIdx+leftQuoteIndex+1 : baseIdx+leftQuoteIndex+rightQuoteIndex+1]
78-
baseIdx += leftQuoteIndex + rightQuoteIndex + 2
52+
logItem.Client = string(fields[0])
53+
logItem.Time = clfDateParse(fields[3])
54+
55+
requestLine := fields[4]
56+
url := requestLine
7957
// strip HTTP method in url
8058
spaceIndex := bytes.IndexByte(url, ' ')
8159
if spaceIndex == -1 {
@@ -91,51 +69,16 @@ func ParseNginxCombined(line []byte) (LogItem, error) {
9169
} else {
9270
url = url[:spaceIndex]
9371
}
72+
logItem.URL = string(url)
9473

95-
// get size ($body_bytes_sent)
96-
baseIdx += 1
97-
leftSpaceIndex := bytes.IndexByte(line[baseIdx:], ' ')
98-
if leftSpaceIndex == -1 {
99-
return LogItem{}, errors.New("unexpected format: no space after $request (code)")
100-
}
101-
rightSpaceIndex := bytes.IndexByte(line[baseIdx+leftSpaceIndex+1:], ' ')
102-
if rightSpaceIndex == -1 {
103-
return LogItem{}, errors.New("unexpected format: no space after $body_bytes_sent (size)")
104-
}
105-
sizeBytes := line[baseIdx+leftSpaceIndex+1 : baseIdx+leftSpaceIndex+rightSpaceIndex+1]
106-
size, err := strconv.ParseUint(string(sizeBytes), 10, 64)
74+
sizeBytes := fields[6]
75+
logItem.Size, err = strconv.ParseUint(string(sizeBytes), 10, 64)
10776
if err != nil {
108-
return LogItem{}, err
77+
return logItem, err
10978
}
110-
baseIdx += leftSpaceIndex + rightSpaceIndex + 2
11179

112-
// skip referer
113-
leftQuoteIndex = bytes.IndexByte(line[baseIdx:], '"')
114-
if leftQuoteIndex == -1 {
115-
return LogItem{}, errors.New("unexpected format: no \" (referer)")
116-
}
117-
rightQuoteIndex = findEndingDoubleQuote(line[baseIdx+leftQuoteIndex+1:])
118-
if rightQuoteIndex == -1 {
119-
return LogItem{}, errors.New("unexpected format: no \" after first \" (referer)")
120-
}
121-
baseIdx += 1 + leftQuoteIndex + rightQuoteIndex + 2
122-
// get UA
123-
leftQuoteIndex = bytes.IndexByte(line[baseIdx:], '"')
124-
if leftQuoteIndex == -1 {
125-
return LogItem{}, errors.New("unexpected format: no \" (user-agent)")
126-
}
127-
rightQuoteIndex = findEndingDoubleQuote(line[baseIdx+leftQuoteIndex+1:])
128-
if rightQuoteIndex == -1 {
129-
return LogItem{}, errors.New("unexpected format: no \" after first \" (user-agent)")
130-
}
131-
userAgent := line[baseIdx+leftQuoteIndex+1 : baseIdx+leftQuoteIndex+rightQuoteIndex+1]
132-
return LogItem{
133-
Size: size,
134-
Client: string(clientIP),
135-
Time: localTime,
136-
URL: string(url),
137-
Useragent: string(userAgent),
138-
}, nil
80+
logItem.Useragent = string(fields[8])
81+
return
13982
}
14083

14184
var nginxCombinedRe = regexp.MustCompile(

0 commit comments

Comments
 (0)