-
Notifications
You must be signed in to change notification settings - Fork 63
/
Copy pathrequest.go
186 lines (168 loc) · 4.49 KB
/
request.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
// Copyright 2015 andeya Author. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package surfer
import (
"bytes"
"fmt"
"io"
"math/rand"
"net/http"
"net/url"
"strings"
"time"
)
// constant
const (
SurfID = 0 // Surf下载器标识符
PhomtomJsID = 1 // PhomtomJs下载器标识符
DefaultMethod = "GET" // 默认请求方法
DefaultDialTimeout = 2 * time.Minute // 默认请求服务器超时
DefaultConnTimeout = 2 * time.Minute // 默认下载超时
DefaultTryTimes = 3 // 默认最大下载次数
DefaultRetryPause = 2 * time.Second // 默认重新下载前停顿时长
)
// Request contains the necessary prerequisite information.
type Request struct {
// url (必须填写)
Url string
url *url.URL
// GET POST HEAD (默认为GET)
Method string
// http header
Header http.Header
// 是否使用cookies,在Spider的EnableCookie设置
EnableCookie bool
// request body interface
Body body
body io.Reader
bodyBytes []byte
// dial tcp: i/o timeout
DialTimeout time.Duration
// WSARecv tcp: i/o timeout
ConnTimeout time.Duration
// the max times of download
TryTimes int
// how long pause when retry
RetryPause time.Duration
// max redirect times
// when RedirectTimes equal 0, redirect times is ∞
// when RedirectTimes less than 0, redirect times is 0
RedirectTimes int
// the download ProxyHost
Proxy string
proxy *url.URL
// 指定下载器ID
// 0为Surf高并发下载器,各种控制功能齐全
// 1为PhantomJS下载器,特点破防力强,速度慢,低并发
DownloaderID int
client *http.Client
}
func (r *Request) prepare() error {
var err error
r.url, err = UrlEncode(r.Url)
if err != nil {
return err
}
r.Url = r.url.String()
if r.Proxy != "" {
if r.proxy, err = url.Parse(r.Proxy); err != nil {
return err
}
}
if r.DialTimeout < 0 {
r.DialTimeout = 0
} else if r.DialTimeout == 0 {
r.DialTimeout = DefaultDialTimeout
}
if r.ConnTimeout < 0 {
r.ConnTimeout = 0
} else if r.ConnTimeout == 0 {
r.ConnTimeout = DefaultConnTimeout
}
if r.TryTimes == 0 {
r.TryTimes = DefaultTryTimes
}
if r.RetryPause <= 0 {
r.RetryPause = DefaultRetryPause
}
if r.DownloaderID != PhomtomJsID {
r.DownloaderID = SurfID
}
if r.Header == nil {
r.Header = make(http.Header)
}
var commonUserAgentIndex int
if !r.EnableCookie {
commonUserAgentIndex = rand.Intn(len(UserAgents["common"]))
r.Header.Set("User-Agent", UserAgents["common"][commonUserAgentIndex])
} else if len(r.Header["User-Agent"]) == 0 {
r.Header.Set("User-Agent", UserAgents["common"][commonUserAgentIndex])
}
if len(r.Method) == 0 {
r.Method = DefaultMethod
} else {
r.Method = strings.ToUpper(r.Method)
}
r.body = nil
if r.Body != nil {
return r.Body.SetBody(r)
}
return nil
}
func (r *Request) renewBody() {
if r.body != nil {
r.body = bytes.NewReader(r.bodyBytes)
}
}
// ReadBody returns body bytes
func (r *Request) ReadBody() ([]byte, error) {
if r.url == nil {
if err := r.prepare(); err != nil {
return nil, err
}
}
return r.bodyBytes, nil
}
// 回写Request内容
func (r *Request) writeback(resp *http.Response) *http.Response {
if resp == nil {
resp = new(http.Response)
resp.Request = new(http.Request)
} else if resp.Request == nil {
resp.Request = new(http.Request)
}
if resp.Header == nil {
resp.Header = make(http.Header)
}
resp.Request.Method = r.Method
resp.Request.Header = r.Header
resp.Request.Host = r.url.Host
r.url = nil
return resp
}
// checkRedirect is used as the value to http.Client.CheckRedirect
// when redirectTimes equal 0, redirect times is ∞
// when redirectTimes less than 0, not allow redirects
func (r *Request) checkRedirect(req *http.Request, via []*http.Request) error {
if r.RedirectTimes == 0 {
return nil
}
if len(via) >= r.RedirectTimes {
if r.RedirectTimes < 0 {
return fmt.Errorf("not allow redirects")
}
return fmt.Errorf("stopped after %v redirects", r.RedirectTimes)
}
return nil
}