-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
67 lines (48 loc) · 1.32 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"regexp"
"strconv"
"time"
"github.com/gocolly/colly"
)
type request struct {
URL string
Title string
commentsString string
CommentCount int
Date time.Time
State string
CrawledAt time.Time
}
func main() {
requests := []request{}
re := regexp.MustCompile("[0-9]+")
c := colly.NewCollector()
c.OnHTML("article", func(e *colly.HTMLElement) {
temp := request{}
dateString := e.ChildAttr("div.submitted > span", "content")
var err error
temp.Date, err = time.Parse(time.RFC3339, dateString)
if err != nil {
fmt.Println(err)
}
temp.URL = fmt.Sprintf("https://www.offenedaten-wuppertal.de%s", e.ChildAttr("h2 > a", "href"))
temp.Title = e.ChildText("h2 > a")
temp.State = e.ChildText("span.status-message")
temp.commentsString = e.ChildText("div.comment-counts")
temp.CommentCount, err = strconv.Atoi(re.FindAllString(e.ChildText("div.comment-counts"), 1)[0])
if err != nil {
fmt.Println(err)
}
temp.CrawledAt = time.Now()
requests = append(requests, temp)
})
for i := 0; i < 6; i++ {
c.Visit(fmt.Sprintf("https://www.offenedaten-wuppertal.de/daten/anfragen?page=%d", i))
}
file, _ := json.MarshalIndent(requests, "", " ")
_ = ioutil.WriteFile("./out/requests.json", file, 0644)
}