File tree 6 files changed +32
-12
lines changed
6 files changed +32
-12
lines changed Original file line number Diff line number Diff line change
1
+ # Wiki Index
2
+ This project has been created to play around with the Wikipedia dataset.
3
+ The goal is to calculate the distance between two pages, that is, how many clicks are required
4
+ to get from one page to the other by only using the links on each page.
5
+
6
+ First you should download the dataset, e.g. from [ here] ( https://meta.wikimedia.org/wiki/Data_dump_torrents ) . Next, you can build and run the index:
7
+
8
+ ``` bash
9
+ go build
10
+ ./WikiIndex -i ~ /Downloads/simplewiki-20170820-pages-meta-current.xml.bz2
11
+ ```
12
+
13
+ After that, you can reach the web interface with [ http://localhost:8080 ] ( http://localhost:8080 ) .
Original file line number Diff line number Diff line change 8
8
"github.com/pkg/errors"
9
9
)
10
10
11
- func (a * App ) Serve () error {
11
+ func (a * App ) Serve (address string ) error {
12
12
r := gin .Default ()
13
13
14
14
r .GET ("/" , a .Root ())
@@ -17,7 +17,7 @@ func (a *App) Serve() error {
17
17
r .GET ("/longest" , a .Longest ())
18
18
//r.GET("/loooongest", a.LongestOverall())
19
19
20
- return r .Run (":8080" )
20
+ return r .Run (address )
21
21
}
22
22
23
23
func (a * App ) Root () gin.HandlerFunc {
Original file line number Diff line number Diff line change @@ -7,6 +7,7 @@ require (
7
7
github.com/flosch/pongo2 v0.0.0-20190707114632-bbf5a6c351f4
8
8
github.com/gin-gonic/gin v1.4.0
9
9
github.com/gosimple/slug v1.9.0
10
+ github.com/jessevdk/go-flags v1.4.0
10
11
github.com/pkg/errors v0.8.1
11
12
github.com/stretchr/testify v1.3.0
12
13
)
Original file line number Diff line number Diff line change @@ -32,6 +32,8 @@ github.com/gosimple/slug v1.9.0/go.mod h1:AMZ+sOVe65uByN3kgEyf9WEBKBCSS+dJjMX9x4
32
32
github.com/hashicorp/go-syslog v1.0.0 /go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4 =
33
33
github.com/hashicorp/golang-lru v0.0.0-20180201235237-0fb14efe8c47 /go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8 =
34
34
github.com/hpcloud/tail v1.0.0 /go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU =
35
+ github.com/jessevdk/go-flags v1.4.0 h1:4IU2WS7AumrZ/40jfhf4QVDMsQwqA7VEHozFRrGARJA =
36
+ github.com/jessevdk/go-flags v1.4.0 /go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI =
35
37
github.com/jimstudt/http-authentication v0.0.0-20140401203705-3eca13d6893a /go.mod h1:wK6yTYYcgjHE1Z1QtXACPDjcFJyBskHEdagmnq3vsP8 =
36
38
github.com/json-iterator/go v1.1.6 /go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU =
37
39
github.com/juju/errors v0.0.0-20181118221551-089d3ea4e4d5 h1:rhqTjzJlm7EbkELJDKMTU7udov+Se0xZkWmugr6zGok =
Original file line number Diff line number Diff line change @@ -7,14 +7,13 @@ import (
7
7
"log"
8
8
"os"
9
9
10
- _ "github.com/flosch/pongo2 "
10
+ "github.com/jessevdk/go-flags "
11
11
)
12
12
13
-
14
- //const filename = "testdata/enwiki-20190101-pages-articles-multistream.xml.bz2"
15
- const filename = "testdata/simplewiki-20170820-pages-meta-current.xml.bz2"
16
- //const filename = "testdata/sample.xml"
17
-
13
+ var options struct {
14
+ Filename string `short:"i" long:"input" description:"Wikipedia XML dump (something ending in .xml.bz2)" required:"true"`
15
+ Address string `short:"h" long:"http" description:"HTTP address" default:":8080"`
16
+ }
18
17
19
18
func main () {
20
19
if err := run (); err != nil {
@@ -23,9 +22,14 @@ func main() {
23
22
}
24
23
25
24
func run () error {
25
+ _ , err := flags .Parse (& options )
26
+ if err != nil {
27
+ os .Exit (1 )
28
+ }
29
+
26
30
app := app .New ()
27
31
28
- f , err := os .Open (filename )
32
+ f , err := os .Open (options . Filename )
29
33
if err != nil {
30
34
return err
31
35
}
@@ -39,5 +43,5 @@ func run() error {
39
43
f .Close ()
40
44
}()
41
45
42
- return app .Serve ()
43
- }
46
+ return app .Serve (options . Address )
47
+ }
Original file line number Diff line number Diff line change @@ -50,7 +50,7 @@ <h5 class="card-title">Least referenced page</h5>
50
50
51
51
</ div >
52
52
53
- < h4 > What can you do here?</ h4 >
53
+ < h4 class =" mb-3 " > What can you do here?</ h4 >
54
54
55
55
< div class ="row ">
56
56
You can’t perform that action at this time.
0 commit comments