Skip to content

Commit

Permalink
Merge pull request #9 from p1ass/change-type
Browse files Browse the repository at this point in the history
Change core implements
  • Loading branch information
p1ass authored Aug 17, 2019
2 parents 08bf46b + 4f876b1 commit b891886
Show file tree
Hide file tree
Showing 13 changed files with 143 additions and 307 deletions.
11 changes: 3 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ go get -u github.com/p1ass/feeder
```go
import "github.com/p1ass/feeder"

func fetch(){
func crawl(){
rssCrawler := feeder.NewRSSCrawler("https://example.com/rss")
qiitaCrawler := feeder.NewQiitaCrawler("https://qiita.com/api/v2/users/plus_kyoto/items")

// Crawl data using goroutine.
items := feeder.Crawl(rssCrawler, qiitaCrawler)
items, err := feeder.Crawl(rssCrawler, qiitaCrawler)

feed := &feeder.Feed{
Title: "My feeds",
Expand Down Expand Up @@ -90,18 +90,13 @@ func (crawler *qiitaCrawler) Fetch() (*feeder.Items, error) {
}

func convertQiitaToItem(q *qiitaResponse) *feeder.Item {
length := utf8string.NewString(q.Body).RuneCount()
maxLength := 200
if length < 200 {
maxLength = length
}

i := &feeder.Item{
Title: q.Title,
Link: &feeder.Link{Href: q.URL},
Created: q.CreatedAt,
Id: q.ID,
Description: utf8string.NewString(q.Body).Slice(0, maxLength),
Description: q.Body,
}

if q.User != nil {
Expand Down
18 changes: 7 additions & 11 deletions atom_crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,14 @@ type atomCrawler struct {
URL string
}

// NewAtomCrawler returns atomCrawler
func NewAtomCrawler(url string) Crawler {
return &atomCrawler{URL: url}
}

// Deprecated: Use NewAtomCrawler instead of NewAtomFetcher
func NewAtomFetcher(url string) Fetcher {
return &atomCrawler{URL: url}
}

// Fetch is ...
func (fetcher *atomCrawler) Fetch() (*Items, error) {
resp, err := http.Get(fetcher.URL)
// Crawl is crawl entry items from atom file
func (crawler *atomCrawler) Crawl() ([]*Item, error) {
resp, err := http.Get(crawler.URL)
if err != nil {
return nil, errors.Wrap(err, "Failed to get response from rss.")
}
Expand All @@ -46,7 +42,7 @@ func (fetcher *atomCrawler) Fetch() (*Items, error) {
}
items = append(items, item)
}
return &Items{items}, nil
return items, nil
}

func convertAtomEntryToItem(e *feeds.AtomEntry) (*Item, error) {
Expand All @@ -62,7 +58,7 @@ func convertAtomEntryToItem(e *feeds.AtomEntry) (*Item, error) {
i := &Item{
Title: e.Title,
Description: e.Summary.Content,
Id: e.Id,
ID: e.Id,
Created: &p,
Updated: &u,
}
Expand All @@ -85,7 +81,7 @@ func convertAtomEntryToItem(e *feeds.AtomEntry) (*Item, error) {
for _, link := range e.Links {
if link.Rel == "enclosure" {
i.Enclosure = &Enclosure{
Url: link.Href,
URL: link.Href,
Length: link.Length,
Type: link.Type,
}
Expand Down
53 changes: 26 additions & 27 deletions atom_crawler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,38 +30,37 @@ func TestAtomFetch(t *testing.T) {
updated, _ := time.Parse(time.RFC3339, updatedString)
publishedString := "2019-01-01T00:00:00+09:00"
published, _ := time.Parse(time.RFC3339, publishedString)
expected := &feeder.Items{
[]*feeder.Item{{
Title: "title",
Link: &feeder.Link{
Href: "http://example.com",
Rel: "alternate",
},
Source: nil,
Author: &feeder.Author{
Name: "name",
Email: "[email protected]",
},
Description: "summary_content",
Id: "id",
Updated: &updated,
Created: &published,
Enclosure: &feeder.Enclosure{
Url: "http://example.com/image.png",
Type: "image/png",
Length: "0",
},
Content: "content",
}}}
expected := []*feeder.Item{{
Title: "title",
Link: &feeder.Link{
Href: "http://example.com",
Rel: "alternate",
},
Source: nil,
Author: &feeder.Author{
Name: "name",
Email: "[email protected]",
},
Description: "summary_content",
ID: "id",
Updated: &updated,
Created: &published,
Enclosure: &feeder.Enclosure{
URL: "http://example.com/image.png",
Type: "image/png",
Length: "0",
},
Content: "content",
}}

fetcher := feeder.NewAtomCrawler(server.URL + "/feed")
got, err := fetcher.Fetch()
crawler := feeder.NewAtomCrawler(server.URL + "/feed")
got, err := crawler.Crawl()
if err != nil {
t.Fatal(err)
}

if !reflect.DeepEqual(*expected, *got) {
diffs := pretty.Diff(*expected, *got)
if !reflect.DeepEqual(expected, got) {
diffs := pretty.Diff(expected, got)
t.Log(pretty.Println(diffs))
t.Error("Failed to convert AtomEntry to Item.")

Expand Down
90 changes: 44 additions & 46 deletions feeder.go
Original file line number Diff line number Diff line change
@@ -1,57 +1,57 @@
package feeder

import (
"log"
"sync"
"time"

"github.com/pkg/errors"

ogp "github.com/otiai10/opengraph"
"golang.org/x/sync/errgroup"
)

// Deprecated: Fetcher is replaced by Crawler
type Fetcher interface {
Fetch() (*Items, error)
}

// Crawler is interface for crawling
type Crawler interface {
Fetch() (*Items, error)
Crawl() ([]*Item, error)
}

// Link represents http link
type Link struct {
Href, Rel, Type, Length string
}

// Author represents entry author
type Author struct {
Name, Email string
}

// Image represents image
type Image struct {
Url, Title, Link string
URL, Title, Link string
Width, Height int
}

// Enclosure represents og link
type Enclosure struct {
Url, Length, Type string
URL, Length, Type string
}

// Item represents a entry
type Item struct {
Title string
Link *Link
Source *Link
Author *Author
Description string

Id string
ID string
Updated *time.Time
Created *time.Time
Enclosure *Enclosure
Content string
}

type Items struct {
Items []*Item
}

// Feed represents rss feed or atom feed
type Feed struct {
Title string
Link *Link
Expand All @@ -61,60 +61,60 @@ type Feed struct {
Created time.Time
Id string
Subtitle string
Items Items
Items []*Item
Copyright string
Image *Image
}

func (items *Items) Add(i *Items) {
items.Items = append(items.Items, i.Items...)
}

// Crawl is function that crawls all site using goroutine.
// func Crawl(fetchers ...Fetcher) *Items is deprecated
func Crawl(crawlers ...Crawler) *Items {
items := &Items{}
// func Crawl(crawlers ...Fetcher) *Items is deprecated
func Crawl(crawlers ...Crawler) ([]*Item, error) {
items := []*Item{}
mutex := sync.Mutex{}
wg := sync.WaitGroup{}

eg := errgroup.Group{}
for _, f := range crawlers {
wg.Add(1)
go func(f Crawler) {
i, err := f.Fetch()
f := f
eg.Go(func() error {
i, err := f.Crawl()
if err != nil {
log.Fatal(err)
return err
} else {
mutex.Lock()
items.Add(i)
items = append(items, i...)
mutex.Unlock()
}
wg.Done()
}(f)
return nil
})
}
if err := eg.Wait(); err != nil {
return nil, errors.Wrap(err, "failed to crawl items")
}
wg.Wait()

fetchOGP(items)
items, err := fetchOGP(items)
if err != nil {
return nil, errors.Wrap(err, "failed to fetch ogp")
}

return items
return items, nil
}

func fetchOGP(items *Items) *Items {
wg := sync.WaitGroup{}
func fetchOGP(items []*Item) ([]*Item, error) {
eg := errgroup.Group{}

for _, i := range items.Items {
wg.Add(1)
for _, i := range items {
i := i
go func() {
if i.Enclosure == nil || i.Enclosure.Url == "" {
eg.Go(func() error {
if i.Enclosure == nil || i.Enclosure.URL == "" {
og, err := ogp.Fetch(i.Link.Href)
if err != nil {
log.Printf("Failed to fetch ogp. %#v", err)
return err
}

if len(og.Image) > 0 {
image := og.Image[0]
i.Enclosure = &Enclosure{}
i.Enclosure.Url = image.URL
i.Enclosure.URL = image.URL

if image.Type != "" {
i.Enclosure.Type = image.Type
Expand All @@ -123,12 +123,10 @@ func fetchOGP(items *Items) *Items {
}
i.Enclosure.Length = "0"
}

}
wg.Done()
}()
return nil
})
}
wg.Wait()

return items
return items, nil
}
Loading

0 comments on commit b891886

Please sign in to comment.