From b3a3f4936a37e3571fa2b26da821aa986164a5cd Mon Sep 17 00:00:00 2001 From: gaarder Date: Mon, 20 Jan 2025 06:20:29 +1100 Subject: [PATCH 01/11] Update vrphub.go for Duration --- pkg/scrape/vrphub.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pkg/scrape/vrphub.go b/pkg/scrape/vrphub.go index b140f04cf..ba8473f50 100644 --- a/pkg/scrape/vrphub.go +++ b/pkg/scrape/vrphub.go @@ -7,6 +7,7 @@ import ( "path" "regexp" "strings" + "strconv" "github.com/gocolly/colly/v2" "github.com/nleeper/goment" @@ -132,6 +133,20 @@ func VRPHub(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan // Duration sc.Duration = 0 + reDuration := regexp.MustCompile(`^WATCH FULL VIDEO ([0-9}+:*[0-9}*) MIN$`) + e.ForEach(`maxbutton-7 maxbutton maxbutton-get-the-full-video-now`, func(id int, e *colly.HTMLElement) { + tmpDuration, err := reDuration.FindStringSubmatch(e.Text) + if err != nil { + return + } + if tmpDuration != null { + intDuration, err := strconv.Atoi(strings.Split(tmpDuration[1],":")[0]) + if err != nil { + sc.Duration = intDuration + return + } + } + }) // There are 2 places we can find filenames from - one is in the video // previews, and one is in the trailer download section. Some posts From 306c77944f46f846a787c03ad48a80cb833d4d76 Mon Sep 17 00:00:00 2001 From: gaarder Date: Mon, 20 Jan 2025 15:45:26 +1100 Subject: [PATCH 02/11] Update vrphub.go video duration --- pkg/scrape/vrphub.go | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/pkg/scrape/vrphub.go b/pkg/scrape/vrphub.go index ba8473f50..e25a7ca4c 100644 --- a/pkg/scrape/vrphub.go +++ b/pkg/scrape/vrphub.go @@ -133,17 +133,13 @@ func VRPHub(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan // Duration sc.Duration = 0 - reDuration := regexp.MustCompile(`^WATCH FULL VIDEO ([0-9}+:*[0-9}*) MIN$`) - e.ForEach(`maxbutton-7 maxbutton maxbutton-get-the-full-video-now`, func(id int, e *colly.HTMLElement) { - tmpDuration, err := reDuration.FindStringSubmatch(e.Text) - if err != nil { - return - } - if tmpDuration != null { + reDuration := regexp.MustCompile(`^WATCH FULL VIDEO ([0-9]+:*[0-9]*) MIN$`) + e.ForEach(`a.maxbutton-get-the-full-video-now`, func(id int, e *colly.HTMLElement) { + tmpDuration := reDuration.FindStringSubmatch(e.Text) + if tmpDuration != nil { intDuration, err := strconv.Atoi(strings.Split(tmpDuration[1],":")[0]) - if err != nil { + if err == nil { sc.Duration = intDuration - return } } }) From 58927e79191f9e8e1686235fb5894d4e0f8939e4 Mon Sep 17 00:00:00 2001 From: gaarder Date: Tue, 21 Jan 2025 06:28:54 +1100 Subject: [PATCH 03/11] gofmt --- pkg/scrape/vrphub.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/scrape/vrphub.go b/pkg/scrape/vrphub.go index e25a7ca4c..4e6719598 100644 --- a/pkg/scrape/vrphub.go +++ b/pkg/scrape/vrphub.go @@ -137,7 +137,7 @@ func VRPHub(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan e.ForEach(`a.maxbutton-get-the-full-video-now`, func(id int, e *colly.HTMLElement) { tmpDuration := reDuration.FindStringSubmatch(e.Text) if tmpDuration != nil { - intDuration, err := strconv.Atoi(strings.Split(tmpDuration[1],":")[0]) + intDuration, err := strconv.Atoi(strings.Split(tmpDuration[1], ":")[0]) if err == nil { sc.Duration = intDuration } From 2c5c627184d24fcd3a8d1f193da397d7316515ab Mon Sep 17 00:00:00 2001 From: gaarder Date: Tue, 21 Jan 2025 06:30:26 +1100 Subject: [PATCH 04/11] gofmt2 From a68c97e4d97ce6dfa2b4c896d433b78ad6ef601f Mon Sep 17 00:00:00 2001 From: gaarder Date: Wed, 22 Jan 2025 06:33:03 +1100 Subject: [PATCH 05/11] Revert --- pkg/scrape/vrphub.go | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/pkg/scrape/vrphub.go b/pkg/scrape/vrphub.go index 4e6719598..48a930ae2 100644 --- a/pkg/scrape/vrphub.go +++ b/pkg/scrape/vrphub.go @@ -133,16 +133,6 @@ func VRPHub(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan // Duration sc.Duration = 0 - reDuration := regexp.MustCompile(`^WATCH FULL VIDEO ([0-9]+:*[0-9]*) MIN$`) - e.ForEach(`a.maxbutton-get-the-full-video-now`, func(id int, e *colly.HTMLElement) { - tmpDuration := reDuration.FindStringSubmatch(e.Text) - if tmpDuration != nil { - intDuration, err := strconv.Atoi(strings.Split(tmpDuration[1], ":")[0]) - if err == nil { - sc.Duration = intDuration - } - } - }) // There are 2 places we can find filenames from - one is in the video // previews, and one is in the trailer download section. Some posts From 174e436c16ad668c0ce3aef3be78a11f2d5230c6 Mon Sep 17 00:00:00 2001 From: gaarder Date: Wed, 22 Jan 2025 06:36:35 +1100 Subject: [PATCH 06/11] Update vrphub.go --- pkg/scrape/vrphub.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pkg/scrape/vrphub.go b/pkg/scrape/vrphub.go index 48a930ae2..4e6719598 100644 --- a/pkg/scrape/vrphub.go +++ b/pkg/scrape/vrphub.go @@ -133,6 +133,16 @@ func VRPHub(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan // Duration sc.Duration = 0 + reDuration := regexp.MustCompile(`^WATCH FULL VIDEO ([0-9]+:*[0-9]*) MIN$`) + e.ForEach(`a.maxbutton-get-the-full-video-now`, func(id int, e *colly.HTMLElement) { + tmpDuration := reDuration.FindStringSubmatch(e.Text) + if tmpDuration != nil { + intDuration, err := strconv.Atoi(strings.Split(tmpDuration[1], ":")[0]) + if err == nil { + sc.Duration = intDuration + } + } + }) // There are 2 places we can find filenames from - one is in the video // previews, and one is in the trailer download section. Some posts From f041b165c239e0170ac2679ea7f9ac37fc42e809 Mon Sep 17 00:00:00 2001 From: gaarder Date: Wed, 22 Jan 2025 06:44:32 +1100 Subject: [PATCH 07/11] Update vrphub.go --- pkg/scrape/vrphub.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/scrape/vrphub.go b/pkg/scrape/vrphub.go index 4e6719598..77688bd92 100644 --- a/pkg/scrape/vrphub.go +++ b/pkg/scrape/vrphub.go @@ -132,7 +132,6 @@ func VRPHub(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan }) // Duration - sc.Duration = 0 reDuration := regexp.MustCompile(`^WATCH FULL VIDEO ([0-9]+:*[0-9]*) MIN$`) e.ForEach(`a.maxbutton-get-the-full-video-now`, func(id int, e *colly.HTMLElement) { tmpDuration := reDuration.FindStringSubmatch(e.Text) From bb06d98a3d9793c61c4e6ab41f690ff2b3e4a053 Mon Sep 17 00:00:00 2001 From: gaarder Date: Thu, 23 Jan 2025 15:51:08 +1100 Subject: [PATCH 08/11] gofmt4 --- pkg/scrape/vrphub.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/scrape/vrphub.go b/pkg/scrape/vrphub.go index 77688bd92..daeb523b9 100644 --- a/pkg/scrape/vrphub.go +++ b/pkg/scrape/vrphub.go @@ -6,8 +6,8 @@ import ( "net/url" "path" "regexp" - "strings" "strconv" + "strings" "github.com/gocolly/colly/v2" "github.com/nleeper/goment" @@ -132,6 +132,7 @@ func VRPHub(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan }) // Duration + sc.Duration = 0 reDuration := regexp.MustCompile(`^WATCH FULL VIDEO ([0-9]+:*[0-9]*) MIN$`) e.ForEach(`a.maxbutton-get-the-full-video-now`, func(id int, e *colly.HTMLElement) { tmpDuration := reDuration.FindStringSubmatch(e.Text) From e2a3a927f5f2d28cdb508207549740a2c07ba9a3 Mon Sep 17 00:00:00 2001 From: gaarder austenite Date: Thu, 23 Jan 2025 05:03:16 +0000 Subject: [PATCH 09/11] gitpod From 0e967aadbe4989748ffc485fc350f633b906b8bc Mon Sep 17 00:00:00 2001 From: gaarder austenite Date: Thu, 23 Jan 2025 05:09:58 +0000 Subject: [PATCH 10/11] gitpod gofmt --- pkg/scrape/vrphub.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/scrape/vrphub.go b/pkg/scrape/vrphub.go index daeb523b9..a4291fc5c 100644 --- a/pkg/scrape/vrphub.go +++ b/pkg/scrape/vrphub.go @@ -132,7 +132,6 @@ func VRPHub(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan }) // Duration - sc.Duration = 0 reDuration := regexp.MustCompile(`^WATCH FULL VIDEO ([0-9]+:*[0-9]*) MIN$`) e.ForEach(`a.maxbutton-get-the-full-video-now`, func(id int, e *colly.HTMLElement) { tmpDuration := reDuration.FindStringSubmatch(e.Text) From fcec8a1ba33673e0cb0f95428ce0773818d20c3c Mon Sep 17 00:00:00 2001 From: gaarder Date: Fri, 24 Jan 2025 15:30:54 +1100 Subject: [PATCH 11/11] Update vrphub scraper to only consider featured performers --- pkg/scrape/vrphub.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pkg/scrape/vrphub.go b/pkg/scrape/vrphub.go index a4291fc5c..b8eaeaee0 100644 --- a/pkg/scrape/vrphub.go +++ b/pkg/scrape/vrphub.go @@ -101,9 +101,13 @@ func VRPHub(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan // Cast sc.ActorDetails = make(map[string]models.ActorDetails) - e.ForEach(`div.td-post-header header.td-post-title span.td-post-date2 a.ftlink`, func(id int, e *colly.HTMLElement) { - sc.Cast = append(sc.Cast, e.Text) - sc.ActorDetails[e.Text] = models.ActorDetails{Source: sc.ScraperID + " scrape", ProfileUrl: e.Attr("href")} + e.ForEach(`div.td-post-header header.td-post-title span.td-post-date2`, func(id int, e *colly.HTMLElement) { + if strings.Contains(e.Text, "Featuring") { + e.ForEach(`a.ftlink`, func(id int, e *colly.HTMLElement) { + sc.Cast = append(sc.Cast, e.Text) + sc.ActorDetails[e.Text] = models.ActorDetails{Source: sc.ScraperID + " scrape", ProfileUrl: e.Attr("href")} + }) + } }) // Gallery