From 3c7cbc0615a747527eb973736f0fd68d3a9893b0 Mon Sep 17 00:00:00 2001 From: pops64 Date: Tue, 15 Oct 2024 15:54:05 -0400 Subject: [PATCH 01/15] Rebuild of Reality Lovers API appears to have been abandoned and now returns 403 errors. This new scraper now relies on HTML for its data. All works. Title and Cover are both grabbed from index page. These seem to be the most accurate and useful data source. On single scene scrape we fall back to using the gallery and the URL for these sources --- pkg/scrape/realitylovers.go | 206 ++++++++++++++++++------------------ 1 file changed, 104 insertions(+), 102 deletions(-) diff --git a/pkg/scrape/realitylovers.go b/pkg/scrape/realitylovers.go index ba4029cae..0486da64b 100644 --- a/pkg/scrape/realitylovers.go +++ b/pkg/scrape/realitylovers.go @@ -1,16 +1,13 @@ package scrape import ( - "fmt" "regexp" "strings" - "time" - "github.com/go-resty/resty/v2" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" + "github.com/nleeper/goment" "github.com/thoas/go-funk" - "github.com/tidwall/gjson" "github.com/xbapps/xbvr/pkg/models" ) @@ -18,129 +15,134 @@ func RealityLoversSite(wg *models.ScrapeWG, updateSite bool, knownScenes []strin defer wg.Done() logScrapeStart(scraperID, siteID) - sceneCollector := createCollector("realitylovers.com", "engine.realitylovers.com", "tsvirtuallovers.com", "engine.tsvirtuallovers.com") + sceneCollector := createCollector(domain) + siteCollector := createCollector(domain) - sceneCollector.OnResponse(func(r *colly.Response) { - if r.StatusCode != 200 { - return - } - json := gjson.ParseBytes(r.Body) + // These cookies are needed for age verification. + siteCollector.OnRequest(func(r *colly.Request) { + r.Headers.Set("Cookie", "agreedToDisclaimer=true") + }) + + sceneCollector.OnRequest(func(r *colly.Request) { + r.Headers.Set("Cookie", "agreedToDisclaimer=true") + }) + sceneCollector.OnHTML(`html`, func(e *colly.HTMLElement) { sc := models.ScrapedScene{} sc.ScraperID = scraperID sc.SceneType = "VR" sc.Studio = "RealityLovers" sc.Site = siteID - sc.HomepageURL = r.Request.Ctx.Get("sceneURL") - - // Scene ID - sc.SiteID = json.Get("contentId").String() - sc.SceneID = slugify.Slugify(sc.Site) + "-" + sc.SiteID + sc.SiteID = "" + sc.HomepageURL, _ = strings.CutSuffix(e.Request.URL.String(), "/") - sc.Title = json.Get("title").String() - sc.Synopsis = json.Get("description").String() + // Cover Url + coverURL := e.Request.Ctx.GetAny("coverURL").(string) + sc.Covers = append(sc.Covers, coverURL) - covers := json.Get("mainImages.0.imgSrcSet").String() - sc.Covers = append(sc.Covers, strings.Fields(covers)[0]) + // Gallery + e.ForEach(`div.owl-carousel div.item`, func(id int, e *colly.HTMLElement) { + sc.Gallery = append(sc.Gallery, e.ChildAttr("img", "src")) + }) - sc.Released = json.Get("releaseDate").String() + // Incase we scrape a single scene use one of the gallery images for the cover + if singleSceneURL != "" { + sc.Covers = append(sc.Covers, sc.Gallery[0]) + } // Cast sc.ActorDetails = make(map[string]models.ActorDetails) - json.Get("starring").ForEach(func(_, star gjson.Result) bool { - name := star.Get("name").String() - sc.Cast = append(sc.Cast, name) - sc.ActorDetails[name] = models.ActorDetails{Source: sc.ScraperID + " scrape", ProfileUrl: "https://" + domain + "/" + star.Get("uri").String()} - return true - }) - - // Gallery - json.Get("screenshots").ForEach(func(_, screenshot gjson.Result) bool { - imgset := screenshot.Get("galleryImgSrcSet").String() - images := strings.Split(imgset, ",") - selectedImage := "" - for _, image := range images { - parts := strings.Fields(image) - if selectedImage == "" { - selectedImage = parts[0] - } - if parts[1] == "1920w" { - selectedImage = parts[0] + e.ForEach(`table.video-description-list tbody`, func(id int, e *colly.HTMLElement) { + // Cast + e.ForEach(`tr:nth-child(1) a`, func(id int, e *colly.HTMLElement) { + if strings.TrimSpace(e.Text) != "" { + sc.Cast = append(sc.Cast, strings.TrimSpace(e.Text)) + sc.ActorDetails[strings.TrimSpace(e.Text)] = models.ActorDetails{Source: sc.ScraperID + " scrape", ProfileUrl: e.Request.AbsoluteURL(e.Attr("href"))} } - } - sc.Gallery = append(sc.Gallery, selectedImage) - return true - }) + }) + + // Tags + e.ForEach(`tr:nth-child(2) a`, func(id int, e *colly.HTMLElement) { + tag := strings.TrimSpace(e.Text) - // Tags - json.Get("categories").ForEach(func(_, category gjson.Result) bool { - sc.Tags = append(sc.Tags, category.Get("name").String()) - return true + // Standardize the resolution tags + tag, _ = strings.CutSuffix(strings.ToLower(tag), " vr porn") + tag, _ = strings.CutSuffix(tag, " ts") + sc.Tags = append(sc.Tags, tag) + }) + + // Date + tmpDate, _ := goment.New(strings.TrimSpace(e.ChildText(`tr:nth-child(3) td:last-child`)), "MMMM DD, YYYY") + sc.Released = tmpDate.Format("YYYY-MM-DD") }) - sc.TrailerType = "url" - sc.TrailerSrc = json.Get("trailerUrl").String() + // Synposis + sc.Synopsis = strings.TrimSpace(e.ChildText("div.accordion-body")) + + tmp := strings.Split(sc.HomepageURL, "/") + + // Title + sc.Title = e.Request.Ctx.GetAny("title").(string) + + //Fall back incase single scene scraping + if sc.Title == "" { + sc.Title = strings.ReplaceAll(tmp[len(tmp)-1], "-", " ") + } + + // Scene ID + sc.SiteID = tmp[len(tmp)-2] + + if sc.SiteID != "" { + sc.SceneID = slugify.Slugify(sc.Site) + "-" + sc.SiteID - out <- sc + // save only if we got a SceneID + out <- sc + } }) - // Request scenes via REST API - if singleSceneURL == "" { - page := 0 - for { - url := fmt.Sprintf("https://engine.%s/content/videos?max=12&page=%v&pornstar=&category=&perspective=&sort=NEWEST", domain, page) - log.Infoln("visiting", url) - r, err := resty.New().R(). - SetHeader("User-Agent", UserAgent). - Get(url) - - if err != nil { - log.Errorf("Error fetching BaberoticaVR feed: %s", err) - logScrapeFinished(scraperID, siteID) - return nil - } + siteCollector.OnHTML(`a.page-link[aria-label="Next"]:not(.disabled)`, func(e *colly.HTMLElement) { + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } + }) - scenecnt := 0 - if err == nil || r.StatusCode() == 200 { - result := gjson.Get(r.String(), "contents") - result.ForEach(func(key, value gjson.Result) bool { - scenecnt++ - sceneURL := "https://" + domain + "/" + value.Get("videoUri").String() - sceneID := value.Get("id").String() - if !funk.ContainsString(knownScenes, sceneURL) { - ctx := colly.NewContext() - ctx.Put("sceneURL", sceneURL) - sceneCollector.Request("GET", "https://engine."+domain+"/content/videoDetail?contentId="+sceneID, nil, ctx, nil) - } - return true - }) - } - if err != nil { - log.Errorf("Error visiting %s %s", url, err) - } - if r.StatusCode() != 200 { - log.Errorf("Return code visiting %s %v", url, r.StatusCode()) - } + siteCollector.OnHTML(`div#gridView`, func(e *colly.HTMLElement) { + + e.ForEach("div.video-grid-view", func(id int, e *colly.HTMLElement) { - if scenecnt < 12 { - break + re := regexp.MustCompile(`.+[jJ][pP][gG]`) + tmp := strings.Split(e.ChildAttr("img", "srcset"), ",") + r := re.FindStringSubmatch(tmp[len(tmp)-1]) + coverURL := "" + + if len(r) > 0 { + coverURL = strings.TrimSpace(r[0]) + } else { + log.Warnln("Couldn't Find Cover Img in srcset:", tmp) } - page++ - if limitScraping { - break + + title := e.ChildText("p.card-title") + + sceneURL := e.Request.AbsoluteURL(e.ChildAttr("a", "href")) + + // If scene exist in database, there's no need to scrape + if !funk.ContainsString(knownScenes, sceneURL) { + ctx := colly.NewContext() + ctx.Put("coverURL", coverURL) + ctx.Put("title", title) + sceneCollector.Request("GET", sceneURL, nil, ctx, nil) } - // have seen instances of status 404, so make sure we don't span will calls - time.Sleep(time.Second) - } - } else { - re := regexp.MustCompile(`.com\/vd\/(\d+)\/`) - match := re.FindStringSubmatch(singleSceneURL) - if len(match) >= 2 { - ctx := colly.NewContext() - ctx.Put("sceneURL", singleSceneURL) - sceneCollector.Request("GET", "https://engine."+domain+"/content/videoDetail?contentId="+match[1], nil, ctx, nil) - } + }) + }) + if singleSceneURL != "" { + ctx := colly.NewContext() + ctx.Put("coverURL", "") + ctx.Put("title", "") + sceneCollector.Request("GET", singleSceneURL, nil, ctx, nil) + } else { + siteCollector.Visit("https://" + domain + "/videos/page1") } if updateSite { From 6d93807bdf913ba07d9107c5aa5431ae1894a43a Mon Sep 17 00:00:00 2001 From: pops64 Date: Mon, 11 Nov 2024 14:26:29 -0500 Subject: [PATCH 02/15] Final KinkVR working File names are dropped upon migration. Appears to have to good means for the future of guessing the filename. Do not need migration code. Due to the change in the URL all scenes are resraped on next update and all scraped data is replaced with its new format --- pkg/scrape/badoink.go | 9 +-- pkg/scrape/badoinkv2.go | 127 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+), 7 deletions(-) create mode 100644 pkg/scrape/badoinkv2.go diff --git a/pkg/scrape/badoink.go b/pkg/scrape/badoink.go index 6c18805bf..fde652299 100644 --- a/pkg/scrape/badoink.go +++ b/pkg/scrape/badoink.go @@ -26,8 +26,8 @@ func BadoinkSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out defer wg.Done() logScrapeStart(scraperID, siteID) - sceneCollector := createCollector("badoinkvr.com", "babevr.com", "vrcosplayx.com", "18vr.com", "kinkvr.com") - siteCollector := createCollector("badoinkvr.com", "babevr.com", "vrcosplayx.com", "18vr.com", "kinkvr.com") + sceneCollector := createCollector("badoinkvr.com", "babevr.com", "vrcosplayx.com", "18vr.com") + siteCollector := createCollector("badoinkvr.com", "babevr.com", "vrcosplayx.com", "18vr.com") trailerCollector := cloneCollector(sceneCollector) commonDb, _ := models.GetCommonDB() @@ -283,14 +283,9 @@ func BabeVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "babevr", "BabeVR", "https://babevr.com/vrpornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping) } -func KinkVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { - return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "kinkvr", "KinkVR", "https://kinkvr.com/bdsm-vr-videos?order=newest", singeScrapeAdditionalInfo, limitScraping) -} - func init() { registerScraper("badoinkvr", "BadoinkVR", "https://pbs.twimg.com/profile_images/618071358933610497/QaMV81nF_200x200.png", "badoinkvr.com", BadoinkVR) registerScraper("18vr", "18VR", "https://pbs.twimg.com/profile_images/989481761783545856/w-iKqgqV_200x200.jpg", "18vr.com", B18VR) registerScraper("vrcosplayx", "VRCosplayX", "https://pbs.twimg.com/profile_images/900675974039298049/ofMytpkQ_200x200.jpg", "vrcosplayx.com", VRCosplayX) registerScraper("babevr", "BabeVR", "https://babevr.com/icons/babevr/apple-touch-icon.png", "babevr.com", BabeVR) - registerScraper("kinkvr", "KinkVR", "https://kinkvr.com/icons/kinkvr/apple-touch-icon.png", "kinkvr.com", KinkVR) } diff --git a/pkg/scrape/badoinkv2.go b/pkg/scrape/badoinkv2.go new file mode 100644 index 000000000..91cc4d04f --- /dev/null +++ b/pkg/scrape/badoinkv2.go @@ -0,0 +1,127 @@ +package scrape + +import ( + "strings" + + "github.com/gocolly/colly/v2" + "github.com/mozillazg/go-slugify" + "github.com/nleeper/goment" + "github.com/thoas/go-funk" + "github.com/xbapps/xbvr/pkg/models" +) + +func BadoinkSitev2(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, domain string, singeScrapeAdditionalInfo string, limitScraping bool) error { + defer wg.Done() + logScrapeStart(scraperID, siteID) + + sceneCollector := createCollector(domain) + siteCollector := createCollector(domain) + + // These cookies are needed for age verification. + siteCollector.OnRequest(func(r *colly.Request) { + r.Headers.Set("Cookie", "agreedToDisclaimer=true") + }) + + sceneCollector.OnRequest(func(r *colly.Request) { + r.Headers.Set("Cookie", "agreedToDisclaimer=true") + }) + + sceneCollector.OnHTML(`html`, func(e *colly.HTMLElement) { + sc := models.ScrapedScene{} + sc.ScraperID = scraperID + sc.SceneType = "VR" + sc.Studio = "Badoink" + sc.Site = siteID + sc.SiteID = "" + sc.HomepageURL, _ = strings.CutSuffix(e.Request.URL.String(), "/") + + // Cover Url + coverURL := e.ChildAttr("div#povVideoContainer dl8-video", "poster") + sc.Covers = append(sc.Covers, coverURL) + + // Gallery + e.ForEach(`div.owl-carousel div.item`, func(id int, e *colly.HTMLElement) { + sc.Gallery = append(sc.Gallery, e.ChildAttr("img", "src")) + }) + + // Incase we scrape a single scene use one of the gallery images for the cover + if singleSceneURL != "" { + sc.Covers = append(sc.Covers, sc.Gallery[0]) + } + + // Cast + sc.ActorDetails = make(map[string]models.ActorDetails) + e.ForEach(`table.video-description-list tbody`, func(id int, e *colly.HTMLElement) { + // Cast + e.ForEach(`tr:nth-child(1) a`, func(id int, e *colly.HTMLElement) { + if strings.TrimSpace(e.Text) != "" { + sc.Cast = append(sc.Cast, strings.TrimSpace(e.Text)) + sc.ActorDetails[strings.TrimSpace(e.Text)] = models.ActorDetails{Source: sc.ScraperID + " scrape", ProfileUrl: e.Request.AbsoluteURL(e.Attr("href"))} + } + }) + + // Tags + e.ForEach(`tr:nth-child(2) a`, func(id int, e *colly.HTMLElement) { + tag := strings.TrimSpace(e.Text) + sc.Tags = append(sc.Tags, tag) + }) + + // Date + tmpDate, _ := goment.New(strings.TrimSpace(e.ChildText(`tr:nth-child(3) td:last-child`)), "MMMM DD, YYYY") + sc.Released = tmpDate.Format("YYYY-MM-DD") + }) + + // Synposis + sc.Synopsis = strings.TrimSpace(e.ChildText("div.accordion-body")) + + // Title + sc.Title = e.ChildText("h1.page-title") + + // Scene ID -- Uses the ending number of the video url instead of the ID used for the directory that the video link is stored in(Maintains backwards compatibility with old scenes) + tmp := strings.Split(sc.HomepageURL, "/") + siteIDstr := strings.Split(tmp[len(tmp)-1], "-") + sc.SiteID = siteIDstr[len(siteIDstr)-1] + + if sc.SiteID != "" { + sc.SceneID = slugify.Slugify(sc.Site) + "-" + sc.SiteID + + // save only if we got a SceneID + out <- sc + } + }) + + siteCollector.OnHTML(`a.page-link[aria-label="Next"]:not(.disabled)`, func(e *colly.HTMLElement) { + if !limitScraping { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + siteCollector.Visit(pageURL) + } + }) + + siteCollector.OnHTML(`div.video-grid-view a`, func(e *colly.HTMLElement) { + sceneURL := e.Request.AbsoluteURL(e.Attr("href")) + // If scene exist in database, there's no need to scrape + if !funk.ContainsString(knownScenes, sceneURL) { + sceneCollector.Visit(sceneURL) + } + }) + + if singleSceneURL != "" { + sceneCollector.Visit(singleSceneURL) + } else { + siteCollector.Visit("https://" + domain + "/videos/page1") + } + + if updateSite { + updateSiteLastUpdate(scraperID) + } + logScrapeFinished(scraperID, siteID) + return nil +} + +func KinkVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + return BadoinkSitev2(wg, updateSite, knownScenes, out, singleSceneURL, "kinkvr", "KinkVR", "kinkvr.com", singeScrapeAdditionalInfo, limitScraping) +} + +func init() { + registerScraper("kinkvr", "KinkVR", "https://kinkvr.com/icons/kinkvr/apple-touch-icon.png", "kinkvr.com", KinkVR) +} From 48b8aea3e0721686f349b0f6f55ffd6ac231eaa5 Mon Sep 17 00:00:00 2001 From: pops64 Date: Tue, 12 Nov 2024 11:33:13 -0500 Subject: [PATCH 03/15] Bug fix Bootstrap sites use a trailing slash. I remove this for internal logic and this is what is saved. But when comparing new scenes to what has been search the trailing slash caused this logic to never go true. --- pkg/scrape/badoinkv2.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/scrape/badoinkv2.go b/pkg/scrape/badoinkv2.go index 91cc4d04f..ba76791af 100644 --- a/pkg/scrape/badoinkv2.go +++ b/pkg/scrape/badoinkv2.go @@ -98,7 +98,7 @@ func BadoinkSitev2(wg *models.ScrapeWG, updateSite bool, knownScenes []string, o }) siteCollector.OnHTML(`div.video-grid-view a`, func(e *colly.HTMLElement) { - sceneURL := e.Request.AbsoluteURL(e.Attr("href")) + sceneURL, _ := strings.CutSuffix(e.Request.AbsoluteURL(e.Attr("href")), "/") // If scene exist in database, there's no need to scrape if !funk.ContainsString(knownScenes, sceneURL) { sceneCollector.Visit(sceneURL) From 04531a349dbc6fb2ab1712e21a83b1ad30bb826f Mon Sep 17 00:00:00 2001 From: pops64 Date: Tue, 12 Nov 2024 12:05:45 -0500 Subject: [PATCH 04/15] Spin off kinkvr completely After some digging it appears kinkvr was sold and not a website redesign by badoink. Now completly separates kinkvr from badoink --- pkg/scrape/{badoinkv2.go => kinkvr.go} | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) rename pkg/scrape/{badoinkv2.go => kinkvr.go} (82%) diff --git a/pkg/scrape/badoinkv2.go b/pkg/scrape/kinkvr.go similarity index 82% rename from pkg/scrape/badoinkv2.go rename to pkg/scrape/kinkvr.go index ba76791af..a8dad3acb 100644 --- a/pkg/scrape/badoinkv2.go +++ b/pkg/scrape/kinkvr.go @@ -10,12 +10,14 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func BadoinkSitev2(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, domain string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func KinkVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() + scraperID := "kinkvr" + siteID := "KinkVR" logScrapeStart(scraperID, siteID) - sceneCollector := createCollector(domain) - siteCollector := createCollector(domain) + sceneCollector := createCollector("kinkvr.com") + siteCollector := createCollector("kinkvr.com") // These cookies are needed for age verification. siteCollector.OnRequest(func(r *colly.Request) { @@ -33,7 +35,7 @@ func BadoinkSitev2(wg *models.ScrapeWG, updateSite bool, knownScenes []string, o sc.Studio = "Badoink" sc.Site = siteID sc.SiteID = "" - sc.HomepageURL, _ = strings.CutSuffix(e.Request.URL.String(), "/") + sc.HomepageURL = e.Request.URL.String() // Cover Url coverURL := e.ChildAttr("div#povVideoContainer dl8-video", "poster") @@ -78,7 +80,8 @@ func BadoinkSitev2(wg *models.ScrapeWG, updateSite bool, knownScenes []string, o sc.Title = e.ChildText("h1.page-title") // Scene ID -- Uses the ending number of the video url instead of the ID used for the directory that the video link is stored in(Maintains backwards compatibility with old scenes) - tmp := strings.Split(sc.HomepageURL, "/") + tmpUrlStr, _ := strings.CutSuffix(e.Request.URL.String(), "/") + tmp := strings.Split(tmpUrlStr, "/") siteIDstr := strings.Split(tmp[len(tmp)-1], "-") sc.SiteID = siteIDstr[len(siteIDstr)-1] @@ -98,7 +101,7 @@ func BadoinkSitev2(wg *models.ScrapeWG, updateSite bool, knownScenes []string, o }) siteCollector.OnHTML(`div.video-grid-view a`, func(e *colly.HTMLElement) { - sceneURL, _ := strings.CutSuffix(e.Request.AbsoluteURL(e.Attr("href")), "/") + sceneURL := e.Request.AbsoluteURL(e.Attr("href")) // If scene exist in database, there's no need to scrape if !funk.ContainsString(knownScenes, sceneURL) { sceneCollector.Visit(sceneURL) @@ -108,7 +111,7 @@ func BadoinkSitev2(wg *models.ScrapeWG, updateSite bool, knownScenes []string, o if singleSceneURL != "" { sceneCollector.Visit(singleSceneURL) } else { - siteCollector.Visit("https://" + domain + "/videos/page1") + siteCollector.Visit("https://kinkvr.com/videos/page1") } if updateSite { @@ -118,10 +121,6 @@ func BadoinkSitev2(wg *models.ScrapeWG, updateSite bool, knownScenes []string, o return nil } -func KinkVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { - return BadoinkSitev2(wg, updateSite, knownScenes, out, singleSceneURL, "kinkvr", "KinkVR", "kinkvr.com", singeScrapeAdditionalInfo, limitScraping) -} - func init() { registerScraper("kinkvr", "KinkVR", "https://kinkvr.com/icons/kinkvr/apple-touch-icon.png", "kinkvr.com", KinkVR) } From f22761efbc3c454b2d684b522ce3f798740d2bb7 Mon Sep 17 00:00:00 2001 From: pops64 Date: Wed, 27 Nov 2024 10:44:13 -0500 Subject: [PATCH 05/15] Add Auth Cookie Options --- pkg/api/options.go | 2 ++ pkg/config/config.go | 1 + pkg/scrape/slrstudios.go | 14 ++++++++++++ .../options/sections/InterfaceAdvanced.vue | 22 +++++++++++++++++++ 4 files changed, 39 insertions(+) diff --git a/pkg/api/options.go b/pkg/api/options.go index a50417ac8..c79298806 100644 --- a/pkg/api/options.go +++ b/pkg/api/options.go @@ -73,6 +73,7 @@ type RequestSaveOptionsAdvanced struct { UseAltSrcInFileMatching bool `json:"useAltSrcInFileMatching"` UseAltSrcInScriptFilters bool `json:"useAltSrcInScriptFilters"` IgnoreReleasedBefore time.Time `json:"ignoreReleasedBefore"` + SLRAuthCookie string `json:"SLRAuthCookie"` } type RequestSaveOptionsFunscripts struct { @@ -497,6 +498,7 @@ func (i ConfigResource) saveOptionsAdvanced(req *restful.Request, resp *restful. config.Config.Advanced.UseAltSrcInFileMatching = r.UseAltSrcInFileMatching config.Config.Advanced.UseAltSrcInScriptFilters = r.UseAltSrcInScriptFilters config.Config.Advanced.IgnoreReleasedBefore = r.IgnoreReleasedBefore + config.Config.Advanced.SLRAuthCookie = r.SLRAuthCookie config.SaveConfig() resp.WriteHeaderAndEntity(http.StatusOK, r) diff --git a/pkg/config/config.go b/pkg/config/config.go index c4f4582ea..a5672d24d 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -60,6 +60,7 @@ type ObjectConfig struct { UseAltSrcInFileMatching bool `default:"true" json:"useAltSrcInFileMatching"` UseAltSrcInScriptFilters bool `default:"true" json:"useAltSrcInScriptFilters"` IgnoreReleasedBefore time.Time `json:"ignoreReleasedBefore"` + SLRAuthCookie string `default:"" json:"slrAuthCookie"` } `json:"advanced"` Funscripts struct { ScrapeFunscripts bool `default:"false" json:"scrapeFunscripts"` diff --git a/pkg/scrape/slrstudios.go b/pkg/scrape/slrstudios.go index 14688e981..0d62f7bae 100644 --- a/pkg/scrape/slrstudios.go +++ b/pkg/scrape/slrstudios.go @@ -32,6 +32,16 @@ func SexLikeReal(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out sceneCollector := createCollector("www.sexlikereal.com") siteCollector := createCollector("www.sexlikereal.com") + if config.Config.Advanced.SLRAuthCookie != "" { + // These cookies are needed for age verification. + siteCollector.OnRequest(func(r *colly.Request) { + r.Headers.Set("Cookie", `auth_jwt=` + config.Config.Advanced.SLRAuthCookie) + }) + sceneCollector.OnRequest(func(r *colly.Request) { + r.Headers.Set("Cookie", `auth_jwt=` + config.Config.Advanced.SLRAuthCookie) + }) + } + commonDb, _ := models.GetCommonDB() // RegEx Patterns @@ -335,6 +345,10 @@ func SexLikeReal(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out } }) + siteCollector.OnResponse(func(r *colly.Response) { + r.Save("slr.html") + }) + siteCollector.OnHTML(`div.c-grid--scenes article`, func(e *colly.HTMLElement) { sceneURL := e.Request.AbsoluteURL(e.ChildAttr("a[data-qa=scenes-grid-item-link-title]", "href")) diff --git a/ui/src/views/options/sections/InterfaceAdvanced.vue b/ui/src/views/options/sections/InterfaceAdvanced.vue index c3257b243..16ea4b68d 100644 --- a/ui/src/views/options/sections/InterfaceAdvanced.vue +++ b/ui/src/views/options/sections/InterfaceAdvanced.vue @@ -9,6 +9,7 @@ + @@ -165,6 +166,19 @@ +
+
+
+ + + + + Save + +
+
+
+ @@ -338,6 +352,14 @@ export default { }, isLoading: function () { return this.$store.state.optionsAdvanced.loading + }, + slrAuthCookie: { + get () { + return this.$store.state.optionsAdvanced.advanced.slrAuthCookie + }, + set (value) { + this.$store.state.optionsAdvanced.advanced.slrAuthCookie = value + } } } } From 8b5be30160e35e19e5383f6cc08169b705f5448f Mon Sep 17 00:00:00 2001 From: pops64 Date: Thu, 28 Nov 2024 22:56:04 -0500 Subject: [PATCH 06/15] Draft PS-Porn migration to Custom Due to a URL change on SLR it is needed to update the URL. Chose to migrate it to a custom scraper in addition to fix the URL. Will remove the scraper from the List if no scenes have been found for the PS-Porn scraper --- pkg/config/scraper_list.go | 37 ++++++++++++++++++++++++++++++++++++ pkg/config/scrapers.json | 6 ------ pkg/migrations/migrations.go | 8 ++++++++ 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/pkg/config/scraper_list.go b/pkg/config/scraper_list.go index 29296522d..6078d6979 100644 --- a/pkg/config/scraper_list.go +++ b/pkg/config/scraper_list.go @@ -156,3 +156,40 @@ func SetSiteId(configList *[]ScraperConfig, customId string) { } } + +func MigrateFromOfficalToCustom(id string, url string, name string, company string, avatarUrl string) error { + + db, _ := models.GetDB() + defer db.Close() + + // Check to see if we even have PS-Porn data. Other wise there is no need to add a custom site entry + var scenes []models.Scene + db.Where("scraper_id = ?", id).Find(&scenes) + if len(scenes) < 1 { + common.Log.Infoln(`No` + name + ` Scenes found no migration needed`) + return db.Delete(&models.Site{ID: id}).Error + } + common.Log.Infoln(name + `Scenes found migration needed`) + + err := db.Model(&models.Scene{}).Where("scraper_id = ?", id).Update("needs_update", true).Error + if err != nil { + return err + } + // Load the custom scrapers + var scraperConfig ScraperList + scraperConfig.Load() + + // Data taken from offical SLR scraper, updated to fix url change + scraper := ScraperConfig{URL: url, Name: name, Company: company, AvatarUrl: avatarUrl} + // Add the to the SLR list the new custom PS-Porn site + scraperConfig.CustomScrapers.SlrScrapers = append(scraperConfig.CustomScrapers.SlrScrapers, scraper) + + // Save the new list file + fName := filepath.Join(common.AppDir, "scrapers.json") + list, _ := json.MarshalIndent(scraperConfig, "", " ") + os.WriteFile(fName, list, 0644) + + common.Log.Infoln(name + `migration complete. Please restart XBVR and run` + name + `scraper to complete migration`) + + return db.Delete(&models.Site{ID: "ps-porn"}).Error +} diff --git a/pkg/config/scrapers.json b/pkg/config/scrapers.json index 299dafce6..7be1c0921 100644 --- a/pkg/config/scrapers.json +++ b/pkg/config/scrapers.json @@ -319,12 +319,6 @@ "company": "POV Central", "avatar_url": "https://mcdn.vrporn.com/files/20191125091909/POVCentralLogo.jpg" }, - { - "url": "https://www.sexlikereal.com/studios/ps-porn", - "name": "PS-Porn", - "company": "Paula Shy", - "avatar_url": "https://mcdn.vrporn.com/files/20201221090642/PS-Porn-400x400.jpg" - }, { "url": "https://www.sexlikereal.com/studios/realhotvr", "name": "RealHotVR", diff --git a/pkg/migrations/migrations.go b/pkg/migrations/migrations.go index c2b0ce823..1d200071c 100644 --- a/pkg/migrations/migrations.go +++ b/pkg/migrations/migrations.go @@ -2072,6 +2072,14 @@ func Migrate() { return nil }, }, + { + ID: "0091-PS-Porn-Offical-Removal", + Migrate: func(tx *gorm.DB) error { + + return config.MigrateFromOfficalToCustom("ps-porn", "https://www.sexlikereal.com/studios/ps-porn-vr", "PS-Porn", "Paula Shy", "https://mcdn.vrporn.com/files/20201221090642/PS-Porn-400x400.jpg") + + }, + }, }) if err := m.Migrate(); err != nil { From b95a660609b9b9e7f3ef7c2f5a4fd891f8ccbf56 Mon Sep 17 00:00:00 2001 From: pops64 Date: Thu, 28 Nov 2024 23:00:27 -0500 Subject: [PATCH 07/15] Testing --- pkg/migrations/migrations.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/migrations/migrations.go b/pkg/migrations/migrations.go index 1d200071c..f3c39677b 100644 --- a/pkg/migrations/migrations.go +++ b/pkg/migrations/migrations.go @@ -2073,7 +2073,7 @@ func Migrate() { }, }, { - ID: "0091-PS-Porn-Offical-Removal", + ID: "0092-PS-Porn-Offical-Removal", Migrate: func(tx *gorm.DB) error { return config.MigrateFromOfficalToCustom("ps-porn", "https://www.sexlikereal.com/studios/ps-porn-vr", "PS-Porn", "Paula Shy", "https://mcdn.vrporn.com/files/20201221090642/PS-Porn-400x400.jpg") From 7c1e2977a7031d14395ceae9731808df1e61ed4b Mon Sep 17 00:00:00 2001 From: pops64 Date: Fri, 29 Nov 2024 00:23:34 -0500 Subject: [PATCH 08/15] Working ready for PR Some final testing to due. Adds some sites that have mains available so users may set them to alts --- pkg/config/scraper_list.go | 71 ++++++++++++++++++++++++------------ pkg/config/scrapers.json | 20 ---------- pkg/migrations/migrations.go | 8 +++- pkg/scrape/vrphub.go | 4 +- 4 files changed, 55 insertions(+), 48 deletions(-) diff --git a/pkg/config/scraper_list.go b/pkg/config/scraper_list.go index 6078d6979..405c840c9 100644 --- a/pkg/config/scraper_list.go +++ b/pkg/config/scraper_list.go @@ -157,7 +157,7 @@ func SetSiteId(configList *[]ScraperConfig, customId string) { } -func MigrateFromOfficalToCustom(id string, url string, name string, company string, avatarUrl string) error { +func MigrateFromOfficalToCustom(id string, url string, name string, company string, avatarUrl string, customId string, suffix string) error { db, _ := models.GetDB() defer db.Close() @@ -165,31 +165,54 @@ func MigrateFromOfficalToCustom(id string, url string, name string, company stri // Check to see if we even have PS-Porn data. Other wise there is no need to add a custom site entry var scenes []models.Scene db.Where("scraper_id = ?", id).Find(&scenes) - if len(scenes) < 1 { - common.Log.Infoln(`No` + name + ` Scenes found no migration needed`) - return db.Delete(&models.Site{ID: id}).Error - } - common.Log.Infoln(name + `Scenes found migration needed`) - - err := db.Model(&models.Scene{}).Where("scraper_id = ?", id).Update("needs_update", true).Error - if err != nil { - return err - } - // Load the custom scrapers - var scraperConfig ScraperList - scraperConfig.Load() + if len(scenes) != 0 { + common.Log.Infoln(name + ` Scenes found migration needed`) + + // Update scene data to reflect change + for _, scene := range scenes { + //Needed due to weried VRPHub scrapers + scene.ScraperId = strings.TrimPrefix(id, "vrphub-" + `-` + customId) + scene.Site = name + " " + suffix + scene.NeedsUpdate = true + + err := db.Save(&scene).Error + + if err != nil { + return err + } + } - // Data taken from offical SLR scraper, updated to fix url change - scraper := ScraperConfig{URL: url, Name: name, Company: company, AvatarUrl: avatarUrl} - // Add the to the SLR list the new custom PS-Porn site - scraperConfig.CustomScrapers.SlrScrapers = append(scraperConfig.CustomScrapers.SlrScrapers, scraper) + + + // Load the custom scrapers + var scraperConfig ScraperList + scraperConfig.Load() + + // Data taken from offical SLR scraper, updated to fix url change + scraper := ScraperConfig{URL: url, Name: name, Company: company, AvatarUrl: avatarUrl} + // Add the to the SLR list the new custom PS-Porn site + switch customId{ + case "slr": + scraperConfig.CustomScrapers.SlrScrapers = append(scraperConfig.CustomScrapers.SlrScrapers, scraper) + case "povr": + scraperConfig.CustomScrapers.PovrScrapers = append(scraperConfig.CustomScrapers.PovrScrapers, scraper) + case "vrporn": + scraperConfig.CustomScrapers.VrpornScrapers = append(scraperConfig.CustomScrapers.VrpornScrapers, scraper) + case "vrphub": + scraperConfig.CustomScrapers.VrphubScrapers = append(scraperConfig.CustomScrapers.VrphubScrapers, scraper) + } + // Save the new list file + fName := filepath.Join(common.AppDir, "scrapers.json") + list, _ := json.MarshalIndent(scraperConfig, "", " ") + os.WriteFile(fName, list, 0644) - // Save the new list file - fName := filepath.Join(common.AppDir, "scrapers.json") - list, _ := json.MarshalIndent(scraperConfig, "", " ") - os.WriteFile(fName, list, 0644) + common.Log.Infoln(name + ` migration complete. Please restart XBVR and run ` + name + ` scraper to complete migration`) + + } else { - common.Log.Infoln(name + `migration complete. Please restart XBVR and run` + name + `scraper to complete migration`) + common.Log.Infoln(`No ` + name + ` Scenes found no migration needed`) + + } - return db.Delete(&models.Site{ID: "ps-porn"}).Error + return db.Delete(&models.Site{ID: id}).Error } diff --git a/pkg/config/scrapers.json b/pkg/config/scrapers.json index 7be1c0921..083a72270 100644 --- a/pkg/config/scrapers.json +++ b/pkg/config/scrapers.json @@ -175,12 +175,6 @@ "company": "", "avatar_url": "" }, - { - "url": "https://www.sexlikereal.com/studios/fuckpassvr", - "name": "FuckPassVR", - "company": "FuckPassVR", - "avatar_url": "https://cdn-vr.sexlikereal.com/images/studio_creatives/logotypes/1/352/logo_crop_1635153994.png" - }, { "url": "https://www.sexlikereal.com/studios/heathering", "name": "Heathering", @@ -497,20 +491,6 @@ } ], "vrphub": [ - { - "id": "vrphub-vrhush", - "url": "https://vrphub.com/category/vr-hush", - "name": "VRHush", - "company": "VRHush", - "avatar_url": "https://cdn-nexpectation.secure.yourpornpartner.com/sites/vrh/favicon/apple-touch-icon-180x180.png" - }, - { - "id": "vrphub-stripzvr", - "url": "https://vrphub.com/category/stripzvr/", - "name": "StripzVR - VRP Hub", - "company": "StripzVR", - "avatar_url": "https://www.stripzvr.com/wp-content/uploads/2018/09/cropped-favicon-192x192.jpg" - } ] } } diff --git a/pkg/migrations/migrations.go b/pkg/migrations/migrations.go index f3c39677b..9c0cf5834 100644 --- a/pkg/migrations/migrations.go +++ b/pkg/migrations/migrations.go @@ -2073,10 +2073,14 @@ func Migrate() { }, }, { - ID: "0092-PS-Porn-Offical-Removal", + ID: "0081-Offical-Site-Removals-With-Main-Site-Aviable", Migrate: func(tx *gorm.DB) error { - return config.MigrateFromOfficalToCustom("ps-porn", "https://www.sexlikereal.com/studios/ps-porn-vr", "PS-Porn", "Paula Shy", "https://mcdn.vrporn.com/files/20201221090642/PS-Porn-400x400.jpg") + err := config.MigrateFromOfficalToCustom("ps-porn", "https://www.sexlikereal.com/studios/ps-porn-vr", "PS-Porn", "Paula Shy", "https://mcdn.vrporn.com/files/20201221090642/PS-Porn-400x400.jpg", "slr", "(SLR)") + err = config.MigrateFromOfficalToCustom("fuckpassvr", "https://www.sexlikereal.com/studios/fuckpassvr", "FuckPassVR", "FuckPassVR", "https://cdn-vr.sexlikereal.com/images/studio_creatives/logotypes/1/352/logo_crop_1635153994.png", "slr", "(SLR)") + err = config.MigrateFromOfficalToCustom("vrphub-vrhush", "https://vrphub.com/category/vr-hush", "VRHush", "VRHush", "https://cdn-nexpectation.secure.yourpornpartner.com/sites/vrh/favicon/apple-touch-icon-180x180.png", "vrphub", "(VRP Hub)") + err = config.MigrateFromOfficalToCustom("vrphub-stripzvr", "https://vrphub.com/category/stripzvr/", "StripzVR - VRP Hub", "StripzVR", "https://www.stripzvr.com/wp-content/uploads/2018/09/cropped-favicon-192x192.jpg", "vrphub", "(VRP Hub)") + return err }, }, diff --git a/pkg/scrape/vrphub.go b/pkg/scrape/vrphub.go index e461fbb7e..b140f04cf 100644 --- a/pkg/scrape/vrphub.go +++ b/pkg/scrape/vrphub.go @@ -273,9 +273,9 @@ func init() { scrapers.Load() for _, scraper := range scrapers.XbvrScrapers.VrphubScrapers { switch scraper.ID { - case "vrphub-vrhush": + case "vr-hush-vrphub": addVRPHubScraper(scraper.ID, scraper.Name, scraper.Company, scraper.AvatarUrl, false, scraper.URL, vrhushCallback) - case "vrphub-stripzvr": + case "stripzvr-vrphub": addVRPHubScraper(scraper.ID, scraper.Name, scraper.Company, scraper.AvatarUrl, false, scraper.URL, stripzvrCallback) } addVRPHubScraper(scraper.ID, scraper.Name, scraper.Company, scraper.AvatarUrl, false, scraper.URL, noop) From 8899e9305eed2d77cfbfdef3ad102b79c8212e8f Mon Sep 17 00:00:00 2001 From: pops64 Date: Fri, 29 Nov 2024 00:41:48 -0500 Subject: [PATCH 09/15] Go Fmt --- pkg/config/scraper_list.go | 16 +++++++--------- pkg/migrations/migrations.go | 4 ++-- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/pkg/config/scraper_list.go b/pkg/config/scraper_list.go index 405c840c9..53774a117 100644 --- a/pkg/config/scraper_list.go +++ b/pkg/config/scraper_list.go @@ -158,20 +158,20 @@ func SetSiteId(configList *[]ScraperConfig, customId string) { } func MigrateFromOfficalToCustom(id string, url string, name string, company string, avatarUrl string, customId string, suffix string) error { - + db, _ := models.GetDB() defer db.Close() - + // Check to see if we even have PS-Porn data. Other wise there is no need to add a custom site entry var scenes []models.Scene db.Where("scraper_id = ?", id).Find(&scenes) if len(scenes) != 0 { common.Log.Infoln(name + ` Scenes found migration needed`) - + // Update scene data to reflect change - for _, scene := range scenes { - //Needed due to weried VRPHub scrapers - scene.ScraperId = strings.TrimPrefix(id, "vrphub-" + `-` + customId) + for _, scene := range scenes { + //Needed due to weried VRPHub scrapers + scene.ScraperId = strings.TrimPrefix(id, "vrphub-"+`-`+customId) scene.Site = name + " " + suffix scene.NeedsUpdate = true @@ -182,8 +182,6 @@ func MigrateFromOfficalToCustom(id string, url string, name string, company stri } } - - // Load the custom scrapers var scraperConfig ScraperList scraperConfig.Load() @@ -191,7 +189,7 @@ func MigrateFromOfficalToCustom(id string, url string, name string, company stri // Data taken from offical SLR scraper, updated to fix url change scraper := ScraperConfig{URL: url, Name: name, Company: company, AvatarUrl: avatarUrl} // Add the to the SLR list the new custom PS-Porn site - switch customId{ + switch customId { case "slr": scraperConfig.CustomScrapers.SlrScrapers = append(scraperConfig.CustomScrapers.SlrScrapers, scraper) case "povr": diff --git a/pkg/migrations/migrations.go b/pkg/migrations/migrations.go index 9c0cf5834..639d4424a 100644 --- a/pkg/migrations/migrations.go +++ b/pkg/migrations/migrations.go @@ -2075,13 +2075,13 @@ func Migrate() { { ID: "0081-Offical-Site-Removals-With-Main-Site-Aviable", Migrate: func(tx *gorm.DB) error { - + err := config.MigrateFromOfficalToCustom("ps-porn", "https://www.sexlikereal.com/studios/ps-porn-vr", "PS-Porn", "Paula Shy", "https://mcdn.vrporn.com/files/20201221090642/PS-Porn-400x400.jpg", "slr", "(SLR)") err = config.MigrateFromOfficalToCustom("fuckpassvr", "https://www.sexlikereal.com/studios/fuckpassvr", "FuckPassVR", "FuckPassVR", "https://cdn-vr.sexlikereal.com/images/studio_creatives/logotypes/1/352/logo_crop_1635153994.png", "slr", "(SLR)") err = config.MigrateFromOfficalToCustom("vrphub-vrhush", "https://vrphub.com/category/vr-hush", "VRHush", "VRHush", "https://cdn-nexpectation.secure.yourpornpartner.com/sites/vrh/favicon/apple-touch-icon-180x180.png", "vrphub", "(VRP Hub)") err = config.MigrateFromOfficalToCustom("vrphub-stripzvr", "https://vrphub.com/category/stripzvr/", "StripzVR - VRP Hub", "StripzVR", "https://www.stripzvr.com/wp-content/uploads/2018/09/cropped-favicon-192x192.jpg", "vrphub", "(VRP Hub)") return err - + }, }, }) From d46bdf44498bbd08d9e79aa4871d85b14c7a3e7c Mon Sep 17 00:00:00 2001 From: pops64 Date: Fri, 29 Nov 2024 00:44:43 -0500 Subject: [PATCH 10/15] Clean Up --- pkg/config/scrapers.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/config/scrapers.json b/pkg/config/scrapers.json index 083a72270..d7f34df48 100644 --- a/pkg/config/scrapers.json +++ b/pkg/config/scrapers.json @@ -490,7 +490,6 @@ "avatar_url": "https://mcdn.vrporn.com/files/20200421094123/vrclubz_logo_NEW-400x400_webwhite.png" } ], - "vrphub": [ - ] + "vrphub": [] } } From 9ccaad643f71d48806e395df3441e597cbc2ce5f Mon Sep 17 00:00:00 2001 From: pops64 Date: Fri, 29 Nov 2024 01:38:51 -0500 Subject: [PATCH 11/15] Fixes in Auth Cookie --- ui/src/store/optionsAdvanced.js | 3 +++ ui/src/views/options/sections/InterfaceAdvanced.vue | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ui/src/store/optionsAdvanced.js b/ui/src/store/optionsAdvanced.js index 971252bd2..2bab657b4 100644 --- a/ui/src/store/optionsAdvanced.js +++ b/ui/src/store/optionsAdvanced.js @@ -13,6 +13,7 @@ const state = { useAltSrcInFileMatching: true, useAltSrcInScriptFilters: true, ignoreReleasedBefore: null, + SLRAuthCookie: '', } } @@ -34,6 +35,7 @@ const actions = { state.advanced.useAltSrcInFileMatching = data.config.advanced.useAltSrcInFileMatching state.advanced.useAltSrcInScriptFilters = data.config.advanced.useAltSrcInScriptFilters state.advanced.ignoreReleasedBefore = data.config.advanced.ignoreReleasedBefore + state.advanced.SLRAuthCookie = data.config.advanced.SLRAuthCookie state.loading = false }) }, @@ -52,6 +54,7 @@ const actions = { state.advanced.useAltSrcInFileMatching = data.useAltSrcInFileMatching state.advanced.useAltSrcInScriptFilters = data.useAltSrcInScriptFilters state.advanced.ignoreReleasedBefore = data.ignoreReleasedBefore + state.advanced.SLRAuthCookie = data.SLRAuthCookie state.loading = false }) } diff --git a/ui/src/views/options/sections/InterfaceAdvanced.vue b/ui/src/views/options/sections/InterfaceAdvanced.vue index 16ea4b68d..b52810f58 100644 --- a/ui/src/views/options/sections/InterfaceAdvanced.vue +++ b/ui/src/views/options/sections/InterfaceAdvanced.vue @@ -166,11 +166,12 @@ +
- - + + Save From c4abbdf617ccacfb7149b738963530bc6ee7fd3b Mon Sep 17 00:00:00 2001 From: pops64 Date: Fri, 29 Nov 2024 03:12:04 -0500 Subject: [PATCH 12/15] RL Fixes Untested but should work --- pkg/scrape/realitylovers.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/scrape/realitylovers.go b/pkg/scrape/realitylovers.go index 0486da64b..9a7548c36 100644 --- a/pkg/scrape/realitylovers.go +++ b/pkg/scrape/realitylovers.go @@ -34,7 +34,7 @@ func RealityLoversSite(wg *models.ScrapeWG, updateSite bool, knownScenes []strin sc.Studio = "RealityLovers" sc.Site = siteID sc.SiteID = "" - sc.HomepageURL, _ = strings.CutSuffix(e.Request.URL.String(), "/") + sc.HomepageURL = e.Request.URL.String() // Cover Url coverURL := e.Request.Ctx.GetAny("coverURL").(string) @@ -79,7 +79,7 @@ func RealityLoversSite(wg *models.ScrapeWG, updateSite bool, knownScenes []strin // Synposis sc.Synopsis = strings.TrimSpace(e.ChildText("div.accordion-body")) - tmp := strings.Split(sc.HomepageURL, "/") + tmp := strings.Split(strings.TrimSuffix(sc.HomepageURL, "/"), "/") // Title sc.Title = e.Request.Ctx.GetAny("title").(string) From 3f13352a62ec9efdd48d8a5fc2b0c29dd6d5291a Mon Sep 17 00:00:00 2001 From: pops64 Date: Fri, 29 Nov 2024 13:58:08 -0500 Subject: [PATCH 13/15] Update Favicon URL --- pkg/scrape/fuckpassvr.go | 2 +- pkg/scrape/sexbabesvr.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/scrape/fuckpassvr.go b/pkg/scrape/fuckpassvr.go index 34963bf86..fa42a6796 100644 --- a/pkg/scrape/fuckpassvr.go +++ b/pkg/scrape/fuckpassvr.go @@ -130,5 +130,5 @@ func FuckPassVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out } func init() { - registerScraper("fuckpassvr-native", "FuckPassVR", "https://www.fuckpassvr.com/_nuxt/img/logo_bw.1fac7d1.png", "fuckpassvr.com", FuckPassVR) + registerScraper("fuckpassvr-native", "FuckPassVR", "https://www.fuckpassvr.com/favicon.png", "fuckpassvr.com", FuckPassVR) } diff --git a/pkg/scrape/sexbabesvr.go b/pkg/scrape/sexbabesvr.go index 864c7063f..1e353d851 100644 --- a/pkg/scrape/sexbabesvr.go +++ b/pkg/scrape/sexbabesvr.go @@ -141,5 +141,5 @@ func SexBabesVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out } func init() { - registerScraper("sexbabesvr", "SexBabesVR", "https://sexbabesvr.com/assets/front/assets/logo.png", "sexbabesvr.com", SexBabesVR) + registerScraper("sexbabesvr", "SexBabesVR", "https://sexbabesvr.com/static/images/favicons/favicon-32x32.png", "sexbabesvr.com", SexBabesVR) } From 558a6b53ab992e95e8a7272a4385486a09f2e2fe Mon Sep 17 00:00:00 2001 From: pops64 Date: Fri, 29 Nov 2024 15:27:46 -0500 Subject: [PATCH 14/15] Favicon --- pkg/scrape/kinkvr.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/scrape/kinkvr.go b/pkg/scrape/kinkvr.go index a8dad3acb..a50bb779a 100644 --- a/pkg/scrape/kinkvr.go +++ b/pkg/scrape/kinkvr.go @@ -122,5 +122,5 @@ func KinkVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan } func init() { - registerScraper("kinkvr", "KinkVR", "https://kinkvr.com/icons/kinkvr/apple-touch-icon.png", "kinkvr.com", KinkVR) + registerScraper("kinkvr", "KinkVR", "https://static.rlcontent.com/shared/KINK/skins/web-10/branding/favicon.png", "kinkvr.com", KinkVR) } From 2dc288fd22a946a1152fcb3daec19d390f47e11d Mon Sep 17 00:00:00 2001 From: pops64 Date: Sat, 21 Dec 2024 12:01:36 -0500 Subject: [PATCH 15/15] Fix for Title not being scraped Title was no longer being found fixed. Also removed the VR Porn Scene suffix being applied to recent scenes --- pkg/scrape/vrspy.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/scrape/vrspy.go b/pkg/scrape/vrspy.go index b81baf52e..659d3a641 100644 --- a/pkg/scrape/vrspy.go +++ b/pkg/scrape/vrspy.go @@ -62,7 +62,8 @@ func VRSpy(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan< sc.SceneID = scraperID + "-" + sc.SiteID - sc.Title = e.ChildText(`.video-content .header-container .video-title .section-header-container`) + sc.Title = strings.TrimSuffix(strings.TrimSuffix(e.ChildText(`div.video-title .section-header-container`), " Scene"), " - VR Porn") + sc.Synopsis = e.ChildText(`.video-description-container`) sc.Tags = e.ChildTexts(`.video-categories .chip`)