diff --git a/.github/workflows/goreleaser.yaml b/.github/workflows/goreleaser.yaml new file mode 100644 index 0000000..e6bec59 --- /dev/null +++ b/.github/workflows/goreleaser.yaml @@ -0,0 +1,30 @@ +name: goreleaser +on: + push: + tags: + - "*" + +permissions: + contents: write + +jobs: + goreleaser: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.22.x' + check-latest: true + - name: Run GoReleaser + uses: goreleaser/goreleaser-action@v5 + with: + distribution: goreleaser + version: latest + args: release --clean + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.goreleaser.yaml b/.goreleaser.yaml new file mode 100644 index 0000000..0ea6588 --- /dev/null +++ b/.goreleaser.yaml @@ -0,0 +1,39 @@ +project_name: seads +before: + hooks: + - go mod tidy +builds: + - env: + - CGO_ENABLED=0 + goos: + - linux + - windows + - darwin + goarch: + - amd64 + - arm64 + mod_timestamp: "{{ .CommitTimestamp }}" + main: ./cmd/seads/ + +archives: + - format: tar.gz + name_template: >- + {{ .ProjectName }}_ + {{- title .Os }}_ + {{- if eq .Arch "amd64" }}x86_64 + {{- else if eq .Arch "386" }}i386 + {{- else }}{{ .Arch }}{{ end }} + {{- if .Arm }}v{{ .Arm }}{{ end }} + format_overrides: + - goos: windows + format: zip +checksum: + name_template: 'checksums.txt' +snapshot: + name_template: "{{ incpatch .Version }}-next" +changelog: + sort: asc + filters: + exclude: + - '^docs:' + - '^test:' \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..f7fb444 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,11 @@ +FROM golang:alpine AS builder +WORKDIR /app +COPY go.mod go.sum ./ +RUN go mod download +COPY . . +RUN go build -o /app/seads ./cmd/seads/ + +FROM alpine:latest +WORKDIR /app +COPY --from=builder /app/seads /app/seads +ENTRYPOINT ["/app/seads"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..78cf715 --- /dev/null +++ b/README.md @@ -0,0 +1,163 @@ +# seads - Search Engine ADs Scanner + +[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) +[![GoDoc Card](https://godoc.org/github.com/andpalmier/seads?status.svg)](https://godoc.org/github.com/andpalmier/seads) +[![Go Report Card](https://goreportcard.com/badge/github.com/andpalmier/apkingo)](https://goreportcard.com/report/github.com/andpalmier/seads) +[![follow on X](https://img.shields.io/twitter/follow/andpalmier?style=social&logo=x)](https://x.com/intent/follow?screen_name=andpalmier) + +`seads` (Search Engine ADs Scanner) is a utility designed to automatically detect advertisements displayed on most popular search engines when searching for a user-submitted keywords. + +For a comprehensive guide on how to use `seads`, please refer to [this blog post](https://andpalmier.com/posts/seads/). + +![seads](https://github.com/andpalmier/seads/blob/main/img/seads.gif?raw=true) + +## Features: +- **Automated reporting**: Easily send reports of findings via email, Slack, or Telegram. +- **Concurrent Search**: Specify multiple headless instances to gather as many ads as possible concurrently. +- **Screenshot Support**: Capture screenshots of ads found in search engines for evidence. +- **Docker Support**: Install seads without affecting your local setup using Docker. + +## Known limitations: +- Due to the nature of search engine ads, a single search may not reveal all ads. Using concurrent headless browsers might slow down detection but ensures comprehensive ad gathering. +- Notifications via Slack and Telegram have character limits. Messages exceeding the limit won't be sent. + +## Installation + +### Download binary + +You can download `seads` from the [releases section](https://github.com/andpalmier/seads/releases). + +### Using Go install + +You can compile it from source by running: + +``` +go install github.com/andpalmier/seads/cmd/seads@latest +``` + +### Using Docker: + +Clone the GitHub repo and run Docker: + +``` +docker build -t seads . +docker run -it -v "$(pwd)":/mnt seads -h +``` + +## Usage + +You can run `seads` with the following flags: + +``` + -config string (REQUIRED) + path to config file (default "config.yaml"). + -concurrency int + number of concurrent headless browsers (default 4). + -cleanlinks + print clear links in output (links will remain defanged in notifications). + -notify + notify if unexpected domains are found. + -screenshot string + path to store screenshots (if empty, the screenshot feature will be disabled). +``` + +Example: + +``` +seads -config config.yaml -notify +``` + +Docker example: + +``` +docker run -it -v "$(pwd)":/mnt seads -config /mnt/config.yaml -notify +``` + +## How to use + +After installing `seads`, create a `config.yaml` file with the following structure: + +```yaml +mail: + host: MAILHOST + port: 587 + username: USERNAME + password: PASSWORD + from: FROMADDRESS + recipients: [RECIPIENTADDRESS#1,RECIPIENTADDRESS#2] + +slack: + token: SLACKTOKEN + channels: [CHANNEL#1,CHANNEL#2] + +telegram: + token: TELEGRAMTOKEN + chatid: [CHATID#1,CHATID#2] + +queries: + - query: "ipad" + expected-domains: [apple.com, amazon.com] + + - query: "as roma" + expected-domains: [] +``` + +The field `expected-domains` is used to specify domains we are expecting to appear in the ads of search engines while searching for the specified keywords. +Domains in `expected-domains` will still appear in the output of `seads`, but won’t be sent in the notification. + +Run `seads` with the following command: + +``` +seads -config config.yaml -screenshot scr -notify +``` + +output example: + +![seads](https://github.com/andpalmier/seads/blob/main/img/seads.gif?raw=true) + +screenshots example: + +![seads_yahoo_apple](https://github.com/andpalmier/seads/blob/main/img/example-yahoo-apple.png?raw=true) + +notification example: + +``` +Here are the "unexpected domains" found during the last execution of seads: + +Message creation date: 2024-03-12 22:28:14 + +* Search engine: Yahoo + Search term: apple + Domain: reparaturpc[.]ch + Full link: www[.]https://reparaturpc[.]ch/de/?msclkid=75c3ce8f8942156ac179ab7f41a03704 + +* Search engine: Yahoo + Search term: apple + Domain: fust[.]ch + Full link: https://www[.]fust[.]ch/de/marken/apple[.]html?&msclkid=a836011a07061ba4052864eacfe7d0fd&utm_source=bing&utm_medium=cpc&utm_campaign=Bing%20-%20NBrand%20-%20S%20-%20D%20-%20MM%20PC%20Marke%20Apple&utm_term=apple&utm_content=1_Apple%3D2_undefined%C2%A63_Nbrand&gclid=a836011a07061ba4052864eacfe7d0fd&gclsrc=3p[.]ds + +* Search engine: Yahoo + Search term: apple + Domain: jobs[.]ch + Full link: https://www[.]jobs[.]ch/en/vacancies/?term=apple&utm_source=bing&utm_medium=search&utm_campaign=wb:jobs|tg:b2c|cn:ww|lg:en|ct:search,nonbrand,company|cd:company|mg:job-application|pd:y|tt:cpc|gt:keyword,nonbrand,company|gd:company&msclkid=17ac4d7d0b0616628f40288dc3e79a46&utm_term=apple&utm_content=gt%3Akeyword,nonbrand,company%7Cgd%3Acompany + +* Search engine: Yahoo + Search term: apple + Domain: amazon[.]com + Full link: https://www[.]amazon[.]com/s?k=applwe&adgrpid=1344703557775981&hvadid=84044278562817&hvbmt=be&hvdev=c&hvlocphy=3322&hvnetw=o&hvqmt=e&hvtargid=kwd-84044521042995%3Aloc-175&hydadcr=29387_14610683&tag=mh0b-20&ref=pd_sl_7xha1yy51_e + + +This message was automatically sent by seads (www.github.com/andpalmier/seads) +``` + + +## 3rd party libraries + +- Rod: [GitHub repo](https://github.com/go-rod/rod), [documentation](https://go-rod.github.io/) +- Shoutrrr: [GitHub repo](https://github.com/containrrr/shoutrrr), [documentation](https://containrrr.dev/shoutrrr/v0.8/) +- Fatih/color: [GitHub repo](https://github.com/fatih/color), [Go reference](https://pkg.go.dev/github.com/fatih/color) + +## Next steps + +- [ ] Add flag to allow submission of User-Agent string to be used in headless browsers +- [ ] Add more search engines diff --git a/cmd/seads/bing.go b/cmd/seads/bing.go new file mode 100644 index 0000000..6fc5920 --- /dev/null +++ b/cmd/seads/bing.go @@ -0,0 +1,50 @@ +package main + +import ( + "fmt" + "github.com/go-rod/rod" + "path/filepath" + "time" +) + +// getBingAds searches for ads on Bing for a given encoded string +func getBingAds(encoded string) ([]string, error) { + var ads []string + + // Create a new Rod browser instance + browser := rod.New().MustConnect().MustIncognito() + page := browser.MustPage() + defer browser.MustClose() + + wait := page.MustWaitNavigation() + // Open Bing search page and search for encoded string + page.MustNavigate(seURLs["Bing"] + encoded) + wait() + + // Get ad links from the search results + adList, err := page.Elements(`li.b_adTop a.b_restorableLink`) + if err != nil { + return nil, err + } + + // Open ads link in a new page to get URL + for _, ad := range adList { + href, err := ad.Attribute("href") + if err != nil { + return nil, err + } + adPage := browser.MustPage(*href) + defer adPage.Close() + wait := adPage.MustWaitNavigation() + wait() + ads = append(ads, adPage.MustInfo().URL) + } + + // Capture a screenshot if ads are found and screenshot path is provided + if len(ads) > 0 && len(*screenshotPath) > 0 { + filename := fmt.Sprintf("bing-%s-%d.png", encoded, time.Now().UnixNano()) + page.MustWaitStable().MustScreenshotFullPage(filepath.Join(*screenshotPath, filename)) + } + + return ads, nil +} diff --git a/cmd/seads/config.go b/cmd/seads/config.go new file mode 100644 index 0000000..a5bbb91 --- /dev/null +++ b/cmd/seads/config.go @@ -0,0 +1,36 @@ +package main + +import ( + "gopkg.in/yaml.v2" + "os" +) + +// Query represents a search query +type Query struct { + SearchTerm string `yaml:"query"` + ExpectedDomains []string `yaml:"expected-domains"` +} + +// Config holds the overall configuration +type Config struct { + TelegramNotifier *TelegramNotifier `yaml:"telegram"` + SlackNotifier *SlackNotifier `yaml:"slack"` + MailNotifier *MailNotifier `yaml:"mail"` + Queries []Query `yaml:"queries"` +} + +// parseConfig parses the specified config file +func parseConfig(configPath string) (Config, error) { + data, err := os.ReadFile(configPath) + if err != nil { + return Config{}, err + } + + var config Config + err = yaml.Unmarshal(data, &config) + if err != nil { + return Config{}, err + } + + return config, nil +} diff --git a/cmd/seads/ddg.go b/cmd/seads/ddg.go new file mode 100644 index 0000000..166246c --- /dev/null +++ b/cmd/seads/ddg.go @@ -0,0 +1,50 @@ +package main + +import ( + "fmt" + "github.com/go-rod/rod" + "path/filepath" + "time" +) + +// getDuckDuckGoAds searches for ads on DuckDuckGo for a given encoded string +func getDuckDuckGoAds(encoded string) ([]string, error) { + var ads []string + + // Create a new Rod browser instance + browser := rod.New().MustConnect().MustIncognito() + page := browser.MustPage() + defer browser.MustClose() + + wait := page.MustWaitNavigation() + // Open DuckDuckGo search page and search for encoded string + page.MustNavigate(seURLs["DuckDuckGo"] + encoded) + wait() + + // Get ad links from the search results + adList, err := page.Elements("li[data-layout=\"ad\"] a[data-testid=\"result-extras-url-link\"]") + if err != nil { + return nil, err + } + + // Open ads link in a new page to get URL + for _, ad := range adList { + href, err := ad.Attribute("href") + if err != nil { + return nil, err + } + adPage := browser.MustPage(*href) + defer adPage.Close() + wait := adPage.MustWaitNavigation() + wait() + ads = append(ads, adPage.MustInfo().URL) + } + + // Capture a screenshot if ads are found and screenshot path is provided + if len(ads) > 0 && len(*screenshotPath) > 0 { + filename := fmt.Sprintf("duckduckgo-%s-%d.png", encoded, time.Now().UnixNano()) + page.MustWaitStable().MustScreenshotFullPage(filepath.Join(*screenshotPath, filename)) + } + + return ads, nil +} diff --git a/cmd/seads/google.go b/cmd/seads/google.go new file mode 100644 index 0000000..e5f71ac --- /dev/null +++ b/cmd/seads/google.go @@ -0,0 +1,54 @@ +package main + +import ( + "fmt" + "github.com/go-rod/rod" + "path/filepath" + "time" +) + +// getGoogleAds searches for ads on Google for a given encoded string +func getGoogleAds(encoded string) ([]string, error) { + var ads []string + + // Create a new Rod browser instance + browser := rod.New().MustConnect().MustIncognito() + page := browser.MustPage() + defer browser.MustClose() + + wait := page.MustWaitNavigation() + page.MustNavigate(seURLs["Google"] + encoded) + wait() + + cookieButton := page.MustElements(`button#W0wltc`) + if len(cookieButton) > 0 { + cookieButton[0].MustClick() + } + + // Open Google search page and search for encoded string + adList, err := page.Elements(`div#tads a.sVXRqc`) + if err != nil { + return nil, err + } + + // Open ads link in a new page to get URL + for _, ad := range adList { + href, err := ad.Attribute("href") + if err != nil { + return nil, err + } + adPage := browser.MustPage(*href) + defer adPage.Close() + wait := adPage.MustWaitNavigation() + wait() + ads = append(ads, adPage.MustInfo().URL) + } + + // Capture a screenshot if ads are found and screenshot path is provided + if len(ads) > 0 && len(*screenshotPath) > 0 { + filename := fmt.Sprintf("google-%s-%d.png", encoded, time.Now().UnixNano()) + page.MustWaitStable().MustScreenshotFullPage(filepath.Join(*screenshotPath, filename)) + } + + return ads, nil +} diff --git a/cmd/seads/main.go b/cmd/seads/main.go new file mode 100644 index 0000000..4c9ed42 --- /dev/null +++ b/cmd/seads/main.go @@ -0,0 +1,98 @@ +package main + +import ( + "flag" + "fmt" + "log" +) + +var ( + configPath = flag.String("config", "config.yaml", "path to config file") + consumers = flag.Int("concurrency", 4, "number of concurrent headless browsers") + screenshotPath = flag.String("screenshot", "", "path to store screenshots (if empty, the screenshot feature will be disabled)") + cleanLinks = flag.Bool("cleanlinks", false, "print clear links in output (links will remain defanged in notifications)") + notify = flag.Bool("notify", false, "notify if unexpected domains are found (requires notifications fields in config.yaml)") + seURLs = map[string]string{ + "Google": "https://www.google.com/search?q=", + "Bing": "https://www.bing.com/search?q=", + "Yahoo": "https://search.yahoo.com/search?q=", + "DuckDuckGo": "https://duckduckgo.com/?ia=web&?q=", + } + sf = []SearchFunctions{ + {Name: "Google", Function: getGoogleAds}, + {Name: "Bing", Function: getBingAds}, + {Name: "Yahoo", Function: getYahooAds}, + {Name: "DuckDuckGo", Function: getDuckDuckGoAds}, + } +) + +// SearchFunc holds the search engine name and its corresponding function +type SearchFunctions struct { + Name string + Function func(string) ([]string, error) +} + +// ResultAd represents an ad result +type ResultAd struct { + Domain string + Link string +} + +// search return the ads found in the search engines for the specified config +func search(config Config) []string { + var toNotify []string + + for _, query := range config.Queries { + fmt.Printf("Searching for: '%s'\n", query.SearchTerm) + + for _, engine := range sf { + ads := searchAds(engine.Function, query, engine.Name) + resultAds, _ := GetResultAdsFromURLs(ads) + fmt.Println() + fmt.Printf("* Searching ads for '%s' on %s: ", + query.SearchTerm, engine.Name) + if len(resultAds) == 0 { + italic.Println("no ads found") + } else { + fmt.Println() + for _, resultAd := range resultAds { + if isExpectedDomain(resultAd.Domain, query.ExpectedDomains) { + printExpectedDomain(resultAd) + } else { + printUnexpectedDomain(resultAd) + if *notify { + toNotify = append(toNotify, formatNotification(engine.Name, + query.SearchTerm, resultAd)) + } + } + } + } + } + fmt.Println() + } + return toNotify +} + +func main() { + + flag.Parse() + + config, err := parseConfig(*configPath) + if err != nil { + if *configPath == "config.yaml" { + log.Fatalf("no config file found at config.yaml, please be sure to use " + + "-config to specify the config file path") + } + log.Fatalf("error parsing config file: %v\n", err) + } + + toNotify := search(config) + + fmt.Println() + + if *notify && len(toNotify) > 0 { + config.notify(toNotify) + } + + fmt.Println() +} diff --git a/cmd/seads/notify.go b/cmd/seads/notify.go new file mode 100644 index 0000000..1f5af75 --- /dev/null +++ b/cmd/seads/notify.go @@ -0,0 +1,121 @@ +package main + +import ( + "fmt" + "github.com/containrrr/shoutrrr" + "time" +) + +// Notifier interface used for notification channels +type Notifier interface { + SendMessage(message string) error +} + +// TelegramNotifier holds configurations for sending the message on Telegram +type TelegramNotifier struct { + Token string `yaml:"token"` + ChatId []string `yaml:"chatid"` +} + +// SendMessage sends the specified message on Telegram +func (tn *TelegramNotifier) SendMessage(message string) error { + chats := tn.ChatId[0] + if len(tn.ChatId) > 1 { + for _, mr := range tn.ChatId[1:] { + chats += "," + mr + } + } + + url := fmt.Sprintf("telegram://%s@telegram?channels=%s", tn.Token, chats) + return shoutrrr.Send(url, message) +} + +// SlackNotifier holds configurations for sending the message on Slack +type SlackNotifier struct { + Token string `yaml:"token"` + Channels []string `yaml:"channels"` +} + +// SendMessage sends the specified message on Slack +func (sn *SlackNotifier) SendMessage(message string) error { + channels := sn.Channels[0] + if len(sn.Channels) > 1 { + for _, mr := range sn.Channels[1:] { + channels += "," + mr + } + } + + url := fmt.Sprintf("slack://%s@%s", sn.Token, channels) + return shoutrrr.Send(url, message) +} + +// MailNotifier holds configurations for sending the message via email +type MailNotifier struct { + Host string `yaml:"host"` + Port string `yaml:"port"` + Username string `yaml:"username"` + Password string `yaml:"password"` + Auth string `yaml:"auth"` + From string `yaml:"from"` + Recipients []string `yaml:"recipients"` +} + +// SendMessage sends the specified message via email +func (mn *MailNotifier) SendMessage(message string) error { + mailrecipients := mn.Recipients[0] + if len(mn.Recipients) > 1 { + for _, mr := range mn.Recipients[1:] { + mailrecipients += "," + mr + } + } + url := fmt.Sprintf("smtp://%s:%s@%s:%s/?from=%s&to=%s&subject=seadscan notification", + mn.Username, mn.Password, mn.Host, mn.Port, mn.From, mailrecipients) + return shoutrrr.Send(url, message) +} + +// notify creates the message to be sent and sends it using the specified notification services +func (config *Config) notify(toSend []string) { + message := createMessage(toSend) + + notifiers := []Notifier{} + if config.SlackNotifier != nil { + notifiers = append(notifiers, config.SlackNotifier) + } + if config.TelegramNotifier != nil { + notifiers = append(notifiers, config.TelegramNotifier) + } + if config.MailNotifier != nil { + notifiers = append(notifiers, config.MailNotifier) + } + + notificationSent := false + + for _, notifier := range notifiers { + err := notifier.SendMessage(message) + if err != nil { + fmt.Printf("error sending message via notifier: %v\n", err) + continue + } + notificationSent = true + } + if notificationSent { + fmt.Println("notifications sent!") + } +} + +// createMessage assembles the message to be sent over the specified notification channels +func createMessage(toSend []string) string { + message := "Here are the \"unexpected domains\" found during the last execution of seads:\n\n " + + "Message creation date: " + time.Now().Format(time.DateTime) + "\n\n" + for _, s := range toSend { + message += s + "\n" + } + message += "\nThis message was automatically sent by seads (www.github.com/andpalmier/seads)" + return message +} + +// formatNotification formats the notification message +func formatNotification(engineName, searchTerm string, resultAd ResultAd) string { + return fmt.Sprintf("* Search engine: %s\n\tSearch term: %s\n\tDomain: %s\n\tFull link: %s\n", + engineName, searchTerm, DefangURL(resultAd.Domain), DefangURL(resultAd.Link)) +} diff --git a/cmd/seads/utils.go b/cmd/seads/utils.go new file mode 100644 index 0000000..133b50d --- /dev/null +++ b/cmd/seads/utils.go @@ -0,0 +1,151 @@ +package main + +import ( + "errors" + "fmt" + "github.com/fatih/color" + "log" + "net/url" + "strings" + "sync" +) + +var ( + green = color.New(color.FgGreen) + italic = color.New(color.Italic) + red = color.New(color.FgRed) +) + +// removeDuplicates removes ads with same domain from the given list +func removeDuplicates(ads []string) ([]string, error) { + var results []string + seen := make(map[string]struct{}) + + for _, adURL := range ads { + adURL = normalizeURL(adURL) + parsedURL, err := url.Parse(adURL) + if err != nil { + return nil, err + } + domain := parsedURL.Host + if _, ok := seen[domain]; !ok { + results = append(results, adURL) + seen[domain] = struct{}{} + } + } + return results, nil +} + +// normalizeURL normalizes an ad URL by adding "www." and "https://" if missing +func normalizeURL(adURL string) string { + if !strings.Contains(adURL, "www.") { + adURL = "www." + adURL + } + if !strings.Contains(adURL, "https://") { + adURL = "https://" + adURL + } + return adURL +} + +// EncodeString encodes an input string +func EncodeString(input string) string { + return url.QueryEscape(input) +} + +// DefangURL prevents a URL from being clickable +func DefangURL(url string) string { + return strings.ReplaceAll(url, ".", "[.]") +} + +// ExtractDomainFromURL extracts domain from a URL +func ExtractDomainFromURL(inputURL string) (string, error) { + if !strings.Contains(inputURL, "https") { + inputURL = "https://" + inputURL + } + parsedURL, err := url.Parse(inputURL) + if err != nil { + return "", err + } + host := parsedURL.Host + return strings.TrimPrefix(host, "www."), nil +} + +// GetResultAdsFromURLs gets ResultAd list from a list of ads +func GetResultAdsFromURLs(ads []string) ([]ResultAd, error) { + var results []ResultAd + uniqueAds, err := removeDuplicates(ads) + if err != nil { + return results, err + } + for _, ad := range uniqueAds { + domain, err := ExtractDomainFromURL(ad) + if err != nil { + return nil, errors.New("cannot get domain from following URL: " + ad) + } + results = append(results, ResultAd{domain, ad}) + } + return results, nil +} + +// isExpectedDomain checks if the domainAd is in the expectedDomains list +func isExpectedDomain(domainAd string, expectedDomains []string) bool { + for _, domain := range expectedDomains { + if domainAd == domain { + return true + } + } + return false +} + +// printExpectedDomain prints the expected domain +func printExpectedDomain(resultAd ResultAd) { + green.Printf("[+] expected domain: ") + if *cleanLinks { + fmt.Printf("%s => %s\n", resultAd.Domain, resultAd.Link) + } else { + fmt.Printf("%s => %s\n", DefangURL(resultAd.Domain), DefangURL(resultAd.Link)) + } +} + +// printUnexpectedDomain prints the unexpected domain +func printUnexpectedDomain(resultAd ResultAd) { + red.Printf("[!] unexpected domain: ") + if *cleanLinks { + fmt.Printf("%s => %s\n", resultAd.Domain, resultAd.Link) + } else { + fmt.Printf("%s => %s\n", DefangURL(resultAd.Domain), DefangURL(resultAd.Link)) + } +} + +// searchAds searches ads using a specific engine function +func searchAds(engineFunc func(string) ([]string, error), query Query, engineName string) []string { + encoded := EncodeString(query.SearchTerm) + + adsFoundChan := make(chan []string, *consumers) + var wg sync.WaitGroup + + searchFunc := func(i int) { + defer wg.Done() + ads, err := engineFunc(encoded) + if err != nil { + log.Printf("Error searching %s ad: %v", engineName, err) + return + } + adsFoundChan <- ads + } + + for i := 0; i < *consumers; i++ { + wg.Add(1) + go searchFunc(i) + } + + wg.Wait() + close(adsFoundChan) + + adsFound := make([]string, 0) + for ads := range adsFoundChan { + adsFound = append(adsFound, ads...) + } + + return adsFound +} diff --git a/cmd/seads/yahoo.go b/cmd/seads/yahoo.go new file mode 100644 index 0000000..e2bd779 --- /dev/null +++ b/cmd/seads/yahoo.go @@ -0,0 +1,56 @@ +package main + +import ( + "fmt" + "github.com/go-rod/rod" + "path/filepath" + "time" +) + +// getYahooAds searches for ads on Yahoo for a given encoded string +func getYahooAds(encoded string) ([]string, error) { + var ads []string + + // Create a new Rod browser instance + browser := rod.New().MustConnect().MustIncognito() + page := browser.MustPage() + defer browser.MustClose() + + wait := page.MustWaitNavigation() + // Open Yahoo search page and scroll to click "reject cookie" button if present + page.MustNavigate(seURLs["Yahoo"] + encoded) + wait() + + scrollButtons := page.MustElements(`button#scroll-down-btn`) + if len(scrollButtons) > 0 { + scrollButtons[0].MustClick() + page.MustElement(`button[value="reject"`).MustClick() + } + + // Get ad links from the search results + adList, err := page.Elements(`ol.searchCenterTopAds a[data-matarget="ad"]`) + if err != nil { + return nil, err + } + + // Open ads link in a new page to get URL + for _, ad := range adList { + href, err := ad.Attribute("href") + if err != nil { + return nil, err + } + adPage := browser.MustPage(*href) + defer adPage.Close() + wait := adPage.MustWaitNavigation() + wait() + ads = append(ads, adPage.MustInfo().URL) + } + + // Capture a screenshot if ads are found and screenshot path is provided + if len(ads) > 0 && len(*screenshotPath) > 0 { + filename := fmt.Sprintf("yahoo-%s-%d.png", encoded, time.Now().UnixNano()) + page.MustWaitStable().MustScreenshotFullPage(filepath.Join(*screenshotPath, filename)) + } + + return ads, nil +} diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..eb43e2d --- /dev/null +++ b/config.yaml @@ -0,0 +1,22 @@ +mail: + host: MAILHOST + port: 587 + username: USERNAME + password: PASSWORD + from: FROMADDRESS + recipients: [RECIPIENTADDRESS#1,RECIPIENTADDRESS#2] + +slack: + token: SLACKTOKEN + channels: [CHANNEL#1,CHANNEL#2] + +telegram: + token: TELEGRAMTOKEN + chatid: [CHATID#1,CHATID#2] + +queries: + - query: "ipad" + expected-domains: [apple.com, amazon.com] + + - query: "as roma" + expected-domains: [] diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..0b9550e --- /dev/null +++ b/go.mod @@ -0,0 +1,21 @@ +module github.com/andpalmier/seads + +go 1.22 + +require ( + github.com/containrrr/shoutrrr v0.8.0 + github.com/fatih/color v1.16.0 + github.com/go-rod/rod v0.114.7 + gopkg.in/yaml.v2 v2.4.0 +) + +require ( + github.com/mattn/go-colorable v0.1.13 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/ysmood/fetchup v0.2.3 // indirect + github.com/ysmood/goob v0.4.0 // indirect + github.com/ysmood/got v0.34.1 // indirect + github.com/ysmood/gson v0.7.3 // indirect + github.com/ysmood/leakless v0.8.0 // indirect + golang.org/x/sys v0.16.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..924bde5 --- /dev/null +++ b/go.sum @@ -0,0 +1,59 @@ +github.com/containrrr/shoutrrr v0.8.0 h1:mfG2ATzIS7NR2Ec6XL+xyoHzN97H8WPjir8aYzJUSec= +github.com/containrrr/shoutrrr v0.8.0/go.mod h1:ioyQAyu1LJY6sILuNyKaQaw+9Ttik5QePU8atnAdO2o= +github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= +github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= +github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0= +github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-rod/rod v0.114.7 h1:h4pimzSOUnw7Eo41zdJA788XsawzHjJMyzCE3BrBww0= +github.com/go-rod/rod v0.114.7/go.mod h1:aiedSEFg5DwG/fnNbUOTPMTTWX3MRj6vIs/a684Mthw= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE= +github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/jarcoal/httpmock v1.3.0 h1:2RJ8GP0IIaWwcC9Fp2BmVi8Kog3v2Hn7VXM3fTd+nuc= +github.com/jarcoal/httpmock v1.3.0/go.mod h1:3yb8rc4BI7TCBhFY8ng0gjuLKJNquuDNiPaZjnENuYg= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/onsi/ginkgo/v2 v2.9.2 h1:BA2GMJOtfGAfagzYtrAlufIP0lq6QERkFmHLMLPwFSU= +github.com/onsi/ginkgo/v2 v2.9.2/go.mod h1:WHcJJG2dIlcCqVfBAwUCrJxSPFb6v4azBwgxeMeDuts= +github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE= +github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg= +github.com/ysmood/fetchup v0.2.3 h1:ulX+SonA0Vma5zUFXtv52Kzip/xe7aj4vqT5AJwQ+ZQ= +github.com/ysmood/fetchup v0.2.3/go.mod h1:xhibcRKziSvol0H1/pj33dnKrYyI2ebIvz5cOOkYGns= +github.com/ysmood/goob v0.4.0 h1:HsxXhyLBeGzWXnqVKtmT9qM7EuVs/XOgkX7T6r1o1AQ= +github.com/ysmood/goob v0.4.0/go.mod h1:u6yx7ZhS4Exf2MwciFr6nIM8knHQIE22lFpWHnfql18= +github.com/ysmood/gop v0.0.2 h1:VuWweTmXK+zedLqYufJdh3PlxDNBOfFHjIZlPT2T5nw= +github.com/ysmood/gop v0.0.2/go.mod h1:rr5z2z27oGEbyB787hpEcx4ab8cCiPnKxn0SUHt6xzk= +github.com/ysmood/got v0.34.1 h1:IrV2uWLs45VXNvZqhJ6g2nIhY+pgIG1CUoOcqfXFl1s= +github.com/ysmood/got v0.34.1/go.mod h1:yddyjq/PmAf08RMLSwDjPyCvHvYed+WjHnQxpH851LM= +github.com/ysmood/gotrace v0.6.0 h1:SyI1d4jclswLhg7SWTL6os3L1WOKeNn/ZtzVQF8QmdY= +github.com/ysmood/gotrace v0.6.0/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM= +github.com/ysmood/gson v0.7.3 h1:QFkWbTH8MxyUTKPkVWAENJhxqdBa4lYTQWqZCiLG6kE= +github.com/ysmood/gson v0.7.3/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg= +github.com/ysmood/leakless v0.8.0 h1:BzLrVoiwxikpgEQR0Lk8NyBN5Cit2b1z+u0mgL4ZJak= +github.com/ysmood/leakless v0.8.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ= +golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ= +golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= +golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68= +golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/tools v0.7.0 h1:W4OVu8VVOaIO0yzWMNdepAulS7YfoS3Zabrm8DOXXU4= +golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s= +google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= +google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/img/example-yahoo-apple.png b/img/example-yahoo-apple.png new file mode 100644 index 0000000..4c45669 Binary files /dev/null and b/img/example-yahoo-apple.png differ diff --git a/img/seads.gif b/img/seads.gif new file mode 100644 index 0000000..f28a4d2 Binary files /dev/null and b/img/seads.gif differ