Skip to content
This repository has been archived by the owner on Apr 27, 2024. It is now read-only.

Commit

Permalink
Merge pull request #41 from john-doherty/bot-detection
Browse files Browse the repository at this point in the history
Added bot detection
  • Loading branch information
john-doherty authored Mar 11, 2024
2 parents 304142a + 2889d14 commit 3f598a3
Show file tree
Hide file tree
Showing 6 changed files with 206 additions and 4 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ Name | Type | Description
`dev` | `boolean` | `true` if event captured when running locally
`ad` | `object` | object containing online ad click ids
`utm` | `object` | object containing UTM tracking values
`bot` | `boolean` | `true` if script executing via a bot request

## Contributing

Expand Down
4 changes: 2 additions & 2 deletions dist/mixpanel-lite.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "mixpanel-lite",
"version": "1.5.10",
"version": "1.6.0",
"description": "A lightweight alternative to mixpanel-js with offline support for PWAs",
"main": "src/mixpanel-lite.js",
"scripts": {
Expand Down
70 changes: 70 additions & 0 deletions src/mixpanel-lite.js
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,11 @@
eventData.properties.dev = true;
}

var isBotRunner = isBot();
if (isBotRunner) {
eventData.properties.bot = true;
}

// save the event
transactions.add(eventData);

Expand Down Expand Up @@ -727,6 +732,71 @@
return false;
}

/**
* Checks if the user agent is from a known bot
* @param {string} userAgent The user agent string to check.
* @return {boolean} True if the user agent is from a known bot, false otherwise.
*/
function isBot() {

var ua = String(navigator.userAgent || '').toLowerCase();

var botUAs = [
'ahrefsbot',
'ahrefssiteaudit',
'baiduspider',
'bingbot',
'bingpreview',
'chrome-lighthouse',
'facebookexternal',
'petalbot',
'pinterest',
'screaming frog',
'yahoo! slurp',
'yandexbot',
'adsbot-google',
'apis-google',
'duplexweb-google',
'feedfetcher-google',
'google favicon',
'google web preview',
'google-read-aloud',
'googlebot',
'googleweblight',
'mediapartners-google',
'storebot-google',
'twitterbot',
'coccocbot',
'duckduckbot',
'barkrowler',
'applebot',
'tweetmemebot',
'bitlybot',
'linkdexbot',
'linkedinbot',
'mj12bot',
'seznambot',
'slurp',
'sogou',
'teoma',
'yandex',
'alexabot',
'semrushbot',
'ubot',
'crawler',
'spider',
'exabot'
];

for (var i = 0; i < botUAs.length; i++) {
if (ua.indexOf(botUAs[i]) !== -1) {
return true;
}
}

return false;
}

/**
* Checks if the script is running locally
* @returns {boolean} true if running locally, otherwise false
Expand Down
131 changes: 131 additions & 0 deletions tests/mixpanel-lite-bot-spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
var path = require('path');
var puppeteer = require('puppeteer');
var utils = require('./utils');

var url = 'file://' + path.join(__dirname, 'environment.html');
var page = null;
var browser = null;

describe('mixpanel-lite: .bot', function () {

// create a new browser instance before each test
beforeEach(async function () {

// Launch a new browser instance
browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox']
});

// get page
page = (await browser.pages())[0];

// Navigate to the desired URL
await page.goto(url);
});

afterEach(async function () {

return page.evaluate(function () {
return localStorage.removeItem('mixpanel-lite');
})
.then(function() {
return utils.sleep(250);
})
.then(function() {
return browser.close();
});
});

it('should send .bot=true to /track endpoint if bot detected', async function () {

var now = (new Date()).getTime();
var token = 'test-token-' + now;
var eventName = 'test-event-' + now;
var botUserAgents = [
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'Mozilla/5.0 (compatible; Bingbot/2.0; +http://www.bing.com/bingbot.htm)',
'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)',
'DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)',
'Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)',
'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)',
'Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)',
'Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)',
'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)'
];
var randomUserAgent = botUserAgents[Math.floor(Math.random() * botUserAgents.length)];

// bypass automation detection
page.evaluate(function (randomUa) {

// override navigator.userAgent (include `HeadlessChrome` for puppeteer)
Object.defineProperty(navigator, "userAgent", {
get: function () { return randomUa; }
});

}, randomUserAgent);

await page.setRequestInterception(true);

// setup mixpanel
await utils.setMixpanelToken(page, token);

// listen for track requests
var trackRequests = utils.waitForPuppeteerRequests(page, 1, 'https://api.mixpanel.com/track');

// send event (in offline mode)
await utils.sendMixpanelEvent(page, eventName);

// Now wait for requests to be sent
var results = await trackRequests;

// decode the data and convert to JSON object so we can inspect
var eventPayload = utils.getJsonPayloadFromMixpanelUrl(results[0].url);

// check the tracking data we sent is correct
expect(eventPayload?.event).toEqual(eventName);
expect(eventPayload?.properties?.bot).toEqual(true);
});

it('should NOT send .bot=true to /track endpoint if not a bot', async function () {

var now = (new Date()).getTime();
var token = 'test-token-' + now;
var eventName = 'test-event-' + now;

await page.setRequestInterception(true);

// bypass automation detection
page.evaluate(function () {

// override navigator.userAgent (include `HeadlessChrome` for puppeteer)
Object.defineProperty(navigator, "userAgent", {
get: function () { return 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'; }
});

// return false for `navigator.webdriver` check
Object.defineProperty(navigator, "webdriver", {
get: function () { return false; }
});
});

// setup mixpanel
await utils.setMixpanelToken(page, token);

// listen for track requests
var trackRequests = utils.waitForPuppeteerRequests(page, 1, 'https://api.mixpanel.com/track');

// send event (in offline mode)
await utils.sendMixpanelEvent(page, eventName);

// Now wait for requests to be sent
var results = await trackRequests;

// decode the data and convert to JSON object so we can inspect
var eventPayload = utils.getJsonPayloadFromMixpanelUrl(results[0].url);

// check the tracking data we sent is correct
expect(eventPayload?.event).toEqual(eventName);
expect(eventPayload?.properties?.bot).toBeUndefined();
});
});

0 comments on commit 3f598a3

Please sign in to comment.