EthanBrodeur · thomasgales · Apr 19, 2020 · Apr 23, 2020
diff --git a/README.md b/README.md
@@ -5,9 +5,17 @@ Backs up running data from logarun.com to an Excel file with BeautifulSoup + Pan
 # Usage
 To pull data, run the following in terminal when inside the Save-A-Log directory:
 ```
-$ python logarunPull.py myUsername
+$ python logarunPull.py myUsername [-d number of days back]
 ```
+
+Script will prompt user to input the account's password. If the account is public, just leave blank.
+
 Additional modifiers for date ranges, output formatting forthcoming.
 
 # Notes
-Your Logarun profile needs to be public for the script to execute.
+Usage requires the following to be installed:
+- FireFox
+- BeautifulSoup4
+- numpy
+- pandas
+- geckodriver
diff --git a/logarun_pull.py b/logarun_pull.py
@@ -4,29 +4,41 @@
 import argparse
 import csv
 from datetime import datetime, timedelta
+
+from getpass import getpass
+from selenium import webdriver
+from selenium.webdriver.firefox.options import Options
+
 import re
 import sys
 
 from bs4 import BeautifulSoup
 import numpy as np
 import pandas as pd
-import urllib2
+try:
+    # For Python 3.0 and later
+    from urllib.request import urlopen
+except ImportError:
+    # Fall back to Python 2's urllib2
+    from urllib2 import urlopen
 
 
 def main():
 	parser = argparse.ArgumentParser(description="Query logarun.com for information about a user's training history.")
-	parser.add_argument('username', metavar='u', help='Username to query')
-	parser.add_argument('daysBack', metavar='db', help='Number of days to go back, from today', default=1, type=int)
+	parser.add_argument('username', help='Username to query')
+	parser.add_argument('-d', '--daysBack', help='Number of days to go back, from today', default=1, type=int)
 	args=parser.parse_args()
 
 	# Import username from command line
-	if args.username == "":
+	if args.username == "" or args.username is None:
 		print("Terminating. You need to supply a username.")
 		sys.exit()
 	print("username = " + args.username)
-	print('days back = %s', args.daysBack)
+	print('days back =', args.daysBack)
 	current_day = datetime.today()
 
+	password = getpass("Password (if account is public, leave blank):")
+
 
 	# BUILD URL
 	# A Logarun URl looks like:
@@ -37,42 +49,54 @@ def main():
 	# args.daysBack = 50
 	index = pd.date_range(current_day - timedelta(days=args.daysBack), periods=args.daysBack, freq='D')
 
-	headers =["Date", "Log Title", "Log Note", "Activity", "Activity Distance", "Activity Type", "Activity Time", "Activity Pace", "Comments"]
+	headers = ["Date", "Log Title", "Log Note", "Activity", "Activity Distance", "Activity Type", "Activity Time", "Activity Pace", "Comments"]
 	df = pd.DataFrame(columns=headers)
 
+	driver = None
+	if password != '':
+		driver = attempt_login(args.username, password)
+		if not logged_in(driver):
+			print("Login failed. Are you sure your username and password are correct?")
+			driver.quit()
+			return
+
 	while args.daysBack >= 0:
 
 		url_query = base_URL + args.username + date_format(current_day)
 		print("We will query: " + url_query)
 
+		soup = None
 		# Paging logarun.com... this is what takes awhile
+		if (password != ''):
+			driver.get(url_query)
+			soup = BeautifulSoup(driver.page_source, 'html.parser')
+		else:
+			try:
+				page = urlopen(url_query)
+				soup = BeautifulSoup(page, 'html.parser')
+			except URLError:
+				print('error was ' + URLError)
+
 		try:
-			page = urllib2.urlopen(url_query)
-		except URLError:
-			print('error was ' + URLError)
-
-		soup = BeautifulSoup(page, 'html.parser')
-
-		try:
-			df.append(get_activity(unicode("Bike"), soup, current_day))
+			df.append(get_activity(str("Bike"), soup, current_day))
 		except TypeError:
 			pass
 			# print("bike didn't happen that day")
 
 		try:
-			df = df.append(get_activity(unicode("Run"), soup, current_day))
+			df = df.append(get_activity(str("Run"), soup, current_day))
 		except TypeError:
 			pass
 			#print("run didn't happen that day")
 
 		try:
-			df.append(get_activity(unicode("Swim"), soup, current_day))
+			df.append(get_activity(str("Swim"), soup, current_day))
 		except TypeError:
 			#print("swim didn't happen that day")
 			pass
 
 		try:
-			df.append(get_activity(unicode("Elliptical"), soup, current_day))
+			df.append(get_activity(str("Elliptical"), soup, current_day))
 		except TypeError:
 			pass
 			#print("elliptical didn't happen that day")
@@ -83,7 +107,11 @@ def main():
 		args.daysBack -= 1
 		current_day = subtract_day(current_day)
 
+	if password != '':
+		driver.quit()
 	df.to_csv("myLog.csv", index = False)
+	print('Output log to myLog.csv.')
+
 
 """Utility Functions"""
 def date_format(date):
@@ -104,6 +132,11 @@ def grab_comments(soup, date):
 	list_of_comments = []
 	dates = []
 	for line in comments.findAll('li'):
+		# When user is logged in, logarun includes a <li> tag with class='add' for the "add comment" button.
+		# For actual comments, logarun does not include a class attribute for the <li> tag.
+		addComment = line.get('class')
+		if (addComment is not None and line.get('class')[0] == 'add'):
+			continue
 		author = line.find('a').text
 		comment = line.find_all('p')[-1].text
 		full_comment = author + ': ' + comment
@@ -120,7 +153,7 @@ def get_activity(activity_string, soup, date):
 	"""pull an activity from a day.	
 	activity_string: "Run", "Bike", "Elliptical", etc.
 	"""
-	if not isinstance(activity_string, basestring):
+	if not isinstance(activity_string, str):
 		print("error! Didn't pass a string to get_activity")
 		print('you passed:')
 		print(type(activity_string))
@@ -167,14 +200,47 @@ def get_activity(activity_string, soup, date):
 			'Activity Type' : activity_types,
 			'Activity Time' : activity_times,
 			'Activity Pace' : activity_paces}) 
-		# print (df)
 		return df
 
 	else:
 		return 0
 
-
-if __name__ == "__main__":
-	main()
 
+def logged_in(driver):
+	"""
+	Checks if a user is logged in.
+
+	:param driver: webdriver. Function will test if driver is logged into logarun.
+	:returns: True if logged in, false otherwise.
+	"""
+	driver.get("http://www.logarun.com")
+	soup = BeautifulSoup(driver.page_source, 'html.parser')
+	searchBar = soup.find('div', attrs={'class': 'topRight'})
+	for element in searchBar.findAll('a'):
+		title = element.get('title')
+		if (title is not None and title == 'Log into your account.'):
+			return False
+	return True
+
+
+def attempt_login(username, password):
+	"""
+	-Credit to https://github.com/adzienis/Lazy_Runner for this function. For some odd
+		reason, their python script goes from strava TO logarun...
+	-For some reason webdriver is able to understand logarun's mangled HTML (missing closing tags)
+		but Mechanize could not. 
+
+	:returns: webdriver. May or may not be successfully logged into logarun.
+	"""
+	options = Options()
+	options.headless = True
+	driver = webdriver.Firefox(options=options)
+	driver.get("http://www.logarun.com/logon.aspx")
+	driver.find_element_by_id("LoginName").send_keys(username)
+	driver.find_element_by_id("Password").send_keys(password)
+	driver.find_element_by_id("LoginNow").click()
+	return driver
 
+
+if __name__ == "__main__":
+	main()