gedsic · sidusnare · Dec 6, 2017 · Dec 7, 2017 · Dec 7, 2017
diff --git a/tumbdl.sh b/tumbdl.sh
@@ -27,110 +27,150 @@
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
-url=$1
-targetDir=$2
 
 # global curl options
-# to disable progress bar, replace with -s
-# to enable verbose mode, add -v
-curlOptions='--progress-bar'
+# to disable progress bar, replace with '-s'
+# to enable verbose mode, add '-v'
+curlOptions=( '--progress-bar' '-v' )
 userAgent='Mozilla/5.0 (Windows NT 6.1; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0'
 
 # check usage
-if [ $# -ne 2 ]; then
-  echo "Usage: tumbdl [URL] [DIR]"
-  echo ""
-  echo "URL: URL of tumblelog, e.g. prostbote.tumblr.com"
-  echo "DIR: directory to put images in, e.g. prostbote"
-  exit
+HELP='Usage: tumbdl <-c file> <-u string> [URL] [DIR]
+URL: URL of tumblelog, e.g. prostbote.tumblr.com
+DIR: directory to put images in, e.g. prostbote
+
+Safemode options:
+	-c <cookie file>
+	-u <user agent>
+	Safemode works with a login cookie, part of this cookie is a hash that includes your user agent. Using Firefox ESR and an addon, you can get both of these things.
+	You can get your cookies by isntalling:
+		Firefox ESR: https://www.mozilla.org/en-US/firefox/organizations/all/
+		Add-on: https://addons.mozilla.org/en-US/firefox/addon/export-cookies/
+	Log into Tumblr, then from the "Tools" menu, select "Export cookies". Use the full path to the cookie file for the -c argument.
+
+	You can get your user agent by opening this URL:
+		https://www.google.com/search?q=what+is+my+user+agent+string'
+cookieJar=True
+while getopts c:u:h opts; do
+	case "${opts}" in
+		h)
+			echo -e "${HELP}"
+			exit 0
+			;;
+		c)
+			cookieFile="${OPTARG}"
+			cookieJar=False
+			if [ ! -e "${cookieFile}" ]; then
+				echo "Unable to find ${cookieFile}, please check and try again" >&2
+				exit 1
+			fi
+			;;
+		u)
+			userAgent="${OPTARG}"
+			;;
+	esac
+done
+shift $(( OPTIND - 1))
+url="${1}"
+targetDir="${2}"
+if [ "${#}" -ne 2 ]; then
+	echo -e "${HELP}"
+	exit 1
 fi
-
 # sanitize input url
-url=$(echo "$url" | sed 's/\/$//g')
+url=$( sed 's/\/$//g' <<< "${url}")
 
 # create target dir
-mkdir "$targetDir"
-touch "$targetDir/articles.txt"
+mkdir "${targetDir}"
+touch "${targetDir}/articles.txt"
 
 # create cookie jar (not really needed atm)
-cookieFile="$(mktemp 2>/dev/null || mktemp -t 'mytmpdir')"
-
+if [ -z "${cookieFile}" ]; then
+	cookieFile="$(mktemp 2>/dev/null || mktemp -t 'mytmpdir')"
+fi
 # get first archive page
 archiveLink="/archive/"
 
 # loop over archive pages
 endOfArchive=0
-while [[ $endOfArchive -ne 1 ]]
+while [[ ${endOfArchive} -ne 1 ]]
 do
   # get archive page
-  archivePage=$(curl $curlOptions -c $cookieFile --referer "http://$url" -A "$userAgent" "$url$archiveLink")
-  echo "Retrieving archive page $url$archiveLink..."
+  # If we are passed a cookie file, just use it right off, don't init it with -c
+  if [ "${cookieJar}" == 'True'  ]; then
+    archivePage=$(curl "${curlOptions[@]}" -c "${cookieFile}" --referer "https://${url}" -A "${userAgent}" "${url}${archiveLink}")
+  else
+    archivePage=$(curl "${curlOptions[@]}" -b "${cookieFile}" --referer "https://${url}" -A "${userAgent}" "${url}${archiveLink}")
+  fi  
+  echo "Retrieving archive page ${url}${archiveLink}..."
 
   # extract links to posts
-  monthPosts=$(echo "$archivePage" | grep -o -P "/post/[0-9]*.*?\"" | sed 's/"//g')
+  monthPosts=$( grep -o -P "/post/[0-9]*.*?\"" <<< "${archivePage}" | sed 's/"//g')
 
   # process all posts on this archive page
-  for postURL in $(echo "$monthPosts")
+  for postURL in ${monthPosts}
   do
     # check if post page has already been processed before
-    if grep -Fxq "$postURL" "$targetDir/articles.txt"
+    if grep -Fxq "${postURL}" "${targetDir}/articles.txt"
     then
-      echo "Already got $url$postURL, skipping."
+      echo "Already got ${url}${postURL}, skipping."
     else
       # get the image links (can be multiple images in sets)
-      echo "Retrieving post $url$postURL..."
-      postPage=$(curl $curlOptions -b $cookieFile --referer "http://$url$archiveLink" -A "$userAgent" "$url$postURL")
-      imageLinks=$(echo "$postPage" | grep -o -P "http[s]*://([0-9]*.)?media\.tumblr\.com/([A-Za-z0-9]*/)?tumblr_[A-Za-z0-9]*_[0-9]*\.[a-z]*" | sort | uniq)
+      echo "Retrieving post ${url}${postURL}..."
+      postPage=$(curl "${curlOptions[@]}" -b "${cookieFile}" --referer "https://${url}${archiveLink}" -A "${userAgent}" "${url}${postURL}")
+      imageLinks=$( grep -o -P "http[s]*://([0-9]*.)?media\.tumblr\.com/([A-Za-z0-9]*/)?tumblr_[A-Za-z0-9]*_[0-9]*\.[a-z]*" <<< "${postPage}" | sort | uniq)
       # remove resolution info from image filename
-      baseImages=$(echo "$imageLinks" | grep -o "tumblr_.*$" | sed 's/_[0-9]*\.\w*//g' | uniq)
+      baseImages=$( grep -o "tumblr_.*$" <<< "${imageLinks}" | sed 's/_[0-9]*\.\w*//g' | uniq)
       # if we encounter any download errors, don't mark the post as archived
       curlError=0
 
       # determine the highest available resolution and download image
-      if [ ! -z "$baseImages" ]
+      if [ ! -z "${baseImages}" ]
       then
 
-        for image in $(echo "$baseImages")
+        for image in ${baseImages}
         do
           # get the image name of image with highest resolution
-          maxResImage=$(echo "$imageLinks" | grep -o "$image.*" | sort -n | head -n 1)
+          maxResImage=$( grep -o "${image}.*" <<< "${imageLinks}" | sort -n | head -n 1)
           # get full image url
-          maxResImageURL=$(echo "$imageLinks" | grep "$maxResImage")
+          maxResImageURL=$( grep "${maxResImage}" <<< "${imageLinks}")
           # download image (if it doesn't exist)
-          if [ -e "$targetDir/$maxResImage" ]
+          if [ -e "${targetDir}/${maxResImage}" ]
           then
             echo "Image exists, skipping."
           else
-            echo "Downloading image $maxResImageURL..."
-            curl $curlOptions -b $cookieFile --referer "http://$url$postURL" -A "$userAgent" -o "$targetDir/$maxResImage" "$maxResImageURL"
-            if [ ! 0 -eq $? ]; then curlError=1; fi;
+            echo "Downloading image ${maxResImageURL}..."
+            if ! curl "${curlOptions[@]}" -b "${cookieFile}" --referer "https://${url}${postURL}" -A "${userAgent}" -o "${targetDir}/${maxResImage}" "${maxResImageURL}" ;then
+            	curlError=1
+	    fi 
           fi
         done
       else
         # no images found, check for video links
         echo "No images found, checking for videos"
 
         # check for tumblr hosted videos
-        videoPlayers=$(echo "$postPage" | grep -o -P "http[s]*://www.tumblr.com/video/.*/[0-9]*/[0-9]*/" | sort | uniq)
-        for video in $(echo "$videoPlayers")
+        videoPlayers=$( grep -o -P "http[s]*://www.tumblr.com/video/.*/[0-9]*/[0-9]*/" <<< "${postPage}" | sort | uniq)
+        for video in ${videoPlayers}
         do
-          echo "Found tumblr-hosted video $video"
+          echo "Found tumblr-hosted video ${video}"
           # get video link and type
-          videoSource=$(curl $curlOptions -b $cookieFile --referer "http://$url$postURL" -A "$userAgent" "$video" | grep -o -P "<source src=\"http[s]*://[^.]*.tumblr.com/video_file/.*?>")
+          videoSource=$(curl "${curlOptions[@]}" -b "${cookieFile}" --referer "https://${url}${postURL}" -A "${userAgent}" "${video}" | grep -o -P "<source src=\"http[s]*://[^.]*.tumblr.com/video_file/.*?>")
           # get video url
-          videoURL=$(echo "$videoSource" | grep -o -P "http[s]*://[^.]*.tumblr.com/video_file/[[:0-9A-Za-z]*/]*[0-9]*/tumblr_[A-Za-z0-9]*")
+          videoURL=$( grep -o -P "http[s]*://[^.]*.tumblr.com/video_file/[[:0-9A-Za-z]*/]*[0-9]*/tumblr_[A-Za-z0-9]*" <<< "${videoSource}" )
           # construct filename with extension from type string
-          videoFile=$(echo "$videoSource" | grep -o -P "tumblr_.*?>" | sed -e 's/<source src=\"//g' -e 's/\" type=\"video\//./g' -e 's/\">//g' -e 's/\//\_/g')
+          videoFile=$( grep -o -P "tumblr_.*?>" <<< "${videoSource}" | sed -e 's/<source src=\"//g' -e 's/\" type=\"video\//./g' -e 's/\">//g' -e 's/\//\_/g')
           # download video (if it doesn't exist)
-          if [ -e "$targetDir/$videoFile" ]
+          if [ -e "${targetDir}/${videoFile}" ]
           then
             echo "Video exists, skipping."
           else
-            echo "Downloading video $videoURL"
-            curl $curlOptions -L -b $cookieFile --referer "http://$url$postURL" -A "$userAgent" -o "$targetDir/$videoFile" "$videoURL"
-            if [ ! 0 -eq $? ]; then curlError=1; fi;
+            echo "Downloading video ${videoURL}"
+            if ! curl "${curlOptions[@]}" -L -b "${cookieFile}" --referer "https://${url}${postURL}" -A "${userAgent}" -o "${targetDir}/${videoFile}" "${videoURL}"; then 
+            	 curlError=1
+            fi
           fi
         done
         # check if youtube-dl is available
@@ -139,29 +179,29 @@ do
           # gather embedded video urls
           otherSource=""
           # check for instagram video
-          otherSource=$(echo "$otherSource"; echo "$postPage" | grep -o -P "http[s]*://www.instagram.com/p/[A-Za-z0-9]*")
+          otherSource="${otherSource} $( grep -o -P "http[s]*://www.instagram.com/p/[A-Za-z0-9]*" <<< "${postPage}")"
           # check fou youtube video
-          otherSource=$(echo "$otherSource"; echo "$postPage" | grep -o -P "http[s]*://www.youtube.com/embed/.*?\?" | sed 's/\?//g')
+          otherSource="${otherSource} $( grep -o -P "http[s]*://www.youtube.com/embed/.*?\?" | sed 's/\?//g' <<< "${postPage}" )"
           # check for vine
-          otherSource=$(echo "$otherSource"; echo "$postPage" | grep -o -P "http[s]*://vine.co/v/.*?/")
+          otherSource="${otherSource} $( grep -o -P "http[s]*://vine.co/v/.*?/" <<< "${postPage}" )"
           # check for vimeo
-          otherSource=$(echo "$otherSource"; echo "$postPage" | grep -o -P "http[s]*://player.vimeo.com/video/[0-9]*")
+          otherSource="${otherSource} $( grep -o -P "http[s]*://player.vimeo.com/video/[0-9]*" <<< "${postPage}" )"
           # check for dailymotion
-          otherSource=$(echo "$otherSource"; echo "$postPage" | grep -o -P "http[s]*://www.dailymotion.com/embed/video/[A-Za-z0-9]*")
+          otherSource="${otherSource} $( grep -o -P "http[s]*://www.dailymotion.com/embed/video/[A-Za-z0-9]*" <<< "${postPage}" )"
           # check for brightcove
-          otherSource=$(echo "$otherSource"; echo "$postPage" | grep -o -P "http[s]*://players.brightcove.net/.*/index.html\?videoId=[0-9]*")
+          otherSource="${otherSource} $( grep -o -P "http[s]*://players.brightcove.net/.*/index.html\?videoId=[0-9]*" <<< "${postPage}" )"
           # add expressions for other video sites here like this:
-          #otherSource=$(echo "$otherSource"; echo "$postPage" | grep -o "http[s]*://www.example.com/embed/video/[A-Za-z0-9]*")
+          #otherSource="${otherSource} $( grep -o "http[s]*://www.example.com/embed/video/[A-Za-z0-9]*" <<< "${postPage}" )"
 
           # if video links were found, try youtube-dl
-          if [ ! -z $otherSource ]
+          if [ ! -z "${otherSource}" ]
           then
-            for otherVid in $(echo "$otherSource")
+            for otherVid in ${otherSource}
             do
               echo "Found embedded video $otherVid, attempting download via youtube-dl..."
-              youtube-dl "$otherVid" -o "$targetDir/%(title)s_%(duration)s.%(ext)s" -ciw
-              # if error occurs, don't mark post as archived
-              if [ ! 0 -eq $? ]; then curlError=1; fi;
+              if ! youtube-dl "$otherVid" -o "${targetDir}/%(title)s_%(duration)s.%(ext)s" -ciw ; then
+              	curlError=1
+	      fi
             done
           else
             echo "No videos found, moving on."
@@ -172,23 +212,23 @@ do
       fi
 
       # if no error occured, enter page as downloaded
-      if [[ $curlError -eq 0 ]]
+      if [[ ${curlError} -eq 0 ]]
       then
-        echo "$postURL" >> "$targetDir/articles.txt"
+        echo "${postURL}" >> "${targetDir}/articles.txt"
       else
         echo "Some error occured during downloading. No articles.txt entry created."
       fi
 
     fi
   done
   # get link to next archive page
-  archiveLink=$(echo "$archivePage" | grep -o -P "id=\"next_page_link\" href=\".*?\"" | sed -e 's/id=\"next_page_link\" href=\"//g' -e 's/\"//g')
+  archiveLink=$(echo "${archivePage}" | grep -o -P "id=\"next_page_link\" href=\".*?\"" | sed -e 's/id=\"next_page_link\" href=\"//g' -e 's/\"//g')
   # check if we are at the end of the archive (no link is returned)
-  if [ -z "$archiveLink" ]
+  if [ -z "${archiveLink}" ]
   then
     endOfArchive=1
     echo "Reached the last archive page. Done!"
   else
-    echo "Next archive page: $url$archiveLink"
+    echo "Next archive page: ${url}${archiveLink}"
   fi
 done