From b93cc10b1627ef8cc2ba6646b239f85bf97814a6 Mon Sep 17 00:00:00 2001 From: u0028003 Date: Wed, 28 Apr 2021 10:04:23 -0600 Subject: [PATCH] Modified how projects are listed, how the help menu is accessed, and a check to only make URLs for files that haven't been downloaded to enable iterative syncing. --- README.md | 15 ++++----- pom.xml | 8 ++--- .../hci/bioinfo/SBApps/ProjectDownloader.java | 33 ++++++++++++------- .../bioinfo/SBApps/ProjectDownloaderTest.java | 2 +- 4 files changed, 34 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 2db4007..a149aac 100644 --- a/README.md +++ b/README.md @@ -3,17 +3,16 @@ Tools for working with the Seven Bridges bioinformatics platform. ## ProjectDownloader
- u0028003$ java -jar ~/Code/SBApps/target/ProjectDownloader_0.2.jar -h
-
-Args: SBApps/ProjectDownloader: -h
-
+u0028003$ java -jar ~/Code/SBApps/target/ProjectDownloader-0.3.jar 
 **************************************************************************************
-**                             Project Downloader: Feb 2021                         **
+**                            Project Downloader: April 2021                        **
 **************************************************************************************
 This tool downloads files from Seven Bridges Projects while maintaining their folder
-structure via the fast, multi-threaded, aria2 download utility.
+structure via the fast, multi-threaded, aria2 download utility. Files already
+downloaded are skipped so run iteratively as new files become available.
 
 Options:
+-l List visible Projects.
 -p Project ID (division/projectName) to download.
       Skip this option to just list the visible Projects.
 -r Regular expressions to select particular Project file paths to download.
@@ -34,8 +33,8 @@ Options:
 Examples assuming ~/.sevenbridges/credentials exists: 
 
 List visible Projects:
-     java -Xmx1G -jar pathTo/ProjectDownloader_xxx.jar
-List files in a Project:
+     java -Xmx1G -jar pathTo/ProjectDownloader_xxx.jar -l
+List files in a particular Project:
      java -Xmx1G -jar pathTo/ProjectDownloader_xxx.jar -p alana-welm/pdx
 Test Project file path regexes:
      java -Xmx1G -jar pathTo/ProjectDownloader_xxx.jar -p alana-welm/pdx
diff --git a/pom.xml b/pom.xml
index f37132d..cc5cc7d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,11 +6,11 @@
 	4.0.0
 
 	edu.utah.hci.bioinfo
-	SBApps
+	ProjectDownloader
 	jar
-	0.2.1
+	0.3
 
-	SBApps
+	ProjectDownloader
 	https://github.com/HuntsmanCancerInstitute
 
 	
@@ -51,7 +51,7 @@
 							
 								
-									edu.utah.hci.bioinfo.SBApps.App
+									edu.utah.hci.bioinfo.SBApps.ProjectDownloader
 								
 							
 						
diff --git a/src/main/java/edu/utah/hci/bioinfo/SBApps/ProjectDownloader.java b/src/main/java/edu/utah/hci/bioinfo/SBApps/ProjectDownloader.java
index 154b75f..1eaa3f0 100644
--- a/src/main/java/edu/utah/hci/bioinfo/SBApps/ProjectDownloader.java
+++ b/src/main/java/edu/utah/hci/bioinfo/SBApps/ProjectDownloader.java
@@ -21,6 +21,7 @@ public class ProjectDownloader {
 	private String projectId = null;
 	private Pattern[] toKeep = null;
 	private boolean skipNoUrlFiles = false;
+	private boolean skipDownloadedFiles = true;
 	private File aria2 = null;
 	private File downloadDirectory = null;
 
@@ -63,24 +64,24 @@ private int doWork() throws Exception {
 		
 		//pull projects?
 		if (projectId == null) {
-			Util.p("No project ID provided, loading visable projects...");
+			Util.p("Listing visable Projects...");
 			if (fetchProjects()) return 0;
 			return 1;
 		}
 
-		//fetch files and folders for the project, max return per query is 100 items
+		//fetch files and folders for the project, max return per query is 100 items, unfortunately we have to fetch all of the files in a project, no option to fetch just a subset, some projects have 40K+ files!
 		Util.p("Loading files from "+projectId+"...");
 		JsonNode fj = api.query("files?offset=0&limit=100&project="+projectId, true, true);
 		if (fj == null) return 1;
 		loadSBFiles(fj);
 		
 		//build the map
-		Util.p("\nBuilding file paths...");
+		Util.p("\nBuilding file paths, comparing to what already exists...");
 		buildFilePaths();
 		if (downloadDirectory == null) return 0;
 
-		//get file urls, some of these calls will fail if the file is archived or being archived/ unarchived
-		Util.p("\nRequesting URLs...");
+		//get file urls, some of these calls will fail if the file is archived or being archived/ unarchived or has hit some odd issue with no error! 
+		Util.p("\nRequesting download URLs...");
 		if (requestUrls() == false) return 1;
 
 		//make aria2 download file
@@ -231,9 +232,16 @@ private void buildFilePaths() {
 				else {
 					String p = f.getPath();
 					f.setMakeUrl(false);
+
 					for (Pattern pat: toKeep) {
 						if (pat.matcher(p).matches()) {
-							f.setMakeUrl(true);
+							//check if it exists?
+							if (skipDownloadedFiles) {
+								File test = new File (downloadDirectory, p);
+								if (test.exists()) f.setMakeUrl(false);
+								else f.setMakeUrl(true);
+							}
+							else f.setMakeUrl(true);
 							break;
 						}
 					}
@@ -316,7 +324,8 @@ private ArrayList fetchFileItems(JsonNode fj) throws Exception {
 
 
 	public static void main(String[] args) {
-		new ProjectDownloader(args);
+		if (args.length == 0) printDocs();
+		else new ProjectDownloader(args);
 	}		
 
 	/**This method will process each argument and assign new varibles
@@ -338,6 +347,7 @@ public void processArgs(String[] args) throws Exception{
 					case 'c': credentialsFile = new File(args[++i]).getCanonicalFile(); break;
 					case 'a': account = args[++i]; break;
 					case 'p': projectId = args[++i]; break;
+					case 'l': break;
 					case 'r': regExToKeep = args[++i]; break;
 					case 's': skipNoUrlFiles = true; break;
 					case 'd': downloadDirectory = new File(args[++i]).getCanonicalFile(); break;
@@ -385,9 +395,11 @@ public static void printDocs(){
 				"**                            Project Downloader: April 2021                        **\n" +
 				"**************************************************************************************\n" +
 				"This tool downloads files from Seven Bridges Projects while maintaining their folder\n"+
-				"structure via the fast, multi-threaded, aria2 download utility.\n"+
+				"structure via the fast, multi-threaded, aria2 download utility. Files already\n"+
+				"downloaded are skipped so run iteratively as new files become available.\n"+
 
 				"\nOptions:\n"+
+				"-l List visible Projects.\n"+
 				"-p Project ID (division/projectName) to download.\n"+
 				"      Skip this option to just list the visible Projects.\n"+
 				"-r Regular expressions to select particular Project file paths to download.\n"+
@@ -404,12 +416,11 @@ public static void printDocs(){
 				"     archived in Glacier. Use the SB web console to unarchive them, then rerun.\n"+
 				"-e Path to the aria2 executable, see https://aria2.github.io to download and install.\n"+
 				"     Skip this option to set up a mock aria2 download.\n"+
-				"-h Print this help menu.\n"+
 
 				"\nExamples assuming ~/.sevenbridges/credentials exists: \n\n"+
 				"List visible Projects:\n"+
-				"     java -Xmx1G -jar pathTo/ProjectDownloader_xxx.jar\n"+
-				"List files in a Project:\n"+
+				"     java -Xmx1G -jar pathTo/ProjectDownloader_xxx.jar -l\n"+
+				"List files in a particular Project:\n"+
 				"     java -Xmx1G -jar pathTo/ProjectDownloader_xxx.jar -p alana-welm/pdx\n"+
 				"Test Project file path regexes:\n"+
 				"     java -Xmx1G -jar pathTo/ProjectDownloader_xxx.jar -p alana-welm/pdx\n"+
diff --git a/src/test/java/edu/utah/hci/bioinfo/SBApps/ProjectDownloaderTest.java b/src/test/java/edu/utah/hci/bioinfo/SBApps/ProjectDownloaderTest.java
index f893b07..a87c97c 100644
--- a/src/test/java/edu/utah/hci/bioinfo/SBApps/ProjectDownloaderTest.java
+++ b/src/test/java/edu/utah/hci/bioinfo/SBApps/ProjectDownloaderTest.java
@@ -48,7 +48,7 @@ public void testTestResources() {
     public void testListProjectsDefault() {
     	
        	String[] command = new String[] {
-    			"java", "-jar", testJar.toString()
+    			"java", "-jar", testJar.toString(), "-l"
        	};
        	String[] output = Util.executeViaProcessBuilder(command, true, "\n");
     	Assert.assertTrue(Util.stringArrayToString(output, " ").contains(projectID));