Skip to content

Commit

Permalink
Modified how projects are listed, how the help menu is accessed, and a
Browse files Browse the repository at this point in the history
check to only make URLs for files that haven't been downloaded to enable
iterative syncing.
  • Loading branch information
u0028003 committed Apr 28, 2021
1 parent 190066f commit b93cc10
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 24 deletions.
15 changes: 7 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,16 @@ Tools for working with the Seven Bridges bioinformatics platform.

## ProjectDownloader
<pre>
u0028003$ java -jar ~/Code/SBApps/target/ProjectDownloader_0.2.jar -h

Args: SBApps/ProjectDownloader: -h

u0028003$ java -jar ~/Code/SBApps/target/ProjectDownloader-0.3.jar
**************************************************************************************
** Project Downloader: Feb 2021 **
** Project Downloader: April 2021 **
**************************************************************************************
This tool downloads files from Seven Bridges Projects while maintaining their folder
structure via the fast, multi-threaded, aria2 download utility.
structure via the fast, multi-threaded, aria2 download utility. Files already
downloaded are skipped so run iteratively as new files become available.

Options:
-l List visible Projects.
-p Project ID (division/projectName) to download.
Skip this option to just list the visible Projects.
-r Regular expressions to select particular Project file paths to download.
Expand All @@ -34,8 +33,8 @@ Options:
Examples assuming ~/.sevenbridges/credentials exists:

List visible Projects:
java -Xmx1G -jar pathTo/ProjectDownloader_xxx.jar
List files in a Project:
java -Xmx1G -jar pathTo/ProjectDownloader_xxx.jar -l
List files in a particular Project:
java -Xmx1G -jar pathTo/ProjectDownloader_xxx.jar -p alana-welm/pdx
Test Project file path regexes:
java -Xmx1G -jar pathTo/ProjectDownloader_xxx.jar -p alana-welm/pdx
Expand Down
8 changes: 4 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
<modelVersion>4.0.0</modelVersion>

<groupId>edu.utah.hci.bioinfo</groupId>
<artifactId>SBApps</artifactId>
<artifactId>ProjectDownloader</artifactId>
<packaging>jar</packaging>
<version>0.2.1</version>
<version>0.3</version>

<name>SBApps</name>
<name>ProjectDownloader</name>
<url>https://github.com/HuntsmanCancerInstitute</url>

<properties>
Expand Down Expand Up @@ -51,7 +51,7 @@
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>edu.utah.hci.bioinfo.SBApps.App</mainClass>
<mainClass>edu.utah.hci.bioinfo.SBApps.ProjectDownloader</mainClass>
</transformer>
</transformers>
</configuration>
Expand Down
33 changes: 22 additions & 11 deletions src/main/java/edu/utah/hci/bioinfo/SBApps/ProjectDownloader.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ public class ProjectDownloader {
private String projectId = null;
private Pattern[] toKeep = null;
private boolean skipNoUrlFiles = false;
private boolean skipDownloadedFiles = true;
private File aria2 = null;
private File downloadDirectory = null;

Expand Down Expand Up @@ -63,24 +64,24 @@ private int doWork() throws Exception {

//pull projects?
if (projectId == null) {
Util.p("No project ID provided, loading visable projects...");
Util.p("Listing visable Projects...");
if (fetchProjects()) return 0;
return 1;
}

//fetch files and folders for the project, max return per query is 100 items
//fetch files and folders for the project, max return per query is 100 items, unfortunately we have to fetch all of the files in a project, no option to fetch just a subset, some projects have 40K+ files!
Util.p("Loading files from "+projectId+"...");
JsonNode fj = api.query("files?offset=0&limit=100&project="+projectId, true, true);
if (fj == null) return 1;
loadSBFiles(fj);

//build the map
Util.p("\nBuilding file paths...");
Util.p("\nBuilding file paths, comparing to what already exists...");
buildFilePaths();
if (downloadDirectory == null) return 0;

//get file urls, some of these calls will fail if the file is archived or being archived/ unarchived
Util.p("\nRequesting URLs...");
//get file urls, some of these calls will fail if the file is archived or being archived/ unarchived or has hit some odd issue with no error!
Util.p("\nRequesting download URLs...");
if (requestUrls() == false) return 1;

//make aria2 download file
Expand Down Expand Up @@ -231,9 +232,16 @@ private void buildFilePaths() {
else {
String p = f.getPath();
f.setMakeUrl(false);

for (Pattern pat: toKeep) {
if (pat.matcher(p).matches()) {
f.setMakeUrl(true);
//check if it exists?
if (skipDownloadedFiles) {
File test = new File (downloadDirectory, p);
if (test.exists()) f.setMakeUrl(false);
else f.setMakeUrl(true);
}
else f.setMakeUrl(true);
break;
}
}
Expand Down Expand Up @@ -316,7 +324,8 @@ private ArrayList<SBFile> fetchFileItems(JsonNode fj) throws Exception {


public static void main(String[] args) {
new ProjectDownloader(args);
if (args.length == 0) printDocs();
else new ProjectDownloader(args);
}

/**This method will process each argument and assign new varibles
Expand All @@ -338,6 +347,7 @@ public void processArgs(String[] args) throws Exception{
case 'c': credentialsFile = new File(args[++i]).getCanonicalFile(); break;
case 'a': account = args[++i]; break;
case 'p': projectId = args[++i]; break;
case 'l': break;
case 'r': regExToKeep = args[++i]; break;
case 's': skipNoUrlFiles = true; break;
case 'd': downloadDirectory = new File(args[++i]).getCanonicalFile(); break;
Expand Down Expand Up @@ -385,9 +395,11 @@ public static void printDocs(){
"** Project Downloader: April 2021 **\n" +
"**************************************************************************************\n" +
"This tool downloads files from Seven Bridges Projects while maintaining their folder\n"+
"structure via the fast, multi-threaded, aria2 download utility.\n"+
"structure via the fast, multi-threaded, aria2 download utility. Files already\n"+
"downloaded are skipped so run iteratively as new files become available.\n"+

"\nOptions:\n"+
"-l List visible Projects.\n"+
"-p Project ID (division/projectName) to download.\n"+
" Skip this option to just list the visible Projects.\n"+
"-r Regular expressions to select particular Project file paths to download.\n"+
Expand All @@ -404,12 +416,11 @@ public static void printDocs(){
" archived in Glacier. Use the SB web console to unarchive them, then rerun.\n"+
"-e Path to the aria2 executable, see https://aria2.github.io to download and install.\n"+
" Skip this option to set up a mock aria2 download.\n"+
"-h Print this help menu.\n"+

"\nExamples assuming ~/.sevenbridges/credentials exists: \n\n"+
"List visible Projects:\n"+
" java -Xmx1G -jar pathTo/ProjectDownloader_xxx.jar\n"+
"List files in a Project:\n"+
" java -Xmx1G -jar pathTo/ProjectDownloader_xxx.jar -l\n"+
"List files in a particular Project:\n"+
" java -Xmx1G -jar pathTo/ProjectDownloader_xxx.jar -p alana-welm/pdx\n"+
"Test Project file path regexes:\n"+
" java -Xmx1G -jar pathTo/ProjectDownloader_xxx.jar -p alana-welm/pdx\n"+
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public void testTestResources() {
public void testListProjectsDefault() {

String[] command = new String[] {
"java", "-jar", testJar.toString()
"java", "-jar", testJar.toString(), "-l"
};
String[] output = Util.executeViaProcessBuilder(command, true, "\n");
Assert.assertTrue(Util.stringArrayToString(output, " ").contains(projectID));
Expand Down

0 comments on commit b93cc10

Please sign in to comment.