-
Notifications
You must be signed in to change notification settings - Fork 403
Encode hubs #1800
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Encode hubs #1800
Changes from all commits
bd623d1
45d5d40
8329e33
f9c3e06
f33eb80
9ac1afc
a74c586
40717d0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,6 +4,17 @@ | |
|
|
||
| package org.igv.encode; | ||
|
|
||
| import org.igv.Globals; | ||
| import org.igv.logging.LogManager; | ||
| import org.igv.logging.Logger; | ||
| import org.igv.prefs.Constants; | ||
| import org.igv.prefs.PreferencesManager; | ||
| import org.igv.ui.IGV; | ||
| import org.igv.ui.action.BrowseEncodeAction; | ||
| import org.igv.util.Pair; | ||
| import org.igv.util.ParsingUtils; | ||
|
|
||
| import javax.swing.text.NumberFormatter; | ||
| import java.awt.*; | ||
| import java.io.BufferedReader; | ||
| import java.io.IOException; | ||
|
|
@@ -12,17 +23,6 @@ | |
| import java.util.*; | ||
| import java.util.List; | ||
| import java.util.stream.Collectors; | ||
| import javax.swing.*; | ||
| import javax.swing.text.NumberFormatter; | ||
|
|
||
| import org.igv.logging.*; | ||
| import org.igv.Globals; | ||
| import org.igv.prefs.Constants; | ||
| import org.igv.prefs.PreferencesManager; | ||
| import org.igv.ui.IGV; | ||
| import org.igv.ui.action.BrowseEncodeAction; | ||
| import org.igv.util.Pair; | ||
| import org.igv.util.ParsingUtils; | ||
|
|
||
| /** | ||
| * @author Jim Robinson | ||
|
|
@@ -35,7 +35,9 @@ public class EncodeTrackChooserFactory { | |
| private static NumberFormatter numberFormatter = new NumberFormatter(); | ||
|
||
|
|
||
| private static String ENCODE_HOST = "https://www.encodeproject.org"; | ||
| private static Set<String> filteredColumns = new HashSet(Arrays.asList("ID", "Assembly", "HREF", "path")); | ||
| private static Set<String> filteredColumns = new HashSet<>(Arrays.asList( | ||
| "ID", "Assembly", "HREF", "path", | ||
| "url", "Project", "name", "color", "altColor")); | ||
|
|
||
| private static List<String> filteredExtensions = Arrays.asList("tsv", "tsv.gz"); | ||
|
|
||
|
|
@@ -53,6 +55,8 @@ public class EncodeTrackChooserFactory { | |
| static HashSet<String> ucscSupportedGenomes = new HashSet<>(Arrays.asList("hg19", "mm9")); | ||
| static HashSet<String> supportedGenomes = new HashSet<>( | ||
| Arrays.asList("ce10", "ce11", "dm3", "dm6", "GRCh38", "hg19", "mm10", "mm9")); | ||
| static HashSet<String> hicGenomes = new HashSet<>( | ||
| Arrays.asList("GRCh38", "hg19", "mm10", "mm9")); | ||
|
|
||
| /** | ||
| * Return a new or cached instance of a track chooser for the given genome and type. | ||
|
|
@@ -79,14 +83,15 @@ public synchronized static TrackChooser getInstance(String genomeId, BrowseEncod | |
| instance = new TrackChooser(parent, headings, rows, title); | ||
| instanceMap.put(key, instance); | ||
| } | ||
|
|
||
| return instance; | ||
| } | ||
|
|
||
| private static String getDialogTitle(String genomeId, BrowseEncodeAction.Type type) { | ||
|
|
||
| if (type == BrowseEncodeAction.Type.UCSC) { | ||
| return "ENCODE data hosted at UCSC (2012)"; | ||
| } else if (type == BrowseEncodeAction.Type.FOUR_DN) { | ||
| return "4DN"; | ||
| } else { | ||
| switch (type) { | ||
| case SIGNALS_CHIP: | ||
|
|
@@ -103,6 +108,10 @@ public static boolean genomeSupportedUCSC(String genomeId) { | |
| return genomeId != null && ucscSupportedGenomes.contains(getEncodeGenomeID(genomeId)); | ||
| } | ||
|
|
||
| public static boolean hicSupportedUCSC(String genomeId) { | ||
| return genomeId != null && hicGenomes.contains(getEncodeGenomeID(genomeId)); | ||
| } | ||
|
|
||
| public static boolean genomeSupported(String genomeId) { | ||
| return genomeId != null && supportedGenomes.contains(getEncodeGenomeID(genomeId)); | ||
| } | ||
|
|
@@ -128,26 +137,16 @@ private static Pair<List<String>, List<FileRecord>> getEncodeFileRecords(String | |
| if (is == null) { | ||
| return null; | ||
| } | ||
| Pair<List<String>, List<FileRecord>> headingRecordPair = parseRecords(is, type, genomeId); | ||
|
|
||
| if (IGV.hasInstance()) { | ||
| Set<String> loadedPaths = IGV.getInstance().getDataResourceLocators().stream() | ||
| .map(rl -> rl.getPath()) | ||
| .collect(Collectors.toSet()); | ||
|
|
||
| for (FileRecord fileRecord : headingRecordPair.getSecond()) { | ||
| if (loadedPaths.contains(fileRecord.getPath())) { | ||
| fileRecord.setSelected(true); | ||
| } | ||
| } | ||
| } | ||
| return headingRecordPair; | ||
| return parseRecords(is, type, genomeId); | ||
| } | ||
| } | ||
|
|
||
| private static InputStream getStreamFor(String genomeId, BrowseEncodeAction.Type type) throws IOException { | ||
| if (type == BrowseEncodeAction.Type.UCSC) { | ||
| return EncodeTrackChooserFactory.class.getResourceAsStream("encode." + genomeId + ".txt"); | ||
| } else if (type == BrowseEncodeAction.Type.FOUR_DN) { | ||
| String url = PreferencesManager.getPreferences().get(Constants.FOUR_DN_FILELIST_URL) + "4dn_" + genomeId + "_tracks.txt"; | ||
| return ParsingUtils.openInputStream(url); | ||
| } else { | ||
| String root = PreferencesManager.getPreferences().get(Constants.ENCODE_FILELIST_URL) + genomeId + "."; | ||
| String url = null; | ||
|
|
@@ -158,6 +157,9 @@ private static InputStream getStreamFor(String genomeId, BrowseEncodeAction.Type | |
| case SIGNALS_OTHER: | ||
| url = root + "signals.other.txt.gz"; | ||
| break; | ||
| case HIC: | ||
| url = root + "hic.txt.gz"; | ||
| break; | ||
| case OTHER: | ||
| url = root + "other.txt.gz"; | ||
| break; | ||
|
|
@@ -175,21 +177,27 @@ private static Pair parseRecords(InputStream is, BrowseEncodeAction.Type type, S | |
|
|
||
| String[] headers = Globals.tabPattern.split(reader.readLine()); | ||
|
|
||
| int pathColumn = type == BrowseEncodeAction.Type.UCSC ? 0 : Arrays.asList(headers).indexOf("HREF"); | ||
| int pathColumn = switch (type) { | ||
| case UCSC, FOUR_DN -> 0; | ||
| default -> Arrays.asList(headers).indexOf("HREF"); | ||
| }; | ||
|
|
||
| List<FileRecord> records = new ArrayList<>(20000); | ||
| String nextLine; | ||
| while ((nextLine = reader.readLine()) != null) { | ||
| if (!nextLine.startsWith("#")) { | ||
|
|
||
| String[] tokens = Globals.tabPattern.split(nextLine, -1); | ||
| String path = type == BrowseEncodeAction.Type.UCSC ? tokens[pathColumn] : ENCODE_HOST + tokens[pathColumn]; | ||
| String path = switch (type) { | ||
| case UCSC, FOUR_DN -> tokens[pathColumn]; | ||
| default -> ENCODE_HOST + tokens[pathColumn]; | ||
| }; | ||
|
|
||
| if (filteredExtensions.stream().anyMatch(e -> path.endsWith(e))) { | ||
| continue; | ||
| } | ||
|
|
||
| Map<String, String> attributes = new LinkedHashMap<>(); | ||
| Map<String, String> attributes = new HashMap<>(); | ||
| for (int i = 0; i < headers.length; i++) { | ||
| String value = i < tokens.length ? tokens[i] : ""; | ||
| if (value.length() > 0) { | ||
|
|
@@ -198,7 +206,6 @@ private static Pair parseRecords(InputStream is, BrowseEncodeAction.Type type, S | |
| } | ||
| final FileRecord record = new FileRecord(path, attributes); | ||
| records.add(record); | ||
|
|
||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -6,6 +6,7 @@ | |||||||||||||
| import org.igv.logging.LogManager; | ||||||||||||||
| import org.igv.logging.Logger; | ||||||||||||||
| import org.igv.util.CompressionUtils; | ||||||||||||||
| import org.igv.util.collections.CaseInsensitiveMap; | ||||||||||||||
| import org.igv.util.collections.LRUCache; | ||||||||||||||
| import org.igv.util.stream.IGVSeekableStreamFactory; | ||||||||||||||
|
|
||||||||||||||
|
|
@@ -56,7 +57,8 @@ public int getByteSize() { | |||||||||||||
| private Map<String, Long> expectedValueVectors; | ||||||||||||||
| private Map<String, Object> attributes; | ||||||||||||||
| private List<Chromosome> chromosomes = new ArrayList<>(); | ||||||||||||||
| private Map<String, Integer> chromosomeIndexMap = new HashMap<>(); | ||||||||||||||
| private Map<String, Integer> chromosomeIndexMap = new CaseInsensitiveMap<Integer>(); | ||||||||||||||
| private Integer wgResolution = null; | ||||||||||||||
| private List<Integer> bpResolutions = new ArrayList<>(); | ||||||||||||||
| private List<Integer> fragResolutions = new ArrayList<>(); | ||||||||||||||
| private Map<String, String> chrAliasTable = new HashMap<>(); | ||||||||||||||
|
|
@@ -90,7 +92,7 @@ public int getVersion() { | |||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| public String getNVIString() { | ||||||||||||||
| if(this.normVectorIndexPosition > 0 && this.normVectorIndexSize > 0) { | ||||||||||||||
| if (this.normVectorIndexPosition > 0 && this.normVectorIndexSize > 0) { | ||||||||||||||
| return this.normVectorIndexPosition + "," + this.normVectorIndexSize; | ||||||||||||||
| } else { | ||||||||||||||
| return null; | ||||||||||||||
|
|
@@ -158,17 +160,19 @@ private void readHeaderAndFooter() throws IOException { | |||||||||||||
|
|
||||||||||||||
| // chromosomes | ||||||||||||||
| this.chromosomes = new ArrayList<>(); | ||||||||||||||
| this.chromosomeIndexMap = new HashMap<>(); | ||||||||||||||
| this.chromosomeIndexMap = new CaseInsensitiveMap<>(); | ||||||||||||||
| int nChrs = bodyParser.getInt(); | ||||||||||||||
| for (int i = 0; i < nChrs; i++) { | ||||||||||||||
| String name = getString(bodyParser); | ||||||||||||||
| long size = this.version < 9 ? bodyParser.getInt() : bodyParser.getLong(); | ||||||||||||||
| Chromosome chr = new Chromosome(i, name, (int) size); | ||||||||||||||
| if ("all".equalsIgnoreCase(name)) { | ||||||||||||||
| // whole genome handling omitted other fields | ||||||||||||||
| } | ||||||||||||||
| this.chromosomes.add(chr); | ||||||||||||||
|
|
||||||||||||||
| String canonicalName = genome == null ? name : genome.getCanonicalChrName(name); | ||||||||||||||
| chrAliasTable.put(canonicalName, name); | ||||||||||||||
|
|
||||||||||||||
| this.chromosomeIndexMap.put(name, i); | ||||||||||||||
|
|
||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // bp resolutions | ||||||||||||||
|
|
@@ -185,12 +189,6 @@ private void readHeaderAndFooter() throws IOException { | |||||||||||||
| this.fragResolutions.add(bodyParser.getInt()); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // build alias table | ||||||||||||||
| for (String chrName : chromosomeIndexMap.keySet()) { | ||||||||||||||
| String canonicalName = genome == null ? chrName : genome.getCanonicalChrName(chrName); | ||||||||||||||
| chrAliasTable.put(canonicalName, chrName); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| private void readFooter() throws IOException { | ||||||||||||||
|
|
@@ -280,7 +278,7 @@ public List<ContactRecord> getContactRecords(Region region1, | |||||||||||||
| if (block == null) continue; | ||||||||||||||
|
|
||||||||||||||
| for (ContactRecord rec : block.records) { | ||||||||||||||
| if (allRecords || (rec.bin1() >= x1 && rec.bin1() < x2 && rec.bin2() >= y1 && rec.bin2() < y2) && rec.counts() > 1) { | ||||||||||||||
| if (allRecords || (rec.bin1() >= x1 && rec.bin1() < x2 && rec.bin2() >= y1 && rec.bin2() < y2) && rec.counts() > 1) { | ||||||||||||||
| contactRecords.add(rec); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
@@ -289,6 +287,24 @@ public List<ContactRecord> getContactRecords(Region region1, | |||||||||||||
| return contactRecords; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| public int getWGResolution() { | ||||||||||||||
| if (wgResolution == null) { | ||||||||||||||
| try { | ||||||||||||||
| Integer idx = chromosomeIndexMap.get("all"); | ||||||||||||||
| if (idx == null) return -1; | ||||||||||||||
|
||||||||||||||
| if (idx == null) return -1; | |
| if (idx == null) return -1; | |
| if (idx < 0 || idx >= chromosomes.size()) { | |
| log.error("Chromosome index for 'all' is out of bounds: " + idx); | |
| return -1; | |
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Typo in comment: "psuedo-chromosome" should be "pseudo-chromosome".