Skip to content

Commit

Permalink
Develop Edition: Dos2Drs (#5163)
Browse files Browse the repository at this point in the history
Dos2Drs develop edition [BA-5967]
  • Loading branch information
ruchim authored and mcovarr committed Sep 16, 2019
1 parent 7840b6f commit b6111f6
Show file tree
Hide file tree
Showing 13 changed files with 150 additions and 142 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
version 1.0

workflow wf_level_file_size {
File input1 = "dos://wb-mock-drs-dev.storage.googleapis.com/4a3908ad-1f0b-4e2a-8a92-611f2123e8b0"
File input2 = "dos://wb-mock-drs-dev.storage.googleapis.com/0c8e7bc6-fd76-459d-947b-808b0605beb3"
File input1 = "drs://wb-mock-drs-dev.storage.googleapis.com/4a3908ad-1f0b-4e2a-8a92-611f2123e8b0"
File input2 = "drs://wb-mock-drs-dev.storage.googleapis.com/0c8e7bc6-fd76-459d-947b-808b0605beb3"

output {
Float fileSize1 = size(input1)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
# For all below 5 HCA uuids, Martha does not return a service account
"drs_usa_hca.localize_drs_with_usa.file1": "dos://service.staging.explore.data.humancellatlas.org/033c9840-c5cd-438b-b0e4-8e4cd8fc8dc6?version=2019-07-04T104122.106166Z",
"drs_usa_hca.localize_drs_with_usa.file2": "dos://service.staging.explore.data.humancellatlas.org/4defa7b0-46c2-4053-8e99-b827eed1bc96?version=2019-07-04T104122.100969Z",
"drs_usa_hca.localize_drs_with_usa.file3": "dos://service.staging.explore.data.humancellatlas.org/de5dcfc1-5aea-41ba-a7ae-e72c416cb450?version=2019-07-04T104122.092788Z",
"drs_usa_hca.localize_drs_with_usa.file4": "dos://service.staging.explore.data.humancellatlas.org/16dea2c5-e2bd-45bc-b2fd-fcac0daafc48?version=2019-07-04T104122.060634Z",
"drs_usa_hca.localize_drs_with_usa.file5": "dos://service.dev.explore.data.humancellatlas.org/7c800467-9143-402f-b965-4e7cad75c1e6?version=2019-05-26T130511.722646Z"
"drs_usa_hca.localize_drs_with_usa.file1": "drs://service.staging.explore.data.humancellatlas.org/033c9840-c5cd-438b-b0e4-8e4cd8fc8dc6?version=2019-07-04T104122.106166Z",
"drs_usa_hca.localize_drs_with_usa.file2": "drs://service.staging.explore.data.humancellatlas.org/4defa7b0-46c2-4053-8e99-b827eed1bc96?version=2019-07-04T104122.100969Z",
"drs_usa_hca.localize_drs_with_usa.file3": "drs://service.staging.explore.data.humancellatlas.org/de5dcfc1-5aea-41ba-a7ae-e72c416cb450?version=2019-07-04T104122.092788Z",
"drs_usa_hca.localize_drs_with_usa.file4": "drs://service.staging.explore.data.humancellatlas.org/16dea2c5-e2bd-45bc-b2fd-fcac0daafc48?version=2019-07-04T104122.060634Z",
"drs_usa_hca.localize_drs_with_usa.file5": "drs://service.dev.explore.data.humancellatlas.org/7c800467-9143-402f-b965-4e7cad75c1e6?version=2019-05-26T130511.722646Z"
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@ class DrsCloudNioFileSystemProvider(rootConfig: Config,

override def isTransient(exception: Exception): Boolean = false

override def getScheme: String = "dos"
override def getScheme: String = "drs"

override def getHost(uriAsString: String): String = {
require(uriAsString.startsWith(s"$getScheme://"), s"Scheme does not match $getScheme")

/*
* In some cases for a URI, the host name is null. For example, for DRS urls like 'dos://dg.123/123-123-123',
* In some cases for a URI, the host name is null. For example, for DRS urls like 'drs://dg.123/123-123-123',
* even though 'dg.123' is a valid host, somehow since it does not conform to URI's standards, uri.getHost returns null. In such
* cases, authority is used instead of host. If there is no authority, use an empty string.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class DrsCloudNioRegularFileAttributes(drsPath: String, drsPathResolver: DrsPath

override def fileHash: Option[String] = {
drsPathResolver.resolveDrsThroughMartha(drsPath).map(marthaResponse => {
marthaResponse.dos.data_object.checksums.flatMap {
marthaResponse.drs.data_object.checksums.flatMap {
_.collectFirst{ case c if c.`type`.equalsIgnoreCase("md5") => c.checksum }
}
}).unsafeRunSync()
Expand All @@ -34,7 +34,7 @@ class DrsCloudNioRegularFileAttributes(drsPath: String, drsPathResolver: DrsPath
override def lastModifiedTime(): FileTime = {
val lastModifiedIO = for {
marthaResponse <- drsPathResolver.resolveDrsThroughMartha(drsPath)
lastModifiedInString <- IO.fromEither(marthaResponse.dos.data_object.updated.toRight(throwRuntimeException("updated")))
lastModifiedInString <- IO.fromEither(marthaResponse.drs.data_object.updated.toRight(throwRuntimeException("updated")))
lastModified <- convertToFileTime(lastModifiedInString)
} yield lastModified

Expand All @@ -45,7 +45,7 @@ class DrsCloudNioRegularFileAttributes(drsPath: String, drsPathResolver: DrsPath
override def size(): Long = {
val sizeIO = for {
marthaResponse <- drsPathResolver.resolveDrsThroughMartha(drsPath)
size <- IO.fromEither(marthaResponse.dos.data_object.size.toRight(throwRuntimeException("size")))
size <- IO.fromEither(marthaResponse.drs.data_object.size.toRight(throwRuntimeException("size")))
} yield size

sizeIO.unsafeRunSync()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@ case class DrsPathResolver(drsConfig: DrsConfig, httpClientBuilder: HttpClientBu

implicit lazy val urlDecoder: Decoder[Url] = deriveDecoder
implicit lazy val checksumDecoder: Decoder[ChecksumObject] = deriveDecoder
implicit lazy val dataObjectDecoder: Decoder[DosDataObject] = deriveDecoder
implicit lazy val dosObjectDecoder: Decoder[DosObject] = deriveDecoder
implicit lazy val dataObjectDecoder: Decoder[DrsDataObject] = deriveDecoder
implicit lazy val drsObjectDecoder: Decoder[DrsObject] = deriveDecoder
implicit lazy val saDataObjectDecoder: Decoder[SADataObject] = deriveDecoder
implicit lazy val marthaResponseDecoder: Decoder[MarthaResponse] = deriveDecoder
// Martha is still returning objects keyed by the obsolete "dos" terminology rather than the current term "drs".
// In order to avoid having Cromwell's case classes use the obsolete terminology that would arise from a derived
// decoder, this `forProduct2` construct instructs Circe to take the value keyed by `dos` and pass that as the
// first argument to `MarthaResponse.apply`, which happens to be the constructor parameter formally named `drs`.
implicit lazy val marthaResponseDecoder: Decoder[MarthaResponse] = Decoder.forProduct2("dos", "googleServiceAccount")(MarthaResponse.apply)

private val DrsPathToken = "${drsPath}"

Expand All @@ -48,7 +52,7 @@ case class DrsPathResolver(drsConfig: DrsConfig, httpClientBuilder: HttpClientBu
e => IO.raiseError(new RuntimeException(s"Failed to parse response from Martha into a case class. Error: ${ExceptionUtils.getMessage(e)}"))
}
}

private def executeMarthaRequest(httpPost: HttpPost): Resource[IO, HttpResponse]= {
for {
httpClient <- Resource.fromAutoCloseable(IO(httpClientBuilder.build()))
Expand Down Expand Up @@ -78,13 +82,13 @@ case class Url(url: String)

case class ChecksumObject(checksum: String, `type`: String)

case class DosDataObject(size: Option[Long],
case class DrsDataObject(size: Option[Long],
checksums: Option[Array[ChecksumObject]],
updated: Option[String],
urls: Array[Url])

case class DosObject(data_object: DosDataObject)
case class DrsObject(data_object: DrsDataObject)

case class SADataObject(data: Json)

case class MarthaResponse(dos: DosObject, googleServiceAccount: Option[SADataObject])
case class MarthaResponse(drs: DrsObject, googleServiceAccount: Option[SADataObject])
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ object DrsLocalizerMain extends IOApp {
marthaResponse <- resolveDrsThroughMartha(drsUrl, marthaUri)
_ = httpBackendConnection.close()
// Currently Martha only supports resolving DRS paths to GCS paths
gcsUrl <- extractFirstGcsUrl(marthaResponse.dos.data_object.urls)
gcsUrl <- extractFirstGcsUrl(marthaResponse.drs.data_object.urls)
exitState <- downloadFileFromGcs(gcsUrl, marthaResponse.googleServiceAccount.map(_.data.toString), downloadLoc, requesterPaysId)
} yield exitState

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,25 @@ import io.circe.generic.semiauto.deriveDecoder

object MarthaResponseJsonSupport {
implicit val urlFormat: Decoder[Url] = deriveDecoder
implicit val dataObject: Decoder[DosDataObject] = deriveDecoder
implicit val dosObjectFormat: Decoder[DosObject] = deriveDecoder
implicit val dataObject: Decoder[DrsDataObject] = deriveDecoder
implicit val drsObjectFormat: Decoder[DrsObject] = deriveDecoder
implicit val googleServiceAccountFormat: Decoder[GoogleServiceAccount] = deriveDecoder
implicit val marthaResponseFormat: Decoder[MarthaResponse] = deriveDecoder
// Martha is still returning objects keyed by the obsolete "dos" terminology rather than the current term "drs".
// In order to avoid having Cromwell's case classes use the obsolete terminology that would arise from a derived
// decoder, this `forProduct2` construct instructs Circe to take the value keyed by `dos` and pass that as the
// first argument to `MarthaResponse.apply`, which happens to be the constructor parameter formally named `drs`.
implicit val marthaResponseFormat: Decoder[MarthaResponse] = Decoder.forProduct2("dos", "googleServiceAccount")(MarthaResponse.apply)

implicit val samErrorResponseFormat: Decoder[SamErrorResponse] = deriveDecoder
implicit val samErrorResponseCodeFormat: Decoder[SamErrorResponseCode] = deriveDecoder
implicit val marthaErrorResponseFormat: Decoder[MarthaErrorResponse] = deriveDecoder
}

case class Url(url: String)
case class DosDataObject(urls: Array[Url])
case class DosObject(data_object: DosDataObject)
case class DrsDataObject(urls: Array[Url])
case class DrsObject(data_object: DrsDataObject)
case class GoogleServiceAccount(data: Json)
case class MarthaResponse(dos: DosObject, googleServiceAccount: Option[GoogleServiceAccount])
case class MarthaResponse(drs: DrsObject, googleServiceAccount: Option[GoogleServiceAccount])


case class SamErrorResponse(text: String)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class DrsPathBuilderFactory(globalConfig: Config, instanceConfig: Config, single
for {
serviceAccountJson <- serviceAccountJsonIo
//Currently, Martha only supports resolving DRS paths to GCS paths
url <- IO.fromEither(DrsResolver.extractUrlForScheme(marthaResponse.dos.data_object.urls, GcsScheme))
url <- IO.fromEither(DrsResolver.extractUrlForScheme(marthaResponse.drs.data_object.urls, GcsScheme))
readableByteChannel <- inputReadChannel(url, GcsScheme, serviceAccountJson, requesterPaysProjectIdOption)
} yield readableByteChannel
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ object DrsResolver {
drsFileSystemProvider <- toIO(drsFileSystemProviderOption, noFileSystemForDrsError)
marthaResponse <- drsFileSystemProvider.drsPathResolver.resolveDrsThroughMartha(drsPath.pathAsString)
//Currently, Martha only supports resolving DRS paths to GCS paths
relativePath <- IO.fromEither(extractUrlForScheme(marthaResponse.dos.data_object.urls, GcsScheme))
relativePath <- IO.fromEither(extractUrlForScheme(marthaResponse.drs.data_object.urls, GcsScheme))
.map(_.substring(urlProtocolLength(GcsScheme)))
.handleErrorWith(e => IO.raiseError(new RuntimeException(s"Error while resolving DRS path: $drsPath. Error: ${ExceptionUtils.getMessage(e)}")))
} yield relativePath
Expand Down
Loading

0 comments on commit b6111f6

Please sign in to comment.