Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/scala/com/github/johnynek/bazel_deps/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ scala_library(
":graph",
":resolver",
":settings_loader",
"//3rdparty/jvm/io/circe:circe_core",
"//3rdparty/jvm/io/circe:circe_jawn",
"//3rdparty/jvm/io/get_coursier:coursier",
"//3rdparty/jvm/io/get_coursier:coursier_cache",
"//3rdparty/jvm/io/get_coursier:coursier_core",
Expand Down
163 changes: 154 additions & 9 deletions src/scala/com/github/johnynek/bazel_deps/CoursierResolver.scala
Original file line number Diff line number Diff line change
@@ -1,26 +1,34 @@
package com.github.johnynek.bazel_deps

import coursier.{Dependency, ResolutionProcess, Project, Resolution}
import coursier.cache.{FileCache, CachePolicy}
import coursier.cache.{ArtifactError, CacheLocks, FileCache, CachePolicy}
import coursier.paths.{CachePath, Util => PathUtil}
import coursier.util.{Artifact, Task}
import cats.MonadError
import cats.data.{Nested, NonEmptyList, Validated, ValidatedNel}
import cats.implicits._
import coursier.LocalRepositories
import coursier.core._
import java.nio.file.Path
import java.io.File
import java.net.http.{HttpClient, HttpHeaders, HttpRequest, HttpResponse}
import java.net.{HttpURLConnection, URI, URL}
import java.nio.file.{Files, Path, StandardCopyOption}
import java.util.Locale
import org.slf4j.LoggerFactory
import io.circe.jawn.JawnParser
import io.circe.syntax._
import coursier.ivy._
import scala.collection.JavaConverters._
import scala.collection.immutable.SortedMap
import scala.util.{Failure, Try}
import scala.util.{Failure, Success, Try}
import scala.concurrent.{Await, ExecutionContext, Future}
import scala.concurrent.duration.Duration

object CoursierResolver {
// 12 concurrent downloads
// most downloads are tiny sha downloads so try keep things alive
lazy val downloadPool = {
import java.util.concurrent.{ExecutorService, Executors, ThreadFactory}
import java.util.concurrent.{Executors, ThreadFactory}
Executors.newFixedThreadPool(
12,
// from scalaz.concurrent.Strategy.DefaultDaemonThreadFactory
Expand All @@ -37,7 +45,7 @@ object CoursierResolver {
}
}

class CoursierResolver(servers: List[DependencyServer], ec: ExecutionContext, runTimeout: Duration, resolverCachePath: Path) extends NormalizingResolver[Task] {
class CoursierResolver(servers: List[DependencyServer], hashInHttpHeaders: Boolean, ec: ExecutionContext, runTimeout: Duration, resolverCachePath: Path) extends NormalizingResolver[Task] {
// TODO: add support for a local file cache other than ivy
private[this] val repos = LocalRepositories.ivy2Local :: {
val settings = SettingsLoader.settings
Expand Down Expand Up @@ -66,6 +74,18 @@ class CoursierResolver(servers: List[DependencyServer], ec: ExecutionContext, ru
}
}

// Copied from coursier.cache.FileCache
private def auxiliaryFilePrefix(file: File): String =
s".${file.getName}__"

private def auxiliaryFile(file: File, key: String): File = {
val key0 = key.toLowerCase(Locale.ROOT).filter(_ != '-')
new File(file.getParentFile, s"${auxiliaryFilePrefix(file)}$key0")
}

// Copied from coursier.cache.internal.Downloader
private def errFile(file: File) = new File(file.getParentFile, "." + file.getName + ".error")

private[this] def makeCache() =
Option(resolverCachePath) match {
case None => FileCache()
Expand All @@ -77,6 +97,8 @@ class CoursierResolver(servers: List[DependencyServer], ec: ExecutionContext, ru
.withPool(CoursierResolver.downloadPool)
.fetch)

private[this] val httpClient = HttpClient.newHttpClient()

private[this] val logger =
LoggerFactory.getLogger("bazel_deps.CoursierResolver")

Expand Down Expand Up @@ -131,12 +153,125 @@ class CoursierResolver(servers: List[DependencyServer], ec: ExecutionContext, ru
digestType: DigestType,
artifact: Artifact
): Task[(Artifact, ShaValue, Long)] = {
makeCache()

val cache = makeCache()
.withCachePolicies(Seq(CachePolicy.FetchMissing))
.withPool(CoursierResolver.downloadPool)
.file(artifact)
.run
.flatMap { e =>
val artifactFile = cache.localFile(artifact.url, artifact.authentication.map(_.user))

// Error caching logic copied from coursier.cache.internal.Downloader.
// If the .pom file exists for an artifact but the artifact itself doesn't,
// assume that the artifact will always remain missing.

lazy val referenceFileOpt: Option[File] = artifact.extra.get("metadata").map { a =>
cache.localFile(a.url, a.authentication.map(_.user))
}
lazy val cacheErrors: Boolean =
(artifact.changing && artifact.extra.contains("cache-errors")) ||
referenceFileOpt.exists(_.exists())

val checkErrFile: Task[Unit] = Task { _ =>
if (errFile(artifactFile).exists()) {
Future.failed(new ArtifactError.NotFound(artifact.url, permanent = Some(true)))
} else {
Future.unit
}
}

// Some Maven repositories (like Artifactory) returns hash digests as HTTP headers in GET/HEAD requests.
// If the user opts into doing so via hashInHttpHeaders, we use a HEAD request to get the checksums and the
// file size rather than by downloading the file itself.
//
// If the HEAD request fails in a Recoverable way, we fall back to downloading the file.
case class Recoverable(e: Throwable) extends Throwable(e)

val headRequest: Task[(Artifact, ShaValue, Long)] =
if (hashInHttpHeaders) {
// Cache all HTTP headers in a JSON file named .<artifact-filename>__headers in the Coursier cache directory
// in a way similar to Coursier's FileCache.
val headersPath: Path = auxiliaryFile(artifactFile, "headers").toPath

Task.schedule(CoursierResolver.downloadPool) {
// Since we use atomic moves, we can guarantee that if the header file exists, it is complete.
if (!Files.exists(headersPath)) {
val tmp = CachePath.temporaryFile(headersPath.toFile).toPath
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could we factor lines 197 - 246 into a method such as fetchHeadersToPath(...) or something.

// When creating directories in the cache directory, we need to take a "structure lock".
CacheLocks.withStructureLock(cache.location) {
PathUtil.createDirectories(tmp.getParent)
PathUtil.createDirectories(headersPath.getParent)
}
// Use JVM and file system locks to ensure that only one thread downloads the headers.
CacheLocks.withLockOr(cache.location, headersPath.toFile)(
if (!Files.exists(headersPath)) { // double-checked locking
// Download the headers.
val req = HttpRequest.newBuilder(new URI(artifact.url))
.method("HEAD", HttpRequest.BodyPublishers.noBody)
.build
val resp = httpClient.send(req, HttpResponse.BodyHandlers.discarding)
resp.statusCode() match {
case 404 =>
if (cacheErrors) {
try Files.createFile(errFile(artifactFile).toPath)
catch {
case e: java.nio.file.FileAlreadyExistsException => ()
}
} else {
// println(s"not caching error for $artifact")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we remove the commented code or use a logging api here (I think other files are using slf4j if I remember correctly).

}
throw new ArtifactError.NotFound(artifact.url, permanent = Some(true))
case 401 =>
throw new ArtifactError.Unauthorized(artifact.url, realm = None)
case sc if sc >= 400 =>
throw new ArtifactError.DownloadError(s"failed to download: $sc", None)
case _ =>
}

val headers = resp.headers
val headersMap = headers.map.asScala.map({ case (k, vs) => (k, vs.asScala) })
val headersJson = headersMap.asJson.noSpaces

// Write to a temporary file first.
val writer = Files.newBufferedWriter(tmp)
try writer.write(headersJson)
finally writer.close()

// Atomic move to the final location.
Files.move(tmp, headersPath, StandardCopyOption.ATOMIC_MOVE)
()
} else (),
// If we couldn't get the lock, try again unless the file has been created.
if (Files.exists(headersPath)) Some(())
else None
)
}
}
.flatMap(_ => resolverMonad.fromTry(
Try(Files.readString(headersPath))
.flatMap((new JawnParser).decode[Map[String, List[String]]](_) match {
case Left(error) =>
Failure(Recoverable(new RuntimeException(s"failed to parse headers file $headersPath", error)))
case Right(obj) => Success(obj)
})
.map((headerMap) => HttpHeaders.of(headerMap.map { case (k, v) => (k, v.asJava) }.asJava, { (_, _) => true }))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: You don't need (headerMap) it is the same as .map(headerMap => HttpHeaders...

.flatMap { headers =>
Try((
headers
.firstValue(digestType match {
// See also https://maven.apache.org/resolver/expected-checksums.html#non-standard-x-headers
case DigestType.Sha1 => "x-checksum-sha1"
case DigestType.Sha256 => "x-checksum-sha256"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this should be moved to a method on DigestType? so this function is: headers.firstValue(digestType.mavenHeader) or something?

Also what about x-goog-meta-checksum-sha1 and x-goog-meta-checksum-md5?

Maybe in fact we need to something like:

sealed trait DigestType {
  def getDigestInstance: MessageDigest
  def expectedHexLength: Int
  def name: String
  def headerKeys: List[String]
}

then do:

Try {
val digest = digestType
  .headerKeys
  .flatMap(key => headers.firstValue(key).asScala)
  .headOption
  .getOrElse(throw new Recoverable(new RuntimeException(s"no ${digestType} found in headers in $headersPath"))

val len = headers
  .firstValueAsLong("Content-Length")
  .orElseThrow(() =>
    Recoverable(new RuntimeException(s"no Content-Length found in headers $headersPath"))
  )

(digest, len)
}

})
.orElseThrow(() => Recoverable(new RuntimeException(s"no ${digestType} found in headers in $headersPath"))),
headers.firstValueAsLong("Content-Length")
.orElseThrow(() => Recoverable(new RuntimeException(s"no Content-Length found in headers $headersPath")))
))
}
.map { case (sha, length) => (artifact, ShaValue(sha, digestType), length) }
))
} else Task.fail(Recoverable(new RuntimeException("skipped HEAD request")))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this if for this else is very far some here... maybe it will get closer if we take the body of the if and put it in a function or method.


val downloadFile: Task[(Artifact, ShaValue, Long)] =
cache.file(artifact).run.flatMap { e =>
resolverMonad.fromTry(e match {
case Left(error) =>
// println(s"Tried to download $artifact but failed.")
Expand All @@ -147,6 +282,16 @@ class CoursierResolver(servers: List[DependencyServer], ec: ExecutionContext, ru
}
})
}

checkErrFile
.flatMap(_ => headRequest.attempt)
.flatMap {
case Right(r) => Task.point(r)
case Left(e: Recoverable) =>
// println(s"falling back to downloading the whole file: $e")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we remove the commented code please?

downloadFile
case Left(e) => Task.fail(e)
}
}

def computeShas(
Expand Down
12 changes: 11 additions & 1 deletion src/scala/com/github/johnynek/bazel_deps/Decoders.scala
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ object Decoders {
implicit val resolverTypeDecoder: Decoder[ResolverType] = {
Decoder.decodeString.emap {
case "aether" => Right(ResolverType.Aether)
case "coursier" => Right(ResolverType.Coursier)
case "coursier" => Right(ResolverType.Coursier.empty)
case "gradle" => Right(ResolverType.Gradle.empty)
case other => Left(s"unrecogized resolverType: $other")
}
Expand All @@ -157,6 +157,8 @@ object Decoders {
Decoder.decodeBoolean.map(bool => StrictVisibility(bool))
}

implicit val coursierDecoder =
auto.exportDecoder[ResolverType.Coursier].instance
implicit val gradleDecoder =
auto.exportDecoder[ResolverType.Gradle].instance

Expand All @@ -166,6 +168,14 @@ object Decoders {
override def apply(c: HCursor): Result[Options] =
baseOptions(c).flatMap { b =>
b.resolverType match {
case Some(cr: ResolverType.Coursier) =>
// in the baseOptions the resolver type is empty
// here we parse resolverOptions as if it is the default
// decoder (using coursierDecoder) to get the Coursier options
c.get[Option[ResolverType.Coursier]]("resolverOptions").map {
case nonEmpty @ Some(_) => b.copy(resolverType = nonEmpty)
case None => b
}
case Some(g: ResolverType.Gradle) =>
// in the baseOptions the resolver type is empty
// here we parse resolverOptions as if it is the default
Expand Down
40 changes: 35 additions & 5 deletions src/scala/com/github/johnynek/bazel_deps/DepsModel.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1523,8 +1523,37 @@ object ResolverType {
case object Aether extends ResolverType("aether") {
override def optionsDoc: Option[Doc] = None
}
case object Coursier extends ResolverType("coursier") {
override def optionsDoc: Option[Doc] = None
case class Coursier(
hashInHttpHeaders: Option[Boolean]
) extends ResolverType("coursier") {
def getHashInHttpHeaders: Boolean = hashInHttpHeaders.getOrElse(false)

override def optionsDoc: Option[Doc] = {

val items = List(
(
"hashInHttpHeaders",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not really sure why we wouldn't always want to enable this if it works. Can you think of a reason? Can you document the reason here so someone reading the code can remember.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately, it doesn't work with all Maven repository servers. E.g., https://repo1.maven.org/maven2/ only supplies SHA-1 checksums, so it would be a waste to enable this option if that's the repo that's getting used.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

makes sense.

hashInHttpHeaders.map(b => Doc.text(s"$b"))
),
).sortBy(_._1)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this was copied, but we don't need to sort a list of length 1. but you can leave it if you add a comment that this is here so we don't forget if we add more options.

.collect { case (k, Some(v)) => (k, v) }

// we can't pack resolvers (yet)
Some(packedYamlMap(items))
}
}

object Coursier {
def empty = Coursier(None)
implicit val coursierMonoid: Monoid[Coursier] = new Monoid[Coursier] {
override val empty = Coursier.empty

override def combine(a: Coursier, b: Coursier): Coursier = {
Coursier(
hashInHttpHeaders = b.hashInHttpHeaders.orElse(a.hashInHttpHeaders)
)
}
}
}

case class Gradle(
Expand Down Expand Up @@ -1634,14 +1663,15 @@ object ResolverType {
}
}

val default = Coursier
val default = Coursier.empty

implicit val resolverSemigroup: Semigroup[ResolverType] =
new Semigroup[ResolverType] {
override def combine(x: ResolverType, y: ResolverType): ResolverType = {
(x, y) match {
case (l: Gradle, r: Gradle) => Monoid.combine(l, r)
case (_, r) => r
case (l: Gradle, r: Gradle) => Monoid.combine(l, r)
case (l: Coursier, r: Coursier) => Monoid.combine(l, r)
case (_, r) => r
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/scala/com/github/johnynek/bazel_deps/MakeDeps.scala
Original file line number Diff line number Diff line change
Expand Up @@ -193,19 +193,19 @@ object MakeDeps {
model.getOptions.getResolvers.collect { case e: MavenServer => e },
resolverCachePath)
)
case ResolverType.Coursier =>
case c: ResolverType.Coursier =>
val ec = scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration._
resolve(
model,
new CoursierResolver(model.getOptions.getResolvers, ec, 3600.seconds, resolverCachePath)
new CoursierResolver(model.getOptions.getResolvers, c.getHashInHttpHeaders, ec, 3600.seconds, resolverCachePath)
)
case g: ResolverType.Gradle =>
val ec = scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration._

lazy val coursierResolver =
new CoursierResolver(model.getOptions.getResolvers, ec, 3600.seconds, resolverCachePath)
new CoursierResolver(model.getOptions.getResolvers, false, ec, 3600.seconds, resolverCachePath)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment as above: if this works, shouldn't true be the default? Why would you set false? Maybe if you know with certainty your http service doesn't give out the headers?


val resolver = new GradleResolver(
rootPath,
Expand Down