diff --git a/src/scala/com/github/johnynek/bazel_deps/BUILD.bazel b/src/scala/com/github/johnynek/bazel_deps/BUILD.bazel index 33b65e9..cc0bfd0 100644 --- a/src/scala/com/github/johnynek/bazel_deps/BUILD.bazel +++ b/src/scala/com/github/johnynek/bazel_deps/BUILD.bazel @@ -85,6 +85,8 @@ scala_library( ":graph", ":resolver", ":settings_loader", + "//3rdparty/jvm/io/circe:circe_core", + "//3rdparty/jvm/io/circe:circe_jawn", "//3rdparty/jvm/io/get_coursier:coursier", "//3rdparty/jvm/io/get_coursier:coursier_cache", "//3rdparty/jvm/io/get_coursier:coursier_core", diff --git a/src/scala/com/github/johnynek/bazel_deps/CoursierResolver.scala b/src/scala/com/github/johnynek/bazel_deps/CoursierResolver.scala index d91daee..18da16f 100644 --- a/src/scala/com/github/johnynek/bazel_deps/CoursierResolver.scala +++ b/src/scala/com/github/johnynek/bazel_deps/CoursierResolver.scala @@ -1,18 +1,26 @@ package com.github.johnynek.bazel_deps import coursier.{Dependency, ResolutionProcess, Project, Resolution} -import coursier.cache.{FileCache, CachePolicy} +import coursier.cache.{ArtifactError, CacheLocks, FileCache, CachePolicy} +import coursier.paths.{CachePath, Util => PathUtil} import coursier.util.{Artifact, Task} import cats.MonadError import cats.data.{Nested, NonEmptyList, Validated, ValidatedNel} import cats.implicits._ import coursier.LocalRepositories import coursier.core._ -import java.nio.file.Path +import java.io.File +import java.net.http.{HttpClient, HttpHeaders, HttpRequest, HttpResponse} +import java.net.{HttpURLConnection, URI, URL} +import java.nio.file.{Files, Path, StandardCopyOption} +import java.util.Locale import org.slf4j.LoggerFactory +import io.circe.jawn.JawnParser +import io.circe.syntax._ import coursier.ivy._ +import scala.collection.JavaConverters._ import scala.collection.immutable.SortedMap -import scala.util.{Failure, Try} +import scala.util.{Failure, Success, Try} import scala.concurrent.{Await, ExecutionContext, Future} import scala.concurrent.duration.Duration @@ -20,7 +28,7 @@ object CoursierResolver { // 12 concurrent downloads // most downloads are tiny sha downloads so try keep things alive lazy val downloadPool = { - import java.util.concurrent.{ExecutorService, Executors, ThreadFactory} + import java.util.concurrent.{Executors, ThreadFactory} Executors.newFixedThreadPool( 12, // from scalaz.concurrent.Strategy.DefaultDaemonThreadFactory @@ -37,7 +45,7 @@ object CoursierResolver { } } -class CoursierResolver(servers: List[DependencyServer], ec: ExecutionContext, runTimeout: Duration, resolverCachePath: Path) extends NormalizingResolver[Task] { +class CoursierResolver(servers: List[DependencyServer], hashInHttpHeaders: Boolean, ec: ExecutionContext, runTimeout: Duration, resolverCachePath: Path) extends NormalizingResolver[Task] { // TODO: add support for a local file cache other than ivy private[this] val repos = LocalRepositories.ivy2Local :: { val settings = SettingsLoader.settings @@ -66,6 +74,18 @@ class CoursierResolver(servers: List[DependencyServer], ec: ExecutionContext, ru } } + // Copied from coursier.cache.FileCache + private def auxiliaryFilePrefix(file: File): String = + s".${file.getName}__" + + private def auxiliaryFile(file: File, key: String): File = { + val key0 = key.toLowerCase(Locale.ROOT).filter(_ != '-') + new File(file.getParentFile, s"${auxiliaryFilePrefix(file)}$key0") + } + + // Copied from coursier.cache.internal.Downloader + private def errFile(file: File) = new File(file.getParentFile, "." + file.getName + ".error") + private[this] def makeCache() = Option(resolverCachePath) match { case None => FileCache() @@ -77,6 +97,8 @@ class CoursierResolver(servers: List[DependencyServer], ec: ExecutionContext, ru .withPool(CoursierResolver.downloadPool) .fetch) + private[this] val httpClient = HttpClient.newHttpClient() + private[this] val logger = LoggerFactory.getLogger("bazel_deps.CoursierResolver") @@ -131,12 +153,125 @@ class CoursierResolver(servers: List[DependencyServer], ec: ExecutionContext, ru digestType: DigestType, artifact: Artifact ): Task[(Artifact, ShaValue, Long)] = { - makeCache() + + val cache = makeCache() .withCachePolicies(Seq(CachePolicy.FetchMissing)) .withPool(CoursierResolver.downloadPool) - .file(artifact) - .run - .flatMap { e => + val artifactFile = cache.localFile(artifact.url, artifact.authentication.map(_.user)) + + // Error caching logic copied from coursier.cache.internal.Downloader. + // If the .pom file exists for an artifact but the artifact itself doesn't, + // assume that the artifact will always remain missing. + + lazy val referenceFileOpt: Option[File] = artifact.extra.get("metadata").map { a => + cache.localFile(a.url, a.authentication.map(_.user)) + } + lazy val cacheErrors: Boolean = + (artifact.changing && artifact.extra.contains("cache-errors")) || + referenceFileOpt.exists(_.exists()) + + val checkErrFile: Task[Unit] = Task { _ => + if (errFile(artifactFile).exists()) { + Future.failed(new ArtifactError.NotFound(artifact.url, permanent = Some(true))) + } else { + Future.unit + } + } + + // Some Maven repositories (like Artifactory) returns hash digests as HTTP headers in GET/HEAD requests. + // If the user opts into doing so via hashInHttpHeaders, we use a HEAD request to get the checksums and the + // file size rather than by downloading the file itself. + // + // If the HEAD request fails in a Recoverable way, we fall back to downloading the file. + case class Recoverable(e: Throwable) extends Throwable(e) + + val headRequest: Task[(Artifact, ShaValue, Long)] = + if (hashInHttpHeaders) { + // Cache all HTTP headers in a JSON file named .__headers in the Coursier cache directory + // in a way similar to Coursier's FileCache. + val headersPath: Path = auxiliaryFile(artifactFile, "headers").toPath + + Task.schedule(CoursierResolver.downloadPool) { + // Since we use atomic moves, we can guarantee that if the header file exists, it is complete. + if (!Files.exists(headersPath)) { + val tmp = CachePath.temporaryFile(headersPath.toFile).toPath + // When creating directories in the cache directory, we need to take a "structure lock". + CacheLocks.withStructureLock(cache.location) { + PathUtil.createDirectories(tmp.getParent) + PathUtil.createDirectories(headersPath.getParent) + } + // Use JVM and file system locks to ensure that only one thread downloads the headers. + CacheLocks.withLockOr(cache.location, headersPath.toFile)( + if (!Files.exists(headersPath)) { // double-checked locking + // Download the headers. + val req = HttpRequest.newBuilder(new URI(artifact.url)) + .method("HEAD", HttpRequest.BodyPublishers.noBody) + .build + val resp = httpClient.send(req, HttpResponse.BodyHandlers.discarding) + resp.statusCode() match { + case 404 => + if (cacheErrors) { + try Files.createFile(errFile(artifactFile).toPath) + catch { + case e: java.nio.file.FileAlreadyExistsException => () + } + } else { + // println(s"not caching error for $artifact") + } + throw new ArtifactError.NotFound(artifact.url, permanent = Some(true)) + case 401 => + throw new ArtifactError.Unauthorized(artifact.url, realm = None) + case sc if sc >= 400 => + throw new ArtifactError.DownloadError(s"failed to download: $sc", None) + case _ => + } + + val headers = resp.headers + val headersMap = headers.map.asScala.map({ case (k, vs) => (k, vs.asScala) }) + val headersJson = headersMap.asJson.noSpaces + + // Write to a temporary file first. + val writer = Files.newBufferedWriter(tmp) + try writer.write(headersJson) + finally writer.close() + + // Atomic move to the final location. + Files.move(tmp, headersPath, StandardCopyOption.ATOMIC_MOVE) + () + } else (), + // If we couldn't get the lock, try again unless the file has been created. + if (Files.exists(headersPath)) Some(()) + else None + ) + } + } + .flatMap(_ => resolverMonad.fromTry( + Try(Files.readString(headersPath)) + .flatMap((new JawnParser).decode[Map[String, List[String]]](_) match { + case Left(error) => + Failure(Recoverable(new RuntimeException(s"failed to parse headers file $headersPath", error))) + case Right(obj) => Success(obj) + }) + .map((headerMap) => HttpHeaders.of(headerMap.map { case (k, v) => (k, v.asJava) }.asJava, { (_, _) => true })) + .flatMap { headers => + Try(( + headers + .firstValue(digestType match { + // See also https://maven.apache.org/resolver/expected-checksums.html#non-standard-x-headers + case DigestType.Sha1 => "x-checksum-sha1" + case DigestType.Sha256 => "x-checksum-sha256" + }) + .orElseThrow(() => Recoverable(new RuntimeException(s"no ${digestType} found in headers in $headersPath"))), + headers.firstValueAsLong("Content-Length") + .orElseThrow(() => Recoverable(new RuntimeException(s"no Content-Length found in headers $headersPath"))) + )) + } + .map { case (sha, length) => (artifact, ShaValue(sha, digestType), length) } + )) + } else Task.fail(Recoverable(new RuntimeException("skipped HEAD request"))) + + val downloadFile: Task[(Artifact, ShaValue, Long)] = + cache.file(artifact).run.flatMap { e => resolverMonad.fromTry(e match { case Left(error) => // println(s"Tried to download $artifact but failed.") @@ -147,6 +282,16 @@ class CoursierResolver(servers: List[DependencyServer], ec: ExecutionContext, ru } }) } + + checkErrFile + .flatMap(_ => headRequest.attempt) + .flatMap { + case Right(r) => Task.point(r) + case Left(e: Recoverable) => + // println(s"falling back to downloading the whole file: $e") + downloadFile + case Left(e) => Task.fail(e) + } } def computeShas( diff --git a/src/scala/com/github/johnynek/bazel_deps/Decoders.scala b/src/scala/com/github/johnynek/bazel_deps/Decoders.scala index badf6c0..fefe654 100644 --- a/src/scala/com/github/johnynek/bazel_deps/Decoders.scala +++ b/src/scala/com/github/johnynek/bazel_deps/Decoders.scala @@ -136,7 +136,7 @@ object Decoders { implicit val resolverTypeDecoder: Decoder[ResolverType] = { Decoder.decodeString.emap { case "aether" => Right(ResolverType.Aether) - case "coursier" => Right(ResolverType.Coursier) + case "coursier" => Right(ResolverType.Coursier.empty) case "gradle" => Right(ResolverType.Gradle.empty) case other => Left(s"unrecogized resolverType: $other") } @@ -157,6 +157,8 @@ object Decoders { Decoder.decodeBoolean.map(bool => StrictVisibility(bool)) } + implicit val coursierDecoder = + auto.exportDecoder[ResolverType.Coursier].instance implicit val gradleDecoder = auto.exportDecoder[ResolverType.Gradle].instance @@ -166,6 +168,14 @@ object Decoders { override def apply(c: HCursor): Result[Options] = baseOptions(c).flatMap { b => b.resolverType match { + case Some(cr: ResolverType.Coursier) => + // in the baseOptions the resolver type is empty + // here we parse resolverOptions as if it is the default + // decoder (using coursierDecoder) to get the Coursier options + c.get[Option[ResolverType.Coursier]]("resolverOptions").map { + case nonEmpty @ Some(_) => b.copy(resolverType = nonEmpty) + case None => b + } case Some(g: ResolverType.Gradle) => // in the baseOptions the resolver type is empty // here we parse resolverOptions as if it is the default diff --git a/src/scala/com/github/johnynek/bazel_deps/DepsModel.scala b/src/scala/com/github/johnynek/bazel_deps/DepsModel.scala index f9a24a3..a9128df 100644 --- a/src/scala/com/github/johnynek/bazel_deps/DepsModel.scala +++ b/src/scala/com/github/johnynek/bazel_deps/DepsModel.scala @@ -1523,8 +1523,37 @@ object ResolverType { case object Aether extends ResolverType("aether") { override def optionsDoc: Option[Doc] = None } - case object Coursier extends ResolverType("coursier") { - override def optionsDoc: Option[Doc] = None + case class Coursier( + hashInHttpHeaders: Option[Boolean] + ) extends ResolverType("coursier") { + def getHashInHttpHeaders: Boolean = hashInHttpHeaders.getOrElse(false) + + override def optionsDoc: Option[Doc] = { + + val items = List( + ( + "hashInHttpHeaders", + hashInHttpHeaders.map(b => Doc.text(s"$b")) + ), + ).sortBy(_._1) + .collect { case (k, Some(v)) => (k, v) } + + // we can't pack resolvers (yet) + Some(packedYamlMap(items)) + } + } + + object Coursier { + def empty = Coursier(None) + implicit val coursierMonoid: Monoid[Coursier] = new Monoid[Coursier] { + override val empty = Coursier.empty + + override def combine(a: Coursier, b: Coursier): Coursier = { + Coursier( + hashInHttpHeaders = b.hashInHttpHeaders.orElse(a.hashInHttpHeaders) + ) + } + } } case class Gradle( @@ -1634,14 +1663,15 @@ object ResolverType { } } - val default = Coursier + val default = Coursier.empty implicit val resolverSemigroup: Semigroup[ResolverType] = new Semigroup[ResolverType] { override def combine(x: ResolverType, y: ResolverType): ResolverType = { (x, y) match { - case (l: Gradle, r: Gradle) => Monoid.combine(l, r) - case (_, r) => r + case (l: Gradle, r: Gradle) => Monoid.combine(l, r) + case (l: Coursier, r: Coursier) => Monoid.combine(l, r) + case (_, r) => r } } } diff --git a/src/scala/com/github/johnynek/bazel_deps/MakeDeps.scala b/src/scala/com/github/johnynek/bazel_deps/MakeDeps.scala index 87c1867..543a604 100644 --- a/src/scala/com/github/johnynek/bazel_deps/MakeDeps.scala +++ b/src/scala/com/github/johnynek/bazel_deps/MakeDeps.scala @@ -193,19 +193,19 @@ object MakeDeps { model.getOptions.getResolvers.collect { case e: MavenServer => e }, resolverCachePath) ) - case ResolverType.Coursier => + case c: ResolverType.Coursier => val ec = scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration._ resolve( model, - new CoursierResolver(model.getOptions.getResolvers, ec, 3600.seconds, resolverCachePath) + new CoursierResolver(model.getOptions.getResolvers, c.getHashInHttpHeaders, ec, 3600.seconds, resolverCachePath) ) case g: ResolverType.Gradle => val ec = scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration._ lazy val coursierResolver = - new CoursierResolver(model.getOptions.getResolvers, ec, 3600.seconds, resolverCachePath) + new CoursierResolver(model.getOptions.getResolvers, false, ec, 3600.seconds, resolverCachePath) val resolver = new GradleResolver( rootPath,