-
Notifications
You must be signed in to change notification settings - Fork 119
Add hashInHttpHeaders option for Coursier resolver #383
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,26 +1,34 @@ | ||
| package com.github.johnynek.bazel_deps | ||
|
|
||
| import coursier.{Dependency, ResolutionProcess, Project, Resolution} | ||
| import coursier.cache.{FileCache, CachePolicy} | ||
| import coursier.cache.{ArtifactError, CacheLocks, FileCache, CachePolicy} | ||
| import coursier.paths.{CachePath, Util => PathUtil} | ||
| import coursier.util.{Artifact, Task} | ||
| import cats.MonadError | ||
| import cats.data.{Nested, NonEmptyList, Validated, ValidatedNel} | ||
| import cats.implicits._ | ||
| import coursier.LocalRepositories | ||
| import coursier.core._ | ||
| import java.nio.file.Path | ||
| import java.io.File | ||
| import java.net.http.{HttpClient, HttpHeaders, HttpRequest, HttpResponse} | ||
| import java.net.{HttpURLConnection, URI, URL} | ||
| import java.nio.file.{Files, Path, StandardCopyOption} | ||
| import java.util.Locale | ||
| import org.slf4j.LoggerFactory | ||
| import io.circe.jawn.JawnParser | ||
| import io.circe.syntax._ | ||
| import coursier.ivy._ | ||
| import scala.collection.JavaConverters._ | ||
| import scala.collection.immutable.SortedMap | ||
| import scala.util.{Failure, Try} | ||
| import scala.util.{Failure, Success, Try} | ||
| import scala.concurrent.{Await, ExecutionContext, Future} | ||
| import scala.concurrent.duration.Duration | ||
|
|
||
| object CoursierResolver { | ||
| // 12 concurrent downloads | ||
| // most downloads are tiny sha downloads so try keep things alive | ||
| lazy val downloadPool = { | ||
| import java.util.concurrent.{ExecutorService, Executors, ThreadFactory} | ||
| import java.util.concurrent.{Executors, ThreadFactory} | ||
| Executors.newFixedThreadPool( | ||
| 12, | ||
| // from scalaz.concurrent.Strategy.DefaultDaemonThreadFactory | ||
|
|
@@ -37,7 +45,7 @@ object CoursierResolver { | |
| } | ||
| } | ||
|
|
||
| class CoursierResolver(servers: List[DependencyServer], ec: ExecutionContext, runTimeout: Duration, resolverCachePath: Path) extends NormalizingResolver[Task] { | ||
| class CoursierResolver(servers: List[DependencyServer], hashInHttpHeaders: Boolean, ec: ExecutionContext, runTimeout: Duration, resolverCachePath: Path) extends NormalizingResolver[Task] { | ||
| // TODO: add support for a local file cache other than ivy | ||
| private[this] val repos = LocalRepositories.ivy2Local :: { | ||
| val settings = SettingsLoader.settings | ||
|
|
@@ -66,6 +74,18 @@ class CoursierResolver(servers: List[DependencyServer], ec: ExecutionContext, ru | |
| } | ||
| } | ||
|
|
||
| // Copied from coursier.cache.FileCache | ||
| private def auxiliaryFilePrefix(file: File): String = | ||
| s".${file.getName}__" | ||
|
|
||
| private def auxiliaryFile(file: File, key: String): File = { | ||
| val key0 = key.toLowerCase(Locale.ROOT).filter(_ != '-') | ||
| new File(file.getParentFile, s"${auxiliaryFilePrefix(file)}$key0") | ||
| } | ||
|
|
||
| // Copied from coursier.cache.internal.Downloader | ||
| private def errFile(file: File) = new File(file.getParentFile, "." + file.getName + ".error") | ||
|
|
||
| private[this] def makeCache() = | ||
| Option(resolverCachePath) match { | ||
| case None => FileCache() | ||
|
|
@@ -77,6 +97,8 @@ class CoursierResolver(servers: List[DependencyServer], ec: ExecutionContext, ru | |
| .withPool(CoursierResolver.downloadPool) | ||
| .fetch) | ||
|
|
||
| private[this] val httpClient = HttpClient.newHttpClient() | ||
|
|
||
| private[this] val logger = | ||
| LoggerFactory.getLogger("bazel_deps.CoursierResolver") | ||
|
|
||
|
|
@@ -131,12 +153,125 @@ class CoursierResolver(servers: List[DependencyServer], ec: ExecutionContext, ru | |
| digestType: DigestType, | ||
| artifact: Artifact | ||
| ): Task[(Artifact, ShaValue, Long)] = { | ||
| makeCache() | ||
|
|
||
| val cache = makeCache() | ||
| .withCachePolicies(Seq(CachePolicy.FetchMissing)) | ||
| .withPool(CoursierResolver.downloadPool) | ||
| .file(artifact) | ||
| .run | ||
| .flatMap { e => | ||
| val artifactFile = cache.localFile(artifact.url, artifact.authentication.map(_.user)) | ||
|
|
||
| // Error caching logic copied from coursier.cache.internal.Downloader. | ||
| // If the .pom file exists for an artifact but the artifact itself doesn't, | ||
| // assume that the artifact will always remain missing. | ||
|
|
||
| lazy val referenceFileOpt: Option[File] = artifact.extra.get("metadata").map { a => | ||
| cache.localFile(a.url, a.authentication.map(_.user)) | ||
| } | ||
| lazy val cacheErrors: Boolean = | ||
| (artifact.changing && artifact.extra.contains("cache-errors")) || | ||
| referenceFileOpt.exists(_.exists()) | ||
|
|
||
| val checkErrFile: Task[Unit] = Task { _ => | ||
| if (errFile(artifactFile).exists()) { | ||
| Future.failed(new ArtifactError.NotFound(artifact.url, permanent = Some(true))) | ||
| } else { | ||
| Future.unit | ||
| } | ||
| } | ||
|
|
||
| // Some Maven repositories (like Artifactory) returns hash digests as HTTP headers in GET/HEAD requests. | ||
| // If the user opts into doing so via hashInHttpHeaders, we use a HEAD request to get the checksums and the | ||
| // file size rather than by downloading the file itself. | ||
| // | ||
| // If the HEAD request fails in a Recoverable way, we fall back to downloading the file. | ||
| case class Recoverable(e: Throwable) extends Throwable(e) | ||
|
|
||
| val headRequest: Task[(Artifact, ShaValue, Long)] = | ||
| if (hashInHttpHeaders) { | ||
| // Cache all HTTP headers in a JSON file named .<artifact-filename>__headers in the Coursier cache directory | ||
| // in a way similar to Coursier's FileCache. | ||
| val headersPath: Path = auxiliaryFile(artifactFile, "headers").toPath | ||
|
|
||
| Task.schedule(CoursierResolver.downloadPool) { | ||
| // Since we use atomic moves, we can guarantee that if the header file exists, it is complete. | ||
| if (!Files.exists(headersPath)) { | ||
| val tmp = CachePath.temporaryFile(headersPath.toFile).toPath | ||
| // When creating directories in the cache directory, we need to take a "structure lock". | ||
| CacheLocks.withStructureLock(cache.location) { | ||
| PathUtil.createDirectories(tmp.getParent) | ||
| PathUtil.createDirectories(headersPath.getParent) | ||
| } | ||
| // Use JVM and file system locks to ensure that only one thread downloads the headers. | ||
| CacheLocks.withLockOr(cache.location, headersPath.toFile)( | ||
| if (!Files.exists(headersPath)) { // double-checked locking | ||
| // Download the headers. | ||
| val req = HttpRequest.newBuilder(new URI(artifact.url)) | ||
| .method("HEAD", HttpRequest.BodyPublishers.noBody) | ||
| .build | ||
| val resp = httpClient.send(req, HttpResponse.BodyHandlers.discarding) | ||
| resp.statusCode() match { | ||
| case 404 => | ||
| if (cacheErrors) { | ||
| try Files.createFile(errFile(artifactFile).toPath) | ||
| catch { | ||
| case e: java.nio.file.FileAlreadyExistsException => () | ||
| } | ||
| } else { | ||
| // println(s"not caching error for $artifact") | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we remove the commented code or use a logging api here (I think other files are using slf4j if I remember correctly). |
||
| } | ||
| throw new ArtifactError.NotFound(artifact.url, permanent = Some(true)) | ||
| case 401 => | ||
| throw new ArtifactError.Unauthorized(artifact.url, realm = None) | ||
| case sc if sc >= 400 => | ||
| throw new ArtifactError.DownloadError(s"failed to download: $sc", None) | ||
| case _ => | ||
| } | ||
|
|
||
| val headers = resp.headers | ||
| val headersMap = headers.map.asScala.map({ case (k, vs) => (k, vs.asScala) }) | ||
| val headersJson = headersMap.asJson.noSpaces | ||
|
|
||
| // Write to a temporary file first. | ||
| val writer = Files.newBufferedWriter(tmp) | ||
| try writer.write(headersJson) | ||
| finally writer.close() | ||
|
|
||
| // Atomic move to the final location. | ||
| Files.move(tmp, headersPath, StandardCopyOption.ATOMIC_MOVE) | ||
| () | ||
| } else (), | ||
| // If we couldn't get the lock, try again unless the file has been created. | ||
| if (Files.exists(headersPath)) Some(()) | ||
| else None | ||
| ) | ||
| } | ||
| } | ||
| .flatMap(_ => resolverMonad.fromTry( | ||
| Try(Files.readString(headersPath)) | ||
| .flatMap((new JawnParser).decode[Map[String, List[String]]](_) match { | ||
| case Left(error) => | ||
| Failure(Recoverable(new RuntimeException(s"failed to parse headers file $headersPath", error))) | ||
| case Right(obj) => Success(obj) | ||
| }) | ||
| .map((headerMap) => HttpHeaders.of(headerMap.map { case (k, v) => (k, v.asJava) }.asJava, { (_, _) => true })) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: You don't need |
||
| .flatMap { headers => | ||
| Try(( | ||
| headers | ||
| .firstValue(digestType match { | ||
| // See also https://maven.apache.org/resolver/expected-checksums.html#non-standard-x-headers | ||
| case DigestType.Sha1 => "x-checksum-sha1" | ||
| case DigestType.Sha256 => "x-checksum-sha256" | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if this should be moved to a method on Also what about Maybe in fact we need to something like: sealed trait DigestType {
def getDigestInstance: MessageDigest
def expectedHexLength: Int
def name: String
def headerKeys: List[String]
}then do: Try {
val digest = digestType
.headerKeys
.flatMap(key => headers.firstValue(key).asScala)
.headOption
.getOrElse(throw new Recoverable(new RuntimeException(s"no ${digestType} found in headers in $headersPath"))
val len = headers
.firstValueAsLong("Content-Length")
.orElseThrow(() =>
Recoverable(new RuntimeException(s"no Content-Length found in headers $headersPath"))
)
(digest, len)
} |
||
| }) | ||
| .orElseThrow(() => Recoverable(new RuntimeException(s"no ${digestType} found in headers in $headersPath"))), | ||
| headers.firstValueAsLong("Content-Length") | ||
| .orElseThrow(() => Recoverable(new RuntimeException(s"no Content-Length found in headers $headersPath"))) | ||
| )) | ||
| } | ||
| .map { case (sha, length) => (artifact, ShaValue(sha, digestType), length) } | ||
| )) | ||
| } else Task.fail(Recoverable(new RuntimeException("skipped HEAD request"))) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this if for this else is very far some here... maybe it will get closer if we take the body of the if and put it in a function or method. |
||
|
|
||
| val downloadFile: Task[(Artifact, ShaValue, Long)] = | ||
| cache.file(artifact).run.flatMap { e => | ||
| resolverMonad.fromTry(e match { | ||
| case Left(error) => | ||
| // println(s"Tried to download $artifact but failed.") | ||
|
|
@@ -147,6 +282,16 @@ class CoursierResolver(servers: List[DependencyServer], ec: ExecutionContext, ru | |
| } | ||
| }) | ||
| } | ||
|
|
||
| checkErrFile | ||
| .flatMap(_ => headRequest.attempt) | ||
| .flatMap { | ||
| case Right(r) => Task.point(r) | ||
| case Left(e: Recoverable) => | ||
| // println(s"falling back to downloading the whole file: $e") | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we remove the commented code please? |
||
| downloadFile | ||
| case Left(e) => Task.fail(e) | ||
| } | ||
| } | ||
|
|
||
| def computeShas( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1523,8 +1523,37 @@ object ResolverType { | |
| case object Aether extends ResolverType("aether") { | ||
| override def optionsDoc: Option[Doc] = None | ||
| } | ||
| case object Coursier extends ResolverType("coursier") { | ||
| override def optionsDoc: Option[Doc] = None | ||
| case class Coursier( | ||
| hashInHttpHeaders: Option[Boolean] | ||
| ) extends ResolverType("coursier") { | ||
| def getHashInHttpHeaders: Boolean = hashInHttpHeaders.getOrElse(false) | ||
|
|
||
| override def optionsDoc: Option[Doc] = { | ||
|
|
||
| val items = List( | ||
| ( | ||
| "hashInHttpHeaders", | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not really sure why we wouldn't always want to enable this if it works. Can you think of a reason? Can you document the reason here so someone reading the code can remember.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately, it doesn't work with all Maven repository servers. E.g., https://repo1.maven.org/maven2/ only supplies SHA-1 checksums, so it would be a waste to enable this option if that's the repo that's getting used.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. makes sense. |
||
| hashInHttpHeaders.map(b => Doc.text(s"$b")) | ||
| ), | ||
| ).sortBy(_._1) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess this was copied, but we don't need to sort a list of length 1. but you can leave it if you add a comment that this is here so we don't forget if we add more options. |
||
| .collect { case (k, Some(v)) => (k, v) } | ||
|
|
||
| // we can't pack resolvers (yet) | ||
| Some(packedYamlMap(items)) | ||
| } | ||
| } | ||
|
|
||
| object Coursier { | ||
| def empty = Coursier(None) | ||
| implicit val coursierMonoid: Monoid[Coursier] = new Monoid[Coursier] { | ||
| override val empty = Coursier.empty | ||
|
|
||
| override def combine(a: Coursier, b: Coursier): Coursier = { | ||
| Coursier( | ||
| hashInHttpHeaders = b.hashInHttpHeaders.orElse(a.hashInHttpHeaders) | ||
| ) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| case class Gradle( | ||
|
|
@@ -1634,14 +1663,15 @@ object ResolverType { | |
| } | ||
| } | ||
|
|
||
| val default = Coursier | ||
| val default = Coursier.empty | ||
|
|
||
| implicit val resolverSemigroup: Semigroup[ResolverType] = | ||
| new Semigroup[ResolverType] { | ||
| override def combine(x: ResolverType, y: ResolverType): ResolverType = { | ||
| (x, y) match { | ||
| case (l: Gradle, r: Gradle) => Monoid.combine(l, r) | ||
| case (_, r) => r | ||
| case (l: Gradle, r: Gradle) => Monoid.combine(l, r) | ||
| case (l: Coursier, r: Coursier) => Monoid.combine(l, r) | ||
| case (_, r) => r | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -193,19 +193,19 @@ object MakeDeps { | |
| model.getOptions.getResolvers.collect { case e: MavenServer => e }, | ||
| resolverCachePath) | ||
| ) | ||
| case ResolverType.Coursier => | ||
| case c: ResolverType.Coursier => | ||
| val ec = scala.concurrent.ExecutionContext.Implicits.global | ||
| import scala.concurrent.duration._ | ||
| resolve( | ||
| model, | ||
| new CoursierResolver(model.getOptions.getResolvers, ec, 3600.seconds, resolverCachePath) | ||
| new CoursierResolver(model.getOptions.getResolvers, c.getHashInHttpHeaders, ec, 3600.seconds, resolverCachePath) | ||
| ) | ||
| case g: ResolverType.Gradle => | ||
| val ec = scala.concurrent.ExecutionContext.Implicits.global | ||
| import scala.concurrent.duration._ | ||
|
|
||
| lazy val coursierResolver = | ||
| new CoursierResolver(model.getOptions.getResolvers, ec, 3600.seconds, resolverCachePath) | ||
| new CoursierResolver(model.getOptions.getResolvers, false, ec, 3600.seconds, resolverCachePath) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same comment as above: if this works, shouldn't true be the default? Why would you set false? Maybe if you know with certainty your http service doesn't give out the headers? |
||
|
|
||
| val resolver = new GradleResolver( | ||
| rootPath, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could we factor lines 197 - 246 into a method such as
fetchHeadersToPath(...)or something.