Skip to content

Commit

Permalink
Merge pull request #10534 from gridbugs/stop-treating-toolchains-as-p…
Browse files Browse the repository at this point in the history
…ackages-in-pkg-rules

Stop treating toolchains as packages in pkg rules
  • Loading branch information
gridbugs authored May 22, 2024
2 parents 98efedf + c740f6a commit 04fdc9c
Show file tree
Hide file tree
Showing 7 changed files with 347 additions and 242 deletions.
6 changes: 3 additions & 3 deletions src/dune_engine/scheduler.mli
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,9 @@ val cancel_current_build : unit -> unit Fiber.t

val inject_memo_invalidation : Memo.Invalidation.t -> unit Fiber.t

(** [sleep duration] wait for [duration] to elapse. Sleepers are checked for
wake up at a rate of once per 0.1 seconds. So [duration] should be at least
this long. *)
(** [sleep duration] wait for [duration] seconds to elapse. Sleepers
are checked for wake up at a rate of once per 0.1 seconds. So
[duration] should be at least this long. *)
val sleep : float -> unit Fiber.t

val stats : unit -> Dune_stats.t option Fiber.t
Expand Down
149 changes: 149 additions & 0 deletions src/dune_pkg/flock.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
open! Stdune
module Flock = Dune_util.Flock
module Scheduler = Dune_engine.Scheduler

type t =
{ flock : Flock.t
; lock_path : Path.t
}

(* Global mutable set of names, used to prevent printing "waiting for
lock" messages multiple times when multiple concurrent fibers try
to take a lock at the same time. *)
module Global_waiting_names = struct
let state = lazy (String.Table.create 1)

(* add a name to the set, returning [true] iff the name wasn't
already in the set *)
let add name =
let state = Lazy.force state in
String.Table.add state name () |> Result.is_ok
;;

let remove name =
let state = Lazy.force state in
String.Table.remove state name
;;
end

let attempt_to_lock { flock; lock_path } ~name_for_messages ~timeout_s =
let open Fiber.O in
let current_dune_pid = Unix.getpid () in
let rec loop timeout_s =
match Flock.lock_non_block flock Flock.Exclusive with
| Error e -> Fiber.return @@ Error e
| Ok `Success ->
Global_waiting_names.remove name_for_messages;
Fiber.return (Ok `Success)
| Ok `Failure -> handle_failure timeout_s
and handle_failure timeout_s =
let locked_by_pid = int_of_string (Io.read_file lock_path) in
let sleep_duration_s = 0.1 in
let remaining_duration_s = timeout_s -. sleep_duration_s in
if remaining_duration_s <= 0.0
then Fiber.return (Ok `Timeout)
else (
if locked_by_pid <> current_dune_pid && Global_waiting_names.add name_for_messages
then
(* Only print this message if the dune process that holds the
lock isn't the current process and this is the first time
that the current process has failed to take the lock since
the last time it successfully took the lock. This prevents
the situation where multiple fibers all attempt to take the
lock concurrently while it's held by another process from
causing the following message from being printed multiple
times. *)
User_message.print
(User_message.make
[ Pp.textf
"Waiting for another instance of dune (pid %d) to release the lock for \
the resource %S..."
locked_by_pid
name_for_messages
]);
let* () = Scheduler.sleep sleep_duration_s in
loop remaining_duration_s)
in
loop timeout_s
;;

let with_flock lock_path ~name_for_messages ~timeout_s ~f =
let open Fiber.O in
let parent = Path.parent_exn lock_path in
Path.mkdir_p parent;
let fd =
Unix.openfile
(Path.to_string lock_path)
[ Unix.O_CREAT; O_WRONLY; O_SHARE_DELETE; O_CLOEXEC ]
0o600
in
let out = Unix.out_channel_of_descr fd in
let flock = Flock.create fd in
let current_dune_pid = Unix.getpid () in
Fiber.finalize
~finally:(fun () ->
let+ () = Fiber.return () in
close_out out)
(fun () ->
attempt_to_lock { flock; lock_path } ~name_for_messages ~timeout_s
>>= function
| Ok `Success ->
Fiber.finalize
(fun () ->
Printf.fprintf out "%d%!" current_dune_pid;
f ())
~finally:(fun () ->
let+ () = Fiber.return () in
match Flock.unlock flock with
| Ok () ->
(* Note that after the lock has been released, we
deliberately don't delete the lock file to avoid a race
condition where other processes or fibers still need to
read the file to determine the process that held the
lock. Even though the lock has been released, other
parties may be in between timing out waiting for the
lock and reading the lock file to get the pid to
include in their error message. *)
()
| Error ue ->
Unix_error.Detailed.create ue ~syscall:"flock" ~arg:"unlock"
|> Unix_error.Detailed.raise)
| Ok `Timeout ->
let locked_by_pid = int_of_string (Io.read_file lock_path) in
if locked_by_pid == current_dune_pid
then
Code_error.raise
"timeout while waiting for flock, but flock was currently held by the \
current process"
[ "name_for_messages", Dyn.string name_for_messages ]
else
User_error.raise
~hints:
[ Pp.textf
"Another instance of dune (pid %d) currently holds the lock for the \
resource %S. If this is unexpected, terminate that process and re-run \
the command."
locked_by_pid
name_for_messages
; Pp.textf
"As a last resort, if the other instance of dune (pid %d) is no longer \
running, manually delete the lock file %s."
locked_by_pid
(Path.to_string_maybe_quoted lock_path)
]
[ Pp.textf
"Timed out after %.2f seconds while waiting for another instance of dune \
(pid %d) to release the lock on the resource %S."
timeout_s
locked_by_pid
name_for_messages
]
| Error error ->
User_error.raise
[ Pp.textf
"Failed to get a lock for the resource %S with lock file %s: %s"
name_for_messages
(Path.to_string_maybe_quoted lock_path)
(Unix.error_message error)
])
;;
23 changes: 23 additions & 0 deletions src/dune_pkg/flock.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
open! Stdune

(** [with_flock path ~name_for_messages ~timeout_s ~f] ensures mutual
exclusion for the function [f] across multiple concurrent
instances of dune, using the lock file at [path] to coordinate
between different dune instances. If the lock is not acquired
after [timeout_s] seconds then a [User_error] is raised. Pass
[infinity] to keep trying to take the lock
forever. [name_for_messages] is the name used to refer to this
lock in error messages.
Within the a dune process, this function also ensures mutual
exclusion between fibers. Note that if this function times out
waiting for the lock while the lock is held by a different fiber
of the same dune process, a [Code_error] is raised rather than a
[User_error]. If a timeout is possible, avoid allowing multiple
fibers to concurrently attempt to take a flock. *)
val with_flock
: Path.t
-> name_for_messages:string
-> timeout_s:float
-> f:(unit -> 'a Fiber.t)
-> 'a Fiber.t
69 changes: 1 addition & 68 deletions src/dune_pkg/rev_store.ml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ module Process = Dune_engine.Process
module Display = Dune_engine.Display
module Scheduler = Dune_engine.Scheduler
module Re = Dune_re
module Flock = Dune_util.Flock
open Fiber.O

module Object = struct
Expand Down Expand Up @@ -62,73 +61,7 @@ let lock_path { dir; _ } =
Path.relative parent "rev-store.lock"
;;

let rec attempt_to_lock flock lock ~max_retries =
let sleep_duration = 0.1 in
match Flock.lock_non_block flock lock with
| Error e -> Fiber.return @@ Error e
| Ok `Success -> Fiber.return (Ok `Success)
| Ok `Failure ->
if max_retries > 0
then
let* () = Scheduler.sleep sleep_duration in
attempt_to_lock flock lock ~max_retries:(max_retries - 1)
else Fiber.return (Ok `Failure)
;;

let with_flock lock_path ~f =
let open Fiber.O in
let parent = Path.parent_exn lock_path in
Path.mkdir_p parent;
let fd =
Unix.openfile
(Path.to_string lock_path)
[ Unix.O_CREAT; O_WRONLY; O_SHARE_DELETE; Unix.O_CLOEXEC ]
0o600
in
let out = Unix.out_channel_of_descr fd in
let flock = Flock.create fd in
let max_retries = 49 in
Fiber.finalize
~finally:(fun () ->
let+ () = Fiber.return () in
close_out out)
(fun () ->
attempt_to_lock flock Flock.Exclusive ~max_retries
>>= function
| Ok `Success ->
Fiber.finalize
(fun () ->
Printf.fprintf out "%d\n%!" (Unix.getpid ());
f ())
~finally:(fun () ->
let+ () = Fiber.return () in
Path.unlink_no_err lock_path;
match Flock.unlock flock with
| Ok () -> ()
| Error ue ->
Unix_error.Detailed.create ue ~syscall:"flock" ~arg:"unlock"
|> Unix_error.Detailed.raise)
| Ok `Failure ->
let pid = Io.read_file lock_path in
User_error.raise
~hints:
[ Pp.textf
"Another dune instance (pid %s) has locked the revision store. If this \
is happening in error, make sure to terminate that instance and re-run \
the command."
pid
]
[ Pp.textf "Couldn't acquire revision store lock after %d attempts" max_retries
]
| Error error ->
User_error.raise
[ Pp.textf
"Failed to get a lock for the revision store at %s: %s"
(Path.to_string_maybe_quoted lock_path)
(Unix.error_message error)
])
;;

let with_flock = Flock.with_flock ~name_for_messages:"revision store" ~timeout_s:5.0
let failure_mode = Process.Failure_mode.Return
let output_limit = Sys.max_string_length
let make_stdout () = Process.Io.make_stdout ~output_on_success:Swallow ~output_limit
Expand Down
68 changes: 55 additions & 13 deletions src/dune_pkg/toolchain.ml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ module Compiler_package = struct
:: ([ "ocaml-system"; "ocaml-variants" ] |> List.map ~f:Package_name.of_string)
;;

let is_compiler_package_by_name name =
List.exists ~f:(Package_name.equal name) package_names
;;

let constraint_ =
let open Dune_lang in
let constraint_ =
Expand Down Expand Up @@ -181,6 +185,10 @@ module Version = struct

let bin_dir t = Path.Outside_build_dir.relative (target_dir t) "bin"
let is_installed t = Path.exists (Path.outside_build_dir (target_dir t))

let flock_path t =
Path.Outside_build_dir.relative (toolchain_dir t) "lock" |> Path.outside_build_dir
;;
end

let handle_checksum_mismatch { Compiler_package.version; url; checksum } ~got_checksum =
Expand Down Expand Up @@ -285,18 +293,17 @@ let get ~log version =
| `Never -> ()
| _ -> User_message.print (User_message.make [ Pp.tag style pp ])
in
if Version.is_installed version
then (
(match log with
| `Always ->
log_print Success
@@ Pp.textf
"Version %s of the compiler toolchain is already installed in %s"
(Version.to_string version)
(Version.target_dir version |> Path.Outside_build_dir.to_string)
| _ -> ());
Fiber.return ())
else (
let print_already_installed_message () =
match log with
| `Always ->
log_print Success
@@ Pp.textf
"Version %s of the compiler toolchain is already installed in %s"
(Version.to_string version)
(Version.target_dir version |> Path.Outside_build_dir.to_string)
| _ -> ()
in
let download_build_install () =
let compiler_package = Compiler_package.of_version version in
log_print Details
@@ Pp.textf
Expand All @@ -315,5 +322,40 @@ let get ~log version =
@@ Pp.textf
"Success! Compiler toolchain version %s installed to %s."
(Version.to_string version)
(Version.target_dir version |> Path.Outside_build_dir.to_string))
(Version.target_dir version |> Path.Outside_build_dir.to_string)
in
if Version.is_installed version
then (
print_already_installed_message ();
Fiber.return ())
else
Flock.with_flock
(Version.flock_path version)
~name_for_messages:(sprintf "toolchain version %s" (Version.to_string version))
~timeout_s:infinity
~f:(fun () ->
(* Note that we deliberately check if the toolchain is
installed before and after taking the flock.
The first check prevents us from trying to take the lock if
the toolchain is installed. To build any package dune first
checks if the necessary toolchain is installed, so to build
a project with many dependencies, dune will check if the
toolchain is installed many times. If this check required
first taking a lock, multiple concurrent dune instances
would sometimes contest the lock. This isn't too bad for
performance as the lock is only held briefly, but when dune
waits on a flock it prints a message, so freqeunt, brief
lock acquisitions can lead to a lot of noise in the
output.
The second check is to handle the case where the toolchain
was installed in between the first check, and the flock
being acquired.
*)
if Version.is_installed version
then (
print_already_installed_message ();
Fiber.return ())
else download_build_install ())
;;
2 changes: 2 additions & 0 deletions src/dune_pkg/toolchain.mli
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ module Compiler_package : sig
used instead. *)
val package_names : Package_name.t list

val is_compiler_package_by_name : Package_name.t -> bool

(** Constraint to apply to the dependency solver to guarantee a
solution that's includes a version of a compiler package that's
supported by dune toolchains. *)
Expand Down
Loading

0 comments on commit 04fdc9c

Please sign in to comment.