Skip to content

Conversation

patricoferris
Copy link
Collaborator

When first providing readdir we shelved an incremental version. This PR starts the ball rolling on adding support for something like with_dir_entries. Opening early to get some feedback on how it should look, should Eio.Path.walk walk directory tree recursively or just a single directory, leaving it to the user to descend should they want to. We should perhaps have a type entry too.

I wrote a small benchmark:

open Eio.Std

let ( / ) = Eio.Path.( / )

let dirname i = Fmt.str "dir-%i" i
let filename i = Fmt.str "file-%i" i

let make_bench_dir ?(depth=1) ?(width=1000) dir =
  let rec loop dir depth =
    if depth <= 0 then ()
    else (
      Eio.Path.mkdir ~perm:0o700 (dir / dirname depth);
      Fiber.List.iter ~max_fibers:10 (fun fname ->
        Eio.Path.save ~create:(`If_missing 0o644) (dir / fname) "hello world"
      ) (List.init width filename);
      loop (dir / dirname depth) (depth - 1)
    )
  in
  loop dir depth

let read_dir_walk ~f dir =
  let rec aux acc path =
    match Eio.Path.kind ~follow:true path with
    | `Directory as k ->
       let acc2 = f acc (k, path) in
       let files = Eio.Path.read_dir path in
       List.fold_left (fun acc f -> aux acc (path / f)) acc2 files
    | k -> f acc (k, path)
    in
  let files = Eio.Path.read_dir dir in
  List.fold_left (fun acc f -> aux acc (dir / f)) [] files

let rec walk ~f acc dir =
  Eio.Path.walk dir @@ fun seq ->
  Seq.fold_left (fun acc -> function
    | (`Directory, path) ->
      let acc2 = f acc (`Directory, dir / path) in
      walk ~f acc2 (dir / path)
    | (kind, fname) -> f acc (kind, dir / fname)) acc seq

let time fn =
  let now = Mtime_clock.now_ns () in
  let v = fn () in
  let stop = Mtime_clock.now_ns () in 
  Eio.traceln "Took: %a" Fmt.uint64_ns_span (Int64.sub stop now);
  v

module Path = struct
  include Eio.Path
  type t = Eio.Fs.dir_ty Eio.Path.t 

  let compare a b = String.compare (Eio.Path.native_exn a) (Eio.Path.native_exn b)
end

module S = Set.Make (Path)

let () = 
  Eio_main.run @@ fun env ->
  let tmpdirf = Filename.temp_dir "eio-walk-" "-bench" in
  let tmpdir = env#fs / tmpdirf in
  make_bench_dir ~depth:15 ~width:20_000 tmpdir;
  let f acc (_, path) = path :: acc in
  let files2 = time (fun () -> walk ~f [] tmpdir) |> S.of_list in
  let files1 = time (fun () -> read_dir_walk ~f tmpdir) |> S.of_list in
  Eio.traceln "Files 1: %i\n" (S.cardinal files1);
  Eio.traceln "Files 2: %i\n" (S.cardinal files2);
  Eio.traceln "Same files? %b\n" (S.equal files1 files2);
  let _ : int = Sys.command (Fmt.str "rm -r %s" tmpdirf) in
  ()

It is far from perfect and we should take into account FS caching, but on my machine I'm getting the following.

For Eio_linux: 1.46s to 110ms
For Eio_posix: 2.02s to 1.58s

@avsm
Copy link
Contributor

avsm commented Oct 6, 2025

I like this a lot, but I'm just wondering about extending it to cover error handling and traversal strategies, as Bos does: https://erratique.ch/software/bos/doc/Bos/OS/Path/index.html#fold

With these optional args, you can control which files to skip and what to do with errors mid-traversal.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants