Skip to content

Commit bb5028a

Browse files
committed
Complete exercise 16.4
1 parent b226c97 commit bb5028a

File tree

2 files changed

+111
-0
lines changed

2 files changed

+111
-0
lines changed

chapter_16/README.md

+7
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,13 @@ $ ./emd5sum emd5sum
3131

3232
**4. Use the `lib_find` module to find all `.jpg` files on your computer. Check for identical files by computing the MD5 sum of each file and comparing the computed sums.**
3333

34+
Solution in the `exercise_5/` directory.
35+
36+
```bash
37+
# Run the escript and it will use lib_find to find all duplicate JPEGs. Duplicate JPEGs will be printed.
38+
$ ./find_duplicate_jpegs
39+
```
40+
3441
**5. Write a caching mechanism that computes the MD5 sum of a file and stores it with the last modified time of the file. When the sum is requested check if the file has changed and return the cached sum if it hasn't**
3542

3643
Solution in the `exercise_5/` directory.
+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#!/usr/bin/env escript
2+
3+
-include_lib("kernel/include/file.hrl").
4+
5+
-import(lists, [reverse/1]).
6+
7+
print_usage() ->
8+
io:format("./find_duplicate_jpegs~n").
9+
10+
main([]) ->
11+
Files = files(<<"/">>, "*\\.jpg", true),
12+
13+
% Map files to tuples of hash and list of filename
14+
HashesAndFilenames = lists:foldl(fun(Filename, Acc) ->
15+
{ok, Data} = file:read_file(Filename),
16+
Hash = erlang:md5(Data),
17+
case proplists:lookup(Hash, Acc) of
18+
none ->
19+
[{Hash, [Filename]}|Acc];
20+
{Hash, Filenames} ->
21+
lists:keyreplace(Hash, 1, Acc, {Hash, [Filename|Filenames]})
22+
end
23+
end, [], Files),
24+
25+
% Find duplicate hashes
26+
DuplicateHashes = lists:filter(fun
27+
({_Hash, Filenames}) when length(Filenames) > 1 -> true;
28+
(_) -> false
29+
30+
end, HashesAndFilenames),
31+
32+
% Print duplicate file names
33+
lists:foreach(fun({Hash, Filenames}) ->
34+
Arguments = [hex_string(Hash), lists:join("\n", Filenames)],
35+
io:format("Duplicate files with content hashed as ~s:~n~s~n~n", Arguments)
36+
end, DuplicateHashes),
37+
halt(0);
38+
main(_) ->
39+
% Print usage message if invalid number of arguments is passed in
40+
print_usage(),
41+
halt(1).
42+
43+
% From https://stackoverflow.com/questions/3768197/erlang-ioformatting-a-binary-to-hex
44+
% We need to format each byte in the hex binary as a set of two ascii
45+
% characters in order to construct an printable string of characters.
46+
hex_string(HexBin) ->
47+
[io_lib:format("~2.16.0B",[X]) || <<X:8>> <= HexBin].
48+
49+
% Code from the lib_find module
50+
%% ---
51+
%% Excerpted from "Programming Erlang, Second Edition",
52+
%% published by The Pragmatic Bookshelf.
53+
%% Copyrights apply to this code. It may not be used to create training material,
54+
%% courses, books, articles, and the like. Contact us if you are in doubt.
55+
%% We make no guarantees that this code is fit for any purpose.
56+
%% Visit http://www.pragmaticprogrammer.com/titles/jaerlang2 for more book information.
57+
%%---
58+
59+
files(Dir, Re, Flag) ->
60+
Re1 = xmerl_regexp:sh_to_awk(Re),
61+
reverse(files(Dir, Re1, Flag, fun(File, Acc) ->[File|Acc] end, [])).
62+
63+
files(Dir, Reg, Recursive, Fun, Acc) ->
64+
case file:list_dir(Dir) of
65+
{ok, Files} -> find_files(Files, Dir, Reg, Recursive, Fun, Acc);
66+
{error, _} -> Acc
67+
end.
68+
69+
find_files([File|T], Dir, Reg, Recursive, Fun, Acc0) ->
70+
FullName = filename:join([Dir,File]),
71+
case file_type(FullName) of
72+
regular ->
73+
case re:run(FullName, Reg, [{capture,none}]) of
74+
match ->
75+
Acc = Fun(FullName, Acc0),
76+
find_files(T, Dir, Reg, Recursive, Fun, Acc);
77+
nomatch ->
78+
find_files(T, Dir, Reg, Recursive, Fun, Acc0)
79+
end;
80+
directory ->
81+
case Recursive of
82+
true ->
83+
Acc1 = files(FullName, Reg, Recursive, Fun, Acc0),
84+
find_files(T, Dir, Reg, Recursive, Fun, Acc1);
85+
false ->
86+
find_files(T, Dir, Reg, Recursive, Fun, Acc0)
87+
end;
88+
error ->
89+
find_files(T, Dir, Reg, Recursive, Fun, Acc0)
90+
end;
91+
find_files([], _, _, _, _, A) ->
92+
A.
93+
94+
file_type(File) ->
95+
case file:read_file_info(File) of
96+
{ok, Facts} ->
97+
case Facts#file_info.type of
98+
regular -> regular;
99+
directory -> directory;
100+
_ -> error
101+
end;
102+
_ ->
103+
error
104+
end.

0 commit comments

Comments
 (0)