-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathxml_parser_encoder.exs
120 lines (97 loc) · 3.51 KB
/
xml_parser_encoder.exs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
defmodule Xml do
def parse(xml) when is_binary(xml) do
do_parse("", xml, [])
end
defp simplify([e, ""]), do: e
defp simplify([_head | _rest] = fine) do
fine
end
defp do_parse(_acc, "", []) do
""
end
defp do_parse("", <<?<, ?/, rest::binary>>, [current_tag | stack]) do
true = String.starts_with?(rest, current_tag <> ">")
rest = String.trim_leading(rest, current_tag <> ">")
{rest, stack}
end
defp do_parse(inprog_val, <<?<, ?/, rest::binary>>, [current_tag | stack]) do
true = String.starts_with?(rest, current_tag <> ">")
rest = String.trim_leading(rest, current_tag <> ">")
[inprog_val | {rest, stack}]
end
defp do_parse("", <<?<, rest::binary>>, stack) do
case do_parse("", rest, stack) do
{rest, stack} -> []
end
defp do_parse(inprog_val, <<?<, rest::binary>>, stack) do
[inprog_val | do_parse("", rest, stack)] |> simplify()
end
defp do_parse(accum, <<?\s, _rest::binary>> = attr_and_or_rest, stack) do
{attribs, rest} = do_gather_possible_attribs(%{}, attr_and_or_rest)
%{"#{accum}" => %{attribs: attribs, vals: do_parse("", rest, [accum | stack])}}
end
defp do_parse(accum, <<?>, rest::binary>>, stack) do
{this_child_struct, remainder} = do_parse("", rest, [accum | stack])
[%{"#{accum}" => this_child_struct} | do_parse("", remainder, stack)]
end
# parse values
defp do_parse(accum, <<next_letter::utf8, rest::binary>>, stack) do
do_parse(accum <> <<next_letter>>, rest, stack)
end
# just slurp up the name=value pair(s) using regex and return as a map
@name_value_pairs_regex ~r/\s+(\w+)="?([^>"]*)"?/u
@capture_to_end_of_tag ~r/[^>]+/
defp do_gather_possible_attribs(attribs, rest) do
these_attribs = Regex.run(@capture_to_end_of_tag, rest, capture: :first) |> List.first
pairs = Regex.scan(@name_value_pairs_regex, these_attribs, capture: :all_but_first)
attribs = Map.merge(attribs, pairs |> Map.new(fn [key, value] -> {key, value} end))
rest = Regex.replace(~r/[^>]*>/u, rest, "", global: false)
{attribs, rest}
end
end
# run this inline suite with "elixir #{__ENV__.file} test"
if System.argv |> List.first == "test" do
ExUnit.start
defmodule XmlTest do
use ExUnit.Case, async: true
test "parses just a root node with a value" do
assert Xml.parse("<root>value</root>") == %{"root" => "value"}
end
test "parses a root node with no value" do
assert Xml.parse("<a></a>") == %{"a" => ""}
end
test "parses two nested tags with a value" do
assert Xml.parse("<a><b>c</b></a>") == %{"a" => %{"b" => "c"}}
end
test "parses multiple identical tags" do
assert Xml.parse("<a><b>foo</b><b>bar</b></a>") ==
%{"a" =>
[
%{"b" => "bar"},
%{"b" => "foo"}
]
}
end
test "parses an attribute" do
assert Xml.parse("<a href=\"http\">b</a>") == %{"a" => %{:attribs => %{"href" => "http"}, :vals => "b"}}
end
test "parses multiple identical tags with attributes" do
assert Xml.parse("<a href=\"http\"><b id=\"1\">foo</b><b>bar</b>text</a>") ==
%{"a" =>
%{attribs: %{"href" => "http"}, vals: [
%{"b" =>
%{attribs: %{"id" => "1"}, vals: [
"bar"
]}
},
%{"b" =>
%{attribs: %{}, vals: ["foo"]}
}
]}
}
end
test "raises if end tag doesn't match start tag" do
assert_raise MatchError, fn -> Xml.parse("<root>value</rot>") end
end
end
end