Skip to content

Commit 9b3c2d7

Browse files
authored
Parsing yaml metadata headers in markdown files (#191)
This PR updates our markdown parser to support parsing yaml headers and tests to make sure that some of the features we'd like to use from it work.
1 parent f18f249 commit 9b3c2d7

File tree

4 files changed

+81
-13
lines changed

4 files changed

+81
-13
lines changed

go.mod

+2
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,12 @@ require (
4242
github.com/rogpeppe/go-internal v1.10.0 // indirect
4343
github.com/spf13/pflag v1.0.5 // indirect
4444
github.com/yuin/goldmark-emoji v1.0.1 // indirect
45+
github.com/yuin/goldmark-meta v1.1.0 // indirect
4546
golang.org/x/net v0.17.0 // indirect
4647
golang.org/x/sync v0.1.0 // indirect
4748
golang.org/x/term v0.16.0 // indirect
4849
golang.org/x/text v0.14.0 // indirect
4950
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
51+
gopkg.in/yaml.v2 v2.4.0 // indirect
5052
gopkg.in/yaml.v3 v3.0.1 // indirect
5153
)

go.sum

+3
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ github.com/yuin/goldmark v1.5.4 h1:2uY/xC0roWy8IBEGLgB1ywIoEJFGmRrX21YQcvGZzjU=
9494
github.com/yuin/goldmark v1.5.4/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
9595
github.com/yuin/goldmark-emoji v1.0.1 h1:ctuWEyzGBwiucEqxzwe0SOYDXPAucOrE9NQC18Wa1os=
9696
github.com/yuin/goldmark-emoji v1.0.1/go.mod h1:2w1E6FEWLcDQkoTE+7HU6QF1F6SLlNGjRIBbIZQFqkQ=
97+
github.com/yuin/goldmark-meta v1.1.0 h1:pWw+JLHGZe8Rk0EGsMVssiNb/AaPMHfSRszZeUeiOUc=
98+
github.com/yuin/goldmark-meta v1.1.0/go.mod h1:U4spWENafuA7Zyg+Lj5RqK/MF+ovMYtBvXi1lBb2VP0=
9799
golang.org/x/net v0.0.0-20221002022538-bcab6841153b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
98100
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
99101
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
@@ -123,6 +125,7 @@ gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
123125
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
124126
gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
125127
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
128+
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
126129
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
127130
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
128131
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=

internal/parsers/markdown.go

+6-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99

1010
"github.com/Azure/InnovationEngine/internal/logging"
1111
"github.com/yuin/goldmark"
12+
meta "github.com/yuin/goldmark-meta"
1213
"github.com/yuin/goldmark/ast"
1314
"github.com/yuin/goldmark/extension"
1415
"github.com/yuin/goldmark/parser"
@@ -17,7 +18,7 @@ import (
1718
)
1819

1920
var markdownParser = goldmark.New(
20-
goldmark.WithExtensions(extension.GFM),
21+
goldmark.WithExtensions(extension.GFM, meta.New(meta.WithStoresInDocument())),
2122
goldmark.WithParserOptions(
2223
parser.WithAutoHeadingID(),
2324
parser.WithBlockParsers(),
@@ -33,6 +34,10 @@ func ParseMarkdownIntoAst(source []byte) ast.Node {
3334
return document
3435
}
3536

37+
func ExtractYamlMetadataFromAst(node ast.Node) map[string]interface{} {
38+
return node.OwnerDocument().Meta()
39+
}
40+
3641
// The representation of an expected output block in a markdown file. This is
3742
// for scenarios that have expected output that should be validated against the
3843
// actual output.
@@ -237,7 +242,6 @@ func convertScenarioVariablesToMap(variableBlock string) map[string]string {
237242
variableMap[key] = value
238243
}
239244
}
240-
241245
}
242246

243247
return variableMap

internal/parsers/markdown_test.go

+70-11
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ func TestParsingMarkdownHeaders(t *testing.T) {
1010
markdown := []byte(`# Hello World`)
1111
document := ParseMarkdownIntoAst(markdown)
1212
title, err := ExtractScenarioTitleFromAst(document, markdown)
13-
1413
if err != nil {
1514
t.Errorf("Error parsing title: %s", err)
1615
}
@@ -24,7 +23,6 @@ func TestParsingMarkdownHeaders(t *testing.T) {
2423
markdown := []byte("# Hello World \n # Hello again")
2524
document := ParseMarkdownIntoAst(markdown)
2625
title, err := ExtractScenarioTitleFromAst(document, markdown)
27-
2826
if err != nil {
2927
t.Errorf("Error parsing title: %s", err)
3028
}
@@ -50,8 +48,60 @@ func TestParsingMarkdownHeaders(t *testing.T) {
5048
})
5149
}
5250

53-
func TestParsingMarkdownCodeBlocks(t *testing.T) {
51+
func TestParsingYamlMetadata(t *testing.T) {
52+
t.Run("Markdown with valid yaml metadata", func(t *testing.T) {
53+
markdown := []byte(`---
54+
key: value
55+
array: [1, 2, 3]
56+
---
57+
`)
58+
59+
document := ParseMarkdownIntoAst(markdown)
60+
metadata := ExtractYamlMetadataFromAst(document)
61+
62+
if metadata["key"] != "value" {
63+
t.Errorf("Metadata is wrong: %v", metadata)
64+
}
65+
66+
array := metadata["array"].([]interface{})
67+
if array[0] != 1 || array[1] != 2 || array[2] != 3 {
68+
t.Errorf("Metadata is wrong: %v", metadata)
69+
}
70+
})
71+
72+
t.Run("Markdown without yaml metadata", func(t *testing.T) {
73+
markdown := []byte(`# Hello World.`)
74+
document := ParseMarkdownIntoAst(markdown)
75+
metadata := ExtractYamlMetadataFromAst(document)
76+
77+
if len(metadata) != 0 {
78+
t.Errorf("Metadata should be empty")
79+
}
80+
})
81+
82+
t.Run("yaml with nested properties", func(t *testing.T) {
83+
markdown := []byte(`---
84+
nested:
85+
key: value
86+
key.value: otherValue
87+
---
88+
`)
5489

90+
document := ParseMarkdownIntoAst(markdown)
91+
metadata := ExtractYamlMetadataFromAst(document)
92+
93+
nested := metadata["nested"].(map[interface{}]interface{})
94+
if nested["key"] != "value" {
95+
t.Errorf("Metadata is wrong: %v", metadata)
96+
}
97+
98+
if metadata["key.value"] != "otherValue" {
99+
t.Errorf("Metadata is wrong: %v", metadata)
100+
}
101+
})
102+
}
103+
104+
func TestParsingMarkdownCodeBlocks(t *testing.T) {
55105
t.Run("Markdown with a valid bash code block", func(t *testing.T) {
56106
markdown := []byte(fmt.Sprintf("# Hello World\n ```bash\n%s\n```", "echo Hello"))
57107

@@ -74,13 +124,16 @@ func TestParsingMarkdownCodeBlocks(t *testing.T) {
74124
)
75125
}
76126
})
77-
78127
}
79128

80129
func TestParsingMarkdownExpectedSimilarty(t *testing.T) {
81-
82130
t.Run("Markdown with a expected_similarty tag using float", func(t *testing.T) {
83-
markdown := []byte(fmt.Sprintf("```bash\n%s\n```\n<!--expected_similarity=0.8-->\n```\nHello\n```\n", "echo Hello"))
131+
markdown := []byte(
132+
fmt.Sprintf(
133+
"```bash\n%s\n```\n<!--expected_similarity=0.8-->\n```\nHello\n```\n",
134+
"echo Hello",
135+
),
136+
)
84137

85138
document := ParseMarkdownIntoAst(markdown)
86139
codeBlocks := ExtractCodeBlocksFromAst(document, markdown, []string{"bash"})
@@ -92,16 +145,23 @@ func TestParsingMarkdownExpectedSimilarty(t *testing.T) {
92145
block := codeBlocks[0].ExpectedOutput
93146
expectedFloat := .8
94147
if block.ExpectedSimilarity != expectedFloat {
95-
t.Errorf("ExpectedSimilarity is wrong, got %f, expected %f", block.ExpectedSimilarity, expectedFloat)
148+
t.Errorf(
149+
"ExpectedSimilarity is wrong, got %f, expected %f",
150+
block.ExpectedSimilarity,
151+
expectedFloat,
152+
)
96153
}
97154
})
98-
99155
}
100156

101157
func TestParsingMarkdownExpectedRegex(t *testing.T) {
102-
103158
t.Run("Markdown with a expected_similarty tag using regex", func(t *testing.T) {
104-
markdown := []byte(fmt.Sprintf("```bash\n%s\n```\n<!--expected_similarity=\"Foo \\w+\"-->\n```\nFoo Bar\n```\n", "echo 'Foo Bar'"))
159+
markdown := []byte(
160+
fmt.Sprintf(
161+
"```bash\n%s\n```\n<!--expected_similarity=\"Foo \\w+\"-->\n```\nFoo Bar\n```\n",
162+
"echo 'Foo Bar'",
163+
),
164+
)
105165

106166
document := ParseMarkdownIntoAst(markdown)
107167
codeBlocks := ExtractCodeBlocksFromAst(document, markdown, []string{"bash"})
@@ -121,5 +181,4 @@ func TestParsingMarkdownExpectedRegex(t *testing.T) {
121181
t.Errorf("ExpectedRegex is wrong, got %q, expected %q", stringRegex, expectedRegex)
122182
}
123183
})
124-
125184
}

0 commit comments

Comments
 (0)