diff --git a/.travis.yml b/.travis.yml index 2f3351d7..4069f223 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,4 +14,4 @@ script: - go get -t -v ./... - diff -u <(echo -n) <(gofmt -d -s .) - go tool vet . - - go test -v -race ./... + - go test -v -race -timeout 60m ./... diff --git a/block.go b/block.go index 859c6ef4..4c23ac4b 100644 --- a/block.go +++ b/block.go @@ -127,7 +127,7 @@ func (p *Markdown) block(data []byte) { if p.isHRule(data) { p.addBlock(HorizontalRule, nil) var i int - for i = 0; i < len(data) && data[i] != '\n'; i++ { + for i = 0; i < len(data) && !iseol(data[i]); i++ { } data = data[i:] continue @@ -228,7 +228,8 @@ func (p *Markdown) prefixHeading(data []byte) int { level++ } i := skipChar(data, level, ' ') - end := skipUntilChar(data, i, '\n') + end, _ := skipUntilNewline(data, i) + end = backupWindowsNewline(end, data) skip := end id := "" if p.extensions&HeadingIDs != 0 { @@ -273,22 +274,20 @@ func (p *Markdown) isUnderlinedHeading(data []byte) int { if data[0] == '=' { i := skipChar(data, 1, '=') i = skipChar(data, i, ' ') - if i < len(data) && data[i] == '\n' { + if i < len(data) && iseol(data[i]) { return 1 } return 0 } - // test of level 2 heading if data[0] == '-' { i := skipChar(data, 1, '-') i = skipChar(data, i, ' ') - if i < len(data) && data[i] == '\n' { + if i < len(data) && iseol(data[i]) { return 2 } return 0 } - return 0 } @@ -300,7 +299,7 @@ func (p *Markdown) titleBlock(data []byte, doRender bool) int { var i int for idx, b := range splitData { if !bytes.HasPrefix(b, []byte("%")) { - i = idx // - 1 + i = idx break } } @@ -373,6 +372,7 @@ func (p *Markdown) html(data []byte, doRender bool) int { } */ + hasCRs := false // if not found, try a second pass looking for indented match // but not if tag is "ins" or "del" (following original Markdown.pl) if !found && curtag != "ins" && curtag != "del" { @@ -380,6 +380,9 @@ func (p *Markdown) html(data []byte, doRender bool) int { for i < len(data) { i++ for i < len(data) && !(data[i-1] == '<' && data[i] == '/') { + if data[i] == '\r' { + hasCRs = true + } i++ } @@ -404,13 +407,11 @@ func (p *Markdown) html(data []byte, doRender bool) int { // the end of the block has been found if doRender { // trim newlines - end := i - for end > 0 && data[end-1] == '\n' { - end-- - } - finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end])) + end := trimTrailingNewlines(data, i) + block := p.addBlock(HTMLBlock, data[:end]) + block.ContainsWindowsNewlines = hasCRs + finalizeHTMLBlock(block) } - return i } @@ -421,17 +422,15 @@ func finalizeHTMLBlock(block *Node) { // HTML comment, lax form func (p *Markdown) htmlComment(data []byte, doRender bool) int { - i := p.inlineHTMLComment(data) + i, hasCRs := p.inlineHTMLComment(data) // needs to end with a blank line if j := p.isEmpty(data[i:]); j > 0 { size := i + j if doRender { // trim trailing newlines - end := size - for end > 0 && data[end-1] == '\n' { - end-- - } + end := trimTrailingNewlines(data, size) block := p.addBlock(HTMLBlock, data[:end]) + block.ContainsWindowsNewlines = hasCRs finalizeHTMLBlock(block) } return size @@ -452,7 +451,7 @@ func (p *Markdown) htmlHr(data []byte, doRender bool) int { return 0 } i := 3 - for i < len(data) && data[i] != '>' && data[i] != '\n' { + for i < len(data) && data[i] != '>' && data[i] != '\n' && data[i] != '\r' { i++ } if i < len(data) && data[i] == '>' { @@ -461,10 +460,7 @@ func (p *Markdown) htmlHr(data []byte, doRender bool) int { size := i + j if doRender { // trim newlines - end := size - for end > 0 && data[end-1] == '\n' { - end-- - } + end := trimTrailingNewlines(data, size) finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end])) } return size @@ -520,19 +516,29 @@ func (p *Markdown) htmlFindEnd(tag string, data []byte) int { return i + skip } +// isEmpty checks if the current position in data contains a sequence of blank +// characters. I.e. whether we have a run consisting exclusively of whitespaces +// or tabs, followed by a newline. If so, it returns the number of bytes this +// blank line consumes. Otherwise returns zero. func (*Markdown) isEmpty(data []byte) int { // it is okay to call isEmpty on an empty buffer if len(data) == 0 { return 0 } - - var i int - for i = 0; i < len(data) && data[i] != '\n'; i++ { + i := 0 + for i < len(data) && !iseol(data[i]) { if data[i] != ' ' && data[i] != '\t' { return 0 } + i++ } - if i < len(data) && data[i] == '\n' { + if i < len(data) && iseol(data[i]) { + i++ + } + // extra check for Windows style newlines: if the newline in the check + // above was a '\r', and it's immediately followed by a '\n', we want to + // consume the latter as well: + if i > 0 && i < len(data) && data[i-1] == '\r' && data[i] == '\n' { i++ } return i @@ -554,7 +560,7 @@ func (*Markdown) isHRule(data []byte) bool { // the whole line must be the char or whitespace n := 0 - for i < len(data) && data[i] != '\n' { + for i < len(data) && !iseol(data[i]) { switch { case data[i] == c: n++ @@ -624,7 +630,7 @@ func isFenceLine(data []byte, info *string, oldmarker string) (end int, marker s i++ infoStart++ - for i < len(data) && data[i] != '}' && data[i] != '\n' { + for i < len(data) && data[i] != '}' && data[i] != '\n' && data[i] != '\r' { infoLength++ i++ } @@ -658,7 +664,8 @@ func isFenceLine(data []byte, info *string, oldmarker string) (end int, marker s if i == len(data) { return i, marker } - if i > len(data) || data[i] != '\n' { + i = skipChar(data, i, ' ') + if i >= len(data) || !iseol(data[i]) { return 0, "" } return i + 1, marker // Take newline into account. @@ -689,7 +696,8 @@ func (p *Markdown) fencedCodeBlock(data []byte, doRender bool) int { } // copy the current line - end := skipUntilChar(data, beg, '\n') + 1 + end, _ := skipUntilNewline(data, beg) + end++ // did we reach the end of the buffer without a closing marker? if end >= len(data) { @@ -728,10 +736,10 @@ func unescapeString(str []byte) []byte { func finalizeCodeBlock(block *Node) { if block.IsFenced { - newlinePos := bytes.IndexByte(block.content, '\n') + newlinePos := bytes.IndexByte(block.content, '\n') // XXX: \r firstLine := block.content[:newlinePos] rest := block.content[newlinePos+1:] - block.Info = unescapeString(bytes.Trim(firstLine, "\n")) + block.Info = unescapeString(bytes.Trim(firstLine, "\n")) // XXX: \r block.Literal = rest } else { block.Literal = block.content @@ -747,12 +755,13 @@ func (p *Markdown) table(data []byte) int { table.Unlink() return 0 } + i, _ = skipWindowsNewline(i, data) p.addBlock(TableBody, nil) for i < len(data) { pipes, rowStart := 0, i - for ; i < len(data) && data[i] != '\n'; i++ { + for ; i < len(data) && !iseol(data[i]); i++ { if data[i] == '|' { pipes++ } @@ -764,7 +773,8 @@ func (p *Markdown) table(data []byte) int { } // include the newline in data sent to tableRow - if i < len(data) && data[i] == '\n' { + i, _ = skipWindowsNewline(i, data) + if i < len(data) && iseol(data[i]) { i++ } p.tableRow(data[rowStart:i], columns, false) @@ -785,7 +795,7 @@ func isBackslashEscaped(data []byte, i int) bool { func (p *Markdown) tableHeader(data []byte) (size int, columns []CellAlignFlags) { i := 0 colCount := 1 - for i = 0; i < len(data) && data[i] != '\n'; i++ { + for i = 0; i < len(data) && !iseol(data[i]); i++ { if data[i] == '|' && !isBackslashEscaped(data, i) { colCount++ } @@ -797,8 +807,9 @@ func (p *Markdown) tableHeader(data []byte) (size int, columns []CellAlignFlags) } // include the newline in the data sent to tableRow + i, _ = skipWindowsNewline(i, data) j := i - if j < len(data) && data[j] == '\n' { + if j < len(data) && iseol(data[j]) { j++ } header := data[:j] @@ -807,14 +818,15 @@ func (p *Markdown) tableHeader(data []byte) (size int, columns []CellAlignFlags) if data[0] == '|' { colCount-- } - if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) { + lastPipe := backtrackAnyNewlines(data, i) + if lastPipe < len(data) && data[lastPipe] == '|' && !isBackslashEscaped(data, lastPipe) { colCount-- } columns = make([]CellAlignFlags, colCount) // move on to the header underline - i++ + i = j if i >= len(data) { return } @@ -827,7 +839,7 @@ func (p *Markdown) tableHeader(data []byte) (size int, columns []CellAlignFlags) // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3 // and trailing | optional on last column col := 0 - for i < len(data) && data[i] != '\n' { + for i < len(data) && !iseol(data[i]) { dashes := 0 if data[i] == ':' { @@ -865,7 +877,7 @@ func (p *Markdown) tableHeader(data []byte) (size int, columns []CellAlignFlags) } // trailing junk found after last column - if col >= colCount && i < len(data) && data[i] != '\n' { + if col >= colCount && i < len(data) && !iseol(data[i]) { return } @@ -873,7 +885,7 @@ func (p *Markdown) tableHeader(data []byte) (size int, columns []CellAlignFlags) // something else found where marker was required return - case data[i] == '\n': + case iseol(data[i]): // marker is optional for the last column col++ @@ -889,7 +901,7 @@ func (p *Markdown) tableHeader(data []byte) (size int, columns []CellAlignFlags) p.addBlock(TableHead, nil) p.tableRow(header, columns, true) size = i - if size < len(data) && data[size] == '\n' { + if size < len(data) && iseol(data[size]) { size++ } return @@ -910,7 +922,7 @@ func (p *Markdown) tableRow(data []byte, columns []CellAlignFlags, header bool) cellStart := i - for i < len(data) && (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' { + for i < len(data) && (data[i] != '|' || isBackslashEscaped(data, i)) && !iseol(data[i]) { i++ } @@ -975,7 +987,7 @@ func (p *Markdown) quote(data []byte) int { // Step over whole lines, collecting them. While doing that, check for // fenced code and if one's found, incorporate it altogether, // irregardless of any contents inside it - for end < len(data) && data[end] != '\n' { + for end < len(data) && !iseol(data[end]) { if p.extensions&FencedCode != 0 { if i := p.fencedCodeBlock(data[end:], false); i > 0 { // -1 to compensate for the extra end++ after the loop: @@ -985,7 +997,7 @@ func (p *Markdown) quote(data []byte) int { } end++ } - if end < len(data) && data[end] == '\n' { + if end < len(data) && iseol(data[end]) { end++ } if pre := p.quotePrefix(data[beg:]); pre > 0 { @@ -1016,16 +1028,20 @@ func (p *Markdown) codePrefix(data []byte) int { func (p *Markdown) code(data []byte) int { var work bytes.Buffer - + var hasCRs, skipped bool i := 0 for i < len(data) { beg := i - for i < len(data) && data[i] != '\n' { + for i < len(data) && !iseol(data[i]) { i++ } - if i < len(data) && data[i] == '\n' { + if i < len(data) && iseol(data[i]) { i++ } + i, skipped = skipWindowsNewline(i, data) + if skipped { + hasCRs = true + } blankline := p.isEmpty(data[beg:i]) > 0 if pre := p.codePrefix(data[beg:i]); pre > 0 { @@ -1046,18 +1062,15 @@ func (p *Markdown) code(data []byte) int { // trim all the \n off the end of work workbytes := work.Bytes() - eol := len(workbytes) - for eol > 0 && workbytes[eol-1] == '\n' { - eol-- - } + eol := trimTrailingNewlines(workbytes, len(workbytes)) if eol != len(workbytes) { work.Truncate(eol) } work.WriteByte('\n') - block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer block.IsFenced = false + block.ContainsWindowsNewlines = hasCRs finalizeCodeBlock(block) return i @@ -1244,15 +1257,22 @@ func (p *Markdown) listItem(data []byte, flags *ListType) int { // find the end of the line line := i - for i > 0 && i < len(data) && data[i-1] != '\n' { + for i > 0 && i < len(data) && !iseol(data[i-1]) { i++ } + if i > 0 && i-1 < len(data) && data[i-1] == '\r' { + i-- + } // get working buffer var raw bytes.Buffer // put the first line into the working buffer raw.Write(data[line:i]) + if i > 0 && data[i-1] != '\n' { + raw.WriteByte('\n') + } + i, _ = skipWindowsNewline(i, data) line = i // process the following lines @@ -1265,7 +1285,7 @@ gatherlines: i++ // find the end of this line - for i < len(data) && data[i-1] != '\n' { + for i < len(data) && !iseol(data[i-1]) { i++ } @@ -1293,8 +1313,8 @@ gatherlines: chunk := data[line+indentIndex : i] if p.extensions&FencedCode != 0 { - // determine if in or out of codeblock - // if in codeblock, ignore normal list processing + // determine if in or out of codeblock + // if in codeblock, ignore normal list processing _, marker := isFenceLine(chunk, nil, codeBlockMarker) if marker != "" { if codeBlockMarker == "" { @@ -1359,10 +1379,10 @@ gatherlines: if *flags&ListTypeDefinition != 0 && i < len(data)-1 { // is the next item still a part of this list? next := i - for next < len(data) && data[next] != '\n' { + for next < len(data) && !iseol(data[next]) { next++ } - for next < len(data)-1 && data[next] == '\n' { + for next < len(data)-1 && iseol(data[next]) { next++ } if i < len(data)-1 && data[i] != ':' && data[next] != ':' { @@ -1386,8 +1406,14 @@ gatherlines: raw.WriteByte('\n') } + i = backupWindowsNewline(i, data) + // add the line into the working buffer without prefix raw.Write(data[line+indentIndex : i]) + if i > 0 && i < len(data) && data[i-1] != '\n' { + raw.WriteByte('\n') + } + i, _ = skipWindowsNewline(i, data) line = i } @@ -1424,7 +1450,7 @@ gatherlines: } // render a single paragraph that has already been parsed out -func (p *Markdown) renderParagraph(data []byte) { +func (p *Markdown) renderParagraph(data []byte, hasCRs bool) { if len(data) == 0 { return } @@ -1434,19 +1460,14 @@ func (p *Markdown) renderParagraph(data []byte) { for data[beg] == ' ' { beg++ } - - end := len(data) - // trim trailing newline - if data[len(data)-1] == '\n' { - end-- - } - + end := trimTrailingNewlines(data, len(data)) // trim trailing spaces for end > beg && data[end-1] == ' ' { end-- } - p.addBlock(Paragraph, data[beg:end]) + block := p.addBlock(Paragraph, data[beg:end]) + block.ContainsWindowsNewlines = hasCRs } func (p *Markdown) paragraph(data []byte) int { @@ -1458,6 +1479,7 @@ func (p *Markdown) paragraph(data []byte) int { if p.extensions&TabSizeEight != 0 { tabSize = TabSizeDouble } + var hasCRs bool // keep going until we find something to mark the end of the paragraph for i < len(data) { // mark the beginning of the current line @@ -1469,7 +1491,7 @@ func (p *Markdown) paragraph(data []byte) int { // preceding it and report that we have consumed up to the end of that // reference: if refEnd := isReference(p, current, tabSize); refEnd > 0 { - p.renderParagraph(data[:i]) + p.renderParagraph(data[:i], hasCRs) return i + refEnd } @@ -1482,7 +1504,7 @@ func (p *Markdown) paragraph(data []byte) int { } } - p.renderParagraph(data[:i]) + p.renderParagraph(data[:i], hasCRs) return i + n } @@ -1490,7 +1512,7 @@ func (p *Markdown) paragraph(data []byte) int { if i > 0 { if level := p.isUnderlinedHeading(current); level > 0 { // render the paragraph - p.renderParagraph(data[:prev]) + p.renderParagraph(data[:prev], hasCRs) // ignore leading and trailing whitespace eol := i - 1 @@ -1511,7 +1533,7 @@ func (p *Markdown) paragraph(data []byte) int { block.HeadingID = id // find the end of the underline - for i < len(data) && data[i] != '\n' { + for i < len(data) && !iseol(data[i]) { i++ } return i @@ -1522,21 +1544,21 @@ func (p *Markdown) paragraph(data []byte) int { if p.extensions&LaxHTMLBlocks != 0 { if data[i] == '<' && p.html(current, false) > 0 { // rewind to before the HTML block - p.renderParagraph(data[:i]) + p.renderParagraph(data[:i], hasCRs) return i } } // if there's a prefixed heading or a horizontal rule after this, paragraph is over if p.isPrefixHeading(current) || p.isHRule(current) { - p.renderParagraph(data[:i]) + p.renderParagraph(data[:i], hasCRs) return i } // if there's a fenced code block, paragraph is over if p.extensions&FencedCode != 0 { if p.fencedCodeBlock(current, false) > 0 { - p.renderParagraph(data[:i]) + p.renderParagraph(data[:i], hasCRs) return i } } @@ -1555,21 +1577,20 @@ func (p *Markdown) paragraph(data []byte) int { p.oliPrefix(current) != 0 || p.quotePrefix(current) != 0 || p.codePrefix(current) != 0 { - p.renderParagraph(data[:i]) + p.renderParagraph(data[:i], hasCRs) return i } } - - // otherwise, scan to the beginning of the next line - nl := bytes.IndexByte(data[i:], '\n') - if nl >= 0 { - i += nl + 1 - } else { - i += len(data[i:]) + var wasCR bool + i, wasCR = skipUntilNewline(data, i) + i++ + if i > len(data) { + i = len(data) } + hasCRs = hasCRs || wasCR } - p.renderParagraph(data[:i]) + p.renderParagraph(data[:i], hasCRs) return i } @@ -1581,6 +1602,26 @@ func skipChar(data []byte, start int, char byte) int { return i } +func skipUntilNewline(text []byte, start int) (int, bool) { + i := start + for i < len(text) && !iseol(text[i]) { + i++ + } + wasCR := false + if i+1 < len(text) && text[i] == '\r' && text[i+1] == '\n' { + i++ + wasCR = true + } + return i, wasCR +} + +func trimTrailingNewlines(data []byte, end int) int { + for end > 0 && (data[end-1] == '\n' || data[end-1] == '\r') { + end-- + } + return end +} + func skipUntilChar(text []byte, start int, char byte) int { i := start for i < len(text) && text[i] != char { @@ -1588,3 +1629,35 @@ func skipUntilChar(text []byte, start int, char byte) int { } return i } + +func skipWindowsNewline(i int, data []byte) (int, bool) { + var skipped bool + if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' { + i++ + skipped = true + } + if i > 0 && i < len(data) && data[i-1] == '\r' && data[i] == '\n' { + i++ + skipped = true + } + return i, skipped +} + +func backupWindowsNewline(i int, data []byte) int { + if i > 1 && i < len(data) && data[i-1] == '\r' && data[i] == '\n' { + return i - 1 + } + if i > 0 && i < len(data) && data[i] == '\r' { + return i + } + return i +} + +// backtrackAnyNewlines decrements i until it hits any non-newline character or +// becomes zero. +func backtrackAnyNewlines(data []byte, i int) int { + for i > 0 && i < len(data) && iseol(data[i]) { + i-- + } + return i +} diff --git a/block_test.go b/block_test.go index 5cbff6d5..36b7b31c 100644 --- a/block_test.go +++ b/block_test.go @@ -14,6 +14,7 @@ package blackfriday import ( + "encoding/hex" "strings" "testing" ) @@ -1607,8 +1608,17 @@ func TestTitleBlock_EXTENSION_TITLEBLOCK(t *testing.T) { "

" + "Some title\n" + "Another title line\n" + - "Yep, more here too" + - "

\n", + "Yep, more here too\n", + + // XXX: titleBlock implementation does not do normalization and leaves + // CRs in the output. This is not ideal, but will work for now. + "% Some title\r\n" + + "% Another title line\r\n" + + "% Yep, more here too\r\n", + "

" + + "Some title\r\n" + + "Another title line\r\n" + + "Yep, more here too

\n", } doTestsBlock(t, tests, Titleblock) } @@ -1844,3 +1854,54 @@ func TestIsFenceLine(t *testing.T) { } } } + +func TestIsEmpty(t *testing.T) { + m := Markdown{} + tests := []struct { + input string + want int + }{ + { + input: "\n", + want: 1, + }, + { + input: " \n", + want: 2, + }, + { + input: "\r\n", + want: 2, + }, + { + input: " \r\n", + want: 3, + }, + { + input: "\r", + want: 1, + }, + } + for _, test := range tests { + if got := m.isEmpty([]byte(test.input)); got != test.want { + t.Errorf("Wrong output for %q: want %v, got %v", + test.input, test.want, got) + } + } +} + +func TestRepro(t *testing.T) { + // XXX: this is a temporary test to ensure the tables fix works. It will + // later be OK to remove it because it will be covered by the usual tests, + // which will be dual-run with Unix and Windows EOLs. + s, err := hex.DecodeString("4e616d65202020207c204167650d0a2d2d2d2d2d2d2d2d7c2d2d2d2d2d2d0d0a426f6220202020207c2032370d0a416c6963652020207c203233") + if err != nil { + panic(err) + } + var tests = []string{ + string(s), + "\n\n\n\n\n\n\n\n" + + "\n\n\n\n\n\n\n\n\n\n\n
NameAge
Bob27
Alice23
\n", + } + doTestsBlock(t, tests, Tables) +} diff --git a/helpers_test.go b/helpers_test.go index 089c730e..83589aa2 100644 --- a/helpers_test.go +++ b/helpers_test.go @@ -14,6 +14,7 @@ package blackfriday import ( + "bytes" "io/ioutil" "path/filepath" "regexp" @@ -148,39 +149,48 @@ func transformLinks(tests []string, prefix string) []string { func doTestsReference(t *testing.T, files []string, flag Extensions) { params := TestParams{extensions: flag} - execRecoverableTestSuite(t, files, params, func(candidate *string) { - for _, basename := range files { - filename := filepath.Join("testdata", basename+".text") - inputBytes, err := ioutil.ReadFile(filename) - if err != nil { - t.Errorf("Couldn't open '%s', error: %v\n", filename, err) - continue - } - input := string(inputBytes) - - filename = filepath.Join("testdata", basename+".html") - expectedBytes, err := ioutil.ReadFile(filename) - if err != nil { - t.Errorf("Couldn't open '%s', error: %v\n", filename, err) - continue - } - expected := string(expectedBytes) + refTestsRunner := func(alterEOLs bool) func(*string) { + return func(candidate *string) { + for _, basename := range files { + filename := filepath.Join("testdata", basename+".text") + inputBytes, err := ioutil.ReadFile(filename) + if err != nil { + t.Errorf("Couldn't open '%s', error: %v\n", filename, err) + continue + } + var input string + if alterEOLs { + input = string(bytes.Replace(inputBytes, []byte{'\n'}, + []byte("\r\n"), -1)) + } else { + input = string(inputBytes) + } + filename = filepath.Join("testdata", basename+".html") + expectedBytes, err := ioutil.ReadFile(filename) + if err != nil { + t.Errorf("Couldn't open '%s', error: %v\n", filename, err) + continue + } + expected := string(expectedBytes) - actual := string(runMarkdown(input, params)) - if actual != expected { - t.Errorf("\n [%#v]\nExpected[%#v]\nActual [%#v]", - basename+".text", expected, actual) - } + actual := string(runMarkdown(input, params)) + if actual != expected { + t.Errorf("\n [%#v]\nExpected[%#v]\nActual [%#v]", + basename+".text", expected, actual) + } - // now test every prefix of every input to check for - // bounds checking - if !testing.Short() { - start, max := 0, len(input) - for end := start + 1; end <= max; end++ { - *candidate = input[start:end] - runMarkdown(*candidate, params) + // now test every prefix of every input to check for + // bounds checking + if !testing.Short() { + start, max := 0, len(input) + for end := start + 1; end <= max; end++ { + *candidate = input[start:end] + runMarkdown(*candidate, params) + } } } } - }) + } + execRecoverableTestSuite(t, files, params, refTestsRunner(true)) + execRecoverableTestSuite(t, files, params, refTestsRunner(false)) } diff --git a/html.go b/html.go index 284c8718..585152cf 100644 --- a/html.go +++ b/html.go @@ -399,6 +399,7 @@ func (r *HTMLRenderer) cr(w io.Writer) { var ( nlBytes = []byte{'\n'} + crnlBytes = []byte("\r\n") gtBytes = []byte{'>'} spaceBytes = []byte{' '} ) @@ -492,6 +493,22 @@ func (r *HTMLRenderer) outHRTag(w io.Writer) { } } +func getLiteral(node *Node) []byte { + hasCR := false + iterNode := node + for iterNode != nil { + if iterNode.ContainsWindowsNewlines { + hasCR = true + break + } + iterNode = iterNode.Parent + } + if hasCR { + return bytes.Replace(node.Literal, crnlBytes, nlBytes, -1) + } + return node.Literal +} + // RenderNode is a default renderer of a single node of a syntax tree. For // block nodes it will be called twice: first time with entering=true, second // time with entering=false, so that it could know when it's working on an open @@ -511,10 +528,11 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt escapeHTML(&tmp, node.Literal) r.sr.Process(w, tmp.Bytes()) } else { + literal := getLiteral(node) if node.Parent.Type == Link { - escLink(w, node.Literal) + escLink(w, literal) } else { - escapeHTML(w, node.Literal) + escapeHTML(w, literal) } } case Softbreak: @@ -656,7 +674,7 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt break } r.cr(w) - r.out(w, node.Literal) + r.out(w, getLiteral(node)) r.cr(w) case Heading: headingLevel := r.HTMLRendererParameters.HeadingLevelOffset + node.Level @@ -762,7 +780,7 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt r.cr(w) r.out(w, preTag) r.tag(w, codeTag[:len(codeTag)-1], attrs) - escapeHTML(w, node.Literal) + escapeHTML(w, getLiteral(node)) r.out(w, codeCloseTag) r.out(w, preCloseTag) if node.Parent.Type != Item { diff --git a/inline.go b/inline.go index 4ed29079..c6708ef4 100644 --- a/inline.go +++ b/inline.go @@ -74,7 +74,7 @@ func (p *Markdown) inline(currBlock *Node, data []byte) { } } if beg < len(data) { - if data[end-1] == '\n' { + if iseol(data[end-1]) { end-- } currBlock.AppendChild(text(data[beg:end])) @@ -180,8 +180,7 @@ func maybeLineBreak(p *Markdown, data []byte, offset int) (int, *Node) { for offset < len(data) && data[offset] == ' ' { offset++ } - - if offset < len(data) && data[offset] == '\n' { + if offset < len(data) && iseol(data[offset]) { if offset-origOffset >= 2 { return offset - origOffset + 1, NewNode(Hardbreak) } @@ -275,7 +274,7 @@ func link(p *Markdown, data []byte, offset int) (int, *Node) { // look for the matching closing bracket for level := 1; level > 0 && i < len(data); i++ { switch { - case data[i] == '\n': + case iseol(data[i]): textHasNl = true case data[i-1] == '\\': @@ -422,7 +421,7 @@ func link(p *Markdown, data []byte, offset int) (int, *Node) { for j := 1; j < txtE; j++ { switch { - case data[j] != '\n': + case !iseol(data[j]): b.WriteByte(data[j]) case data[j-1] != ' ': b.WriteByte(' ') @@ -462,9 +461,9 @@ func link(p *Markdown, data []byte, offset int) (int, *Node) { for j := 1; j < txtE; j++ { switch { - case data[j] != '\n': + case !iseol(data[j]): b.WriteByte(data[j]) - case data[j-1] != ' ': + case data[j-1] != ' ' && data[j-1] != '\r': b.WriteByte(' ') } } @@ -587,23 +586,27 @@ func link(p *Markdown, data []byte, offset int) (int, *Node) { return i, linkNode } -func (p *Markdown) inlineHTMLComment(data []byte) int { +func (p *Markdown) inlineHTMLComment(data []byte) (int, bool) { + hasCRs := false if len(data) < 5 { - return 0 + return 0, false } if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' { - return 0 + return 0, false } i := 5 // scan for an end-of-comment marker, across lines if necessary for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') { + if data[i] == '\r' { + hasCRs = true + } i++ } // no end-of-comment marker if i >= len(data) { - return 0 + return 0, false } - return i + 1 + return i + 1, hasCRs } func stripMailto(link []byte) []byte { @@ -630,7 +633,7 @@ const ( func leftAngle(p *Markdown, data []byte, offset int) (int, *Node) { data = data[offset:] altype, end := tagLength(data) - if size := p.inlineHTMLComment(data); size > 0 { + if size, _ := p.inlineHTMLComment(data); size > 0 { end = size } if end > 2 { @@ -664,7 +667,7 @@ func escape(p *Markdown, data []byte, offset int) (int, *Node) { data = data[offset:] if len(data) > 1 { - if p.extensions&BackslashLineBreak != 0 && data[1] == '\n' { + if p.extensions&BackslashLineBreak != 0 && iseol(data[1]) { return 2, NewNode(Hardbreak) } if bytes.IndexByte(escapeChars, data[1]) < 0 { @@ -868,7 +871,7 @@ func autoLink(p *Markdown, data []byte, offset int) (int, *Node) { * => foo http://www.pokemon.com/Pikachu_(Electric) */ - for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 { + for bufEnd >= 0 && !iseol(origData[bufEnd]) && openDelim != 0 { if origData[bufEnd] == data[linkEnd-1] { openDelim++ } @@ -1080,7 +1083,7 @@ func helperFindEmphChar(data []byte, c byte) int { i++ } i++ - for i < len(data) && (data[i] == ' ' || data[i] == '\n') { + for i < len(data) && (data[i] == ' ' || iseol(data[i])) { i++ } if i >= len(data) { diff --git a/inline_test.go b/inline_test.go index 69b70aee..df20800c 100644 --- a/inline_test.go +++ b/inline_test.go @@ -1185,3 +1185,17 @@ func BenchmarkSmartDoubleQuotes(b *testing.B) { runMarkdown("this should be normal \"quoted\" text.\n", params) } } + +func TestWindowsNewlines(t *testing.T) { + var tests = []string{ + "over *two\r\nlines* test\n", + "

over two\nlines test

\n", + + "\r\nfoo\r\n*bar*\r\n", + "

foo\nbar

\n", + + "# Hello World\r\n\r\nThis is my content.\r\n", + "

Hello World

\n\n

This is my content.

\n", + } + doTestsInline(t, tests) +} diff --git a/markdown.go b/markdown.go index f39ca50a..8cd1aefa 100644 --- a/markdown.go +++ b/markdown.go @@ -569,7 +569,7 @@ func isReference(p *Markdown, data []byte, tabSize int) int { } } idOffset := i - for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' { + for i < len(data) && !iseol(data[i]) && data[i] != ']' { i++ } if i >= len(data) || data[i] != ']' { @@ -590,7 +590,7 @@ func isReference(p *Markdown, data []byte, tabSize int) int { for i < len(data) && (data[i] == ' ' || data[i] == '\t') { i++ } - if i < len(data) && (data[i] == '\n' || data[i] == '\r') { + if i < len(data) && iseol(data[i]) { i++ if i < len(data) && data[i] == '\n' && data[i-1] == '\r' { i++ @@ -652,7 +652,7 @@ func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOff i++ } linkOffset = i - for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' { + for i < len(data) && data[i] != ' ' && data[i] != '\t' && !iseol(data[i]) { i++ } linkEnd = i @@ -670,7 +670,7 @@ func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOff } // compute end-of-line - if i >= len(data) || data[i] == '\r' || data[i] == '\n' { + if i >= len(data) || iseol(data[i]) { lineEnd = i } if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' { @@ -691,7 +691,7 @@ func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOff titleOffset = i // look for EOL - for i < len(data) && data[i] != '\n' && data[i] != '\r' { + for i < len(data) && !iseol(data[i]) { i++ } if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' { @@ -734,7 +734,7 @@ func scanFootnote(p *Markdown, data []byte, i, indentSize int) (blockStart, bloc // find the end of the line blockEnd = i - for i < len(data) && data[i-1] != '\n' { + for i < len(data) && data[i-1] != '\n' { // XXX: \r? i++ } @@ -753,7 +753,7 @@ gatherLines: i++ // find the end of this line - for i < len(data) && data[i-1] != '\n' { + for i < len(data) && data[i-1] != '\n' { // XXX: \r? i++ } @@ -785,7 +785,7 @@ gatherLines: blockEnd = i } - if data[blockEnd-1] != '\n' { + if data[blockEnd-1] != '\n' { // XXX: \r? raw.WriteByte('\n') } @@ -811,6 +811,10 @@ func ispunct(c byte) bool { return false } +func iseol(c byte) bool { + return c == '\n' || c == '\r' +} + // Test if a character is a whitespace character. func isspace(c byte) bool { return ishorizontalspace(c) || isverticalspace(c) diff --git a/node.go b/node.go index 51b9e8c1..74ed6450 100644 --- a/node.go +++ b/node.go @@ -128,6 +128,8 @@ type Node struct { LinkData // Populated if Type is Link TableCellData // Populated if Type is TableCell + ContainsWindowsNewlines bool // Is set to true when Literal (or content field) contains \r + content []byte // Markdown content of the block nodes open bool // Specifies an open block node that has not been finished to process yet } diff --git a/ref_test.go b/ref_test.go index 4375f540..0b94d2ba 100644 --- a/ref_test.go +++ b/ref_test.go @@ -19,15 +19,14 @@ import ( "testing" ) -func TestReference(t *testing.T) { - files := []string{ +var ( + refTestFilesBase = []string{ "Amps and angle encoding", "Auto links", "Backslash escapes", "Blockquotes with code blocks", "Code Blocks", "Code Spans", - "Hard-wrapped paragraphs with list-like lines", "Horizontal rules", "Inline HTML (Advanced)", "Inline HTML (Simple)", @@ -44,35 +43,18 @@ func TestReference(t *testing.T) { "Tabs", "Tidyness", } - doTestsReference(t, files, 0) + refTestFiles = append(refTestFilesBase, + "Hard-wrapped paragraphs with list-like lines") + refTestFilesNoEmptyLine = append(refTestFilesBase, + "Hard-wrapped paragraphs with list-like lines no empty line before block") +) + +func TestReference(t *testing.T) { + doTestsReference(t, refTestFiles, 0) } func TestReference_EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK(t *testing.T) { - files := []string{ - "Amps and angle encoding", - "Auto links", - "Backslash escapes", - "Blockquotes with code blocks", - "Code Blocks", - "Code Spans", - "Hard-wrapped paragraphs with list-like lines no empty line before block", - "Horizontal rules", - "Inline HTML (Advanced)", - "Inline HTML (Simple)", - "Inline HTML comments", - "Links, inline style", - "Links, reference style", - "Links, shortcut references", - "Literal quotes in titles", - "Markdown Documentation - Basics", - "Markdown Documentation - Syntax", - "Nested blockquotes", - "Ordered and unordered lists", - "Strong and em together", - "Tabs", - "Tidyness", - } - doTestsReference(t, files, NoEmptyLineBeforeBlock) + doTestsReference(t, refTestFilesNoEmptyLine, NoEmptyLineBeforeBlock) } // benchResultAnchor is an anchor variable to store the result of a benchmarked @@ -81,32 +63,8 @@ var benchResultAnchor string func BenchmarkReference(b *testing.B) { params := TestParams{extensions: CommonExtensions} - files := []string{ - "Amps and angle encoding", - "Auto links", - "Backslash escapes", - "Blockquotes with code blocks", - "Code Blocks", - "Code Spans", - "Hard-wrapped paragraphs with list-like lines", - "Horizontal rules", - "Inline HTML (Advanced)", - "Inline HTML (Simple)", - "Inline HTML comments", - "Links, inline style", - "Links, reference style", - "Links, shortcut references", - "Literal quotes in titles", - "Markdown Documentation - Basics", - "Markdown Documentation - Syntax", - "Nested blockquotes", - "Ordered and unordered lists", - "Strong and em together", - "Tabs", - "Tidyness", - } var tests []string - for _, basename := range files { + for _, basename := range refTestFiles { filename := filepath.Join("testdata", basename+".text") inputBytes, err := ioutil.ReadFile(filename) if err != nil {