Skip to content

Commit

Permalink
fix parsing bug and update unit tests respectively
Browse files Browse the repository at this point in the history
  • Loading branch information
DanielSchuette committed Sep 16, 2018
1 parent 1a54297 commit 2396259
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 54 deletions.
87 changes: 49 additions & 38 deletions parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,17 +57,8 @@ func ParseEnzymesFromFile(file string) (map[string]RestrictEnzyme, error) {

Loop:
for i, n := 0, len(b); i < n; i++ {
// current char is the last char in the document => add parsed results to `enzymesMap'
if (i + 1) == n {
if itemContainer != nil {
if _, ok := enzymesMap[itemContainer.Name]; !ok {
enzymesMap[itemContainer.Name] = *itemContainer
}
}
}

// otherwise, the document is not yet fully parsed => decide what to do next
if i < len(b)-2 {
// assume that the document is not yet fully parsed => decide what to do next
if i < (n - 2) {
// if current char is a new line delimiter, decide how to proceed
if b[i] == '\n' {
switch {
Expand Down Expand Up @@ -96,43 +87,63 @@ Loop:
if !parse {
continue Loop
}
}
// if current char is a valid item delimiter '\'', perform the appropriate action
if b[i] == '\'' {
// if this '\'' is delimiting the end of a data item, add the current `dataItem'
// to the `itemContainer' field that corresponds to the current `column'
// then, increment column count and continue loop after resetting the temporary
// data item variable `dataItem' and set `openQuote' to false
if openQuote {
switch column {
case 0:
itemContainer.Name = string(dataItem)
case 1:
itemContainer.RecognitionSite = string(dataItem)
case 2:
itemContainer.NoPalinCleav = string(dataItem)
case 3:
itemContainer.ID = string(dataItem)
case 4:
itemContainer.Isoschizomeres = strings.Split(string(dataItem), ",")
}
column++
dataItem = make([]byte, 0)
openQuote = false
continue Loop
}

// if this '\'' is delimiting the start of a data item set `openQuote' to true
// and continue loop to not add the opening '\'' to the respective string
openQuote = true

// if current char is a valid item delimiter '\'', perform the appropriate action
if b[i] == '\'' {
// if this '\'' is delimiting the end of a data item, add the current `dataItem'
// to the `itemContainer' field that corresponds to the current `column'
// then, increment column count and continue loop after resetting the temporary
// data item variable `dataItem' and set `openQuote' to false
if openQuote {
switch column {
case 0:
itemContainer.Name = string(dataItem)
case 1:
itemContainer.RecognitionSite = string(dataItem)
case 2:
itemContainer.NoPalinCleav = string(dataItem)
case 3:
itemContainer.ID = string(dataItem)
case 4:
itemContainer.Isoschizomeres = strings.Split(string(dataItem), ",")
// current char is the last char in the document => add parsed results to `enzymesMap'
if (i + 1) == n {
if itemContainer != nil {
if _, ok := enzymesMap[itemContainer.Name]; !ok {
enzymesMap[itemContainer.Name] = *itemContainer
}
column++
dataItem = make([]byte, 0)
openQuote = false
continue Loop
}

// if this '\'' is delimiting the start of a data item set `openQuote' to true
// and continue loop to not add the opening '\'' to the respective string
openQuote = true
continue Loop
}

// otherwise => continue the loop
continue Loop
}

// if parser is inbetween quotes, append byte to current `dataItem'
if openQuote {
dataItem = append(dataItem, b[i])
}

// current char is the last char in the document and
// no other condition triggered at this point => add parsed results to `enzymesMap'
if (i + 1) == n {
if itemContainer != nil {
if _, ok := enzymesMap[itemContainer.Name]; !ok {
enzymesMap[itemContainer.Name] = *itemContainer
}
}
}
}

fmt.Printf("parsed %d of %d enzyme(s) from '%s'\n", len(enzymesMap), line, file)
Expand Down
33 changes: 18 additions & 15 deletions parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package cloningprimer

import (
"errors"
"log"
"testing"
)

Expand Down Expand Up @@ -38,10 +37,8 @@ func TestParseEnzymesFromFile(t *testing.T) {
want: map[string]RestrictEnzyme{
"AclI": {
Name: "AclI",
RecognitionSite: "invalid",
NoPalinCleav: "invalid",
ID: "invalid",
Isoschizomeres: []string{"invalid", "invalid"},
RecognitionSite: "AACGTT",
NoPalinCleav: "no",
},
},
err: nil,
Expand All @@ -52,24 +49,32 @@ func TestParseEnzymesFromFile(t *testing.T) {
want: map[string]RestrictEnzyme{
"AclI": {
Name: "AclI",
RecognitionSite: "invalid",
NoPalinCleav: "invalid",
ID: "invalid",
Isoschizomeres: []string{"invalid", "invalid"},
RecognitionSite: "AACGTT",
NoPalinCleav: "no",
ID: "A1A1",
Isoschizomeres: []string{"AclI"},
},
},
err: nil,
},
// test correct parsing of enzymes from a file with comments but no column labels: `parse2.re'
// also, two enzymes are passed instead of just one
{
in: "tests/parse3.re",
want: map[string]RestrictEnzyme{
"AclI": {
Name: "AclI",
RecognitionSite: "invalid",
NoPalinCleav: "invalid",
ID: "invalid",
Isoschizomeres: []string{"invalid", "invalid"},
RecognitionSite: "AACGTT",
NoPalinCleav: "no",
ID: "A1A1",
Isoschizomeres: []string{"AclI"},
},
"AclII": {
Name: "AclII",
RecognitionSite: "ACCGGT",
NoPalinCleav: "no",
ID: "A2A2",
Isoschizomeres: []string{"AclII", "AclIII"},
},
},
err: nil,
Expand All @@ -79,8 +84,6 @@ func TestParseEnzymesFromFile(t *testing.T) {
// loop over test cases
for _, c := range cases {
got, err := ParseEnzymesFromFile(c.in)
log.Printf("parsed len: %v, expect len: %v\n", len(got), len(c.want))
log.Printf("parsed: %v, expect: %v\n", got, c.want)

// test similarity of expected and received value
if !isSimilarMap(got, c.want) {
Expand Down
2 changes: 1 addition & 1 deletion tests/parse3.re
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
* a parsed without a problem; even without column labels!
*/
'AclI' 'AACGTT' 'no' 'A1A1' 'AclI'
'AclII' 'ACCGGT' 'no' 'A2A2' 'AclII'
'AclII' 'ACCGGT' 'no' 'A2A2' 'AclII,AclIII'

0 comments on commit 2396259

Please sign in to comment.