From 2396259252d81fb484efbbb7915f1940a26db588 Mon Sep 17 00:00:00 2001 From: Daniel Schuette Date: Sat, 15 Sep 2018 20:19:36 -0400 Subject: [PATCH] fix parsing bug and update unit tests respectively --- parse.go | 87 ++++++++++++++++++++++++++++--------------------- parse_test.go | 33 ++++++++++--------- tests/parse3.re | 2 +- 3 files changed, 68 insertions(+), 54 deletions(-) diff --git a/parse.go b/parse.go index 960b756..1b57f73 100644 --- a/parse.go +++ b/parse.go @@ -57,17 +57,8 @@ func ParseEnzymesFromFile(file string) (map[string]RestrictEnzyme, error) { Loop: for i, n := 0, len(b); i < n; i++ { - // current char is the last char in the document => add parsed results to `enzymesMap' - if (i + 1) == n { - if itemContainer != nil { - if _, ok := enzymesMap[itemContainer.Name]; !ok { - enzymesMap[itemContainer.Name] = *itemContainer - } - } - } - - // otherwise, the document is not yet fully parsed => decide what to do next - if i < len(b)-2 { + // assume that the document is not yet fully parsed => decide what to do next + if i < (n - 2) { // if current char is a new line delimiter, decide how to proceed if b[i] == '\n' { switch { @@ -96,43 +87,63 @@ Loop: if !parse { continue Loop } + } + // if current char is a valid item delimiter '\'', perform the appropriate action + if b[i] == '\'' { + // if this '\'' is delimiting the end of a data item, add the current `dataItem' + // to the `itemContainer' field that corresponds to the current `column' + // then, increment column count and continue loop after resetting the temporary + // data item variable `dataItem' and set `openQuote' to false + if openQuote { + switch column { + case 0: + itemContainer.Name = string(dataItem) + case 1: + itemContainer.RecognitionSite = string(dataItem) + case 2: + itemContainer.NoPalinCleav = string(dataItem) + case 3: + itemContainer.ID = string(dataItem) + case 4: + itemContainer.Isoschizomeres = strings.Split(string(dataItem), ",") + } + column++ + dataItem = make([]byte, 0) + openQuote = false + continue Loop + } + + // if this '\'' is delimiting the start of a data item set `openQuote' to true + // and continue loop to not add the opening '\'' to the respective string + openQuote = true - // if current char is a valid item delimiter '\'', perform the appropriate action - if b[i] == '\'' { - // if this '\'' is delimiting the end of a data item, add the current `dataItem' - // to the `itemContainer' field that corresponds to the current `column' - // then, increment column count and continue loop after resetting the temporary - // data item variable `dataItem' and set `openQuote' to false - if openQuote { - switch column { - case 0: - itemContainer.Name = string(dataItem) - case 1: - itemContainer.RecognitionSite = string(dataItem) - case 2: - itemContainer.NoPalinCleav = string(dataItem) - case 3: - itemContainer.ID = string(dataItem) - case 4: - itemContainer.Isoschizomeres = strings.Split(string(dataItem), ",") + // current char is the last char in the document => add parsed results to `enzymesMap' + if (i + 1) == n { + if itemContainer != nil { + if _, ok := enzymesMap[itemContainer.Name]; !ok { + enzymesMap[itemContainer.Name] = *itemContainer } - column++ - dataItem = make([]byte, 0) - openQuote = false - continue Loop } - - // if this '\'' is delimiting the start of a data item set `openQuote' to true - // and continue loop to not add the opening '\'' to the respective string - openQuote = true - continue Loop } + + // otherwise => continue the loop + continue Loop } // if parser is inbetween quotes, append byte to current `dataItem' if openQuote { dataItem = append(dataItem, b[i]) } + + // current char is the last char in the document and + // no other condition triggered at this point => add parsed results to `enzymesMap' + if (i + 1) == n { + if itemContainer != nil { + if _, ok := enzymesMap[itemContainer.Name]; !ok { + enzymesMap[itemContainer.Name] = *itemContainer + } + } + } } fmt.Printf("parsed %d of %d enzyme(s) from '%s'\n", len(enzymesMap), line, file) diff --git a/parse_test.go b/parse_test.go index 84d0de1..a7336cc 100644 --- a/parse_test.go +++ b/parse_test.go @@ -2,7 +2,6 @@ package cloningprimer import ( "errors" - "log" "testing" ) @@ -38,10 +37,8 @@ func TestParseEnzymesFromFile(t *testing.T) { want: map[string]RestrictEnzyme{ "AclI": { Name: "AclI", - RecognitionSite: "invalid", - NoPalinCleav: "invalid", - ID: "invalid", - Isoschizomeres: []string{"invalid", "invalid"}, + RecognitionSite: "AACGTT", + NoPalinCleav: "no", }, }, err: nil, @@ -52,24 +49,32 @@ func TestParseEnzymesFromFile(t *testing.T) { want: map[string]RestrictEnzyme{ "AclI": { Name: "AclI", - RecognitionSite: "invalid", - NoPalinCleav: "invalid", - ID: "invalid", - Isoschizomeres: []string{"invalid", "invalid"}, + RecognitionSite: "AACGTT", + NoPalinCleav: "no", + ID: "A1A1", + Isoschizomeres: []string{"AclI"}, }, }, err: nil, }, // test correct parsing of enzymes from a file with comments but no column labels: `parse2.re' + // also, two enzymes are passed instead of just one { in: "tests/parse3.re", want: map[string]RestrictEnzyme{ "AclI": { Name: "AclI", - RecognitionSite: "invalid", - NoPalinCleav: "invalid", - ID: "invalid", - Isoschizomeres: []string{"invalid", "invalid"}, + RecognitionSite: "AACGTT", + NoPalinCleav: "no", + ID: "A1A1", + Isoschizomeres: []string{"AclI"}, + }, + "AclII": { + Name: "AclII", + RecognitionSite: "ACCGGT", + NoPalinCleav: "no", + ID: "A2A2", + Isoschizomeres: []string{"AclII", "AclIII"}, }, }, err: nil, @@ -79,8 +84,6 @@ func TestParseEnzymesFromFile(t *testing.T) { // loop over test cases for _, c := range cases { got, err := ParseEnzymesFromFile(c.in) - log.Printf("parsed len: %v, expect len: %v\n", len(got), len(c.want)) - log.Printf("parsed: %v, expect: %v\n", got, c.want) // test similarity of expected and received value if !isSimilarMap(got, c.want) { diff --git a/tests/parse3.re b/tests/parse3.re index 3889bb2..c8d5bb1 100644 --- a/tests/parse3.re +++ b/tests/parse3.re @@ -4,4 +4,4 @@ * a parsed without a problem; even without column labels! */ 'AclI' 'AACGTT' 'no' 'A1A1' 'AclI' -'AclII' 'ACCGGT' 'no' 'A2A2' 'AclII' +'AclII' 'ACCGGT' 'no' 'A2A2' 'AclII,AclIII'