From 2396259252d81fb484efbbb7915f1940a26db588 Mon Sep 17 00:00:00 2001
From: Daniel Schuette <d.schuette@online.de>
Date: Sat, 15 Sep 2018 20:19:36 -0400
Subject: [PATCH] fix parsing bug and update unit tests respectively

---
 parse.go        | 87 ++++++++++++++++++++++++++++---------------------
 parse_test.go   | 33 ++++++++++---------
 tests/parse3.re |  2 +-
 3 files changed, 68 insertions(+), 54 deletions(-)

diff --git a/parse.go b/parse.go
index 960b756..1b57f73 100644
--- a/parse.go
+++ b/parse.go
@@ -57,17 +57,8 @@ func ParseEnzymesFromFile(file string) (map[string]RestrictEnzyme, error) {
 
 Loop:
 	for i, n := 0, len(b); i < n; i++ {
-		// current char is the last char in the document => add parsed results to `enzymesMap'
-		if (i + 1) == n {
-			if itemContainer != nil {
-				if _, ok := enzymesMap[itemContainer.Name]; !ok {
-					enzymesMap[itemContainer.Name] = *itemContainer
-				}
-			}
-		}
-
-		// otherwise, the document is not yet fully parsed => decide what to do next
-		if i < len(b)-2 {
+		// assume that the document is not yet fully parsed => decide what to do next
+		if i < (n - 2) {
 			// if current char is a new line delimiter, decide how to proceed
 			if b[i] == '\n' {
 				switch {
@@ -96,43 +87,63 @@ Loop:
 			if !parse {
 				continue Loop
 			}
+		}
+		// if current char is a valid item delimiter '\'', perform the appropriate action
+		if b[i] == '\'' {
+			// if this '\'' is delimiting the end of a data item, add the current `dataItem'
+			// to the `itemContainer' field that corresponds to the current `column'
+			// then, increment column count and continue loop after resetting the temporary
+			// data item variable `dataItem' and set `openQuote' to false
+			if openQuote {
+				switch column {
+				case 0:
+					itemContainer.Name = string(dataItem)
+				case 1:
+					itemContainer.RecognitionSite = string(dataItem)
+				case 2:
+					itemContainer.NoPalinCleav = string(dataItem)
+				case 3:
+					itemContainer.ID = string(dataItem)
+				case 4:
+					itemContainer.Isoschizomeres = strings.Split(string(dataItem), ",")
+				}
+				column++
+				dataItem = make([]byte, 0)
+				openQuote = false
+				continue Loop
+			}
+
+			// if this '\'' is delimiting the start of a data item set `openQuote' to true
+			// and continue loop to not add the opening '\'' to the respective string
+			openQuote = true
 
-			// if current char is a valid item delimiter '\'', perform the appropriate action
-			if b[i] == '\'' {
-				// if this '\'' is delimiting the end of a data item, add the current `dataItem'
-				// to the `itemContainer' field that corresponds to the current `column'
-				// then, increment column count and continue loop after resetting the temporary
-				// data item variable `dataItem' and set `openQuote' to false
-				if openQuote {
-					switch column {
-					case 0:
-						itemContainer.Name = string(dataItem)
-					case 1:
-						itemContainer.RecognitionSite = string(dataItem)
-					case 2:
-						itemContainer.NoPalinCleav = string(dataItem)
-					case 3:
-						itemContainer.ID = string(dataItem)
-					case 4:
-						itemContainer.Isoschizomeres = strings.Split(string(dataItem), ",")
+			// current char is the last char in the document => add parsed results to `enzymesMap'
+			if (i + 1) == n {
+				if itemContainer != nil {
+					if _, ok := enzymesMap[itemContainer.Name]; !ok {
+						enzymesMap[itemContainer.Name] = *itemContainer
 					}
-					column++
-					dataItem = make([]byte, 0)
-					openQuote = false
-					continue Loop
 				}
-
-				// if this '\'' is delimiting the start of a data item set `openQuote' to true
-				// and continue loop to not add the opening '\'' to the respective string
-				openQuote = true
-				continue Loop
 			}
+
+			// otherwise => continue the loop
+			continue Loop
 		}
 
 		// if parser is inbetween quotes, append byte to current `dataItem'
 		if openQuote {
 			dataItem = append(dataItem, b[i])
 		}
+
+		// current char is the last char in the document and
+		// no other condition triggered at this point => add parsed results to `enzymesMap'
+		if (i + 1) == n {
+			if itemContainer != nil {
+				if _, ok := enzymesMap[itemContainer.Name]; !ok {
+					enzymesMap[itemContainer.Name] = *itemContainer
+				}
+			}
+		}
 	}
 
 	fmt.Printf("parsed %d of %d enzyme(s) from '%s'\n", len(enzymesMap), line, file)
diff --git a/parse_test.go b/parse_test.go
index 84d0de1..a7336cc 100644
--- a/parse_test.go
+++ b/parse_test.go
@@ -2,7 +2,6 @@ package cloningprimer
 
 import (
 	"errors"
-	"log"
 	"testing"
 )
 
@@ -38,10 +37,8 @@ func TestParseEnzymesFromFile(t *testing.T) {
 			want: map[string]RestrictEnzyme{
 				"AclI": {
 					Name:            "AclI",
-					RecognitionSite: "invalid",
-					NoPalinCleav:    "invalid",
-					ID:              "invalid",
-					Isoschizomeres:  []string{"invalid", "invalid"},
+					RecognitionSite: "AACGTT",
+					NoPalinCleav:    "no",
 				},
 			},
 			err: nil,
@@ -52,24 +49,32 @@ func TestParseEnzymesFromFile(t *testing.T) {
 			want: map[string]RestrictEnzyme{
 				"AclI": {
 					Name:            "AclI",
-					RecognitionSite: "invalid",
-					NoPalinCleav:    "invalid",
-					ID:              "invalid",
-					Isoschizomeres:  []string{"invalid", "invalid"},
+					RecognitionSite: "AACGTT",
+					NoPalinCleav:    "no",
+					ID:              "A1A1",
+					Isoschizomeres:  []string{"AclI"},
 				},
 			},
 			err: nil,
 		},
 		// test correct parsing of enzymes from a file with comments but no column labels: `parse2.re'
+		// also, two enzymes are passed instead of just one
 		{
 			in: "tests/parse3.re",
 			want: map[string]RestrictEnzyme{
 				"AclI": {
 					Name:            "AclI",
-					RecognitionSite: "invalid",
-					NoPalinCleav:    "invalid",
-					ID:              "invalid",
-					Isoschizomeres:  []string{"invalid", "invalid"},
+					RecognitionSite: "AACGTT",
+					NoPalinCleav:    "no",
+					ID:              "A1A1",
+					Isoschizomeres:  []string{"AclI"},
+				},
+				"AclII": {
+					Name:            "AclII",
+					RecognitionSite: "ACCGGT",
+					NoPalinCleav:    "no",
+					ID:              "A2A2",
+					Isoschizomeres:  []string{"AclII", "AclIII"},
 				},
 			},
 			err: nil,
@@ -79,8 +84,6 @@ func TestParseEnzymesFromFile(t *testing.T) {
 	// loop over test cases
 	for _, c := range cases {
 		got, err := ParseEnzymesFromFile(c.in)
-		log.Printf("parsed len: %v, expect len: %v\n", len(got), len(c.want))
-		log.Printf("parsed: %v, expect: %v\n", got, c.want)
 
 		// test similarity of expected and received value
 		if !isSimilarMap(got, c.want) {
diff --git a/tests/parse3.re b/tests/parse3.re
index 3889bb2..c8d5bb1 100644
--- a/tests/parse3.re
+++ b/tests/parse3.re
@@ -4,4 +4,4 @@
  * a parsed without a problem; even without column labels!
  */
 'AclI'            'AACGTT'                  'no'    'A1A1'  'AclI'
-'AclII'           'ACCGGT'                  'no'    'A2A2'  'AclII'
+'AclII'           'ACCGGT'                  'no'    'A2A2'  'AclII,AclIII'