walker76-research · johnsimmons2 · Jul 25, 2019 · Jul 25, 2019
diff --git a/requirements.txt b/requirements.txt
@@ -9,4 +9,4 @@ rq
 python-dotenv
 numpy
 datasketch
-python-dotenv
+mysql-connector-python
diff --git a/stackoversight/pipeline/pipelineoutput.py b/stackoversight/pipeline/pipelineoutput.py
@@ -50,10 +50,11 @@ def print(self, index=None):
     def form_lsh(self):
         minhash = []
 
-        for s in self.__items:
+        for list_keywords in self.__items:
             m = MinHash(num_perm=256)
-            for q in s:
-                m.update(q.encode('utf8'))
+            for result in list_keywords:
+                if result is not None:
+                    m.update(result.encode('utf8'))
             minhash.append(m)
 
         forest = MinHashLSHForest(num_perm=256)
@@ -93,7 +94,8 @@ def query(self, item=None):
 
         m = MinHash(num_perm=256)
         for s in item:
-            m.update(s.encode('utf8'))
+            if s is not None:
+                m.update(s.encode('utf8'))
         query = self.__forest.query(m, 5)
         out = np.array(query)
         self.__hash_results = query

diff --git a/stackoversight/test.py b/stackoversight/test.py
@@ -7,48 +7,52 @@
 import asyncio
 import rq
 
-# TODO: tuples instead
-
-code_base = "for i in range(1,11):\n" \
-        "   n = 1\n" \
-        "   for j in range(1, i+1):\n" \
-        "       n *= j\n" \
-        "   print('{}! = {}'.format(i, n))"
-
-code_sample = "for i in range(25,100):\n" \
-        "   n = 3\n" \
-        "   # This is a comment!\n" \
-        "   # Notice how it is found as a 'NONE'!\n" \
-        "   for j in range(2, i+5):\n" \
-        "       n *= j\n" \
-        "       # Another comment!\n" \
-        "   print('{}! = {}'.format(i, n))"
-
-code_samplet = """for i in len(array):
-    k = 12
-    j = 10
-    for n in range(1, len(array) + i):
-        j *= n
-        k = k + i
-    other_func_call()
-    print('{}! = {}'.format(i,n))
-    print('Done!')
-"""
-
-code_samplet2 = """def levenshtein(seq1, seq2):
-    size_x = len(seq1) + 1
-    size_y = len(seq2) + 1
-    matrix = np.zeros ((size_x, size_y))
-    for x in range(size_x):
-        matrix [x, 0] = x
-    for y in range(size_y):
-        matrix [0, y] = y
-    return matrix
-"""
-
-not_code = "This is an example of\nsomething that is not even a code snippet!\n" \
-            " it contains code such as: for i in range(1, 10):\n" \
-            " But it would never compile."
+samples = [
+    """
+    for i in range(1,11):
+       n = 1
+       for j in range(1, i+1):
+           n *= j
+       print('{}! = {}'.format(i, n))
+    """,
+    """
+    for i in range(25,100):
+       n = 3
+       # This is a comment!
+       # Notice how it is found as a 'NONE'!
+       for j in range(2, i+5):
+           n *= j
+           # Another comment!
+       print('{}! = {}'.format(i, n))
+    """,
+    """for i in len(array):
+        k = 12
+        j = 10
+        for n in range(1, len(array) + i):
+            j *= n
+            k = k + i
+        other_func_call()
+        print('{}! = {}'.format(i,n))
+        print('Done!')
+    """,
+    """def levenshtein(seq1, seq2):
+        size_x = len(seq1) + 1
+        size_y = len(seq2) + 1
+        matrix = np.zeros ((size_x, size_y))
+        for x in range(size_x):
+            matrix [x, 0] = x
+        for y in range(size_y):
+            matrix [0, y] = y
+        return matrix
+    """,
+    """This is an example of
+    something that is not even a code snippet!
+    it contains code such as: 
+    for i in range(1, 10):
+    But it would never compile.
+    """,
+    "isn't code"
+]
 
 # Set the pipeline steps up into the correct order
 processing_steps = [
@@ -58,16 +62,8 @@
     KeywordExtractor()
 ]
 
-snippets = [
-    code_base,
-    code_sample,
-    code_samplet,
-    code_samplet2,
-    not_code
-]
-
 pipeline = Pipeline(processing_steps)
-output = pipeline.execute_synchronous(snippets)
+output = pipeline.execute_synchronous(samples)
 output.form_lsh()
 output.set_input(output[0])
 query_out = output.query()