Skip to content

Commit f77c4ad

Browse files
authored
Merge pull request #118 from dentonmwood/master
Getting end lines from code
2 parents eb0ca20 + cf7f981 commit f77c4ad

File tree

10 files changed

+45
-45
lines changed

10 files changed

+45
-45
lines changed

cli/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
"""Package containing implementation of the application's CLI (command line interface)."""
1+
"""Package containing implementation of the application's CLI (command start interface)."""

cli/app.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
def main():
1414
"""Entry point of the application."""
1515
try:
16-
# Parse command line arguments
16+
# Parse command start arguments
1717
repos, algorithm = handle_cli_args()
1818

1919
time_snap("Arguments handled; Repositories parsed")
@@ -42,8 +42,13 @@ def main():
4242
time_snap("Analysis completed")
4343

4444
# Save detection result to a JSON file.
45-
json_filename = "clones_" + \
46-
datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".json"
45+
46+
repo_0 = repos[0][repos[0].rfind('/') + 1:]
47+
json_filename = "clones_" + algorithm + "_" + repo_0 + "_"
48+
if repos[1]:
49+
repo_1 = repos[1][repos[1].rfind('/') + 1:]
50+
json_filename += repo_1 + "_"
51+
json_filename += datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".json"
4752

4853
with open(json_filename, "w") as f:
4954
f.write(result.json())

cli/args_handler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Module containing functions for handling command-line arguments supplied by the user."""
1+
"""Module containing functions for handling command-start arguments supplied by the user."""
22

33
from argparse import ArgumentParser, RawDescriptionHelpFormatter
44
from os.path import isdir
@@ -63,7 +63,7 @@ def repo_path_to_local_path(repo_path):
6363

6464
def handle_cli_args():
6565
"""
66-
Parse the command line arguments and handle them.
66+
Parse the command start arguments and handle them.
6767
6868
If there is any problem, an error message will be printed
6969
and the script will exit with a non-zero exit code.

engine/algorithms/chlorine/chlorine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def chlorine_single_repo(modules):
136136
137137
Clones must satisfy the settings at the top of this source file.
138138
Detected code clones are printed on STDOUT, including the common skeleton,
139-
path to each clones (source file path, line number, column offset),
139+
path to each clones (source file path, start number, column offset),
140140
size of each clone (number of nodes in its syntax tree) and their
141141
similarity percentage (number of matching nodes / total number of nodes).
142142

engine/errors/user_input.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33

44
class UserInputError(Exception):
55
"""
6-
Exception representing invalid user input such as command line arguments.
6+
Exception representing invalid user input such as command start arguments.
77
88
Alternatively, this can also represented a problem caused by
9-
invalid user input further down the line.
9+
invalid user input further down the start.
1010
Simply put, the problem can / must be fixed by modifying the user input.
1111
1212
Attributes:

engine/nodes/nodeorigin.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,21 @@ class NodeOrigin:
77
88
Attributes:
99
file {string} -- Source file from which the node originates.
10-
line {int|None} -- Line number at which the node was found.
11-
col_offset {int|None} -- Column offset within the line.
12-
Number of characters on the same
13-
line before the node's token.
10+
start {int|None} -- starting line number at which the node was found.
11+
end {int|None} -- ending line number at which the node was found.
1412
1513
"""
1614

17-
def __init__(self, file_path, line=None, col_offset=None):
15+
def __init__(self, file_path, start=None, end=None):
1816
"""
1917
Initialize a new node origin instance.
2018
2119
Arguments:
2220
file_path {string} -- Path to the node's source file.
2321
2422
Keyword Arguments:
25-
line {int} -- Line number of node's origin. (default: {None})
26-
col_offset {int} -- Column offset of node. (default: {None})
23+
start {int} -- Starting line number of node's origin. (default: {None})
24+
end {int} -- Ending line number of node's origin. (default: {None})
2725
2826
Raises:
2927
ValueError -- When file path is None or when only one of the two
@@ -34,18 +32,18 @@ def __init__(self, file_path, line=None, col_offset=None):
3432
raise ValueError(
3533
"File path must always be set to a non-None value")
3634

37-
if (line is None) != (col_offset is None):
35+
if (start is None) != (end is None):
3836
raise ValueError(
39-
"Either both line number and column offset must be set or neither")
37+
"Either both the start and end lines must be set or neither")
4038

4139
self.file = file_path
42-
self.line = line
43-
self.col_offset = col_offset
40+
self.start = start
41+
self.end = end
4442

4543
def __str__(self):
4644
"""Convert the node origin into a human-readable string representation."""
47-
return self.file + (f" (L: {self.line} C: {self.col_offset})"
48-
if self.line and self.col_offset else "")
45+
return self.file + (f" ({self.start}, {self.end})"
46+
if self.start and self.end else "")
4947

5048
def __repr__(self):
5149
"""Return a string representation of the node origin."""

engine/nodes/tree.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Module containing the `TreeNode` class."""
22

33
import ast
4+
from asttokens import LineNumbers
45
from .nodeorigin import NodeOrigin
56

67
_IGNORE_CLASSES = [ast.Load, ast.Store, ast.Del,
@@ -16,7 +17,7 @@ class TreeNode:
1617
1718
Attributes:
1819
node {AST} -- Original AST node generated by Python's built-in parser.
19-
origin {NodeOrigin} -- Origin of the node (file path, line and column).
20+
origin {NodeOrigin} -- Origin of the node (file path, start and column).
2021
children {list[TreeNode]} -- List of direct children of this node.
2122
weight {int} -- Total number of nodes in this node's tree.
2223
names {list[string]} -- All names / symbols used in this node's tree.
@@ -27,25 +28,30 @@ class TreeNode:
2728
2829
"""
2930

30-
def __init__(self, node, origin_file):
31+
def __init__(self, atok, lino, node, origin_file):
3132
"""
3233
Initialize a new tree node instance.
3334
3435
Arguments:
36+
atok -- asttokens object containing the tokens for the AST
37+
lino - asttokens LineNumber object containing the conversion for offsets to line numbers
3538
node -- Single raw node produced by the Python AST parser.
3639
origin_file {string} -- Relative path to the source file.
3740
3841
"""
3942
self.node = node
40-
self.origin = NodeOrigin(origin_file, node.lineno, node.col_offset) \
41-
if node._attributes else NodeOrigin(origin_file)
43+
44+
start, end = atok.get_text_range(node)
45+
start_line, start_char = LineNumbers.offset_to_line(lino, start)
46+
end_line, end_char = LineNumbers.offset_to_line(lino, end)
47+
self.origin = NodeOrigin(origin_file, start_line, end_line)
4248

4349
# Check if this type of node can have docstring.
4450
can_have_docstring = node.__class__ in [ast.ClassDef, ast.FunctionDef]
4551

4652
# HACK: Ignore useless context-related children.
4753
# This should greatly reduce the total number of nodes.
48-
self.children = [TreeNode(n, origin_file) for n in
54+
self.children = [TreeNode(atok, lino, n, origin_file) for n in
4955
ast.iter_child_nodes(node)
5056
if n.__class__ not in _IGNORE_CLASSES and
5157
# Ignore docstrings in class and function definitions.

engine/preprocessing/module_parser.py

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Module containing code used for parsing of modules and nodes from Python code."""
22

33
import ast
4+
from asttokens import asttokens, LineNumbers
45
from os import listdir, path
56
from os.path import isdir, isfile, relpath
67
from ..nodes.tree import TreeNode
@@ -17,20 +18,6 @@ def _read_whole_file(file_path):
1718
return f.read()
1819

1920

20-
def _read_ast_from_file(file_path):
21-
"""
22-
Parse a module AST from the specified file.
23-
24-
Arguments:
25-
file_path {string} -- Path of file to parse the AST from.
26-
27-
Returns:
28-
AST parsed from the specified file.
29-
30-
"""
31-
return ast.parse(_read_whole_file(file_path))
32-
33-
3421
def _get_tree_node_from_file(file_path, repo_path):
3522
"""
3623
Parse a TreeNode representing the module in the specified file.
@@ -42,7 +29,10 @@ def _get_tree_node_from_file(file_path, repo_path):
4229
TreeNode -- TreeNode parsed from the specified file.
4330
4431
"""
45-
return TreeNode(_read_ast_from_file(file_path),
32+
atok = asttokens.ASTTokens(_read_whole_file(file_path), True)
33+
root = atok.tree
34+
lino = LineNumbers(atok.get_text(root))
35+
return TreeNode(atok, lino, root,
4636
relpath(file_path, repo_path))
4737

4838

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
asttokens
12
# Interface for working with Git repositories
23
gitpython
34
# "Work list" binary table in Lee's algorithm (Iodine)

web/analyzer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def _extract_patterns(conn, commit_id, modules):
4242
pattern_id = _get_pattern_id(conn, n)
4343

4444
conn.run("""INSERT INTO pattern_instances """ +
45-
"""(pattern_id, commit_id, "file", "line", col_offset) """ +
45+
"""(pattern_id, commit_id, "file", "start", end) """ +
4646
"""VALUES (%s, %s, %s, %s, %s);""",
4747
pattern_id, commit_id,
4848
n.origin.file, n.origin.line, n.origin.col_offset)
@@ -85,7 +85,7 @@ def analyze_repo(repo_info, repo_id, algorithm=OXYGEN):
8585
commit_id, c.value, c.match_weight)
8686

8787
for o, s in c.origins.items():
88-
conn.run("""INSERT INTO origins (cluster_id, file, line, col_offset, similarity) VALUES (%s, %s, %s, %s, %s);""",
88+
conn.run("""INSERT INTO origins (cluster_id, file, start, end, similarity) VALUES (%s, %s, %s, %s, %s);""",
8989
cluster_id, o.file, o.line, o.col_offset, s)
9090

9191
log.success(
@@ -121,7 +121,7 @@ def find_repo_results(conn, repo_id):
121121
for c in conn.iter_dict("""SELECT id, "value", weight FROM clusters WHERE commit_id = %s;""", commit_id):
122122
origins = {}
123123

124-
for o in conn.iter_dict("""SELECT file, line, col_offset, similarity FROM origins WHERE cluster_id = %s;""", c.id):
124+
for o in conn.iter_dict("""SELECT file, start, end, similarity FROM origins WHERE cluster_id = %s;""", c.id):
125125
origins[NodeOrigin(o.file, o.line, o.col_offset)] = o.similarity
126126

127127
clones.append(DetectedClone(c.value, c.weight, origins=origins))

0 commit comments

Comments
 (0)