Skip to content

Commit 63ee209

Browse files
committed
cli: error handling for tokens
Signed-off-by: Alexander Bezzubov <[email protected]>
1 parent 96ceb0d commit 63ee209

File tree

1 file changed

+15
-2
lines changed

1 file changed

+15
-2
lines changed

notebooks/codesearchnet-opennmt.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,13 @@ def __getitem__(self, idx: int) -> Tuple[str, str]:
7878

7979
tokens = row["code_tokens"]
8080
body_tokens = tokens[tokens.index(fn_name) + 2 :]
81-
fn_body_tokens = body_tokens[body_tokens.index("{") + 1 : len(body_tokens) - 1]
81+
try:
82+
fn_body_tokens = body_tokens[
83+
body_tokens.index("{") + 1 : len(body_tokens) - 1
84+
]
85+
except ValueError as ve: # '{' might be missing
86+
logging.error("'%s' fn body extraction failed", body_tokens, ve)
87+
fn_body_tokens = None
8288

8389
return (fn_name, fn_body, fn_body_tokens)
8490

@@ -91,6 +97,7 @@ def __len__(self) -> int:
9197
from functools import lru_cache
9298
from typing import List
9399

100+
94101
def split_camelcase(camel_case_identifier: str) -> List[str]:
95102
"""
96103
Split camelCase identifiers.
@@ -158,7 +165,13 @@ def main(args: Namespace) -> None:
158165
for fn_name, fn_body, fn_body_tokens in dataset:
159166
if not fn_name or not fn_body:
160167
continue
161-
src = " ".join(fn_body_tokens) if args.token_level_sources else fn_body
168+
169+
if args.token_level_sources:
170+
if not fn_body_tokens:
171+
continue
172+
src = " ".join(fn_body_tokens)
173+
else:
174+
src = fn_body
162175

163176
if args.word_level_targets:
164177
tgt = fn_name

0 commit comments

Comments
 (0)