forked from VowpalWabbit/vowpal_wabbit
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ci: expand python lint and formatting (VowpalWabbit#3800)
* ci: expand python lint and formatting * Update lint.yml * Update lint.yml
- Loading branch information
1 parent
dde8b11
commit 104cb18
Showing
19 changed files
with
284 additions
and
205 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,63 +1,71 @@ | ||
#!/usr/bin/env python | ||
# convert letter.data to letter.vw | ||
|
||
def read_letter_names (fn): | ||
|
||
def read_letter_names(fn): | ||
ret = list() | ||
with open(fn) as ins: | ||
for line in ins: | ||
ret.append(line.rstrip()) | ||
print "Read %d names from %s" % (len(ret),fn) | ||
print("Read %d names from %s" % (len(ret), fn)) | ||
return ret | ||
|
||
def find_pixel_start (names): | ||
|
||
def find_pixel_start(names): | ||
for i in range(len(names)): | ||
if names[i].startswith("p_"): | ||
return i | ||
raise ValueError("No pixel data",names) | ||
raise ValueError("No pixel data", names) | ||
|
||
|
||
def data2vw (ifn, train, test, names): | ||
def data2vw(ifn, train, test, names): | ||
lineno = 0 | ||
trainN = 0 | ||
testN = 0 | ||
if ifn.endswith(".gz"): | ||
import gzip | ||
|
||
iopener = gzip.open | ||
else: | ||
iopener = open | ||
id_pos = names.index("id") | ||
letter_pos = names.index("letter") | ||
pixel_start = find_pixel_start(names) | ||
with iopener(ifn) as ins, open(train,"wb") as trainS, open(test,"wb") as testS: | ||
with iopener(ifn) as ins, open(train, "wb") as trainS, open(test, "wb") as testS: | ||
for line in ins: | ||
lineno += 1 | ||
vals = line.rstrip().split('\t') | ||
vals = line.rstrip().split("\t") | ||
if len(vals) != len(names): | ||
raise ValueError("Bad field count", | ||
len(vals),len(names),vals,names) | ||
raise ValueError("Bad field count", len(vals), len(names), vals, names) | ||
char = vals[letter_pos] | ||
if len(char) != 1: | ||
raise ValueError("Bad letter",char) | ||
raise ValueError("Bad letter", char) | ||
if lineno % 10 == 0: | ||
testN += 1 | ||
outs = testS | ||
else: | ||
trainN += 1 | ||
outs = trainS | ||
outs.write("%d 1 %s-%s|Pixel" % (ord(char)-ord('a')+1,char,vals[id_pos])) | ||
for i in range(pixel_start,len(names)): | ||
if vals[i] != '0': | ||
outs.write(' %s:%s' % (names[i],vals[i])) | ||
outs.write('\n') | ||
print "Read %d lines from %s; wrote %d lines into %s and %d lines into %s" % ( | ||
lineno,ifn,trainN,train,testN,test) | ||
outs.write( | ||
"%d 1 %s-%s|Pixel" % (ord(char) - ord("a") + 1, char, vals[id_pos]) | ||
) | ||
for i in range(pixel_start, len(names)): | ||
if vals[i] != "0": | ||
outs.write(" %s:%s" % (names[i], vals[i])) | ||
outs.write("\n") | ||
print( | ||
"Read %d lines from %s; wrote %d lines into %s and %d lines into %s" | ||
% (lineno, ifn, trainN, train, testN, test) | ||
) | ||
|
||
|
||
if __name__ == '__main__': | ||
if __name__ == "__main__": | ||
import argparse | ||
parser = argparse.ArgumentParser(description='Convert letters.data to VW format') | ||
parser.add_argument('input',help='path to letter.data[.gz]') | ||
parser.add_argument('names',help='path to letter.names') | ||
parser.add_argument('train',help='VW train file location (90%)') | ||
parser.add_argument('test',help='VW test file location (10%)') | ||
|
||
parser = argparse.ArgumentParser(description="Convert letters.data to VW format") | ||
parser.add_argument("input", help="path to letter.data[.gz]") | ||
parser.add_argument("names", help="path to letter.names") | ||
parser.add_argument("train", help="VW train file location (90%)") | ||
parser.add_argument("test", help="VW test file location (10%)") | ||
args = parser.parse_args() | ||
data2vw(args.input,args.train,args.test,read_letter_names(args.names)) | ||
data2vw(args.input, args.train, args.test, read_letter_names(args.names)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.