Skip to content
This repository has been archived by the owner on Aug 21, 2024. It is now read-only.

Commit

Permalink
clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
cdpath committed Mar 22, 2023
1 parent 1f9a30c commit 982067c
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 84 deletions.
143 changes: 70 additions & 73 deletions label_studio_converter/brush.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
pycocotools_imported = False
else:
pycocotools_imported = True


logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -70,7 +70,7 @@ def access_bit(data, num):

def bytes2bit(data):
"""get bit string from bytes data"""
return ''.join([str(access_bit(data, i)) for i in range(len(data) * 8)])
return "".join([str(access_bit(data, i)) for i in range(len(data) * 8)])


def decode_rle(rle, print_params: bool = False):
Expand All @@ -86,7 +86,7 @@ def decode_rle(rle, print_params: bool = False):

if print_params:
print(
'RLE params:', num, 'values', word_size, 'word_size', rle_sizes, 'rle_sizes'
"RLE params:", num, "values", word_size, "word_size", rle_sizes, "rle_sizes"
)

i = 0
Expand All @@ -112,23 +112,23 @@ def decode_from_annotation(from_name, results):
counters = defaultdict(int)
for result in results:
key = (
'brushlabels'
if result['type'].lower() == 'brushlabels'
else ('labels' if result['type'].lower() == 'labels' else None)
"brushlabels"
if result["type"].lower() == "brushlabels"
else ("labels" if result["type"].lower() == "labels" else None)
)
if key is None or 'rle' not in result:
if key is None or "rle" not in result:
continue

rle = result['rle']
width = result['original_width']
height = result['original_height']
labels = result[key] if key in result else ['no_label']
name = from_name + '-' + '-'.join(labels)
rle = result["rle"]
width = result["original_width"]
height = result["original_height"]
labels = result[key] if key in result else ["no_label"]
name = from_name + "-" + "-".join(labels)

# result count
i = str(counters[name])
counters[name] += 1
name += '-' + i
name += "-" + i

image = decode_rle(rle)
layers[name] = np.reshape(image, [height, width, 4])[:, :, 3]
Expand All @@ -142,80 +142,78 @@ def save_brush_images_from_annotation(
from_name,
results,
out_dir,
out_format='numpy',
out_format="numpy",
):
layers = decode_from_annotation(from_name, results)
if isinstance(completed_by, dict):
email = completed_by.get('email', '')
email = completed_by.get("email", "")
else:
email = str(completed_by)
email = "".join(
x for x in email if x.isalnum() or x == '@' or x == '.'
x for x in email if x.isalnum() or x == "@" or x == "."
) # sanitize filename

for name in layers:
filename = os.path.join(
out_dir,
'task-'
"task-"
+ str(task_id)
+ '-annotation-'
+ "-annotation-"
+ str(annotation_id)
+ '-by-'
+ "-by-"
+ email
+ '-'
+ "-"
+ name,
)
image = layers[name]
logger.debug(f'Save image to {filename}')
if out_format == 'numpy':
logger.debug(f"Save image to {filename}")
if out_format == "numpy":
np.save(filename, image)
elif out_format == 'png':
elif out_format == "png":
im = Image.fromarray(image)
im.save(filename + '.png')
im.save(filename + ".png")
else:
raise Exception('Unknown output format for brush converter')
raise Exception("Unknown output format for brush converter")


def convert_task(item, out_dir, out_format='numpy'):
def convert_task(item, out_dir, out_format="numpy"):
"""Task with multiple annotations to brush images, out_format = numpy | png"""
for from_name, results in item['output'].items():
for from_name, results in item["output"].items():
save_brush_images_from_annotation(
item['id'],
item['annotation_id'],
item['completed_by'],
item["id"],
item["annotation_id"],
item["completed_by"],
from_name,
results,
out_dir,
out_format,
)


def convert_task_dir(items, out_dir, out_format='numpy'):
def convert_task_dir(items, out_dir, out_format="numpy"):
"""Directory with tasks and annotation to brush images, out_format = numpy | png"""
for item in items:
convert_task(item, out_dir, out_format)


def binary_mask_to_rle(binary_mask: np.ndarray):
""" from binary image mask to uncompressed coco rle
"""
"""from binary image mask to uncompressed coco rle"""
counts = []
for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order='F'))):
for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order="F"))):
if i == 0 and value == 1:
counts.append(0)
counts.append(len(list(elements)))
return {'counts': counts, 'size': list(binary_mask.shape)}
return {"counts": counts, "size": list(binary_mask.shape)}


def ls_rle_to_coco_rle(ls_rle, height, width):
""" from LS rle to compressed coco rle
"""
"""from LS rle to compressed coco rle"""
ls_mask = decode_rle(ls_rle)
ls_mask = np.reshape(ls_mask, [height, width, 4])[:, :, 3]
ls_mask = np.where(ls_mask > 0, 1, 0)
binary_mask = np.asfortranarray(ls_mask)
coco_rle = binary_mask_to_rle(binary_mask)
result = pycocotools.mask.frPyObjects(coco_rle, *coco_rle.get('size'))
result = pycocotools.mask.frPyObjects(coco_rle, *coco_rle.get("size"))
result["counts"] = result["counts"].decode()
return result

Expand Down Expand Up @@ -284,98 +282,98 @@ def encode_rle(arr, wordsize=8, rle_sizes=[3, 4, 8, 16]):
"""
# Set length of array in 32 bits
num = len(arr)
numbits = f'{num:032b}'
numbits = f"{num:032b}"

# put in the wordsize in bits
wordsizebits = f'{wordsize - 1:05b}'
wordsizebits = f"{wordsize - 1:05b}"

# put rle sizes in the bits
rle_bits = ''.join([f'{x - 1:04b}' for x in rle_sizes])
rle_bits = "".join([f"{x - 1:04b}" for x in rle_sizes])

# combine it into base string
base_str = numbits + wordsizebits + rle_bits

# start with creating the rle bite string
out_str = ''
out_str = ""
for length_reeks, p, value in zip(*base_rle_encode(arr)):
# TODO: A nice to have but --> this can be optimized but works
if length_reeks == 1:
# we state with the first 0 that it has a length of 1
out_str += '0'
out_str += "0"
# We state now the index on the rle sizes
out_str += '00'
out_str += "00"

# the rle size value is 0 for an individual number
out_str += '000'
out_str += "000"

# put the value in a 8 bit string
out_str += f'{value:08b}'
state = 'single_val'
out_str += f"{value:08b}"
state = "single_val"

elif length_reeks > 1:
state = 'series'
state = "series"
# rle size = 3
if length_reeks <= 8:
# Starting with a 1 indicates that we have started a series
out_str += '1'
out_str += "1"

# index in rle size arr
out_str += '00'
out_str += "00"

# length of array to bits
out_str += f'{length_reeks - 1:03b}'
out_str += f"{length_reeks - 1:03b}"

out_str += f'{value:08b}'
out_str += f"{value:08b}"

# rle size = 4
elif 8 < length_reeks <= 16:
# Starting with a 1 indicates that we have started a series
out_str += '1'
out_str += '01'
out_str += "1"
out_str += "01"

# length of array to bits
out_str += f'{length_reeks - 1:04b}'
out_str += f"{length_reeks - 1:04b}"

out_str += f'{value:08b}'
out_str += f"{value:08b}"

# rle size = 8
elif 16 < length_reeks <= 256:
# Starting with a 1 indicates that we have started a series
out_str += '1'
out_str += "1"

out_str += '10'
out_str += "10"

# length of array to bits
out_str += f'{length_reeks - 1:08b}'
out_str += f"{length_reeks - 1:08b}"

out_str += f'{value:08b}'
out_str += f"{value:08b}"

# rle size = 16 or longer
else:
length_temp = length_reeks
while length_temp > 2**16:
# Starting with a 1 indicates that we have started a series
out_str += '1'
out_str += "1"

out_str += '11'
out_str += f'{2 ** 16 - 1:016b}'
out_str += "11"
out_str += f"{2 ** 16 - 1:016b}"

out_str += f'{value:08b}'
out_str += f"{value:08b}"
length_temp -= 2**16

# Starting with a 1 indicates that we have started a series
out_str += '1'
out_str += "1"

out_str += '11'
out_str += "11"
# length of array to bits
out_str += f'{length_temp - 1:016b}'
out_str += f"{length_temp - 1:016b}"

out_str += f'{value:08b}'
out_str += f"{value:08b}"

# make sure that we have an 8 fold lenght otherwise add 0's at the end
nzfill = 8 - len(base_str + out_str) % 8
total_str = base_str + out_str
total_str = total_str + nzfill * '0'
total_str = total_str + nzfill * "0"

rle = bits2byte(total_str)

Expand Down Expand Up @@ -410,8 +408,8 @@ def mask2rle(mask):
:param mask: uint8 or int np.array mask with len(shape) == 2 like grayscale image
:return: list of ints in RLE format
"""
assert len(mask.shape) == 2, 'mask must be 2D np.array'
assert mask.dtype == np.uint8 or mask.dtype == int, 'mask must be uint8 or int'
assert len(mask.shape) == 2, "mask must be 2D np.array"
assert mask.dtype == np.uint8 or mask.dtype == int, "mask must be uint8 or int"
array = mask.ravel()
array = np.repeat(array, 4) # must be 4 channels
rle = encode_rle(array)
Expand All @@ -430,7 +428,7 @@ def image2rle(path):
so you can mark background as black and foreground as white
:return: list of ints in RLE format
"""
with Image.open(path).convert('L') as image:
with Image.open(path).convert("L") as image:
mask = np.array((np.array(image) > 128) * 255, dtype=np.uint8)
array = mask.ravel()
array = np.repeat(array, 4)
Expand Down Expand Up @@ -487,4 +485,3 @@ def image2annotation(
result["ground_truth"] = ground_truth

return result

25 changes: 16 additions & 9 deletions label_studio_converter/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,7 +653,12 @@ def add_image(images, width, height, image_id, image_path):

for label in labels:
category_name = None
for key in ['rectanglelabels', 'polygonlabels', 'brushlabels', 'labels']:
for key in [
'rectanglelabels',
'polygonlabels',
'brushlabels',
'labels',
]:
if key in label and len(label[key]) > 0:
category_name = label[key][0]
break
Expand Down Expand Up @@ -718,14 +723,16 @@ def add_image(images, width, height, image_id, image_path):
)
elif 'brushlabels' in label and brush.pycocotools_imported:
segmentation = brush.ls_rle_to_coco_rle(label["rle"], height, width)
annotations.append({
"image_id": image_id,
"segmentation": segmentation,
"area": brush.get_cocomask_area(segmentation),
"bbox": brush.get_cocomask_bounding_box(segmentation),
"iscrowd": 1,
"category_id": category_id,
})
annotations.append(
{
"image_id": image_id,
"segmentation": segmentation,
"area": brush.get_cocomask_area(segmentation),
"bbox": brush.get_cocomask_bounding_box(segmentation),
"iscrowd": 1,
"category_id": category_id,
}
)
else:
raise ValueError("Unknown label type")

Expand Down
4 changes: 2 additions & 2 deletions label_studio_converter/imports/yolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def convert_yolo_to_ls(
out_type="annotations",
image_root_url='/data/local-files/?d=',
image_ext='.jpg,.jpeg,.png',
image_dims: Optional[Tuple[int,int]] = None,
image_dims: Optional[Tuple[int, int]] = None,
):
"""Convert YOLO labeling to Label Studio JSON
:param input_dir: directory with YOLO where images, labels, notes.json are located
Expand Down Expand Up @@ -216,5 +216,5 @@ def add_parser(subparsers):
"case where you dataset has uniform image dimesions. e.g. `--image-dims 600 800` "
"if all your images are of dimensions width=600, height=800"
),
default=None
default=None,
)

0 comments on commit 982067c

Please sign in to comment.