clean up

HumanSignal · Mar 22, 2023 · 982067c · 982067c
1 parent 1f9a30c
commit 982067c
Show file tree

Hide file tree

Showing 3 changed files with 88 additions and 84 deletions.
diff --git a/label_studio_converter/brush.py b/label_studio_converter/brush.py
@@ -42,7 +42,7 @@
     pycocotools_imported = False
 else:
     pycocotools_imported = True
-    
+
 
 logger = logging.getLogger(__name__)
 
@@ -70,7 +70,7 @@ def access_bit(data, num):
 
 def bytes2bit(data):
     """get bit string from bytes data"""
-    return ''.join([str(access_bit(data, i)) for i in range(len(data) * 8)])
+    return "".join([str(access_bit(data, i)) for i in range(len(data) * 8)])
 
 
 def decode_rle(rle, print_params: bool = False):
@@ -86,7 +86,7 @@ def decode_rle(rle, print_params: bool = False):
 
     if print_params:
         print(
-            'RLE params:', num, 'values', word_size, 'word_size', rle_sizes, 'rle_sizes'
+            "RLE params:", num, "values", word_size, "word_size", rle_sizes, "rle_sizes"
         )
 
     i = 0
@@ -112,23 +112,23 @@ def decode_from_annotation(from_name, results):
     counters = defaultdict(int)
     for result in results:
         key = (
-            'brushlabels'
-            if result['type'].lower() == 'brushlabels'
-            else ('labels' if result['type'].lower() == 'labels' else None)
+            "brushlabels"
+            if result["type"].lower() == "brushlabels"
+            else ("labels" if result["type"].lower() == "labels" else None)
         )
-        if key is None or 'rle' not in result:
+        if key is None or "rle" not in result:
             continue
 
-        rle = result['rle']
-        width = result['original_width']
-        height = result['original_height']
-        labels = result[key] if key in result else ['no_label']
-        name = from_name + '-' + '-'.join(labels)
+        rle = result["rle"]
+        width = result["original_width"]
+        height = result["original_height"]
+        labels = result[key] if key in result else ["no_label"]
+        name = from_name + "-" + "-".join(labels)
 
         # result count
         i = str(counters[name])
         counters[name] += 1
-        name += '-' + i
+        name += "-" + i
 
         image = decode_rle(rle)
         layers[name] = np.reshape(image, [height, width, 4])[:, :, 3]
@@ -142,80 +142,78 @@ def save_brush_images_from_annotation(
     from_name,
     results,
     out_dir,
-    out_format='numpy',
+    out_format="numpy",
 ):
     layers = decode_from_annotation(from_name, results)
     if isinstance(completed_by, dict):
-        email = completed_by.get('email', '')
+        email = completed_by.get("email", "")
     else:
         email = str(completed_by)
     email = "".join(
-        x for x in email if x.isalnum() or x == '@' or x == '.'
+        x for x in email if x.isalnum() or x == "@" or x == "."
     )  # sanitize filename
 
     for name in layers:
         filename = os.path.join(
             out_dir,
-            'task-'
+            "task-"
             + str(task_id)
-            + '-annotation-'
+            + "-annotation-"
             + str(annotation_id)
-            + '-by-'
+            + "-by-"
             + email
-            + '-'
+            + "-"
             + name,
         )
         image = layers[name]
-        logger.debug(f'Save image to {filename}')
-        if out_format == 'numpy':
+        logger.debug(f"Save image to {filename}")
+        if out_format == "numpy":
             np.save(filename, image)
-        elif out_format == 'png':
+        elif out_format == "png":
             im = Image.fromarray(image)
-            im.save(filename + '.png')
+            im.save(filename + ".png")
         else:
-            raise Exception('Unknown output format for brush converter')
+            raise Exception("Unknown output format for brush converter")
 
 
-def convert_task(item, out_dir, out_format='numpy'):
+def convert_task(item, out_dir, out_format="numpy"):
     """Task with multiple annotations to brush images, out_format = numpy | png"""
-    for from_name, results in item['output'].items():
+    for from_name, results in item["output"].items():
         save_brush_images_from_annotation(
-            item['id'],
-            item['annotation_id'],
-            item['completed_by'],
+            item["id"],
+            item["annotation_id"],
+            item["completed_by"],
             from_name,
             results,
             out_dir,
             out_format,
         )
 
 
-def convert_task_dir(items, out_dir, out_format='numpy'):
+def convert_task_dir(items, out_dir, out_format="numpy"):
     """Directory with tasks and annotation to brush images, out_format = numpy | png"""
     for item in items:
         convert_task(item, out_dir, out_format)
 
 
 def binary_mask_to_rle(binary_mask: np.ndarray):
-    """ from binary image mask to uncompressed coco rle
-    """
+    """from binary image mask to uncompressed coco rle"""
     counts = []
-    for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order='F'))):
+    for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order="F"))):
         if i == 0 and value == 1:
             counts.append(0)
         counts.append(len(list(elements)))
-    return {'counts': counts, 'size': list(binary_mask.shape)}
+    return {"counts": counts, "size": list(binary_mask.shape)}
 
 
 def ls_rle_to_coco_rle(ls_rle, height, width):
-    """ from LS rle to compressed coco rle
-    """
+    """from LS rle to compressed coco rle"""
     ls_mask = decode_rle(ls_rle)
     ls_mask = np.reshape(ls_mask, [height, width, 4])[:, :, 3]
     ls_mask = np.where(ls_mask > 0, 1, 0)
     binary_mask = np.asfortranarray(ls_mask)
     coco_rle = binary_mask_to_rle(binary_mask)
-    result = pycocotools.mask.frPyObjects(coco_rle, *coco_rle.get('size'))
+    result = pycocotools.mask.frPyObjects(coco_rle, *coco_rle.get("size"))
     result["counts"] = result["counts"].decode()
     return result
 
@@ -284,98 +282,98 @@ def encode_rle(arr, wordsize=8, rle_sizes=[3, 4, 8, 16]):
     """
     # Set length of array in 32 bits
     num = len(arr)
-    numbits = f'{num:032b}'
+    numbits = f"{num:032b}"
 
     # put in the wordsize in bits
-    wordsizebits = f'{wordsize - 1:05b}'
+    wordsizebits = f"{wordsize - 1:05b}"
 
     # put rle sizes in the bits
-    rle_bits = ''.join([f'{x - 1:04b}' for x in rle_sizes])
+    rle_bits = "".join([f"{x - 1:04b}" for x in rle_sizes])
 
     # combine it into base string
     base_str = numbits + wordsizebits + rle_bits
 
     # start with creating the rle bite string
-    out_str = ''
+    out_str = ""
     for length_reeks, p, value in zip(*base_rle_encode(arr)):
         # TODO: A nice to have but --> this can be optimized but works
         if length_reeks == 1:
             # we state with the first 0 that it has a length of 1
-            out_str += '0'
+            out_str += "0"
             # We state now the index on the rle sizes
-            out_str += '00'
+            out_str += "00"
 
             # the rle size value is 0 for an individual number
-            out_str += '000'
+            out_str += "000"
 
             # put the value in a 8 bit string
-            out_str += f'{value:08b}'
-            state = 'single_val'
+            out_str += f"{value:08b}"
+            state = "single_val"
 
         elif length_reeks > 1:
-            state = 'series'
+            state = "series"
             # rle size = 3
             if length_reeks <= 8:
                 # Starting with a 1 indicates that we have started a series
-                out_str += '1'
+                out_str += "1"
 
                 # index in rle size arr
-                out_str += '00'
+                out_str += "00"
 
                 # length of array to bits
-                out_str += f'{length_reeks - 1:03b}'
+                out_str += f"{length_reeks - 1:03b}"
 
-                out_str += f'{value:08b}'
+                out_str += f"{value:08b}"
 
             # rle size = 4
             elif 8 < length_reeks <= 16:
                 # Starting with a 1 indicates that we have started a series
-                out_str += '1'
-                out_str += '01'
+                out_str += "1"
+                out_str += "01"
 
                 # length of array to bits
-                out_str += f'{length_reeks - 1:04b}'
+                out_str += f"{length_reeks - 1:04b}"
 
-                out_str += f'{value:08b}'
+                out_str += f"{value:08b}"
 
             # rle size = 8
             elif 16 < length_reeks <= 256:
                 # Starting with a 1 indicates that we have started a series
-                out_str += '1'
+                out_str += "1"
 
-                out_str += '10'
+                out_str += "10"
 
                 # length of array to bits
-                out_str += f'{length_reeks - 1:08b}'
+                out_str += f"{length_reeks - 1:08b}"
 
-                out_str += f'{value:08b}'
+                out_str += f"{value:08b}"
 
             # rle size = 16 or longer
             else:
                 length_temp = length_reeks
                 while length_temp > 2**16:
                     # Starting with a 1 indicates that we have started a series
-                    out_str += '1'
+                    out_str += "1"
 
-                    out_str += '11'
-                    out_str += f'{2 ** 16 - 1:016b}'
+                    out_str += "11"
+                    out_str += f"{2 ** 16 - 1:016b}"
 
-                    out_str += f'{value:08b}'
+                    out_str += f"{value:08b}"
                     length_temp -= 2**16
 
                 # Starting with a 1 indicates that we have started a series
-                out_str += '1'
+                out_str += "1"
 
-                out_str += '11'
+                out_str += "11"
                 # length of array to bits
-                out_str += f'{length_temp - 1:016b}'
+                out_str += f"{length_temp - 1:016b}"
 
-                out_str += f'{value:08b}'
+                out_str += f"{value:08b}"
 
     # make sure that we have an 8 fold lenght otherwise add 0's at the end
     nzfill = 8 - len(base_str + out_str) % 8
     total_str = base_str + out_str
-    total_str = total_str + nzfill * '0'
+    total_str = total_str + nzfill * "0"
 
     rle = bits2byte(total_str)
 
@@ -410,8 +408,8 @@ def mask2rle(mask):
     :param mask: uint8 or int np.array mask with len(shape) == 2 like grayscale image
     :return: list of ints in RLE format
     """
-    assert len(mask.shape) == 2, 'mask must be 2D np.array'
-    assert mask.dtype == np.uint8 or mask.dtype == int, 'mask must be uint8 or int'
+    assert len(mask.shape) == 2, "mask must be 2D np.array"
+    assert mask.dtype == np.uint8 or mask.dtype == int, "mask must be uint8 or int"
     array = mask.ravel()
     array = np.repeat(array, 4)  # must be 4 channels
     rle = encode_rle(array)
@@ -430,7 +428,7 @@ def image2rle(path):
                  so you can mark background as black and foreground as white
     :return: list of ints in RLE format
     """
-    with Image.open(path).convert('L') as image:
+    with Image.open(path).convert("L") as image:
         mask = np.array((np.array(image) > 128) * 255, dtype=np.uint8)
         array = mask.ravel()
         array = np.repeat(array, 4)
@@ -487,4 +485,3 @@ def image2annotation(
         result["ground_truth"] = ground_truth
 
     return result
-
diff --git a/label_studio_converter/converter.py b/label_studio_converter/converter.py
@@ -653,7 +653,12 @@ def add_image(images, width, height, image_id, image_path):
 
             for label in labels:
                 category_name = None
-                for key in ['rectanglelabels', 'polygonlabels', 'brushlabels', 'labels']:
+                for key in [
+                    'rectanglelabels',
+                    'polygonlabels',
+                    'brushlabels',
+                    'labels',
+                ]:
                     if key in label and len(label[key]) > 0:
                         category_name = label[key][0]
                         break
@@ -718,14 +723,16 @@ def add_image(images, width, height, image_id, image_path):
                     )
                 elif 'brushlabels' in label and brush.pycocotools_imported:
                     segmentation = brush.ls_rle_to_coco_rle(label["rle"], height, width)
-                    annotations.append({
-                        "image_id": image_id,
-                        "segmentation": segmentation,
-                        "area": brush.get_cocomask_area(segmentation),
-                        "bbox": brush.get_cocomask_bounding_box(segmentation),
-                        "iscrowd": 1,
-                        "category_id": category_id,
-                    })
+                    annotations.append(
+                        {
+                            "image_id": image_id,
+                            "segmentation": segmentation,
+                            "area": brush.get_cocomask_area(segmentation),
+                            "bbox": brush.get_cocomask_bounding_box(segmentation),
+                            "iscrowd": 1,
+                            "category_id": category_id,
+                        }
+                    )
                 else:
                     raise ValueError("Unknown label type")
 

diff --git a/label_studio_converter/imports/yolo.py b/label_studio_converter/imports/yolo.py
@@ -24,7 +24,7 @@ def convert_yolo_to_ls(
     out_type="annotations",
     image_root_url='/data/local-files/?d=',
     image_ext='.jpg,.jpeg,.png',
-    image_dims: Optional[Tuple[int,int]] = None,
+    image_dims: Optional[Tuple[int, int]] = None,
 ):
     """Convert YOLO labeling to Label Studio JSON
     :param input_dir: directory with YOLO where images, labels, notes.json are located
@@ -216,5 +216,5 @@ def add_parser(subparsers):
             "case where you dataset has uniform image dimesions. e.g. `--image-dims 600 800` "
             "if all your images are of dimensions width=600, height=800"
         ),
-        default=None
+        default=None,
     )