Merge branch 'master' into feat_export_to_coco

HumanSignal · Mar 22, 2023 · a97bdc8 · a97bdc8
2 parents b597b31 + 9c25b65
commit a97bdc8
Show file tree

Hide file tree

Showing 37 changed files with 1,217 additions and 584 deletions.
diff --git a/.github/autolabeler.yml b/.github/autolabeler.yml
@@ -1,8 +1,8 @@
 template: "Mandatory field" #https://github.com/release-drafter/release-drafter/blob/master/bin/generate-schema.js#L15
 autolabeler:
-  - label: 'breaking'
-    body:
-      - '/BREAKING CHANGE/i'
+#   - label: 'breaking'
+#     body:
+#       - '/BREAKING CHANGE/i'
   - label: 'fix'
     title:
       - '/^fix:/'

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -0,0 +1,13 @@
+name: Lint
+
+on: [pull_request]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: psf/black@stable
+        with:
+          options: "--check --skip-string-normalization"
+          src: "./label_studio_converter"
diff --git a/.github/workflows/pr-labeler.yml b/.github/workflows/pr-labeler.yml
@@ -36,7 +36,7 @@ jobs:
           configuration_path: ".github/pr-title-checker-config.json"
 
       - name: "Set PR's label based on title"
-        uses: release-drafter/release-drafter@v5.22.0
+        uses: release-drafter/release-drafter@v5.23.0
         with:
           disable-releaser: true
           config-name: autolabeler.yml

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -30,10 +30,10 @@ jobs:
       collect_analytics: false
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
 
       - name: Set up Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
         with:
           python-version: '3.7'
 

diff --git a/README.md b/README.md
@@ -21,12 +21,14 @@ Label Studio Format Converter helps you to encode labels into the format of your
 ## Examples
 
 #### JSON
-Running from the command line:
+**Running from the command line:**
+
 ```bash
-python label_studio_converter/cli.py --input examples/sentiment_analysis/completions/ --config examples/sentiment_analysis/config.xml --output tmp/output.json
+pip install -U label-studio-converter
+python label-studio-converter export -i exported_tasks.json -c examples/sentiment_analysis/config.xml -o output_dir -f CSV
 ```
 
-Running from python:
+**Running from python:**
 ```python
 from label_studio_converter import Converter
 
@@ -259,7 +261,7 @@ Corresponding annotations could be found in `tmp/voc-annotations/*.xml`:
 
 Use cases: image object detection
 
-### YOLO
+### YOLO to Label Studio converter 
 
 Usage:
 

diff --git a/label_studio_converter/audio.py b/label_studio_converter/audio.py
@@ -10,7 +10,9 @@
 logger = logging.getLogger(__name__)
 
 
-def convert_to_asr_json_manifest(input_data, output_dir, data_key, project_dir, upload_dir, download_resources):
+def convert_to_asr_json_manifest(
+    input_data, output_dir, data_key, project_dir, upload_dir, download_resources
+):
     audio_dir_rel = 'audio'
     output_audio_dir = os.path.join(output_dir, audio_dir_rel)
     ensure_dir(output_dir), ensure_dir(output_audio_dir)
@@ -19,13 +21,24 @@ def convert_to_asr_json_manifest(input_data, output_dir, data_key, project_dir,
         for item in input_data:
             audio_path = item['input'][data_key]
             try:
-                audio_path = download(audio_path, output_audio_dir, project_dir=project_dir, upload_dir=upload_dir,
-                                      return_relative_path=True, download_resources=download_resources)
-                duration = get_audio_duration(os.path.join(output_audio_dir, os.path.basename(audio_path)))
+                audio_path = download(
+                    audio_path,
+                    output_audio_dir,
+                    project_dir=project_dir,
+                    upload_dir=upload_dir,
+                    return_relative_path=True,
+                    download_resources=download_resources,
+                )
+                duration = get_audio_duration(
+                    os.path.join(output_audio_dir, os.path.basename(audio_path))
+                )
             except:
-                logger.info('Unable to download {image_path} or get audio duration. The item {item} will be skipped'.format(
-                    image_path=audio_path, item=item
-                ), exc_info=True)
+                logger.info(
+                    'Unable to download {image_path} or get audio duration. The item {item} will be skipped'.format(
+                        image_path=audio_path, item=item
+                    ),
+                    exc_info=True,
+                )
                 continue
 
             for texts in iter(item['output'].values()):
@@ -37,7 +50,7 @@ def convert_to_asr_json_manifest(input_data, output_dir, data_key, project_dir,
                 'audio_filepath': audio_path,
                 'duration': duration,
                 'text': transcript,
-                'annotator': _get_annotator(item, default='')
+                'annotator': _get_annotator(item, default=''),
             }
             json.dump(metadata, fout)
             fout.write('\n')