0.9.15

divideconcept · web-flow · commit 0ebbf2af148d · 2023-03-26T08:00:17.000+02:00
diff --git a/torchstudio/datasets/genericloader.py b/torchstudio/datasets/genericloader.py
@@ -34,14 +34,15 @@ class GenericLoader(Dataset):
                 (samples in one folder: 1.ext, 2.ext, ...)
 
         extensions (str):
-            file extension to filters (such as: .jpg, .jpeg, .png, .mp3, .wav, .npy, .npz)
+            file extension to filters
+                (supported: .jpg, .jpeg, .png, .webp, .tif, .tiff, .mp3, .wav, .ogg, .flac, .npy, .npz)
 
         transforms (list):
             list of transforms to apply to the different components of each sample (use None is some components need no transform)
             (ie: [torchvision.transforms.Compose([transforms.Resize(64)]), torchaudio.transforms.Spectrogram()])
     """
 
-    def __init__(self, path:str='', classification:bool=True, separator:str='/', extensions:str='.jpg, .jpeg, .png, .mp3, .wav, .npy, .npz', transforms=[]):
+    def __init__(self, path:str='', classification:bool=True, separator:str='/', extensions:str='.jpg, .jpeg, .png, .webp, .tif, .tiff, .mp3, .wav, .ogg, .flac, .npy, .npz', transforms=[]):
         exts = tuple(extensions.replace(' ','').split(','))
         paths = []
         self.samples = []
@@ -126,27 +127,31 @@ def __init__(self, path:str='', classification:bool=True, separator:str='/', ext
                         self.samples[samples_index[sample_name]].append(path)
 
     def to_tensors(self, path:str):
-        if path.endswith('.jpg') or path.endswith('.jpeg') or path.endswith('.png'):
+        tensors = []
+        if path.endswith('.jpg') or path.endswith('.jpeg') or path.endswith('.png') or path.endswith('.webp') or path.endswith('.tif') or path.endswith('.tiff'):
             img=Image.open(path)
-            if img.mode=='1' or img.mode=='L' or img.mode=='P':
-                return [torch.from_numpy(np.array(img, dtype=np.uint8))]
-            else:
-                trans=torchvision.transforms.ToTensor()
-                return [trans(img)]
+            for i in range(img.n_frames):
+                if img.mode=='1' or img.mode=='L' or img.mode=='P':
+                    tensors.append(torch.from_numpy(np.array(img, dtype=np.uint8)))
+                else:
+                    trans=torchvision.transforms.ToTensor()
+                    tensors.append(trans(img))
+                if i<(img.n_frames-1):
+                    img.seek(img.tell()+1)
 
-        if path.endswith('.mp3') or path.endswith('.wav'):
+        if path.endswith('.mp3') or path.endswith('.wav') or path.endswith('.ogg') or path.endswith('.flac'):
             waveform, sample_rate = torchaudio.load(path)
-            return [waveform]
+            tensors.append(waveform)
 
         if path.endswith('.npy') or path.endswith('.npz'):
             arrays = np.load(path)
             if type(arrays) == dict:
-                tensors = []
                 for array in arrays:
                     tensors.append(torch.from_numpy(arrays[array]))
-                return tensors
             else:
-                return [torch.from_numpy(arrays)]
+                tensors.append(torch.from_numpy(arrays))
+
+        return tensors
 
     def __len__(self):
         return len(self.samples)
diff --git a/torchstudio/modeltrain.py b/torchstudio/modeltrain.py
@@ -447,7 +447,7 @@ def send_results_back():
         if error_msg:
             print("Error exporting:", error_msg, file=sys.stderr)
         else:
-            error_msg, torchscript_model = safe_exec(torch.onnx.export,{'model':torchscript_model, 'args':input_tensors, 'f':tc.decode_strings(msg_data)[0], 'opset_version':12})
+            error_msg, torchscript_model = safe_exec(torch.onnx.export,{'model':torchscript_model, 'args':input_tensors, 'f':tc.decode_strings(msg_data)[0], 'input_names': eval(tc.decode_strings(msg_data)[1]), 'output_names': eval(tc.decode_strings(msg_data)[2]), 'dynamic_axes': eval(tc.decode_strings(msg_data)[3]), 'opset_version':17})
             if error_msg:
                 print("Error exporting:", error_msg, file=sys.stderr)
             else:
diff --git a/torchstudio/pythoninstall.py b/torchstudio/pythoninstall.py
@@ -1,3 +1,8 @@
+#otherwise conda install may fail
+del __file__
+__package__=None
+__spec__=None
+
 import sys
 import importlib
 import importlib.util
@@ -25,7 +30,7 @@ def init_patch(self, **kwargs):
 
 if not args.package:
     #https://edcarp.github.io/introduction-to-conda-for-data-scientists/03-using-packages-and-channels/index.html#alternative-syntax-for-installing-packages-from-specific-channels
-    conda_install=f"{args.channel}::pytorch {args.channel}::torchvision {args.channel}::torchaudio {args.channel}::torchtext"
+    conda_install=f"pytorch torchvision torchaudio torchtext"
     if (sys.platform.startswith('win') or sys.platform.startswith('linux')):
         if args.cuda:
             print("Checking the latest supported CUDA version...")
@@ -47,26 +52,28 @@ def init_patch(self, **kwargs):
             highest_cuda_string='.'.join([str(value) for value in highest_cuda_version])
             print("Using CUDA "+highest_cuda_string)
             print("")
-            conda_install+=f" {args.channel}::pytorch-cuda="+highest_cuda_string+" -c nvidia"
+            conda_install+=" pytorch-cuda="+highest_cuda_string+" -c "+args.channel+" -c nvidia"
         else:
-            conda_install+=f" {args.channel}::cpuonly"
+            conda_install+=" cpuonly -c "+args.channel
+    else:
+        conda_install+=" -c "+args.channel
     print(f"Downloading and installing {args.channel} packages...")
     print("")
-    conda_install+=" -k" #allow insecure ssl connections
     # https://stackoverflow.com/questions/41767340/using-conda-install-within-a-python-script
     (stdout_str, stderr_str, return_code_int) = Conda.run_command(Conda.Commands.INSTALL,conda_install.split(),use_exception_handler=True,stdout=sys.stdout,stderr=sys.stderr)
     if return_code_int!=0:
         exit(return_code_int)
     print("")
 
+    # onnx required for onnx export
     # datasets(+huggingface_hub) is required by hugging face hub
     # scipy required by torchvision: Caltech ImageNet SBD SVHN datasets and Inception v3 GoogLeNet models
     # pandas required by the dataset tutorial: https://pytorch.org/tutorials/beginner/data_loading_tutorial.html
     # matplotlib-base required by torchstudio renderers
     # python-graphviz required by torchstudio graph
     # paramiko required for ssh connections (+updated cffi required on intel mac)
     # pysoundfile required by torchaudio datasets: https://pytorch.org/audio/stable/backend.html#soundfile-backend
-    conda_install="datasets scipy pandas matplotlib-base python-graphviz paramiko pysoundfile"
+    conda_install="onnx datasets scipy pandas matplotlib-base python-graphviz paramiko pysoundfile"
     if sys.platform.startswith('darwin'):
         conda_install+=" cffi"
 
@@ -75,7 +82,7 @@ def init_patch(self, **kwargs):
 
 print("Downloading and installing conda-forge packages...")
 print("")
-conda_install+=" -c conda-forge -k"
+conda_install+=" -c conda-forge"
 (stdout_str, stderr_str, return_code_int) = Conda.run_command(Conda.Commands.INSTALL,conda_install.split(),use_exception_handler=True,stdout=sys.stdout,stderr=sys.stderr)
 if return_code_int!=0:
     exit(return_code_int)
diff --git a/torchstudio/renderers/bitmap.py b/torchstudio/renderers/bitmap.py
@@ -20,18 +20,19 @@ class Bitmap(Renderer):
             Values can be 'viridis', 'plasma', 'inferno', 'magma', 'cividis'
         colors: List of colors for each channel for multi channels bitmaps (looped if necessary)
         rotate (int): Number of time to rotate the bitmap by 90 degree (counter-clockwise)
-        invert (bool): Invert vertical axis.
+        invert (bool): Invert vertical axis
+        normalize (bool): Normalize values
     """
-    def __init__(self, colormap='inferno', colors=['#ff0000','#00ff00','#0000ff','#ffff00','#00ffff','#ff00ff'], rotate=0, invert=False):
+    def __init__(self, colormap='inferno', colors=['#ff0000','#00ff00','#0000ff','#ffff00','#00ffff','#ff00ff'], rotate=0, invert=False, normalize=False):
         super().__init__()
         self.colormap=colormap
         self.colors=colors
         self.rotate=rotate
         self.invert=invert
+        self.normalize=normalize
 
     def render(self, title, tensor, size, dpi, shift=(0,0,0,0), scale=(1,1,1,1), input_tensors=[], target_tensor=None, labels=[]):
         #check dimensions
-        print(str(tensor.dtype))
         if len(tensor.shape)!=3 and (len(tensor.shape)!=2 or 'int' not in str(tensor.dtype)):
             print("Bitmap renderer requires a 3D tensor or 2D tensor of ints, got a "+str(len(tensor.shape))+"D tensor.", file=sys.stderr)
             return None
@@ -50,6 +51,12 @@ def render(self, title, tensor, size, dpi, shift=(0,0,0,0), scale=(1,1,1,1), inp
         if self.rotate>0:
             tensor=np.rot90(tensor, self.rotate, axes=(1, 2))
 
+        tensor=tensor.astype(np.float32)
+        if self.normalize:
+            max_value=np.amax(tensor)
+            if max_value>0:
+                tensor=tensor/max_value
+
         #apply brightness, gamma and conversion to uint8, then transform CHW to HWC
         tensor = np.multiply(np.clip(np.power(np.clip(tensor*scale[0],0,1),1/scale[3]),0,1),255).astype(np.uint8)
         tensor = tensor.transpose((1, 2, 0))
diff --git a/torchstudio/renderers/spectrogram.py b/torchstudio/renderers/spectrogram.py
@@ -20,13 +20,15 @@ class Spectrogram(Renderer):
             Values can be 'viridis', 'plasma', 'inferno', 'magma', 'cividis'
         colors: List of colors for each channel for multi channels spectrograms (looped if necessary)
         rotate (int): Number of time to rotate the bitmap by 90 degree (counter-clockwise)
+        normalize (bool): Normalize values
     """
-    def __init__(self, colormap='inferno', colors=['#ff0000','#00ff00','#0000ff','#ffff00','#00ffff','#ff00ff'], rotate=0, invert=False):
+    def __init__(self, colormap='inferno', colors=['#ff0000','#00ff00','#0000ff','#ffff00','#00ffff','#ff00ff'], rotate=0, invert=False, normalize=False):
         super().__init__()
         self.colormap=colormap
         self.colors=colors
         self.rotate=rotate
         self.invert=invert
+        self.normalize=normalize
 
     def render(self, title, tensor, size, dpi, shift=(0,0,0,0), scale=(1,1,1,1), input_tensors=[], target_tensor=None, labels=[]):
         #check dimensions
@@ -35,8 +37,8 @@ def render(self, title, tensor, size, dpi, shift=(0,0,0,0), scale=(1,1,1,1), inp
             return None
 
         if np.iscomplexobj(tensor)==False and tensor.shape[0]%2!=0:
-            print("Spectrogram renderer requires a complex tensor or a tensor with an even number of channels", file=sys.stderr)
-            return None
+            #add missing channel (needs pairs to be interpred as complex channels)
+            tensor=np.append(tensor, np.zeros((1,tensor.shape[1],tensor.shape[2])), axis=0)
 
         #convert complex spectrogram to amplitude spectrogram
         if np.iscomplexobj(tensor):
@@ -55,6 +57,12 @@ def render(self, title, tensor, size, dpi, shift=(0,0,0,0), scale=(1,1,1,1), inp
         if self.rotate>0:
             tensor=np.rot90(tensor, self.rotate, axes=(1, 2))
 
+        tensor=tensor.astype(np.float32)
+        if self.normalize:
+            max_value=np.amax(tensor)
+            if max_value>0:
+                tensor=tensor/max_value
+
         #apply brightness, gamma and conversion to uint8, then transform CHW to HWC
         tensor = np.multiply(np.clip(np.power(tensor*scale[0],1/scale[3]),0,1),255).astype(np.uint8)
         tensor = tensor.transpose((1, 2, 0))
diff --git a/torchstudio/renderers/volume.py b/torchstudio/renderers/volume.py
@@ -21,13 +21,15 @@ class Volume(Renderer):
             Values can be 'viridis', 'plasma', 'inferno', 'magma', 'cividis'
         colors: List of colors for each channel for multi channels volumes (looped if necessary)
         rotate (int): Number of time to rotate the bitmap by 90 degree (counter-clockwise)
+        normalize (bool): Normalize values
     """
-    def __init__(self, colormap='inferno', colors=['#ff0000','#00ff00','#0000ff','#ffff00','#00ffff','#ff00ff'], rotate=0, invert=False):
+    def __init__(self, colormap='inferno', colors=['#ff0000','#00ff00','#0000ff','#ffff00','#00ffff','#ff00ff'], rotate=0, invert=False, normalize=False):
         super().__init__()
         self.colormap=colormap
         self.colors=colors
         self.rotate=rotate
         self.invert=invert
+        self.normalize=normalize
 
     def render(self, title, tensor, size, dpi, shift=(0,0,0,0), scale=(1,1,1,1), input_tensors=[], target_tensor=None, labels=[]):
         #check dimensions
@@ -52,6 +54,12 @@ def render(self, title, tensor, size, dpi, shift=(0,0,0,0), scale=(1,1,1,1), inp
         if self.rotate>0:
             tensor=np.rot90(tensor, self.rotate, axes=(1, 2))
 
+        tensor=tensor.astype(np.float32)
+        if self.normalize:
+            max_value=np.amax(tensor)
+            if max_value>0:
+                tensor=tensor/max_value
+
         #apply luminosity and conversion to uint8, then transform CHW to HWC
         tensor = np.multiply(np.clip(np.power(np.clip(tensor*scale[0],0,1),1/scale[3]),0,1),255).astype(np.uint8)
         tensor = tensor.transpose((1, 2, 0))