WAT-ai · hpatel0816 · Feb 13, 2025 · Feb 13, 2025 · Feb 13, 2025 · Feb 17, 2025
diff --git a/config/bbox_pipeline_config.yaml b/config/bbox_pipeline_config.yaml
@@ -0,0 +1,25 @@
+DataLoader:
+  type: BoundingBoxDataLoader
+  params:
+    dataset_dir: "./data/raw_data/STARCOP_train_easy"
+
+Processor:
+  type: BoundingBoxProcessor
+  params:
+    config:
+      get_normalization_constants: True
+      resize: True
+      normalize_dataset: True
+      augment_dataset: True
+      normalize_bbox: True
+    input_shape: (512, 512, 16)
+    normalize: True
+    augmentations:
+      - horizontal_flip
+      - rotate
+
+Model:
+  type: BoundingBoxModel
+  params:
+    input_shape: (512, 512, 16)
+    max_boxes: 1
diff --git a/config/constants.py b/config/constants.py
@@ -1,3 +1,5 @@
+from enum import Enum
+
 IMAGE_FILE_NAMES = (
         "TOA_AVIRIS_460nm.tif",
         "TOA_AVIRIS_550nm.tif",
@@ -14,4 +16,12 @@
         "TOA_WV3_SWIR5.tif",
         "TOA_WV3_SWIR6.tif",
         "TOA_WV3_SWIR7.tif",
-        "TOA_WV3_SWIR8.tif")
+        "TOA_WV3_SWIR8.tif")
+
+class PipelineType(Enum):
+    TRAINING = "training"
+    INFERENCE = "inference"
+
+class DatasetType(Enum):
+    SEGMENTATION = "segmentation"
+    BOUNDING_BOX = "bounding_box"
diff --git a/main.py b/main.py
@@ -0,0 +1,18 @@
+from pipeline.pipeline_manager import PipelineManager
+from config.constants import PipelineType
+
+if __name__ == "__main__":
+    config_path = "./config/bbox_pipeline_config.yaml"
+
+    print("Initializing pipeline manager...")
+    pipeline = PipelineManager(PipelineType.TRAINING, config_path)
+
+    print("Loading dataset...")
+    pipeline.data_loader.create_dataset()
+    data = pipeline.data_loader.get_dataset()
+
+    print("Processing dataset...")
+    pipeline.processor.preprocess(data)
+
+    print("Creating model...")
+    pipeline.model.compile()
diff --git a/models/__init__.py b/models/__init__.py
diff --git a/models/base_model.py b/models/base_model.py
@@ -0,0 +1,38 @@
+from abc import ABC, abstractmethod
+
+class BaseModel(ABC):
+    """
+    Empty base class to define structure of models and necessary functions.
+    """
+
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def build_model(self):
+        pass
+
+    @abstractmethod
+    def compile(self):
+        pass
+
+    @abstractmethod
+    def load_model(self):
+        pass
+
+    @abstractmethod
+    def save_model(self):
+        pass
+
+    @abstractmethod
+    def train(self):
+        pass
+
+    @abstractmethod
+    def predict(self):
+        pass
+
+    @abstractmethod
+    def evaluate(self):
+        pass
+
diff --git a/models/bounding_box_model.py b/models/bounding_box_model.py
@@ -0,0 +1,85 @@
+import yaml
+import tensorflow as tf
+from datetime import datetime
+import ast
+
+from tensorflow.keras.models import load_model
+from tensorflow.keras.optimizers import Adam
+from models.base_model import BaseModel
+from src.losses import iou_loss, modified_mean_squared_error
+
+class BoundingBoxModel(BaseModel):
+
+    def __init__(self, input_shape, max_boxes, model_fn=None, model_filepath=None):
+        super().__init__()
+        self.input_shape = tuple(ast.literal_eval(input_shape)) if isinstance(input_shape, str) else input_shape
+        self.max_boxes = max_boxes
+        self.unique_id = datetime.now().strftime("%Y%m%d%H%M%S")
+
+        if model_filepath:
+            self.model = BoundingBoxModel.load(model_filepath)
+        else:
+            self.model = model_fn(input_shape, max_boxes) if model_fn else self.build_model(self.input_shape, max_boxes)
+
+    def build_model(self, img_shape, max_boxes):
+        model = tf.keras.Sequential([
+            tf.keras.layers.Input(shape=img_shape),
+
+            # Encoder: Convolutional layers
+            tf.keras.layers.Conv2D(64, (3, 3), padding="same"),
+            tf.keras.layers.ELU(),
+            tf.keras.layers.MaxPooling2D((2, 2)),
+
+            tf.keras.layers.Conv2D(128, (3, 3), padding="same"),
+            tf.keras.layers.ELU(),
+            tf.keras.layers.MaxPooling2D((2, 2)),
+
+            tf.keras.layers.Conv2D(256, (3, 3), padding="same"),
+            tf.keras.layers.ELU(),
+            tf.keras.layers.MaxPooling2D((2, 2)),
+
+            # Decoder: Convolution for bounding box regression
+            tf.keras.layers.Conv2D(512, (3, 3), padding="same"),
+            tf.keras.layers.ELU(),
+
+            # Final convolutional layer for predicting bounding boxes
+            tf.keras.layers.Conv2D(4 * max_boxes, (1, 1), padding="same"),
+            tf.keras.layers.ELU(),
+
+            # Global Average Pooling to reduce spatial dimensions
+            tf.keras.layers.GlobalAveragePooling2D(),
+
+            # Reshape to (batch_size, max_boxes, 4)
+            tf.keras.layers.Reshape((max_boxes, 4))  # We want a fixed number of bounding boxes per image
+        ])
+        return model
+
+    def compile(
+        self,
+        optimizer=Adam(learning_rate=0.0001),
+        loss=modified_mean_squared_error,
+        metrics=["mae", "accuracy"],
+    ):
+        self.model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
+
+    def train(self, train_dataset, epochs=10, batch_size=8):
+        train_dataset = train_dataset.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
+        return self.model.fit(train_dataset, epochs=epochs)
+
+    def evaluate(self, test_data):
+        return self.model.evaluate(test_data)
+
+    def predict(self, x):
+        return self.model.predict(x)
+
+    def save_model(self, output_dir):
+        self.model.save(f"{output_dir}/{self.unique_id}_bbox_model.h5")
+        attrs_dict = {k: self.__dict__[k] for k in self.__dict__ if k != "model"}
+
+        with open(f"{output_dir}/{self.unique_id}_attrs.yaml", "w") as attrs_file:
+            yaml.safe_dump(attrs_dict, attrs_file)
+
+    @staticmethod
+    def load_model(filepath):
+        model = load_model(filepath, custom_objects={"iou_loss": iou_loss, "modified_mean_squared_error": modified_mean_squared_error})
+        return model
diff --git a/pipeline/pipeline_manager.py b/pipeline/pipeline_manager.py
@@ -0,0 +1,44 @@
+import yaml
+import importlib
+
+from config.constants import PipelineType
+
+CLASS_MAPPING = {
+    "BoundingBoxDataLoader": "src.data_loader.bounding_box_data_loader.BoundingBoxDataLoader",
+    "SegmentationDataLoader": "src.data_loader.segmentation_data_loader.SegmentationDataLoader",
+    "BoundingBoxProcessor": "src.processor.bounding_box_processor.BoundingBoxProcessor",
+    "BoundingBoxModel": "models.bounding_box_model.BoundingBoxModel",
+}
+
+class PipelineManager:
+
+    def __init__(self, type, config_path):
+        if not isinstance(type, PipelineType):
+            raise ValueError(f"Invalid pipeline type: {type}")
+        self.type = type
+        self.config = self.load_config(config_path)
+        self.data_loader = self._load_component("DataLoader")
+        self.processor = self._load_component("Processor")
+        self.model = self._load_component("Model")
+
+    def _load_component(self, key):
+        """Dynamically loads a class from CLASS_MAPPING based on the YAML config."""
+        if key not in self.config:
+            raise ValueError(f"Missing '{key}' section in config file.")
+
+        key_type = self.config[key]["type"]
+        class_path = CLASS_MAPPING.get(key_type)  # Get class path from mapping
+
+        if not class_path:
+            raise ValueError(f"Unknown type '{key_type}' for '{key}' component.")
+
+        params = self.config[key].get("params", {})  # Extract parameters
+        module_name, class_name = class_path.rsplit(".", 1)  # Split module & class
+        module = importlib.import_module(module_name)  # Import module dynamically
+        cls = getattr(module, class_name)  # Get class from module
+
+        return cls(**params)  # Instantiate and return instance
+
+    def load_config(self, path):
+        with open(path, 'r') as file:
+            return yaml.safe_load(file)
diff --git a/src/data_loader.py b/src/data_loader.py
@@ -1,5 +1,7 @@
 import os
 import tensorflow as tf
+from src.data_loader.bounding_box_data_loader import BoundingBoxDataLoader
+from src.data_loader.segmentation_data_loader import SegmentationDataLoader
 
 from src.image_utils import data_generator, bbox_data_generator, is_valid_bbox
 
@@ -103,26 +105,34 @@ def augment_dataset(image, bbox, augmentations=["none", "horizontal_flip", "vert
     return tf.data.Dataset.from_tensor_slices(datasets).flat_map(lambda x: x)
 
 if __name__ == "__main__":
-    # testing the shapes of the images and bboxes
-    dataset = create_bbox_dataset(data_dir='./data/raw_data/STARCOP_train_easy')
-
+    # Test data loader for bounding box dataset
+    bbox_loader = BoundingBoxDataLoader(
+        dataset_dir='./data/raw_data/STARCOP_train_easy',
+        max_boxes=1
+    )
+    bbox_loader.create_dataset()
+    dataset = bbox_loader.get_dataset()
+    # Testing the shapes of images and bounding boxes
     for image, bbox in dataset.take(3):
-        print(f"original bounding box: {bbox}")
+        print(f"Original bounding box: {bbox}")
         print(f"Original Image Shape: {image.shape}, Original Bbox Shape: {bbox.shape}")
 
+    # Apply augmentation
     augmented_dataset = dataset.flat_map(augment_dataset)
-
     for image, bbox in augmented_dataset.take(3):
-        print(f"augmented bounding box: {bbox}")
+        print(f"Augmented bounding box: {bbox}")
         print(f"Augmented Image Shape: {image.shape}, Augmented Bbox Shape: {bbox.shape}")
 
 
-    # Test the create_dataset function
-    train_data_path = './data/raw_data/STARCOP_train_easy'
-    dataset = create_dataset(train_data_path)
 
-    # Fetch a few samples from the dataset
-    for i, data_point in enumerate(dataset.take(3)):  # Verify first 3 samples
+    # Test data loader for segmentation dataset
+    segmentation_loader = SegmentationDataLoader(
+        dataset_dir='./data/raw_data/STARCOP_train_easy'
+    )
+    segmentation_loader.create_dataset()
+    dataset = segmentation_loader.get_dataset()
+    # Fetch and verify a few samples from the dataset
+    for i, data_point in enumerate(dataset.take(3)):
         print(f"Sample {i + 1}:")
         print("Keys:", data_point.keys())
         print("Image shape:", data_point["image"].shape)

diff --git a/src/data_loader/__init__.py b/src/data_loader/__init__.py
diff --git a/src/data_loader/base_data_loader.py b/src/data_loader/base_data_loader.py
@@ -0,0 +1,16 @@
+from abc import ABC, abstractmethod
+
+class BaseDataLoader(ABC):
+    def __init__(self, dataset_dir, batch_size=32, exclude_dirs=[]):
+        self.dataset_dir = dataset_dir
+        self.batch_size = batch_size
+        self.exclude_dirs = exclude_dirs
+        self.dataset = None
+
+    @abstractmethod
+    def create_dataset(self):
+        pass
+
+    def get_dataset(self):
+        return self.dataset
+
diff --git a/src/data_loader/bounding_box_data_loader.py b/src/data_loader/bounding_box_data_loader.py
@@ -0,0 +1,30 @@
+import tensorflow as tf
+
+from src.data_loader.base_data_loader import BaseDataLoader
+from src.image_utils import bbox_data_generator
+
+class BoundingBoxDataLoader(BaseDataLoader):
+    def __init__(self, dataset_dir, batch_size=32, max_boxes=10, exclude_dirs=[]):
+        super().__init__(dataset_dir, batch_size, exclude_dirs)
+        self.max_boxes = max_boxes 
+
+    def create_dataset(self):
+        """
+        Creates a TensorFlow dataset with images and their bounding box labels
+
+        Returns:
+            tf.data.Dataset: Dataset with images and their bounding box labels
+            - Images: (512, 512, 16)
+            - Labels: (max_boxes, 4)
+        """
+        output_sig = (
+            tf.TensorSpec(shape=(512, 512, 16), dtype=tf.float32),  # Images
+            tf.TensorSpec(shape=(self.max_boxes, 4), dtype=tf.float32)  # Bounding boxes
+        )
+
+        dataset = tf.data.Dataset.from_generator(
+            lambda: bbox_data_generator(self.dataset_dir, self.max_boxes, self.exclude_dirs),
+            output_signature=output_sig
+        )
+
+        self.dataset = dataset
diff --git a/src/data_loader/segmentation_data_loader.py b/src/data_loader/segmentation_data_loader.py
@@ -0,0 +1,39 @@
+import tensorflow as tf
+
+from src.data_loader.base_data_loader import BaseDataLoader
+from src.image_utils import data_generator
+
+class SegmentationDataLoader(BaseDataLoader):
+    def __init__(self, dataset_dir, batch_size=32, exclude_dirs=[]):
+        super().__init__(dataset_dir, batch_size, exclude_dirs)
+
+    def create_dataset(self):
+        """
+        Creates a TensorFlow dataset with images and labels grouped in dictionary format as given:
+            - {"image": image_data, "segmentation_mask": label_data}
+            - "image": (512, 512, 16) in float32.
+            - "segmentation_mask": (512, 512, 1) in float32.
+
+        Args:
+            dir (str | os.PathLike): Path to the directory containing the data.
+
+        Returns:
+            tf.data.Dataset: A TensorFlow dataset.
+        """
+        output_sig = (
+            tf.TensorSpec(shape=(512, 512, 16), dtype=tf.float32),  # Images
+            tf.TensorSpec(shape=(512, 512, 1), dtype=tf.float32)    # Segmentation Masks
+        )
+
+        dataset = tf.data.Dataset.from_generator(
+            lambda: data_generator(self.dataset_dir),
+            output_signature=output_sig
+        )
+
+        # Transform dataset to dictionary format
+        dataset = dataset.map(
+            lambda img, lbl: {"image": img, "segmentation_mask": lbl},
+            num_parallel_calls=tf.data.AUTOTUNE
+        )
+
+        self.dataset = dataset
diff --git a/src/processor/__init__.py b/src/processor/__init__.py