Merge branch 'main' of https://github.com/moverseai/moai

nmvrs · nmvrs · commit ce780c22d1a5 · 2025-01-13T11:54:16.000+02:00
diff --git a/moai/serve/model.py b/moai/serve/model.py
@@ -137,6 +137,10 @@ def initialize(self, context, extract_files=True):
             log.error(f"An error has occured while loading the model:\n{e}")
         self.model = self.model.to(self.device)
         self.model.eval()
+        if hasattr(cfg, "archive") and hasattr(cfg.archive, "model_precision"):
+            if cfg.archive.model_precision == "double":
+                log.info("Setting model to double precision.")
+                self.model.double()
         self.initialized = True
         log.info(
             f"Model ({type(self.model.model if hasattr(self.model, 'model') else self.model)}) loaded successfully."
diff --git a/moai/serve/streaming_optimizer.py b/moai/serve/streaming_optimizer.py
@@ -87,14 +87,25 @@ def __to_device__(self, x):
             y = []
             for i in x:
                 if isinstance(i, torch.Tensor):
-                    y.append(i.to(self.dev))
+                    if i.dtype == torch.float32:
+                        y.append(i.to(self.dev, dtype=self.model.dtype))
+                    else:
+                        y.append(i.to(self.dev))
                 else:
                     pass
             return y
         elif isinstance(x, np.ndarray):
-            return torch.from_numpy(x).to(self.device)
+            if x.dtype == np.float32:
+                return torch.from_numpy(x).to(
+                    device=self.device, dtype=self.model.dtype
+                )
+            else:
+                return torch.from_numpy(x).to(self.device)
         elif isinstance(x, torch.Tensor):
-            return x.to(self.device)
+            if x.dtype == torch.float32:
+                return x.to(device=self.device, dtype=self.model.dtype)
+            else:
+                return x.to(self.device)
 
     def _get_overrides(self):
         overrides = []
@@ -116,6 +127,8 @@ def initialize(self, context):
         main_conf = context.manifest["model"]["modelName"].replace("_", "/")
         # set model to training true before calling the training step
         self.model.train()
+        # TODO: model ignores the precision set in the config
+        # self.model.double()
         try:
             with initialize(
                 config_path="conf/" + "/".join(main_conf.split("/")[0:-1]),
@@ -174,6 +187,9 @@ def handle(self, data: typing.Mapping[str, typing.Any], context: typing.Any):
         log.info("Streaming optimization handler called.")
         result = defaultdict(list)
         self.optimization_step = 0
+        if hasattr(self.trainer, "serve_context") and self.trainer.serve_context:
+            self.trainer.serve_context = None
+        self.trainer.serve_context = context
         self.context = context
         start_time = time.time()
 
@@ -189,6 +205,7 @@ def handle(self, data: typing.Mapping[str, typing.Any], context: typing.Any):
         td = self.preprocess(data)
         # get the dataloader for returned dict
         dataloader = td["dataloader"]
+        self.trainer.dataloader_length = len(dataloader)
         # iterate over the dataloader
         for batch_idx, batch in enumerate(torch.utils.data.DataLoader(dataloader)):
             self.model.optimization_step = 0
@@ -209,14 +226,14 @@ def handle(self, data: typing.Mapping[str, typing.Any], context: typing.Any):
             # batch[key] = (
             #     f"Running batch_idx {batch_idx} with completion percentage {float((batch_idx + 1)/len(dataloader) * 100):.2f}%."
             # )
-            log.info(batch[key])
-            send_intermediate_predict_response(
-                batch,
-                self.context.request_ids,
-                "Intermediate response from the model.",
-                200,
-                self.context,
-            )
+            # log.info(batch[key])
+            # send_intermediate_predict_response(
+            #     batch,
+            #     self.context.request_ids,
+            #     "Intermediate response from the model.",
+            #     200,
+            #     self.context,
+            # )
             if self.keys:
                 for k in self.keys:
                     result[k].append(batch[k])