WenjieDu · Mar 7, 2025
diff --git a/‎README.md
+1-1 b/‎README.md
+1-1
diff --git a/‎README_zh.md
+1-1 b/‎README_zh.md
+1-1
diff --git a/‎docs/index.rst
+1-1 b/‎docs/index.rst
+1-1
diff --git a/‎docs/pypots.forecasting.rst
+9 b/‎docs/pypots.forecasting.rst
+9
diff --git a/‎pypots/forecasting/__init__.py
+2 b/‎pypots/forecasting/__init__.py
+2
diff --git a/‎pypots/forecasting/gpt4ts/__init__.py
+13 b/‎pypots/forecasting/gpt4ts/__init__.py
+13
diff --git a/‎pypots/forecasting/gpt4ts/core.py
+86 b/‎pypots/forecasting/gpt4ts/core.py
+86
diff --git a/‎pypots/forecasting/gpt4ts/data.py
+28 b/‎pypots/forecasting/gpt4ts/data.py
+28
diff --git a/‎pypots/forecasting/gpt4ts/model.py
+351 b/‎pypots/forecasting/gpt4ts/model.py
+351
diff --git a/‎pypots/imputation/gpt4ts/core.py
+4-4 b/‎pypots/imputation/gpt4ts/core.py
+4-4
diff --git a/‎pypots/imputation/gpt4ts/model.py
+19-10 b/‎pypots/imputation/gpt4ts/model.py
+19-10
diff --git a/‎pypots/nn/modules/gpt4ts/backbone.py
+2-2 b/‎pypots/nn/modules/gpt4ts/backbone.py
+2-2
diff --git a/‎tests/forecasting/csdi.py
+1-1 b/‎tests/forecasting/csdi.py
+1-1
diff --git a/‎tests/forecasting/fits.py
+1-1 b/‎tests/forecasting/fits.py
+1-1
diff --git a/‎tests/forecasting/llms/gpt4ts.py
+126 b/‎tests/forecasting/llms/gpt4ts.py
+126
diff --git a/‎tests/forecasting/llms/timellm.py
+1-1 b/‎tests/forecasting/llms/timellm.py
+1-1
diff --git a/‎tests/forecasting/tefn.py
+1-1 b/‎tests/forecasting/tefn.py
+1-1
diff --git a/‎tests/forecasting/timemixer.py
+1-1 b/‎tests/forecasting/timemixer.py
+1-1
diff --git a/‎tests/forecasting/transformer.py
+1-1 b/‎tests/forecasting/transformer.py
+1-1
@@ -128,7 +128,7 @@ The paper references and links are all listed at the bottom of this file.
 | Neural Net    | ModernTCN[^38]                                                                                                                   |    ✅     |          |          |          |          | `2024 - ICLR`                                      |
 | Neural Net    | ImputeFormer🧑‍🔧[^34]                                                                                                           |    ✅     |          |          |          |          | `2024 - KDD`                                       |
 | Neural Net    | SAITS[^1]                                                                                                                        |    ✅     |          |          |          |          | `2023 - ESWA`                                      |
-| LLM           | GPT4TS[^46]                                                                                                                      |    ✅     |          |          |          |          | `2023 - NeurIPS`                                   |
+| LLM           | GPT4TS[^46]                                                                                                                      |    ✅     |    ✅     |          |          |          | `2023 - NeurIPS`                                   |
 | Neural Net    | FreTS🧑‍🔧[^23]                                                                                                                  |    ✅     |          |          |          |          | `2023 - NeurIPS`                                   |
 | Neural Net    | Koopa🧑‍🔧[^29]                                                                                                                  |    ✅     |          |          |          |          | `2023 - NeurIPS`                                   |
 | Neural Net    | Crossformer🧑‍🔧[^16]                                                                                                            |    ✅     |          |          |          |          | `2023 - ICLR`                                      |
 
@@ -113,7 +113,7 @@ PyPOTS当前支持多变量POTS数据的插补, 预测, 分类, 聚类以及异
 | Neural Net    | ModernTCN[^38]                                                                                                                   |    ✅     |          |          |          |          | `2024 - ICLR`                                      |
 | Neural Net    | ImputeFormer🧑‍🔧[^34]                                                                                                           |    ✅     |          |          |          |          | `2024 - KDD`                                       |
 | Neural Net    | SAITS[^1]                                                                                                                        |    ✅     |          |          |          |          | `2023 - ESWA`                                      |
-| LLM           | GPT4TS[^46]                                                                                                                      |    ✅     |          |          |          |          | `2023 - NeurIPS`                                   |
+| LLM           | GPT4TS[^46]                                                                                                                      |    ✅     |    ✅     |          |          |          | `2023 - NeurIPS`                                   |
 | Neural Net    | FreTS🧑‍🔧[^23]                                                                                                                  |    ✅     |          |          |          |          | `2023 - NeurIPS`                                   |
 | Neural Net    | Koopa🧑‍🔧[^29]                                                                                                                  |    ✅     |          |          |          |          | `2023 - NeurIPS`                                   |
 | Neural Net    | Crossformer🧑‍🔧[^16]                                                                                                            |    ✅     |          |          |          |          | `2023 - ICLR`                                      |
 
@@ -147,7 +147,7 @@ The paper references are all listed at the bottom of this readme file.
 +----------------+-----------------------------------------------------------+------+------+------+------+------+-----------------------+
 | Neural Net     | SAITS :cite:`du2023SAITS`                                 |  ✅  |      |      |      |      | ``2023 - ESWA``       |
 +----------------+-----------------------------------------------------------+------+------+------+------+------+-----------------------+
-| LLM            | GPT4TS :cite:`zhou2023gpt4ts`                             |  ✅  |      |      |      |      | ``2023 - NeurIPS``    |
+| LLM            | GPT4TS :cite:`zhou2023gpt4ts`                             |  ✅  |  ✅  |      |      |      | ``2023 - NeurIPS``    |
 +----------------+-----------------------------------------------------------+------+------+------+------+------+-----------------------+
 | Neural Net     | FreTS🧑‍🔧 :cite:`yi2023frets`                            |  ✅  |      |      |      |      | ``2023 - NeurIPS``    |
 +----------------+-----------------------------------------------------------+------+------+------+------+------+-----------------------+
 
@@ -19,6 +19,15 @@ pypots.forecasting.timellm
    :show-inheritance:
    :inherited-members:
 
+pypots.forecasting.gpt4ts
+------------------------------
+
+.. automodule:: pypots.forecasting.gpt4ts
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :inherited-members:
+
 pypots.forecasting.tefn
 ------------------------------
 
 
@@ -8,6 +8,7 @@
 from .bttf import BTTF
 from .csdi import CSDI
 from .fits import FITS
+from .gpt4ts import GPT4TS
 from .tefn import TEFN
 from .timellm import TimeLLM
 from .timemixer import TimeMixer
@@ -21,4 +22,5 @@
     "TEFN",
     "TimeMixer",
     "TimeLLM",
+    "GPT4TS",
 ]
@@ -0,0 +1,13 @@
+"""
+
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: BSD-3-Clause
+
+
+from .model import GPT4TS
+
+__all__ = [
+    "GPT4TS",
+]
@@ -0,0 +1,86 @@
+"""
+The core wrapper assembles the submodules of GPT4TS forecasting model
+and takes over the forward progress of the algorithm.
+
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: BSD-3-Clause
+
+from typing import Callable
+
+import torch
+import torch.nn as nn
+
+from ...nn.functional.error import calc_mse
+from ...nn.modules.gpt4ts import BackboneGPT4TS
+
+
+class _GPT4TS(nn.Module):
+    def __init__(
+        self,
+        n_steps: int,
+        n_features: int,
+        n_pred_steps: int,
+        n_pred_features: int,
+        term: str,
+        n_layers: int,
+        patch_size: int,
+        patch_stride: int,
+        train_gpt_mlp: bool,
+        d_ffn: int,
+        dropout: float,
+        embed: str,
+        freq: str,
+        loss_func: Callable = calc_mse,
+    ):
+        super().__init__()
+
+        assert term in ["long", "short"], "forecasting term should be either 'long' or 'short'"
+        self.n_pred_steps = n_pred_steps
+        self.n_pred_features = n_pred_features
+        self.loss_func = loss_func
+
+        self.backbone = BackboneGPT4TS(
+            term + "_term_forecast",
+            n_steps,
+            n_features,
+            n_pred_steps,
+            n_pred_features,
+            n_layers,
+            patch_size,
+            patch_stride,
+            train_gpt_mlp,
+            d_ffn,
+            dropout,
+            embed,
+            freq,
+        )
+
+    def forward(self, inputs: dict) -> dict:
+        X, missing_mask = inputs["X"], inputs["missing_mask"]
+
+        if self.training:
+            X_pred, X_pred_missing_mask = inputs["X_pred"], inputs["X_pred_missing_mask"]
+        else:
+            batch_size = X.shape[0]
+            X_pred, X_pred_missing_mask = (
+                torch.zeros(batch_size, self.n_pred_steps, self.n_pred_features),
+                torch.ones(batch_size, self.n_pred_steps, self.n_pred_features),
+            )
+
+        # GPT4TS backbone processing
+        forecasting_result = self.backbone(X, missing_mask)
+        # the raw output has length = n_steps+n_pred_steps, we only need the last n_pred_steps
+        forecasting_result = forecasting_result[:, -self.n_pred_steps :]
+
+        results = {
+            "forecasting_data": forecasting_result,
+        }
+
+        # if in training mode, return results with losses
+        if self.training:
+            # `loss` is always the item for backward propagating to update the model
+            results["loss"] = self.loss_func(X_pred, forecasting_result, X_pred_missing_mask)
+
+        return results
@@ -0,0 +1,28 @@
+"""
+Dataset class for the forecasting model GPT4TS.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: BSD-3-Clause
+
+from typing import Union
+
+from ...data.dataset import BaseDataset
+
+
+class DatasetForGPT4TS(BaseDataset):
+    """Dataset for GPT4TS forecasting model."""
+
+    def __init__(
+        self,
+        data: Union[dict, str],
+        return_X_pred=True,
+        file_type: str = "hdf5",
+    ):
+        super().__init__(
+            data=data,
+            return_X_ori=False,
+            return_X_pred=return_X_pred,
+            return_y=False,
+            file_type=file_type,
+        )
@@ -0,0 +1,351 @@
+"""
+The implementation of GPT4TS for the partially-observed time-series forecasting task.
+
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: BSD-3-Clause
+
+from typing import Union, Optional
+
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+
+from .core import _GPT4TS
+from .data import DatasetForGPT4TS
+from ..base import BaseNNForecaster
+from ...data.checking import key_in_data_set
+from ...optim.adam import Adam
+from ...optim.base import Optimizer
+
+
+class GPT4TS(BaseNNForecaster):
+    """The PyTorch implementation of the GPT4TS forecasting model :cite:`zhou2023gpt4ts`.
+
+    Parameters
+    ----------
+    n_steps :
+        The number of time steps in the time-series data sample.
+
+    n_features :
+        The number of features in the time-series data sample.
+
+    n_pred_steps :
+        The number of steps in the forecasting time series.
+
+    n_pred_features :
+        The number of features in the forecasting time series.
+
+    term :
+        The forecasting term, which can be either 'long' or 'short'.
+
+    patch_size :
+        The size of the patch for the patching mechanism.
+
+    patch_stride :
+        The stride for the patching mechanism.
+
+    n_layers :
+        The number of hidden layers to use in GPT2.
+
+    train_gpt_mlp :
+        Whether to train the MLP in GPT2 during tuning.
+
+    d_ffn :
+        The hidden size of the feed-forward network .
+
+    dropout :
+        The dropout rate for the model.
+
+    embed :
+        The embedding method for the model.
+
+    freq :
+        The frequency of the time-series data.
+    batch_size :
+        The batch size for training and evaluating the model.
+
+    epochs :
+        The number of epochs for training the model.
+
+    patience :
+        The patience for the early-stopping mechanism. Given a positive integer, the training process will be
+        stopped when the model does not perform better after that number of epochs.
+        Leaving it default as None will disable the early-stopping.
+
+    train_loss_func:
+        The customized loss function designed by users for training the model.
+        If not given, will use the default loss as claimed in the original paper.
+
+    val_metric_func:
+        The customized metric function designed by users for validating the model.
+        If not given, will use the default MSE metric.
+
+    optimizer :
+        The optimizer for model training.
+        If not given, will use a default Adam optimizer.
+
+    num_workers :
+        The number of subprocesses to use for data loading.
+        `0` means data loading will be in the main process, i.e. there won't be subprocesses.
+
+    device :
+        The device for the model to run on. It can be a string, a :class:`torch.device` object, or a list of them.
+        If not given, will try to use CUDA devices first (will use the default CUDA device if there are multiple),
+        then CPUs, considering CUDA and CPU are so far the main devices for people to train ML models.
+        If given a list of devices, e.g. ['cuda:0', 'cuda:1'], or [torch.device('cuda:0'), torch.device('cuda:1')] , the
+        model will be parallely trained on the multiple devices (so far only support parallel training on CUDA devices).
+        Other devices like Google TPU and Apple Silicon accelerator MPS may be added in the future.
+
+    saving_path :
+        The path for automatically saving model checkpoints and tensorboard files (i.e. loss values recorded during
+        training into a tensorboard file). Will not save if not given.
+
+    model_saving_strategy :
+        The strategy to save model checkpoints. It has to be one of [None, "best", "better", "all"].
+        No model will be saved when it is set as None.
+        The "best" strategy will only automatically save the best model after the training finished.
+        The "better" strategy will automatically save the model during training whenever the model performs
+        better than in previous epochs.
+        The "all" strategy will save every model after each epoch training.
+
+    verbose :
+        Whether to print out the training logs during the training process.
+    """
+
+    def __init__(
+        self,
+        n_steps: int,
+        n_features: int,
+        n_pred_steps: int,
+        n_pred_features: int,
+        term: str,
+        patch_size: int,
+        patch_stride: int,
+        n_layers: int,
+        train_gpt_mlp: bool,
+        d_ffn: int,
+        dropout: float,
+        embed: str = "fixed",
+        freq="h",
+        batch_size: int = 32,
+        epochs: int = 100,
+        patience: Optional[int] = None,
+        train_loss_func: Optional[dict] = None,
+        val_metric_func: Optional[dict] = None,
+        optimizer: Optional[Optimizer] = Adam(),
+        num_workers: int = 0,
+        device: Optional[Union[str, torch.device, list]] = None,
+        saving_path: Optional[str] = None,
+        model_saving_strategy: Optional[str] = "best",
+        verbose: bool = True,
+    ):
+        super().__init__(
+            batch_size=batch_size,
+            epochs=epochs,
+            patience=patience,
+            train_loss_func=train_loss_func,
+            val_metric_func=val_metric_func,
+            num_workers=num_workers,
+            device=device,
+            enable_amp=True,
+            saving_path=saving_path,
+            model_saving_strategy=model_saving_strategy,
+            verbose=verbose,
+        )
+
+        self.n_steps = n_steps
+        self.n_features = n_features
+        self.n_pred_steps = n_pred_steps
+        self.n_pred_features = n_pred_features
+        self.term = term
+        self.n_layers = n_layers
+        self.patch_size = patch_size
+        self.patch_stride = patch_stride
+        self.train_gpt_mlp = train_gpt_mlp
+        self.d_ffn = d_ffn
+        self.dropout = dropout
+        self.embed = embed
+        self.freq = freq
+
+        # set up the model
+        self.model = _GPT4TS(
+            self.n_steps,
+            self.n_features,
+            self.n_pred_steps,
+            self.n_pred_features,
+            self.term,
+            self.n_layers,
+            self.patch_size,
+            self.patch_stride,
+            self.train_gpt_mlp,
+            self.d_ffn,
+            self.dropout,
+            self.embed,
+            self.freq,
+        )
+        self._print_model_size()
+        self._send_model_to_given_device()
+
+        # set up the optimizer
+        self.optimizer = optimizer
+        self.optimizer.init_optimizer(self.model.parameters())
+
+    def _assemble_input_for_training(self, data: list) -> dict:
+        (
+            indices,
+            X,
+            missing_mask,
+            X_pred,
+            X_pred_missing_mask,
+        ) = self._send_data_to_given_device(data)
+
+        inputs = {
+            "X": X,
+            "missing_mask": missing_mask,
+            "X_pred": X_pred,
+            "X_pred_missing_mask": X_pred_missing_mask,
+        }
+        return inputs
+
+    def _assemble_input_for_validating(self, data: list) -> dict:
+        return self._assemble_input_for_training(data)
+
+    def _assemble_input_for_testing(self, data: list) -> dict:
+        (
+            indices,
+            X,
+            missing_mask,
+        ) = self._send_data_to_given_device(data)
+
+        inputs = {
+            "X": X,
+            "missing_mask": missing_mask,
+        }
+        return inputs
+
+    def fit(
+        self,
+        train_set: Union[dict, str],
+        val_set: Optional[Union[dict, str]] = None,
+        file_type: str = "hdf5",
+    ) -> None:
+        # Step 1: wrap the input data with classes Dataset and DataLoader
+        training_set = DatasetForGPT4TS(
+            train_set,
+            file_type=file_type,
+        )
+        training_loader = DataLoader(
+            training_set,
+            batch_size=self.batch_size,
+            shuffle=True,
+            num_workers=self.num_workers,
+        )
+        val_loader = None
+        if val_set is not None:
+            if not key_in_data_set("X_pred", val_set):
+                raise ValueError("val_set must contain 'X_pred' for model validation.")
+            val_set = DatasetForGPT4TS(
+                val_set,
+                file_type=file_type,
+            )
+            val_loader = DataLoader(
+                val_set,
+                batch_size=self.batch_size,
+                shuffle=False,
+                num_workers=self.num_workers,
+            )
+
+        # Step 2: train the model and freeze it
+        self._train_model(training_loader, val_loader)
+        self.model.load_state_dict(self.best_model_dict)
+        self.model.eval()  # set the model as eval status to freeze it.
+
+        # Step 3: save the model if necessary
+        self._auto_save_model_if_necessary(confirm_saving=self.model_saving_strategy == "best")
+
+    def predict(
+        self,
+        test_set: Union[dict, str],
+        file_type: str = "hdf5",
+    ) -> dict:
+        """
+
+        Parameters
+        ----------
+        test_set : dict or str
+            The dataset for model validating, should be a dictionary including keys as 'X' and 'y',
+            or a path string locating a data file.
+            If it is a dict, X should be array-like of shape [n_samples, sequence length (n_steps), n_features],
+            which is time-series data for validating, can contain missing values, and y should be array-like of shape
+            [n_samples], which is classification labels of X.
+            If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
+            key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
+
+        file_type :
+            The type of the given file if test_set is a path string.
+
+        Returns
+        -------
+        result_dict: dict
+            Prediction results in a Python Dictionary for the given samples.
+            It should be a dictionary including a key named 'imputation'.
+
+        """
+
+        # Step 1: wrap the input data with classes Dataset and DataLoader
+        self.model.eval()  # set the model as eval status to freeze it.
+        test_set = DatasetForGPT4TS(
+            test_set,
+            return_X_pred=False,
+            file_type=file_type,
+        )
+
+        test_loader = DataLoader(
+            test_set,
+            batch_size=self.batch_size,
+            shuffle=False,
+            num_workers=self.num_workers,
+        )
+        forecasting_collector = []
+
+        # Step 2: process the data with the model
+        with torch.no_grad():
+            for idx, data in enumerate(test_loader):
+                inputs = self._assemble_input_for_testing(data)
+                results = self.model(inputs)
+                forecasting_data = results["forecasting_data"]
+                forecasting_collector.append(forecasting_data)
+
+        # Step 3: output collection and return
+        forecasting_data = torch.cat(forecasting_collector).cpu().detach().numpy()
+        result_dict = {
+            "forecasting": forecasting_data,  # [bz, n_pred_steps, n_features]
+        }
+        return result_dict
+
+    def forecast(
+        self,
+        test_set: Union[dict, str],
+        file_type: str = "hdf5",
+    ) -> np.ndarray:
+        """Forecast the future of the input with the trained model.
+
+        Parameters
+        ----------
+        test_set :
+            The data samples for testing, should be array-like of shape [n_samples, sequence length (n_steps),
+            n_features], or a path string locating a data file, e.g. h5 file.
+
+        file_type :
+            The type of the given file if X is a path string.
+
+        Returns
+        -------
+        array-like, shape [n_samples, n_pred_steps, n_features],
+            Forecasting results.
+        """
+
+        result_dict = self.predict(test_set, file_type=file_type)
+        return result_dict["forecasting"]
@@ -54,18 +54,18 @@ def __init__(
     def forward(self, inputs: dict) -> dict:
         X, missing_mask = inputs["X"], inputs["missing_mask"]
 
-        # TimesMixer processing
-        dec_out = self.backbone(X, mask=missing_mask)
+        # GPT4TS backbone processing
+        reconstruction = self.backbone(X, mask=missing_mask)
 
-        imputed_data = missing_mask * X + (1 - missing_mask) * dec_out
+        imputed_data = missing_mask * X + (1 - missing_mask) * reconstruction
         results = {
             "imputed_data": imputed_data,
         }
 
         # if in training mode, return results with losses
         if self.training:
             # `loss` is always the item for backward propagating to update the model
-            loss = calc_mse(dec_out, inputs["X_ori"], inputs["indicating_mask"])
+            loss = self.loss_func(reconstruction, inputs["X_ori"], inputs["indicating_mask"])
             results["loss"] = loss
 
         return results
@@ -132,26 +132,35 @@ def __init__(
             val_metric_func=val_metric_func,
             num_workers=num_workers,
             device=device,
+            enable_amp=True,
             saving_path=saving_path,
             model_saving_strategy=model_saving_strategy,
             verbose=verbose,
         )
 
         self.n_steps = n_steps
         self.n_features = n_features
+        self.n_layers = n_layers
+        self.patch_size = patch_size
+        self.patch_stride = patch_stride
+        self.train_gpt_mlp = train_gpt_mlp
+        self.d_ffn = d_ffn
+        self.dropout = dropout
+        self.embed = embed
+        self.freq = freq
 
         # set up the model
         self.model = _GPT4TS(
-            n_steps,
-            n_features,
-            n_layers,
-            patch_size,
-            patch_stride,
-            train_gpt_mlp,
-            d_ffn,
-            dropout,
-            embed,
-            freq,
+            self.n_steps,
+            self.n_features,
+            self.n_layers,
+            self.patch_size,
+            self.patch_stride,
+            self.train_gpt_mlp,
+            self.d_ffn,
+            self.dropout,
+            self.embed,
+            self.freq,
         )
         self._send_model_to_given_device()
         self._print_model_size()
 
@@ -141,7 +141,7 @@ def forecast(
         # enc_out = rearrange(enc_out, 'b m n p -> b n (m p)')
 
         dec_out = self.gpt2(inputs_embeds=enc_out).last_hidden_state
-        dec_out = dec_out[:, :, : self.d_ff]
+        dec_out = dec_out[:, :, : self.d_ffn]
         # dec_out = dec_out.reshape(B, -1)
 
         # dec_out = self.ln(dec_out)
@@ -179,7 +179,7 @@ def anomaly_detection(
 
         outputs = self.gpt2(inputs_embeds=enc_out).last_hidden_state
 
-        outputs = outputs[:, :, : self.d_ff]
+        outputs = outputs[:, :, : self.d_ffn]
         # outputs = self.ln_proj(outputs)
         dec_out = self.out_layer(outputs)
 
 
@@ -34,7 +34,7 @@
 
 
 class TestCSDI(unittest.TestCase):
-    logger.info("Running tests for an forecasting model CSDI...")
+    logger.info("Running tests for a forecasting model CSDI...")
 
     # set the log and model saving path
     saving_path = os.path.join(RESULT_SAVING_DIR_FOR_FORECASTING, "CSDI")
 
@@ -33,7 +33,7 @@
 
 
 class TestFITS(unittest.TestCase):
-    logger.info("Running tests for an forecasting model FITS...")
+    logger.info("Running tests for a forecasting model FITS...")
 
     # set the log and model saving path
     saving_path = os.path.join(RESULT_SAVING_DIR_FOR_FORECASTING, "FITS")
 
@@ -0,0 +1,126 @@
+"""
+Test cases for GPT4TS forecasting model.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: BSD-3-Clause
+
+
+import os.path
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.forecasting import GPT4TS
+from pypots.nn.functional import calc_mse
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from tests.global_test_config import (
+    DATA,
+    EPOCHS,
+    DEVICE,
+    N_PRED_STEPS,
+    FORECASTING_TRAIN_SET,
+    FORECASTING_VAL_SET,
+    FORECASTING_TEST_SET,
+    FORECASTING_H5_TRAIN_SET_PATH,
+    FORECASTING_H5_VAL_SET_PATH,
+    FORECASTING_H5_TEST_SET_PATH,
+    RESULT_SAVING_DIR_FOR_FORECASTING,
+    check_tb_and_model_checkpoints_existence,
+)
+
+
+class TestGPT4TS(unittest.TestCase):
+    logger.info("Running tests for a forecasting model GPT4TS...")
+
+    # set the log and model saving path
+    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_FORECASTING, "GPT4TS")
+    model_save_name = "saved_gpt4ts_model.pypots"
+
+    # initialize an Adam optimizer
+    optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+    # initialize a GPT4TS model
+    gpt4ts = GPT4TS(
+        n_steps=DATA["n_steps"] - N_PRED_STEPS,
+        n_features=DATA["n_features"],
+        n_pred_steps=N_PRED_STEPS,
+        n_pred_features=DATA["n_features"],
+        term="short",
+        patch_size=DATA["n_steps"],
+        patch_stride=8,
+        n_layers=2,
+        train_gpt_mlp=True,
+        d_ffn=128,
+        dropout=0.1,
+        batch_size=8,
+        epochs=EPOCHS,
+        saving_path=saving_path,
+        optimizer=optimizer,
+        device=DEVICE,
+    )
+
+    @pytest.mark.xdist_group(name="forecasting-gpt4ts")
+    def test_0_fit(self):
+        self.gpt4ts.fit(FORECASTING_TRAIN_SET, FORECASTING_VAL_SET)
+
+    @pytest.mark.xdist_group(name="forecasting-gpt4ts")
+    def test_1_forecasting(self):
+        forecasting_X = self.gpt4ts.predict(FORECASTING_TEST_SET)["forecasting"]
+        assert not np.isnan(
+            forecasting_X
+        ).any(), "Output has missing values in the forecasting results that should not be."
+        test_MSE = calc_mse(
+            forecasting_X,
+            FORECASTING_TEST_SET["X_pred"],
+            ~np.isnan(FORECASTING_TEST_SET["X_pred"]),
+        )
+        logger.info(f"GPT4TS test_MSE: {test_MSE}")
+
+    @pytest.mark.xdist_group(name="forecasting-gpt4ts")
+    def test_2_parameters(self):
+        assert hasattr(self.gpt4ts, "model") and self.gpt4ts.model is not None
+
+        assert hasattr(self.gpt4ts, "optimizer") and self.gpt4ts.optimizer is not None
+
+        assert hasattr(self.gpt4ts, "best_loss")
+        self.assertNotEqual(self.gpt4ts.best_loss, float("inf"))
+
+        assert hasattr(self.gpt4ts, "best_model_dict") and self.gpt4ts.best_model_dict is not None
+
+    @pytest.mark.xdist_group(name="forecasting-gpt4ts")
+    def test_3_saving_path(self):
+        # whether the root saving dir exists, which should be created by save_log_into_tb_file
+        assert os.path.exists(self.saving_path), f"file {self.saving_path} does not exist"
+
+        # check if the tensorboard file and model checkpoints exist
+        check_tb_and_model_checkpoints_existence(self.gpt4ts)
+
+        # save the trained model into file, and check if the path exists
+        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+        self.gpt4ts.save(saved_model_path)
+
+        # test loading the saved model, not necessary, but need to test
+        self.gpt4ts.load(saved_model_path)
+
+    @pytest.mark.xdist_group(name="forecasting-gpt4ts")
+    def test_4_lazy_loading(self):
+        self.gpt4ts.fit(FORECASTING_H5_TRAIN_SET_PATH, FORECASTING_H5_VAL_SET_PATH)
+        forecasting_results = self.gpt4ts.predict(FORECASTING_H5_TEST_SET_PATH)
+        forecasting_X = forecasting_results["forecasting"]
+        assert not np.isnan(
+            forecasting_X
+        ).any(), "Output has missing values in the forecasting results that should not be."
+
+        test_MSE = calc_mse(
+            forecasting_X,
+            FORECASTING_TEST_SET["X_pred"],
+            ~np.isnan(FORECASTING_TEST_SET["X_pred"]),
+        )
+        logger.info(f"Lazy-loading GPT4TS test_MSE: {test_MSE}")
+
+
+if __name__ == "__main__":
+    unittest.main()
@@ -33,7 +33,7 @@
 
 
 class TestTimeLLM(unittest.TestCase):
-    logger.info("Running tests for an forecasting model TimeLLM...")
+    logger.info("Running tests for a forecasting model TimeLLM...")
 
     # set the log and model saving path
     saving_path = os.path.join(RESULT_SAVING_DIR_FOR_FORECASTING, "TimeLLM")
 
@@ -33,7 +33,7 @@
 
 
 class TestTEFN(unittest.TestCase):
-    logger.info("Running tests for an forecasting model TEFN...")
+    logger.info("Running tests for a forecasting model TEFN...")
 
     # set the log and model saving path
     saving_path = os.path.join(RESULT_SAVING_DIR_FOR_FORECASTING, "TEFN")
 
@@ -33,7 +33,7 @@
 
 
 class TestTimeMixer(unittest.TestCase):
-    logger.info("Running tests for an forecasting model TimeMixer...")
+    logger.info("Running tests for a forecasting model TimeMixer...")
 
     # set the log and model saving path
     saving_path = os.path.join(RESULT_SAVING_DIR_FOR_FORECASTING, "TimeMixer")
 
@@ -33,7 +33,7 @@
 
 
 class TestTransformer(unittest.TestCase):
-    logger.info("Running tests for an forecasting model Transformer...")
+    logger.info("Running tests for a forecasting model Transformer...")
 
     # set the log and model saving path
     saving_path = os.path.join(RESULT_SAVING_DIR_FOR_FORECASTING, "Transformer")