diff --git a/nemo/retriever-synthetic-data-generation/nemo_retriever_sdg/dataset.py b/nemo/retriever-synthetic-data-generation/nemo_retriever_sdg/dataset.py
index a331d3d4..5234a62e 100644
--- a/nemo/retriever-synthetic-data-generation/nemo_retriever_sdg/dataset.py
+++ b/nemo/retriever-synthetic-data-generation/nemo_retriever_sdg/dataset.py
@@ -72,15 +72,26 @@ def load_rawdoc(cls, name: str):
         """Load rawdoc format
         {"text": "...", "title": "..."}
         {"text": "...", "title": "..."}
+        or
+        {"_id": "...", "text": "...", "title": "..."}
+        {"_id": "...", "text": "...", "title": "..."}
         ...
         """
         examples = []
         for example in [json.loads(line) for line in open(name, "r")]:
-            examples.append({"paragraphs": [
-                                {"context": example["text"],
-                                 "document_id": example["title"],
-                                 "qas": []}],
-                            })
+            if "_id" in example:
+                examples.append({"paragraphs": [
+                                    {"context": example["text"],
+                                     "document_id": example["_id"],
+                                     "title": example["title"],
+                                     "qas": []}],
+                                })
+            else: 
+                examples.append({"paragraphs": [
+                                    {"context": example["text"],
+                                     "title": example["title"],
+                                     "qas": []}],
+                                })
         return cls({"data": examples, "version": "2.0"})
 
     def to_json(self, output_path: str):
@@ -174,7 +185,10 @@ def to_beir(self,
             qrels = []
             qid = 0
             for i, example in enumerate(self.data["data"]):
-                doc_id = "doc{}".format(i + 1) # "doc1", "doc2", "doc3", ... "docN"
+                if "document_id" in example['paragraphs'][0]:
+                    doc_id = example['paragraphs'][0]['document_id'] #doc_id
+                else:
+                    doc_id = "doc{}".format(i + 1) # "doc1", "doc2", "doc3", ... "docN"
                 title = example["paragraphs"][0]["document_id"] # Title
                 text = example["paragraphs"][0]["context"]
                 # TODO: Add information as metadata
diff --git a/nemo/retriever-synthetic-data-generation/notebooks/quickstart.ipynb b/nemo/retriever-synthetic-data-generation/notebooks/quickstart.ipynb
index c6007a10..2d0cff8e 100644
--- a/nemo/retriever-synthetic-data-generation/notebooks/quickstart.ipynb
+++ b/nemo/retriever-synthetic-data-generation/notebooks/quickstart.ipynb
@@ -32,6 +32,13 @@
     "{\"text\": \"The Eiffel Tower is an iron lattice tower on the Champ de Mars in Paris.\", \"title\": \"Iconic Landmark\" }\n",
     "...\n",
     "```\n",
+    "If document already has id, the pipeline also accepts in the format of `{\"_id\": <document id>, \"text\": <document>, \"title\": <title>}`. The same document id would be persisted in the generated results.\n",
+    "\n",
+    "```\n",
+    "{\"_id\": \"3\", \"text\": \"The quick brown fox jumps over the lazy dog.\", \"title\": \"Classic Pangram\" }\n",
+    "{\"_id\": \"43\", \"text\": \"The Eiffel Tower is an iron lattice tower on the Champ de Mars in Paris.\", \"title\": \"Iconic Landmark\" }\n",
+    "...\n",
+    "```\n",
     "\n",
     "This repository contains a sample JSONL file `data/sample_data.jsonl`.\n",
     "\n",