sign-language-processing · cleong110 · Jun 20, 2024 · Jun 20, 2024 · Jun 21, 2024 · Jun 21, 2024
diff --git a/src/datasets/NCSLGR.json b/src/datasets/NCSLGR.json
@@ -2,7 +2,7 @@
   "pub": {
     "name": "NCSLGR",
     "year": 2007,
-    "publication": "dataset:databases2007volumes",
+    "publication": "dataset:Neidle_2020_NCSLGR_ISLRN",
     "url": "https://www.bu.edu/asllrp/ncslgr.html"
   },
   "loader": "ncslgr",
@@ -15,7 +15,7 @@
   "#items": null,
   "#samples": "1,875 sentences",
   "#signers": 4,
-  "license": "TODO",
-  "licenseUrl": null,
+  "license": "Research Attribution",
+  "licenseUrl": "https://www.bu.edu/asllrp/data-credits.html",
   "contact": "[email protected]"
 }
diff --git a/src/index.md b/src/index.md
@@ -1046,7 +1046,7 @@ are collections of annotated single signs. They are synthesized [@dataset:ebling
 contain parallel sequences of signs and spoken language.
 Available continuous sign corpora are extremely limited, containing 4-6 orders of magnitude fewer sentence pairs than similar corpora for spoken language machine translation [@arivazhagan2019massively].
 Moreover, while automatic speech recognition (ASR) datasets contain up to 50,000 hours of recordings [@pratap2020mls], the most extensive continuous sign language corpus contains only 1,150 hours, and only 50 of them are publicly available [@dataset:hanke-etal-2020-extending].
-These datasets are usually synthesized [@dataset:databases2007volumes;@dataset:Crasborn2008TheCN;@dataset:ko2019neural;@dataset:hanke-etal-2020-extending] or recorded in studio conditions [@dataset:forster2014extensions;@cihan2018neural], which does not account for noise in real-life conditions. Moreover, some contain signed interpretations of spoken language rather than naturally-produced signs, which may not accurately represent native signing since translation is now a part of the discourse event.
+These datasets are usually synthesized [@dataset:Neidle_2020_NCSLGR_ISLRN;@dataset:Crasborn2008TheCN;@dataset:ko2019neural;@dataset:hanke-etal-2020-extending] or recorded in studio conditions [@dataset:forster2014extensions;@cihan2018neural], which does not account for noise in real-life conditions. Moreover, some contain signed interpretations of spoken language rather than naturally-produced signs, which may not accurately represent native signing since translation is now a part of the discourse event.
 
 
 ###### Availability {-}

diff --git a/src/references.bib b/src/references.bib
@@ -3457,3 +3457,19 @@ @inproceedings{dataset:dal2022lsa
  url = {https://doi.org/10.1007/978-3-031-22419-5_25},
  year = {2023}
 }
+
+@inproceedings{Vogler2012ANW,
+  title={A new web interface to facilitate access to corpora: development of the ASLLRP data access interface},
+  author={Christian Vogler and C. Neidle},
+  year={2012},
+  url={https://api.semanticscholar.org/CorpusID:58305327}
+}
+
+@misc{dataset:Neidle_2020_NCSLGR_ISLRN,
+  type = {Languageresource},
+  title = {National Center for Sign Language and Gesture Resources (NCSLGR) corpus. ISLRN 833-505-711-564-4},
+  author = {Carol Neidle and Stan Sclaroff},
+  year = {2012},
+  publisher = {Boston University},
+  url = {https://www.islrn.org/resources/833-505-711-564-4/}
+}