Merge pull request #22 from yllieth/getchildren-add-datasetid-check

yllieth · yllieth · commit 34d9b8833eee · 2015-09-18T15:09:46.000+02:00
Getchildren add datasetid check
diff --git a/dist/predicsis-jsSDK.js b/dist/predicsis-jsSDK.js
@@ -407,6 +407,7 @@ angular
   .service('Datasets', function($injector, $q, Restangular, Jobs) {
     'use strict';
     var self = this;
+    var DEFAULT_SAMPLING = 70;
 
     function dataset(id) { return Restangular.one('datasets', id); }
     function datasets() { return Restangular.all('datasets'); }
@@ -467,7 +468,7 @@ angular
      *
      * @param {String} fileName used to create the source and the dataset
      * @param {String} destFolder AWS key where the dataset has been uploaded
-     * @param {String} pathName name of the key used to provide destFolder
+     * @param {String} pathName name of the key used to provide destFolder ('key' for S3, 'object' for swift)
      * @return {Promise} Newly created dataset
      */
     this.createFromUpload = function(fileName, destFolder, pathName) {
@@ -499,6 +500,10 @@ angular
      *    <li>Idem for learning/testing filenames</li>
      *  </ul>
      *
+     * According to @sferrandiz, the <code>split()</code> function is deterministic one (if you ask to split a dataset
+     * a lot of time, you will always get the same result). There is a insignificant risk of non-deterministic behaviour
+     * based on the compilation of random generator (which can be different from one OS to an other)
+     *
      * @param {String} id              Dataset id you want to split (called <em>original dataset</em>)
      * @param {String} name            Name of the original dataset (used to name its subsets)
      * @param {String} [filename=name] Name of the original datafile (used to name its subsets's datafile). If undefined, value of <kbd>name</kbd> parameter is used
@@ -511,7 +516,7 @@ angular
      */
     this.split = function(id, name, filename, sampling) {
       filename = filename || name;
-      sampling = sampling || 70;
+      sampling = sampling || DEFAULT_SAMPLING;
 
       var learn = {
         parent_dataset_id: id,
@@ -573,24 +578,39 @@ angular
      * <div><span class="badge get">get</span><code>/datasets/:learned_dataset_id</code></div>
      * <div><span class="badge get">get</span><code>/datasets/:tested_dataset_id</code></div>
      *
-     * @param {String} id Identifier of an original dataset
+     * @see https://github.com/yllieth/predicsis_ml_sdk-javascript/issues/19
+     * <em>When datasets have only 2 splits, this functions rocks, but the subsets array may contains
+     * multiple trains/test subsets et only checking child.sampling > 0 is not enough.
+     * Indeed, when there is more than one train subset (where sampling > 0), this function
+     * will return the first one.<em>
+     *
+     * To solve that issue, we ask for looks for <code>dictionary.dataset_ids</code> instead of
+     * <code>dataset.children_ids</code>. By doing so, we are 100% sure that the fetched datasets are linked
+     * to the good dictionary, which contains the selected target.
+     *
+     * @param {String} datasetId parent Dataset identifier
+     * @param {String} dictionaryId Identifier of the {@link predicsis.jsSDK.models.Dictionaries Dictionary}
      * @return {Promise}
      * <ul>
      *   <li><code>children.train</code>: learning dataset</li>
      *   <li><code>children.test</code>: testing dataset</li>
      * </ul>
      */
-    this.getChildren = function(id) {
-      return self.get(id)
-        .then(function(originalDataset) {
-          return self.all(originalDataset.children_dataset_ids);
+    this.getChildren = function(datasetId, dictionaryId) {
+      var Dictionaries = $injector.get('Dictionaries');
+
+      return Dictionaries.get(dictionaryId)
+        .then(function(dictionary) {
+          return self.all(dictionary.dataset_ids);
         })
-        .then(function(subsets) {
-          return subsets.reduce(function(memo, child) {
-            if (child.sampling > 0) {
-              memo.train = child;
-            } else {
-              memo.test = child;
+        .then(function(childrenCandidates) {
+          return childrenCandidates.reduce(function(memo, child) {
+            if (child.parent_dataset_id === datasetId) {
+              if (self.isTrainPart(child, DEFAULT_SAMPLING)) {
+                memo.train = child;
+              } else if (self.isTestPart(child, -DEFAULT_SAMPLING)) {
+                memo.test = child;
+              }
             }
 
             return memo;
@@ -723,25 +743,40 @@ angular
      * @methodOf predicsis.jsSDK.models.Datasets
      * @name isTrainPart
      * @description Tells if a dataset is a train subset.
-     * <b>Note:</b> A dataset is considered as a train subset if its sampling is positive
+     * <b>Note:</b> A dataset is considered as a train subset if its sampling is positive and equal to the given <kbd>sampling</kbd> value.
+     * A sampling must be between 0 and 100.
      * @param {Object} dataset Instance of {@link predicsis.jsSDK.models.Datasets dataset}
+     * @param {Number} [sampling=70] You can give a positive or negative value for the <kbd>sampling</kbd>, we automatically
+     * compute a positive one. (If you give 70, we'll use 70, and if you give -70, we'll use 70)
      * @return {Boolean} <kbd>true</kbd> / <kbd>false</kbd>
      */
-    this.isTrainPart = function(dataset) {
-      return this.isChild(dataset) && dataset.sampling > 0;
+    this.isTrainPart = function(dataset, sampling) {
+      sampling = sampling || DEFAULT_SAMPLING;
+
+      return this.isChild(dataset)
+        && angular.isNumber(sampling)
+        && -100 <= sampling && sampling <= 100
+        && dataset.sampling === Math.abs(sampling);
     };
 
     /**
      * @ngdoc function
      * @methodOf predicsis.jsSDK.models.Datasets
      * @name isTestPart
      * @description Tells if a dataset is a test subset.
-     * <b>Note:</b> A dataset is considered as a test subset if its sampling is negative
+     * <b>Note:</b> A dataset is considered as a test subset if its sampling is negative and equal to the given <kbd>sampling</kbd> value.
      * @param {Object} dataset Instance of {@link predicsis.jsSDK.models.Datasets dataset}
+     * @param {Number} [sampling=70] You can give a positive or negative value for the <kbd>sampling</kbd>, we automatically
+     * compute a negative one. (If you give 70, we'll use -70, and if you give -70, we'll use -70)
      * @return {Boolean} <kbd>true</kbd> / <kbd>false</kbd>
      */
-    this.isTestPart = function(dataset) {
-      return this.isChild(dataset) && dataset.sampling < 0;
+    this.isTestPart = function(dataset, sampling) {
+      sampling = sampling || DEFAULT_SAMPLING;
+
+      return this.isChild(dataset)
+        && angular.isNumber(sampling)
+        && -100 <= sampling && sampling <= 100
+        && dataset.sampling === -Math.abs(sampling);
     };
 
     /**
@@ -753,7 +788,7 @@ angular
      * @return {Boolean} <kbd>true</kbd> / <kbd>false</kbd>
      */
     this.isFormatted = function(dataset) {
-      return Boolean(dataset.header !== null) && Boolean(dataset.separator !== null)
+      return Boolean(dataset.header !== null) && Boolean(dataset.separator !== null);
     };
 
     /**
@@ -1631,7 +1666,7 @@ angular
 
       $rootScope.$broadcast('jsSDK.learn.start-retrieving-train-dataset');
 
-      return Datasets.getChildren(project.learning_dataset_id)
+      return Datasets.getChildren(project.learning_dataset_id, project.dictionary_id)
         // create preparation rules
         .then(function(children) {
           if(!children.train) {
@@ -2563,7 +2598,7 @@ angular
     var reports = function() { return Restangular.all('reports'); };
     function createClassifierEvaluationReport(project, type) {
       var Datasets = $injector.get('Datasets');
-      return Datasets.getChildren(project.learning_dataset_id)
+      return Datasets.getChildren(project.learning_dataset_id, project.dictionary_id)
         .then(function(children) {
           return self.create({
             type: 'classifier_evaluation',
@@ -3324,7 +3359,7 @@ angular
     function upload(uploadObject, xhr2, credential, file) {
 
       var endpoint = credential.signed_url;
-      var headers = {}, body = file, method = 'PUT';
+      var body = file, method = 'PUT';
 
       xhr2.open(method, endpoint, true);
 
diff --git a/dist/predicsis-jsSDK.min.js b/dist/predicsis-jsSDK.min.js
diff --git a/dist/predicsis-jsSDK.min.js.map b/dist/predicsis-jsSDK.min.js.map
diff --git a/lib/helper/uploadHelper.js b/lib/helper/uploadHelper.js
@@ -17,7 +17,7 @@ angular
     function upload(uploadObject, xhr2, credential, file) {
 
       var endpoint = credential.signed_url;
-      var headers = {}, body = file, method = 'PUT';
+      var body = file, method = 'PUT';
 
       xhr2.open(method, endpoint, true);
 
diff --git a/lib/model/Datasets.js b/lib/model/Datasets.js
@@ -361,14 +361,15 @@ angular
      * <code>dataset.children_ids</code>. By doing so, we are 100% sure that the fetched datasets are linked
      * to the good dictionary, which contains the selected target.
      *
+     * @param {String} datasetId parent Dataset identifier
      * @param {String} dictionaryId Identifier of the {@link predicsis.jsSDK.models.Dictionaries Dictionary}
      * @return {Promise}
      * <ul>
      *   <li><code>children.train</code>: learning dataset</li>
      *   <li><code>children.test</code>: testing dataset</li>
      * </ul>
      */
-    this.getChildren = function(dictionaryId) {
+    this.getChildren = function(datasetId, dictionaryId) {
       var Dictionaries = $injector.get('Dictionaries');
 
       return Dictionaries.get(dictionaryId)
@@ -377,10 +378,12 @@ angular
         })
         .then(function(childrenCandidates) {
           return childrenCandidates.reduce(function(memo, child) {
-            if (self.isTrainPart(child, DEFAULT_SAMPLING)) {
-              memo.train = child;
-            } else if (self.isTestPart(child, -DEFAULT_SAMPLING)) {
-              memo.test = child;
+            if (child.parent_dataset_id === datasetId) {
+              if (self.isTrainPart(child, DEFAULT_SAMPLING)) {
+                memo.train = child;
+              } else if (self.isTestPart(child, -DEFAULT_SAMPLING)) {
+                memo.test = child;
+              }
             }
 
             return memo;
@@ -558,7 +561,7 @@ angular
      * @return {Boolean} <kbd>true</kbd> / <kbd>false</kbd>
      */
     this.isFormatted = function(dataset) {
-      return Boolean(dataset.header !== null) && Boolean(dataset.separator !== null)
+      return Boolean(dataset.header !== null) && Boolean(dataset.separator !== null);
     };
 
     /**
diff --git a/lib/model/Models.js b/lib/model/Models.js
@@ -269,7 +269,7 @@ angular
 
       $rootScope.$broadcast('jsSDK.learn.start-retrieving-train-dataset');
 
-      return Datasets.getChildren(project.learning_dataset_id)
+      return Datasets.getChildren(project.learning_dataset_id, project.dictionary_id)
         // create preparation rules
         .then(function(children) {
           if(!children.train) {
diff --git a/lib/model/Reports.js b/lib/model/Reports.js
@@ -71,7 +71,7 @@ angular
     var reports = function() { return Restangular.all('reports'); };
     function createClassifierEvaluationReport(project, type) {
       var Datasets = $injector.get('Datasets');
-      return Datasets.getChildren(project.learning_dataset_id)
+      return Datasets.getChildren(project.learning_dataset_id, project.dictionary_id)
         .then(function(children) {
           return self.create({
             type: 'classifier_evaluation',