Merge pull request #21 from yllieth/19

dehau · dehau · commit 1842f2611e10 · 2015-09-14T16:39:15.000+02:00
Ensure that we choose the good subsets while getChildren()
diff --git a/lib/model/Datasets.js b/lib/model/Datasets.js
@@ -180,6 +180,7 @@ angular
   .service('Datasets', function($injector, $q, Restangular, Jobs) {
     'use strict';
     var self = this;
+    var DEFAULT_SAMPLING = 70;
 
     function dataset(id) { return Restangular.one('datasets', id); }
     function datasets() { return Restangular.all('datasets'); }
@@ -272,6 +273,10 @@ angular
      *    <li>Idem for learning/testing filenames</li>
      *  </ul>
      *
+     * According to @sferrandiz, the <code>split()</code> function is deterministic one (if you ask to split a dataset
+     * a lot of time, you will always get the same result). There is a insignificant risk of non-deterministic behaviour
+     * based on the compilation of random generator (which can be different from one OS to an other)
+     *
      * @param {String} id              Dataset id you want to split (called <em>original dataset</em>)
      * @param {String} name            Name of the original dataset (used to name its subsets)
      * @param {String} [filename=name] Name of the original datafile (used to name its subsets's datafile). If undefined, value of <kbd>name</kbd> parameter is used
@@ -284,7 +289,7 @@ angular
      */
     this.split = function(id, name, filename, sampling) {
       filename = filename || name;
-      sampling = sampling || 70;
+      sampling = sampling || DEFAULT_SAMPLING;
 
       var learn = {
         parent_dataset_id: id,
@@ -346,23 +351,35 @@ angular
      * <div><span class="badge get">get</span><code>/datasets/:learned_dataset_id</code></div>
      * <div><span class="badge get">get</span><code>/datasets/:tested_dataset_id</code></div>
      *
-     * @param {String} id Identifier of an original dataset
+     * @see https://github.com/yllieth/predicsis_ml_sdk-javascript/issues/19
+     * <em>When datasets have only 2 splits, this functions rocks, but the subsets array may contains
+     * multiple trains/test subsets et only checking child.sampling > 0 is not enough.
+     * Indeed, when there is more than one train subset (where sampling > 0), this function
+     * will return the first one.<em>
+     *
+     * To solve that issue, we ask for looks for <code>dictionary.dataset_ids</code> instead of
+     * <code>dataset.children_ids</code>. By doing so, we are 100% sure that the fetched datasets are linked
+     * to the good dictionary, which contains the selected target.
+     *
+     * @param {String} dictionaryId Identifier of the {@link predicsis.jsSDK.models.Dictionaries Dictionary}
      * @return {Promise}
      * <ul>
      *   <li><code>children.train</code>: learning dataset</li>
      *   <li><code>children.test</code>: testing dataset</li>
      * </ul>
      */
-    this.getChildren = function(id) {
-      return self.get(id)
-        .then(function(originalDataset) {
-          return self.all(originalDataset.children_dataset_ids);
+    this.getChildren = function(dictionaryId) {
+      var Dictionaries = $injector.get('Dictionaries');
+
+      return Dictionaries.get(dictionaryId)
+        .then(function(dictionary) {
+          return self.all(dictionary.dataset_ids);
         })
-        .then(function(subsets) {
-          return subsets.reduce(function(memo, child) {
-            if (child.sampling > 0) {
+        .then(function(childrenCandidates) {
+          return childrenCandidates.reduce(function(memo, child) {
+            if (self.isTrainPart(child, DEFAULT_SAMPLING)) {
               memo.train = child;
-            } else {
+            } else if (self.isTestPart(child, -DEFAULT_SAMPLING)) {
               memo.test = child;
             }
 
@@ -496,25 +513,40 @@ angular
      * @methodOf predicsis.jsSDK.models.Datasets
      * @name isTrainPart
      * @description Tells if a dataset is a train subset.
-     * <b>Note:</b> A dataset is considered as a train subset if its sampling is positive
+     * <b>Note:</b> A dataset is considered as a train subset if its sampling is positive and equal to the given <kbd>sampling</kbd> value.
+     * A sampling must be between 0 and 100.
      * @param {Object} dataset Instance of {@link predicsis.jsSDK.models.Datasets dataset}
+     * @param {Number} [sampling=70] You can give a positive or negative value for the <kbd>sampling</kbd>, we automatically
+     * compute a positive one. (If you give 70, we'll use 70, and if you give -70, we'll use 70)
      * @return {Boolean} <kbd>true</kbd> / <kbd>false</kbd>
      */
-    this.isTrainPart = function(dataset) {
-      return this.isChild(dataset) && dataset.sampling > 0;
+    this.isTrainPart = function(dataset, sampling) {
+      sampling = sampling || DEFAULT_SAMPLING;
+
+      return this.isChild(dataset)
+        && angular.isNumber(sampling)
+        && -100 <= sampling && sampling <= 100
+        && dataset.sampling === Math.abs(sampling);
     };
 
     /**
      * @ngdoc function
      * @methodOf predicsis.jsSDK.models.Datasets
      * @name isTestPart
      * @description Tells if a dataset is a test subset.
-     * <b>Note:</b> A dataset is considered as a test subset if its sampling is negative
+     * <b>Note:</b> A dataset is considered as a test subset if its sampling is negative and equal to the given <kbd>sampling</kbd> value.
      * @param {Object} dataset Instance of {@link predicsis.jsSDK.models.Datasets dataset}
+     * @param {Number} [sampling=70] You can give a positive or negative value for the <kbd>sampling</kbd>, we automatically
+     * compute a negative one. (If you give 70, we'll use -70, and if you give -70, we'll use -70)
      * @return {Boolean} <kbd>true</kbd> / <kbd>false</kbd>
      */
-    this.isTestPart = function(dataset) {
-      return this.isChild(dataset) && dataset.sampling < 0;
+    this.isTestPart = function(dataset, sampling) {
+      sampling = sampling || DEFAULT_SAMPLING;
+
+      return this.isChild(dataset)
+        && angular.isNumber(sampling)
+        && -100 <= sampling && sampling <= 100
+        && dataset.sampling === -Math.abs(sampling);
     };
 
     /**