Skip to content

Commit 5e9cad5

Browse files
committed
Ensure that we choose the good subsets while getChildren()
Instead of using `dataset.children_ids` where all children are stored <en vrac>, we are now using dictionary.dataset_ids Fix #19
1 parent a71f1f2 commit 5e9cad5

File tree

1 file changed

+48
-16
lines changed

1 file changed

+48
-16
lines changed

lib/model/Datasets.js

Lines changed: 48 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ angular
180180
.service('Datasets', function($injector, $q, Restangular, Jobs) {
181181
'use strict';
182182
var self = this;
183+
var DEFAULT_SAMPLING = 70;
183184

184185
function dataset(id) { return Restangular.one('datasets', id); }
185186
function datasets() { return Restangular.all('datasets'); }
@@ -268,6 +269,10 @@ angular
268269
* <li>Idem for learning/testing filenames</li>
269270
* </ul>
270271
*
272+
* According to @sferrandiz, the <code>split()</code> function is deterministic one (if you ask to split a dataset
273+
* a lot of time, you will always get the same result). There is a insignificant risk of non-deterministic behaviour
274+
* based on the compilation of random generator (which can be different from one OS to an other)
275+
*
271276
* @param {String} id Dataset id you want to split (called <em>original dataset</em>)
272277
* @param {String} name Name of the original dataset (used to name its subsets)
273278
* @param {String} [filename=name] Name of the original datafile (used to name its subsets's datafile). If undefined, value of <kbd>name</kbd> parameter is used
@@ -280,7 +285,7 @@ angular
280285
*/
281286
this.split = function(id, name, filename, sampling) {
282287
filename = filename || name;
283-
sampling = sampling || 70;
288+
sampling = sampling || DEFAULT_SAMPLING;
284289

285290
var learn = {
286291
parent_dataset_id: id,
@@ -342,23 +347,35 @@ angular
342347
* <div><span class="badge get">get</span><code>/datasets/:learned_dataset_id</code></div>
343348
* <div><span class="badge get">get</span><code>/datasets/:tested_dataset_id</code></div>
344349
*
345-
* @param {String} id Identifier of an original dataset
350+
* @see https://github.com/yllieth/predicsis_ml_sdk-javascript/issues/19
351+
* <em>When datasets have only 2 splits, this functions rocks, but the subsets array may contains
352+
* multiple trains/test subsets et only checking child.sampling > 0 is not enough.
353+
* Indeed, when there is more than one train subset (where sampling > 0), this function
354+
* will return the first one.<em>
355+
*
356+
* To solve that issue, we ask for looks for <code>dictionary.dataset_ids</code> instead of
357+
* <code>dataset.children_ids</code>. By doing so, we are 100% sure that the fetched datasets are linked
358+
* to the good dictionary, which contains the selected target.
359+
*
360+
* @param {String} dictionaryId Identifier of the {@link predicsis.jsSDK.models.Dictionaries Dictionary}
346361
* @return {Promise}
347362
* <ul>
348363
* <li><code>children.train</code>: learning dataset</li>
349364
* <li><code>children.test</code>: testing dataset</li>
350365
* </ul>
351366
*/
352-
this.getChildren = function(id) {
353-
return self.get(id)
354-
.then(function(originalDataset) {
355-
return self.all(originalDataset.children_dataset_ids);
367+
this.getChildren = function(dictionaryId) {
368+
var Dictionaries = $injector.get('Dictionaries');
369+
370+
return Dictionaries.get(dictionaryId)
371+
.then(function(dictionary) {
372+
return self.all(dictionary.dataset_ids);
356373
})
357-
.then(function(subsets) {
358-
return subsets.reduce(function(memo, child) {
359-
if (child.sampling > 0) {
374+
.then(function(childrenCandidates) {
375+
return childrenCandidates.reduce(function(memo, child) {
376+
if (self.isTrainPart(child, DEFAULT_SAMPLING)) {
360377
memo.train = child;
361-
} else {
378+
} else if (self.isTestPart(child, -DEFAULT_SAMPLING)) {
362379
memo.test = child;
363380
}
364381

@@ -492,25 +509,40 @@ angular
492509
* @methodOf predicsis.jsSDK.models.Datasets
493510
* @name isTrainPart
494511
* @description Tells if a dataset is a train subset.
495-
* <b>Note:</b> A dataset is considered as a train subset if its sampling is positive
512+
* <b>Note:</b> A dataset is considered as a train subset if its sampling is positive and equal to the given <kbd>sampling</kbd> value.
513+
* A sampling must be between 0 and 100.
496514
* @param {Object} dataset Instance of {@link predicsis.jsSDK.models.Datasets dataset}
515+
* @param {Number} [sampling=70] You can give a positive or negative value for the <kbd>sampling</kbd>, we automatically
516+
* compute a positive one. (If you give 70, we'll use 70, and if you give -70, we'll use 70)
497517
* @return {Boolean} <kbd>true</kbd> / <kbd>false</kbd>
498518
*/
499-
this.isTrainPart = function(dataset) {
500-
return this.isChild(dataset) && dataset.sampling > 0;
519+
this.isTrainPart = function(dataset, sampling) {
520+
sampling = sampling || DEFAULT_SAMPLING;
521+
522+
return this.isChild(dataset)
523+
&& angular.isNumber(sampling)
524+
&& -100 <= sampling && sampling <= 100
525+
&& dataset.sampling === Math.abs(sampling);
501526
};
502527

503528
/**
504529
* @ngdoc function
505530
* @methodOf predicsis.jsSDK.models.Datasets
506531
* @name isTestPart
507532
* @description Tells if a dataset is a test subset.
508-
* <b>Note:</b> A dataset is considered as a test subset if its sampling is negative
533+
* <b>Note:</b> A dataset is considered as a test subset if its sampling is negative and equal to the given <kbd>sampling</kbd> value.
509534
* @param {Object} dataset Instance of {@link predicsis.jsSDK.models.Datasets dataset}
535+
* @param {Number} [sampling=70] You can give a positive or negative value for the <kbd>sampling</kbd>, we automatically
536+
* compute a negative one. (If you give 70, we'll use -70, and if you give -70, we'll use -70)
510537
* @return {Boolean} <kbd>true</kbd> / <kbd>false</kbd>
511538
*/
512-
this.isTestPart = function(dataset) {
513-
return this.isChild(dataset) && dataset.sampling < 0;
539+
this.isTestPart = function(dataset, sampling) {
540+
sampling = sampling || DEFAULT_SAMPLING;
541+
542+
return this.isChild(dataset)
543+
&& angular.isNumber(sampling)
544+
&& -100 <= sampling && sampling <= 100
545+
&& dataset.sampling === -Math.abs(sampling);
514546
};
515547

516548
/**

0 commit comments

Comments
 (0)