Skip to content

Commit 1842f26

Browse files
committed
Merge pull request #21 from yllieth/19
Ensure that we choose the good subsets while getChildren()
2 parents 63f6e49 + 5e9cad5 commit 1842f26

File tree

1 file changed

+48
-16
lines changed

1 file changed

+48
-16
lines changed

lib/model/Datasets.js

Lines changed: 48 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ angular
180180
.service('Datasets', function($injector, $q, Restangular, Jobs) {
181181
'use strict';
182182
var self = this;
183+
var DEFAULT_SAMPLING = 70;
183184

184185
function dataset(id) { return Restangular.one('datasets', id); }
185186
function datasets() { return Restangular.all('datasets'); }
@@ -272,6 +273,10 @@ angular
272273
* <li>Idem for learning/testing filenames</li>
273274
* </ul>
274275
*
276+
* According to @sferrandiz, the <code>split()</code> function is deterministic one (if you ask to split a dataset
277+
* a lot of time, you will always get the same result). There is a insignificant risk of non-deterministic behaviour
278+
* based on the compilation of random generator (which can be different from one OS to an other)
279+
*
275280
* @param {String} id Dataset id you want to split (called <em>original dataset</em>)
276281
* @param {String} name Name of the original dataset (used to name its subsets)
277282
* @param {String} [filename=name] Name of the original datafile (used to name its subsets's datafile). If undefined, value of <kbd>name</kbd> parameter is used
@@ -284,7 +289,7 @@ angular
284289
*/
285290
this.split = function(id, name, filename, sampling) {
286291
filename = filename || name;
287-
sampling = sampling || 70;
292+
sampling = sampling || DEFAULT_SAMPLING;
288293

289294
var learn = {
290295
parent_dataset_id: id,
@@ -346,23 +351,35 @@ angular
346351
* <div><span class="badge get">get</span><code>/datasets/:learned_dataset_id</code></div>
347352
* <div><span class="badge get">get</span><code>/datasets/:tested_dataset_id</code></div>
348353
*
349-
* @param {String} id Identifier of an original dataset
354+
* @see https://github.com/yllieth/predicsis_ml_sdk-javascript/issues/19
355+
* <em>When datasets have only 2 splits, this functions rocks, but the subsets array may contains
356+
* multiple trains/test subsets et only checking child.sampling > 0 is not enough.
357+
* Indeed, when there is more than one train subset (where sampling > 0), this function
358+
* will return the first one.<em>
359+
*
360+
* To solve that issue, we ask for looks for <code>dictionary.dataset_ids</code> instead of
361+
* <code>dataset.children_ids</code>. By doing so, we are 100% sure that the fetched datasets are linked
362+
* to the good dictionary, which contains the selected target.
363+
*
364+
* @param {String} dictionaryId Identifier of the {@link predicsis.jsSDK.models.Dictionaries Dictionary}
350365
* @return {Promise}
351366
* <ul>
352367
* <li><code>children.train</code>: learning dataset</li>
353368
* <li><code>children.test</code>: testing dataset</li>
354369
* </ul>
355370
*/
356-
this.getChildren = function(id) {
357-
return self.get(id)
358-
.then(function(originalDataset) {
359-
return self.all(originalDataset.children_dataset_ids);
371+
this.getChildren = function(dictionaryId) {
372+
var Dictionaries = $injector.get('Dictionaries');
373+
374+
return Dictionaries.get(dictionaryId)
375+
.then(function(dictionary) {
376+
return self.all(dictionary.dataset_ids);
360377
})
361-
.then(function(subsets) {
362-
return subsets.reduce(function(memo, child) {
363-
if (child.sampling > 0) {
378+
.then(function(childrenCandidates) {
379+
return childrenCandidates.reduce(function(memo, child) {
380+
if (self.isTrainPart(child, DEFAULT_SAMPLING)) {
364381
memo.train = child;
365-
} else {
382+
} else if (self.isTestPart(child, -DEFAULT_SAMPLING)) {
366383
memo.test = child;
367384
}
368385

@@ -496,25 +513,40 @@ angular
496513
* @methodOf predicsis.jsSDK.models.Datasets
497514
* @name isTrainPart
498515
* @description Tells if a dataset is a train subset.
499-
* <b>Note:</b> A dataset is considered as a train subset if its sampling is positive
516+
* <b>Note:</b> A dataset is considered as a train subset if its sampling is positive and equal to the given <kbd>sampling</kbd> value.
517+
* A sampling must be between 0 and 100.
500518
* @param {Object} dataset Instance of {@link predicsis.jsSDK.models.Datasets dataset}
519+
* @param {Number} [sampling=70] You can give a positive or negative value for the <kbd>sampling</kbd>, we automatically
520+
* compute a positive one. (If you give 70, we'll use 70, and if you give -70, we'll use 70)
501521
* @return {Boolean} <kbd>true</kbd> / <kbd>false</kbd>
502522
*/
503-
this.isTrainPart = function(dataset) {
504-
return this.isChild(dataset) && dataset.sampling > 0;
523+
this.isTrainPart = function(dataset, sampling) {
524+
sampling = sampling || DEFAULT_SAMPLING;
525+
526+
return this.isChild(dataset)
527+
&& angular.isNumber(sampling)
528+
&& -100 <= sampling && sampling <= 100
529+
&& dataset.sampling === Math.abs(sampling);
505530
};
506531

507532
/**
508533
* @ngdoc function
509534
* @methodOf predicsis.jsSDK.models.Datasets
510535
* @name isTestPart
511536
* @description Tells if a dataset is a test subset.
512-
* <b>Note:</b> A dataset is considered as a test subset if its sampling is negative
537+
* <b>Note:</b> A dataset is considered as a test subset if its sampling is negative and equal to the given <kbd>sampling</kbd> value.
513538
* @param {Object} dataset Instance of {@link predicsis.jsSDK.models.Datasets dataset}
539+
* @param {Number} [sampling=70] You can give a positive or negative value for the <kbd>sampling</kbd>, we automatically
540+
* compute a negative one. (If you give 70, we'll use -70, and if you give -70, we'll use -70)
514541
* @return {Boolean} <kbd>true</kbd> / <kbd>false</kbd>
515542
*/
516-
this.isTestPart = function(dataset) {
517-
return this.isChild(dataset) && dataset.sampling < 0;
543+
this.isTestPart = function(dataset, sampling) {
544+
sampling = sampling || DEFAULT_SAMPLING;
545+
546+
return this.isChild(dataset)
547+
&& angular.isNumber(sampling)
548+
&& -100 <= sampling && sampling <= 100
549+
&& dataset.sampling === -Math.abs(sampling);
518550
};
519551

520552
/**

0 commit comments

Comments
 (0)