@@ -180,6 +180,7 @@ angular
180
180
. service ( 'Datasets' , function ( $injector , $q , Restangular , Jobs ) {
181
181
'use strict' ;
182
182
var self = this ;
183
+ var DEFAULT_SAMPLING = 70 ;
183
184
184
185
function dataset ( id ) { return Restangular . one ( 'datasets' , id ) ; }
185
186
function datasets ( ) { return Restangular . all ( 'datasets' ) ; }
@@ -272,6 +273,10 @@ angular
272
273
* <li>Idem for learning/testing filenames</li>
273
274
* </ul>
274
275
*
276
+ * According to @sferrandiz, the <code>split()</code> function is deterministic one (if you ask to split a dataset
277
+ * a lot of time, you will always get the same result). There is a insignificant risk of non-deterministic behaviour
278
+ * based on the compilation of random generator (which can be different from one OS to an other)
279
+ *
275
280
* @param {String } id Dataset id you want to split (called <em>original dataset</em>)
276
281
* @param {String } name Name of the original dataset (used to name its subsets)
277
282
* @param {String } [filename=name] Name of the original datafile (used to name its subsets's datafile). If undefined, value of <kbd>name</kbd> parameter is used
@@ -284,7 +289,7 @@ angular
284
289
*/
285
290
this . split = function ( id , name , filename , sampling ) {
286
291
filename = filename || name ;
287
- sampling = sampling || 70 ;
292
+ sampling = sampling || DEFAULT_SAMPLING ;
288
293
289
294
var learn = {
290
295
parent_dataset_id : id ,
@@ -346,23 +351,35 @@ angular
346
351
* <div><span class="badge get">get</span><code>/datasets/:learned_dataset_id</code></div>
347
352
* <div><span class="badge get">get</span><code>/datasets/:tested_dataset_id</code></div>
348
353
*
349
- * @param {String } id Identifier of an original dataset
354
+ * @see https://github.com/yllieth/predicsis_ml_sdk-javascript/issues/19
355
+ * <em>When datasets have only 2 splits, this functions rocks, but the subsets array may contains
356
+ * multiple trains/test subsets et only checking child.sampling > 0 is not enough.
357
+ * Indeed, when there is more than one train subset (where sampling > 0), this function
358
+ * will return the first one.<em>
359
+ *
360
+ * To solve that issue, we ask for looks for <code>dictionary.dataset_ids</code> instead of
361
+ * <code>dataset.children_ids</code>. By doing so, we are 100% sure that the fetched datasets are linked
362
+ * to the good dictionary, which contains the selected target.
363
+ *
364
+ * @param {String } dictionaryId Identifier of the {@link predicsis.jsSDK.models.Dictionaries Dictionary}
350
365
* @return {Promise }
351
366
* <ul>
352
367
* <li><code>children.train</code>: learning dataset</li>
353
368
* <li><code>children.test</code>: testing dataset</li>
354
369
* </ul>
355
370
*/
356
- this . getChildren = function ( id ) {
357
- return self . get ( id )
358
- . then ( function ( originalDataset ) {
359
- return self . all ( originalDataset . children_dataset_ids ) ;
371
+ this . getChildren = function ( dictionaryId ) {
372
+ var Dictionaries = $injector . get ( 'Dictionaries' ) ;
373
+
374
+ return Dictionaries . get ( dictionaryId )
375
+ . then ( function ( dictionary ) {
376
+ return self . all ( dictionary . dataset_ids ) ;
360
377
} )
361
- . then ( function ( subsets ) {
362
- return subsets . reduce ( function ( memo , child ) {
363
- if ( child . sampling > 0 ) {
378
+ . then ( function ( childrenCandidates ) {
379
+ return childrenCandidates . reduce ( function ( memo , child ) {
380
+ if ( self . isTrainPart ( child , DEFAULT_SAMPLING ) ) {
364
381
memo . train = child ;
365
- } else {
382
+ } else if ( self . isTestPart ( child , - DEFAULT_SAMPLING ) ) {
366
383
memo . test = child ;
367
384
}
368
385
@@ -496,25 +513,40 @@ angular
496
513
* @methodOf predicsis.jsSDK.models.Datasets
497
514
* @name isTrainPart
498
515
* @description Tells if a dataset is a train subset.
499
- * <b>Note:</b> A dataset is considered as a train subset if its sampling is positive
516
+ * <b>Note:</b> A dataset is considered as a train subset if its sampling is positive and equal to the given <kbd>sampling</kbd> value.
517
+ * A sampling must be between 0 and 100.
500
518
* @param {Object } dataset Instance of {@link predicsis.jsSDK.models.Datasets dataset}
519
+ * @param {Number } [sampling=70] You can give a positive or negative value for the <kbd>sampling</kbd>, we automatically
520
+ * compute a positive one. (If you give 70, we'll use 70, and if you give -70, we'll use 70)
501
521
* @return {Boolean } <kbd>true</kbd> / <kbd>false</kbd>
502
522
*/
503
- this . isTrainPart = function ( dataset ) {
504
- return this . isChild ( dataset ) && dataset . sampling > 0 ;
523
+ this . isTrainPart = function ( dataset , sampling ) {
524
+ sampling = sampling || DEFAULT_SAMPLING ;
525
+
526
+ return this . isChild ( dataset )
527
+ && angular . isNumber ( sampling )
528
+ && - 100 <= sampling && sampling <= 100
529
+ && dataset . sampling === Math . abs ( sampling ) ;
505
530
} ;
506
531
507
532
/**
508
533
* @ngdoc function
509
534
* @methodOf predicsis.jsSDK.models.Datasets
510
535
* @name isTestPart
511
536
* @description Tells if a dataset is a test subset.
512
- * <b>Note:</b> A dataset is considered as a test subset if its sampling is negative
537
+ * <b>Note:</b> A dataset is considered as a test subset if its sampling is negative and equal to the given <kbd>sampling</kbd> value.
513
538
* @param {Object } dataset Instance of {@link predicsis.jsSDK.models.Datasets dataset}
539
+ * @param {Number } [sampling=70] You can give a positive or negative value for the <kbd>sampling</kbd>, we automatically
540
+ * compute a negative one. (If you give 70, we'll use -70, and if you give -70, we'll use -70)
514
541
* @return {Boolean } <kbd>true</kbd> / <kbd>false</kbd>
515
542
*/
516
- this . isTestPart = function ( dataset ) {
517
- return this . isChild ( dataset ) && dataset . sampling < 0 ;
543
+ this . isTestPart = function ( dataset , sampling ) {
544
+ sampling = sampling || DEFAULT_SAMPLING ;
545
+
546
+ return this . isChild ( dataset )
547
+ && angular . isNumber ( sampling )
548
+ && - 100 <= sampling && sampling <= 100
549
+ && dataset . sampling === - Math . abs ( sampling ) ;
518
550
} ;
519
551
520
552
/**
0 commit comments