@@ -180,6 +180,7 @@ angular
180
180
. service ( 'Datasets' , function ( $injector , $q , Restangular , Jobs ) {
181
181
'use strict' ;
182
182
var self = this ;
183
+ var DEFAULT_SAMPLING = 70 ;
183
184
184
185
function dataset ( id ) { return Restangular . one ( 'datasets' , id ) ; }
185
186
function datasets ( ) { return Restangular . all ( 'datasets' ) ; }
@@ -268,6 +269,10 @@ angular
268
269
* <li>Idem for learning/testing filenames</li>
269
270
* </ul>
270
271
*
272
+ * According to @sferrandiz, the <code>split()</code> function is deterministic one (if you ask to split a dataset
273
+ * a lot of time, you will always get the same result). There is a insignificant risk of non-deterministic behaviour
274
+ * based on the compilation of random generator (which can be different from one OS to an other)
275
+ *
271
276
* @param {String } id Dataset id you want to split (called <em>original dataset</em>)
272
277
* @param {String } name Name of the original dataset (used to name its subsets)
273
278
* @param {String } [filename=name] Name of the original datafile (used to name its subsets's datafile). If undefined, value of <kbd>name</kbd> parameter is used
@@ -280,7 +285,7 @@ angular
280
285
*/
281
286
this . split = function ( id , name , filename , sampling ) {
282
287
filename = filename || name ;
283
- sampling = sampling || 70 ;
288
+ sampling = sampling || DEFAULT_SAMPLING ;
284
289
285
290
var learn = {
286
291
parent_dataset_id : id ,
@@ -342,23 +347,35 @@ angular
342
347
* <div><span class="badge get">get</span><code>/datasets/:learned_dataset_id</code></div>
343
348
* <div><span class="badge get">get</span><code>/datasets/:tested_dataset_id</code></div>
344
349
*
345
- * @param {String } id Identifier of an original dataset
350
+ * @see https://github.com/yllieth/predicsis_ml_sdk-javascript/issues/19
351
+ * <em>When datasets have only 2 splits, this functions rocks, but the subsets array may contains
352
+ * multiple trains/test subsets et only checking child.sampling > 0 is not enough.
353
+ * Indeed, when there is more than one train subset (where sampling > 0), this function
354
+ * will return the first one.<em>
355
+ *
356
+ * To solve that issue, we ask for looks for <code>dictionary.dataset_ids</code> instead of
357
+ * <code>dataset.children_ids</code>. By doing so, we are 100% sure that the fetched datasets are linked
358
+ * to the good dictionary, which contains the selected target.
359
+ *
360
+ * @param {String } dictionaryId Identifier of the {@link predicsis.jsSDK.models.Dictionaries Dictionary}
346
361
* @return {Promise }
347
362
* <ul>
348
363
* <li><code>children.train</code>: learning dataset</li>
349
364
* <li><code>children.test</code>: testing dataset</li>
350
365
* </ul>
351
366
*/
352
- this . getChildren = function ( id ) {
353
- return self . get ( id )
354
- . then ( function ( originalDataset ) {
355
- return self . all ( originalDataset . children_dataset_ids ) ;
367
+ this . getChildren = function ( dictionaryId ) {
368
+ var Dictionaries = $injector . get ( 'Dictionaries' ) ;
369
+
370
+ return Dictionaries . get ( dictionaryId )
371
+ . then ( function ( dictionary ) {
372
+ return self . all ( dictionary . dataset_ids ) ;
356
373
} )
357
- . then ( function ( subsets ) {
358
- return subsets . reduce ( function ( memo , child ) {
359
- if ( child . sampling > 0 ) {
374
+ . then ( function ( childrenCandidates ) {
375
+ return childrenCandidates . reduce ( function ( memo , child ) {
376
+ if ( self . isTrainPart ( child , DEFAULT_SAMPLING ) ) {
360
377
memo . train = child ;
361
- } else {
378
+ } else if ( self . isTestPart ( child , - DEFAULT_SAMPLING ) ) {
362
379
memo . test = child ;
363
380
}
364
381
@@ -492,25 +509,40 @@ angular
492
509
* @methodOf predicsis.jsSDK.models.Datasets
493
510
* @name isTrainPart
494
511
* @description Tells if a dataset is a train subset.
495
- * <b>Note:</b> A dataset is considered as a train subset if its sampling is positive
512
+ * <b>Note:</b> A dataset is considered as a train subset if its sampling is positive and equal to the given <kbd>sampling</kbd> value.
513
+ * A sampling must be between 0 and 100.
496
514
* @param {Object } dataset Instance of {@link predicsis.jsSDK.models.Datasets dataset}
515
+ * @param {Number } [sampling=70] You can give a positive or negative value for the <kbd>sampling</kbd>, we automatically
516
+ * compute a positive one. (If you give 70, we'll use 70, and if you give -70, we'll use 70)
497
517
* @return {Boolean } <kbd>true</kbd> / <kbd>false</kbd>
498
518
*/
499
- this . isTrainPart = function ( dataset ) {
500
- return this . isChild ( dataset ) && dataset . sampling > 0 ;
519
+ this . isTrainPart = function ( dataset , sampling ) {
520
+ sampling = sampling || DEFAULT_SAMPLING ;
521
+
522
+ return this . isChild ( dataset )
523
+ && angular . isNumber ( sampling )
524
+ && - 100 <= sampling && sampling <= 100
525
+ && dataset . sampling === Math . abs ( sampling ) ;
501
526
} ;
502
527
503
528
/**
504
529
* @ngdoc function
505
530
* @methodOf predicsis.jsSDK.models.Datasets
506
531
* @name isTestPart
507
532
* @description Tells if a dataset is a test subset.
508
- * <b>Note:</b> A dataset is considered as a test subset if its sampling is negative
533
+ * <b>Note:</b> A dataset is considered as a test subset if its sampling is negative and equal to the given <kbd>sampling</kbd> value.
509
534
* @param {Object } dataset Instance of {@link predicsis.jsSDK.models.Datasets dataset}
535
+ * @param {Number } [sampling=70] You can give a positive or negative value for the <kbd>sampling</kbd>, we automatically
536
+ * compute a negative one. (If you give 70, we'll use -70, and if you give -70, we'll use -70)
510
537
* @return {Boolean } <kbd>true</kbd> / <kbd>false</kbd>
511
538
*/
512
- this . isTestPart = function ( dataset ) {
513
- return this . isChild ( dataset ) && dataset . sampling < 0 ;
539
+ this . isTestPart = function ( dataset , sampling ) {
540
+ sampling = sampling || DEFAULT_SAMPLING ;
541
+
542
+ return this . isChild ( dataset )
543
+ && angular . isNumber ( sampling )
544
+ && - 100 <= sampling && sampling <= 100
545
+ && dataset . sampling === - Math . abs ( sampling ) ;
514
546
} ;
515
547
516
548
/**
0 commit comments