@@ -407,6 +407,7 @@ angular
407
407
. service ( 'Datasets' , function ( $injector , $q , Restangular , Jobs ) {
408
408
'use strict' ;
409
409
var self = this ;
410
+ var DEFAULT_SAMPLING = 70 ;
410
411
411
412
function dataset ( id ) { return Restangular . one ( 'datasets' , id ) ; }
412
413
function datasets ( ) { return Restangular . all ( 'datasets' ) ; }
@@ -467,7 +468,7 @@ angular
467
468
*
468
469
* @param {String } fileName used to create the source and the dataset
469
470
* @param {String } destFolder AWS key where the dataset has been uploaded
470
- * @param {String } pathName name of the key used to provide destFolder
471
+ * @param {String } pathName name of the key used to provide destFolder ('key' for S3, 'object' for swift)
471
472
* @return {Promise } Newly created dataset
472
473
*/
473
474
this . createFromUpload = function ( fileName , destFolder , pathName ) {
@@ -499,6 +500,10 @@ angular
499
500
* <li>Idem for learning/testing filenames</li>
500
501
* </ul>
501
502
*
503
+ * According to @sferrandiz, the <code>split()</code> function is deterministic one (if you ask to split a dataset
504
+ * a lot of time, you will always get the same result). There is a insignificant risk of non-deterministic behaviour
505
+ * based on the compilation of random generator (which can be different from one OS to an other)
506
+ *
502
507
* @param {String } id Dataset id you want to split (called <em>original dataset</em>)
503
508
* @param {String } name Name of the original dataset (used to name its subsets)
504
509
* @param {String } [filename=name] Name of the original datafile (used to name its subsets's datafile). If undefined, value of <kbd>name</kbd> parameter is used
@@ -511,7 +516,7 @@ angular
511
516
*/
512
517
this . split = function ( id , name , filename , sampling ) {
513
518
filename = filename || name ;
514
- sampling = sampling || 70 ;
519
+ sampling = sampling || DEFAULT_SAMPLING ;
515
520
516
521
var learn = {
517
522
parent_dataset_id : id ,
@@ -573,24 +578,39 @@ angular
573
578
* <div><span class="badge get">get</span><code>/datasets/:learned_dataset_id</code></div>
574
579
* <div><span class="badge get">get</span><code>/datasets/:tested_dataset_id</code></div>
575
580
*
576
- * @param {String } id Identifier of an original dataset
581
+ * @see https://github.com/yllieth/predicsis_ml_sdk-javascript/issues/19
582
+ * <em>When datasets have only 2 splits, this functions rocks, but the subsets array may contains
583
+ * multiple trains/test subsets et only checking child.sampling > 0 is not enough.
584
+ * Indeed, when there is more than one train subset (where sampling > 0), this function
585
+ * will return the first one.<em>
586
+ *
587
+ * To solve that issue, we ask for looks for <code>dictionary.dataset_ids</code> instead of
588
+ * <code>dataset.children_ids</code>. By doing so, we are 100% sure that the fetched datasets are linked
589
+ * to the good dictionary, which contains the selected target.
590
+ *
591
+ * @param {String } datasetId parent Dataset identifier
592
+ * @param {String } dictionaryId Identifier of the {@link predicsis.jsSDK.models.Dictionaries Dictionary}
577
593
* @return {Promise }
578
594
* <ul>
579
595
* <li><code>children.train</code>: learning dataset</li>
580
596
* <li><code>children.test</code>: testing dataset</li>
581
597
* </ul>
582
598
*/
583
- this . getChildren = function ( id ) {
584
- return self . get ( id )
585
- . then ( function ( originalDataset ) {
586
- return self . all ( originalDataset . children_dataset_ids ) ;
599
+ this . getChildren = function ( datasetId , dictionaryId ) {
600
+ var Dictionaries = $injector . get ( 'Dictionaries' ) ;
601
+
602
+ return Dictionaries . get ( dictionaryId )
603
+ . then ( function ( dictionary ) {
604
+ return self . all ( dictionary . dataset_ids ) ;
587
605
} )
588
- . then ( function ( subsets ) {
589
- return subsets . reduce ( function ( memo , child ) {
590
- if ( child . sampling > 0 ) {
591
- memo . train = child ;
592
- } else {
593
- memo . test = child ;
606
+ . then ( function ( childrenCandidates ) {
607
+ return childrenCandidates . reduce ( function ( memo , child ) {
608
+ if ( child . parent_dataset_id === datasetId ) {
609
+ if ( self . isTrainPart ( child , DEFAULT_SAMPLING ) ) {
610
+ memo . train = child ;
611
+ } else if ( self . isTestPart ( child , - DEFAULT_SAMPLING ) ) {
612
+ memo . test = child ;
613
+ }
594
614
}
595
615
596
616
return memo ;
@@ -723,25 +743,40 @@ angular
723
743
* @methodOf predicsis.jsSDK.models.Datasets
724
744
* @name isTrainPart
725
745
* @description Tells if a dataset is a train subset.
726
- * <b>Note:</b> A dataset is considered as a train subset if its sampling is positive
746
+ * <b>Note:</b> A dataset is considered as a train subset if its sampling is positive and equal to the given <kbd>sampling</kbd> value.
747
+ * A sampling must be between 0 and 100.
727
748
* @param {Object } dataset Instance of {@link predicsis.jsSDK.models.Datasets dataset}
749
+ * @param {Number } [sampling=70] You can give a positive or negative value for the <kbd>sampling</kbd>, we automatically
750
+ * compute a positive one. (If you give 70, we'll use 70, and if you give -70, we'll use 70)
728
751
* @return {Boolean } <kbd>true</kbd> / <kbd>false</kbd>
729
752
*/
730
- this . isTrainPart = function ( dataset ) {
731
- return this . isChild ( dataset ) && dataset . sampling > 0 ;
753
+ this . isTrainPart = function ( dataset , sampling ) {
754
+ sampling = sampling || DEFAULT_SAMPLING ;
755
+
756
+ return this . isChild ( dataset )
757
+ && angular . isNumber ( sampling )
758
+ && - 100 <= sampling && sampling <= 100
759
+ && dataset . sampling === Math . abs ( sampling ) ;
732
760
} ;
733
761
734
762
/**
735
763
* @ngdoc function
736
764
* @methodOf predicsis.jsSDK.models.Datasets
737
765
* @name isTestPart
738
766
* @description Tells if a dataset is a test subset.
739
- * <b>Note:</b> A dataset is considered as a test subset if its sampling is negative
767
+ * <b>Note:</b> A dataset is considered as a test subset if its sampling is negative and equal to the given <kbd>sampling</kbd> value.
740
768
* @param {Object } dataset Instance of {@link predicsis.jsSDK.models.Datasets dataset}
769
+ * @param {Number } [sampling=70] You can give a positive or negative value for the <kbd>sampling</kbd>, we automatically
770
+ * compute a negative one. (If you give 70, we'll use -70, and if you give -70, we'll use -70)
741
771
* @return {Boolean } <kbd>true</kbd> / <kbd>false</kbd>
742
772
*/
743
- this . isTestPart = function ( dataset ) {
744
- return this . isChild ( dataset ) && dataset . sampling < 0 ;
773
+ this . isTestPart = function ( dataset , sampling ) {
774
+ sampling = sampling || DEFAULT_SAMPLING ;
775
+
776
+ return this . isChild ( dataset )
777
+ && angular . isNumber ( sampling )
778
+ && - 100 <= sampling && sampling <= 100
779
+ && dataset . sampling === - Math . abs ( sampling ) ;
745
780
} ;
746
781
747
782
/**
@@ -753,7 +788,7 @@ angular
753
788
* @return {Boolean } <kbd>true</kbd> / <kbd>false</kbd>
754
789
*/
755
790
this . isFormatted = function ( dataset ) {
756
- return Boolean ( dataset . header !== null ) && Boolean ( dataset . separator !== null )
791
+ return Boolean ( dataset . header !== null ) && Boolean ( dataset . separator !== null ) ;
757
792
} ;
758
793
759
794
/**
@@ -1631,7 +1666,7 @@ angular
1631
1666
1632
1667
$rootScope . $broadcast ( 'jsSDK.learn.start-retrieving-train-dataset' ) ;
1633
1668
1634
- return Datasets . getChildren ( project . learning_dataset_id )
1669
+ return Datasets . getChildren ( project . learning_dataset_id , project . dictionary_id )
1635
1670
// create preparation rules
1636
1671
. then ( function ( children ) {
1637
1672
if ( ! children . train ) {
@@ -2563,7 +2598,7 @@ angular
2563
2598
var reports = function ( ) { return Restangular . all ( 'reports' ) ; } ;
2564
2599
function createClassifierEvaluationReport ( project , type ) {
2565
2600
var Datasets = $injector . get ( 'Datasets' ) ;
2566
- return Datasets . getChildren ( project . learning_dataset_id )
2601
+ return Datasets . getChildren ( project . learning_dataset_id , project . dictionary_id )
2567
2602
. then ( function ( children ) {
2568
2603
return self . create ( {
2569
2604
type : 'classifier_evaluation' ,
@@ -3324,7 +3359,7 @@ angular
3324
3359
function upload ( uploadObject , xhr2 , credential , file ) {
3325
3360
3326
3361
var endpoint = credential . signed_url ;
3327
- var headers = { } , body = file , method = 'PUT' ;
3362
+ var body = file , method = 'PUT' ;
3328
3363
3329
3364
xhr2 . open ( method , endpoint , true ) ;
3330
3365
0 commit comments