Skip to content

Commit 34d9b88

Browse files
committed
Merge pull request #22 from yllieth/getchildren-add-datasetid-check
Getchildren add datasetid check
2 parents 1842f26 + a23cb98 commit 34d9b88

File tree

7 files changed

+72
-34
lines changed

7 files changed

+72
-34
lines changed

dist/predicsis-jsSDK.js

Lines changed: 58 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,7 @@ angular
407407
.service('Datasets', function($injector, $q, Restangular, Jobs) {
408408
'use strict';
409409
var self = this;
410+
var DEFAULT_SAMPLING = 70;
410411

411412
function dataset(id) { return Restangular.one('datasets', id); }
412413
function datasets() { return Restangular.all('datasets'); }
@@ -467,7 +468,7 @@ angular
467468
*
468469
* @param {String} fileName used to create the source and the dataset
469470
* @param {String} destFolder AWS key where the dataset has been uploaded
470-
* @param {String} pathName name of the key used to provide destFolder
471+
* @param {String} pathName name of the key used to provide destFolder ('key' for S3, 'object' for swift)
471472
* @return {Promise} Newly created dataset
472473
*/
473474
this.createFromUpload = function(fileName, destFolder, pathName) {
@@ -499,6 +500,10 @@ angular
499500
* <li>Idem for learning/testing filenames</li>
500501
* </ul>
501502
*
503+
* According to @sferrandiz, the <code>split()</code> function is deterministic one (if you ask to split a dataset
504+
* a lot of time, you will always get the same result). There is a insignificant risk of non-deterministic behaviour
505+
* based on the compilation of random generator (which can be different from one OS to an other)
506+
*
502507
* @param {String} id Dataset id you want to split (called <em>original dataset</em>)
503508
* @param {String} name Name of the original dataset (used to name its subsets)
504509
* @param {String} [filename=name] Name of the original datafile (used to name its subsets's datafile). If undefined, value of <kbd>name</kbd> parameter is used
@@ -511,7 +516,7 @@ angular
511516
*/
512517
this.split = function(id, name, filename, sampling) {
513518
filename = filename || name;
514-
sampling = sampling || 70;
519+
sampling = sampling || DEFAULT_SAMPLING;
515520

516521
var learn = {
517522
parent_dataset_id: id,
@@ -573,24 +578,39 @@ angular
573578
* <div><span class="badge get">get</span><code>/datasets/:learned_dataset_id</code></div>
574579
* <div><span class="badge get">get</span><code>/datasets/:tested_dataset_id</code></div>
575580
*
576-
* @param {String} id Identifier of an original dataset
581+
* @see https://github.com/yllieth/predicsis_ml_sdk-javascript/issues/19
582+
* <em>When datasets have only 2 splits, this functions rocks, but the subsets array may contains
583+
* multiple trains/test subsets et only checking child.sampling > 0 is not enough.
584+
* Indeed, when there is more than one train subset (where sampling > 0), this function
585+
* will return the first one.<em>
586+
*
587+
* To solve that issue, we ask for looks for <code>dictionary.dataset_ids</code> instead of
588+
* <code>dataset.children_ids</code>. By doing so, we are 100% sure that the fetched datasets are linked
589+
* to the good dictionary, which contains the selected target.
590+
*
591+
* @param {String} datasetId parent Dataset identifier
592+
* @param {String} dictionaryId Identifier of the {@link predicsis.jsSDK.models.Dictionaries Dictionary}
577593
* @return {Promise}
578594
* <ul>
579595
* <li><code>children.train</code>: learning dataset</li>
580596
* <li><code>children.test</code>: testing dataset</li>
581597
* </ul>
582598
*/
583-
this.getChildren = function(id) {
584-
return self.get(id)
585-
.then(function(originalDataset) {
586-
return self.all(originalDataset.children_dataset_ids);
599+
this.getChildren = function(datasetId, dictionaryId) {
600+
var Dictionaries = $injector.get('Dictionaries');
601+
602+
return Dictionaries.get(dictionaryId)
603+
.then(function(dictionary) {
604+
return self.all(dictionary.dataset_ids);
587605
})
588-
.then(function(subsets) {
589-
return subsets.reduce(function(memo, child) {
590-
if (child.sampling > 0) {
591-
memo.train = child;
592-
} else {
593-
memo.test = child;
606+
.then(function(childrenCandidates) {
607+
return childrenCandidates.reduce(function(memo, child) {
608+
if (child.parent_dataset_id === datasetId) {
609+
if (self.isTrainPart(child, DEFAULT_SAMPLING)) {
610+
memo.train = child;
611+
} else if (self.isTestPart(child, -DEFAULT_SAMPLING)) {
612+
memo.test = child;
613+
}
594614
}
595615

596616
return memo;
@@ -723,25 +743,40 @@ angular
723743
* @methodOf predicsis.jsSDK.models.Datasets
724744
* @name isTrainPart
725745
* @description Tells if a dataset is a train subset.
726-
* <b>Note:</b> A dataset is considered as a train subset if its sampling is positive
746+
* <b>Note:</b> A dataset is considered as a train subset if its sampling is positive and equal to the given <kbd>sampling</kbd> value.
747+
* A sampling must be between 0 and 100.
727748
* @param {Object} dataset Instance of {@link predicsis.jsSDK.models.Datasets dataset}
749+
* @param {Number} [sampling=70] You can give a positive or negative value for the <kbd>sampling</kbd>, we automatically
750+
* compute a positive one. (If you give 70, we'll use 70, and if you give -70, we'll use 70)
728751
* @return {Boolean} <kbd>true</kbd> / <kbd>false</kbd>
729752
*/
730-
this.isTrainPart = function(dataset) {
731-
return this.isChild(dataset) && dataset.sampling > 0;
753+
this.isTrainPart = function(dataset, sampling) {
754+
sampling = sampling || DEFAULT_SAMPLING;
755+
756+
return this.isChild(dataset)
757+
&& angular.isNumber(sampling)
758+
&& -100 <= sampling && sampling <= 100
759+
&& dataset.sampling === Math.abs(sampling);
732760
};
733761

734762
/**
735763
* @ngdoc function
736764
* @methodOf predicsis.jsSDK.models.Datasets
737765
* @name isTestPart
738766
* @description Tells if a dataset is a test subset.
739-
* <b>Note:</b> A dataset is considered as a test subset if its sampling is negative
767+
* <b>Note:</b> A dataset is considered as a test subset if its sampling is negative and equal to the given <kbd>sampling</kbd> value.
740768
* @param {Object} dataset Instance of {@link predicsis.jsSDK.models.Datasets dataset}
769+
* @param {Number} [sampling=70] You can give a positive or negative value for the <kbd>sampling</kbd>, we automatically
770+
* compute a negative one. (If you give 70, we'll use -70, and if you give -70, we'll use -70)
741771
* @return {Boolean} <kbd>true</kbd> / <kbd>false</kbd>
742772
*/
743-
this.isTestPart = function(dataset) {
744-
return this.isChild(dataset) && dataset.sampling < 0;
773+
this.isTestPart = function(dataset, sampling) {
774+
sampling = sampling || DEFAULT_SAMPLING;
775+
776+
return this.isChild(dataset)
777+
&& angular.isNumber(sampling)
778+
&& -100 <= sampling && sampling <= 100
779+
&& dataset.sampling === -Math.abs(sampling);
745780
};
746781

747782
/**
@@ -753,7 +788,7 @@ angular
753788
* @return {Boolean} <kbd>true</kbd> / <kbd>false</kbd>
754789
*/
755790
this.isFormatted = function(dataset) {
756-
return Boolean(dataset.header !== null) && Boolean(dataset.separator !== null)
791+
return Boolean(dataset.header !== null) && Boolean(dataset.separator !== null);
757792
};
758793

759794
/**
@@ -1631,7 +1666,7 @@ angular
16311666

16321667
$rootScope.$broadcast('jsSDK.learn.start-retrieving-train-dataset');
16331668

1634-
return Datasets.getChildren(project.learning_dataset_id)
1669+
return Datasets.getChildren(project.learning_dataset_id, project.dictionary_id)
16351670
// create preparation rules
16361671
.then(function(children) {
16371672
if(!children.train) {
@@ -2563,7 +2598,7 @@ angular
25632598
var reports = function() { return Restangular.all('reports'); };
25642599
function createClassifierEvaluationReport(project, type) {
25652600
var Datasets = $injector.get('Datasets');
2566-
return Datasets.getChildren(project.learning_dataset_id)
2601+
return Datasets.getChildren(project.learning_dataset_id, project.dictionary_id)
25672602
.then(function(children) {
25682603
return self.create({
25692604
type: 'classifier_evaluation',
@@ -3324,7 +3359,7 @@ angular
33243359
function upload(uploadObject, xhr2, credential, file) {
33253360

33263361
var endpoint = credential.signed_url;
3327-
var headers = {}, body = file, method = 'PUT';
3362+
var body = file, method = 'PUT';
33283363

33293364
xhr2.open(method, endpoint, true);
33303365

dist/predicsis-jsSDK.min.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dist/predicsis-jsSDK.min.js.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/helper/uploadHelper.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ angular
1717
function upload(uploadObject, xhr2, credential, file) {
1818

1919
var endpoint = credential.signed_url;
20-
var headers = {}, body = file, method = 'PUT';
20+
var body = file, method = 'PUT';
2121

2222
xhr2.open(method, endpoint, true);
2323

lib/model/Datasets.js

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -361,14 +361,15 @@ angular
361361
* <code>dataset.children_ids</code>. By doing so, we are 100% sure that the fetched datasets are linked
362362
* to the good dictionary, which contains the selected target.
363363
*
364+
* @param {String} datasetId parent Dataset identifier
364365
* @param {String} dictionaryId Identifier of the {@link predicsis.jsSDK.models.Dictionaries Dictionary}
365366
* @return {Promise}
366367
* <ul>
367368
* <li><code>children.train</code>: learning dataset</li>
368369
* <li><code>children.test</code>: testing dataset</li>
369370
* </ul>
370371
*/
371-
this.getChildren = function(dictionaryId) {
372+
this.getChildren = function(datasetId, dictionaryId) {
372373
var Dictionaries = $injector.get('Dictionaries');
373374

374375
return Dictionaries.get(dictionaryId)
@@ -377,10 +378,12 @@ angular
377378
})
378379
.then(function(childrenCandidates) {
379380
return childrenCandidates.reduce(function(memo, child) {
380-
if (self.isTrainPart(child, DEFAULT_SAMPLING)) {
381-
memo.train = child;
382-
} else if (self.isTestPart(child, -DEFAULT_SAMPLING)) {
383-
memo.test = child;
381+
if (child.parent_dataset_id === datasetId) {
382+
if (self.isTrainPart(child, DEFAULT_SAMPLING)) {
383+
memo.train = child;
384+
} else if (self.isTestPart(child, -DEFAULT_SAMPLING)) {
385+
memo.test = child;
386+
}
384387
}
385388

386389
return memo;
@@ -558,7 +561,7 @@ angular
558561
* @return {Boolean} <kbd>true</kbd> / <kbd>false</kbd>
559562
*/
560563
this.isFormatted = function(dataset) {
561-
return Boolean(dataset.header !== null) && Boolean(dataset.separator !== null)
564+
return Boolean(dataset.header !== null) && Boolean(dataset.separator !== null);
562565
};
563566

564567
/**

lib/model/Models.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ angular
269269

270270
$rootScope.$broadcast('jsSDK.learn.start-retrieving-train-dataset');
271271

272-
return Datasets.getChildren(project.learning_dataset_id)
272+
return Datasets.getChildren(project.learning_dataset_id, project.dictionary_id)
273273
// create preparation rules
274274
.then(function(children) {
275275
if(!children.train) {

lib/model/Reports.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ angular
7171
var reports = function() { return Restangular.all('reports'); };
7272
function createClassifierEvaluationReport(project, type) {
7373
var Datasets = $injector.get('Datasets');
74-
return Datasets.getChildren(project.learning_dataset_id)
74+
return Datasets.getChildren(project.learning_dataset_id, project.dictionary_id)
7575
.then(function(children) {
7676
return self.create({
7777
type: 'classifier_evaluation',

0 commit comments

Comments
 (0)