Skip to content

Commit 3f4a5df

Browse files
committed
Merge pull request #484 from CodeNow/SAN-773/file-hash
San 773/file hash
2 parents 841ac2a + 277d1ac commit 3f4a5df

File tree

11 files changed

+605
-125
lines changed

11 files changed

+605
-125
lines changed

lib/middlewares/passport.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ function fetchOrCreateUser (accessToken, refreshToken, profile, done) {
6161
github.user.getEmails({
6262
user: profile.id
6363
}, function (err, emails) {
64-
if (err) { cb(err); }
64+
if (err) { return cb(err); }
6565

6666
var primaryEmail = find(emails, hasProps({ primary: true }));
6767
if (!primaryEmail) {

lib/models/apis/docker.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,8 @@ Docker.prototype.startImageBuilderAndWait = function (sessionUser, version, cont
217217
dockerTag: dockerTag,
218218
buildLog: buildLogData,
219219
dockerHost: self.dockerHost,
220-
versionId: version._id
220+
versionId: version._id,
221+
completed: new Date()
221222
});
222223
var split = dockerTag.split(':');
223224
var imageName = split[0];

lib/models/mongo/context-version.js

Lines changed: 135 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@ var keypather = require('keypather')();
2020
var find = require('101/find');
2121
var equals = require('101/equals');
2222
var noop = require('101/noop');
23-
// var error = require('error');
2423
var createCount = require('callback-count');
24+
var dogstatsd = require('models/datadog');
25+
2526
/**
2627
* d1 >= d2
2728
* @param {Date} d1 date1
@@ -387,7 +388,7 @@ ContextVersionSchema.methods.setBuildCompleted = function (dockerInfo, cb) {
387388
cb(Boom.badRequest('ContextVersion requires dockerImage'));
388389
}
389390
else {
390-
var now = Date.now();
391+
var now = dockerInfo.completed;
391392
ContextVersion.findOneAndUpdate({
392393
_id: contextVersion._id,
393394
'build.started': {
@@ -476,22 +477,7 @@ ContextVersionSchema.methods.dedupe = function (callback) {
476477
'build.started': { $exists: true },
477478
infraCodeVersion: contextVersion.infraCodeVersion
478479
};
479-
if (contextVersion.appCodeVersions.length) {
480-
query.$and = contextVersion.appCodeVersions.map(function (acv) {
481-
return {
482-
appCodeVersions: {
483-
$elemMatch: {
484-
lowerRepo: acv.lowerRepo,
485-
commit: acv.commit
486-
}
487-
}
488-
};
489-
});
490-
query.$and.push({appCodeVersions: { $size: contextVersion.appCodeVersions.length }});
491-
}
492-
else {
493-
query.appCodeVersions = { $size: 0 };
494-
}
480+
query = addAppCodeVersionQuery(contextVersion, query);
495481
opts = {
496482
sort : '-build.started',
497483
limit: 1
@@ -704,4 +690,135 @@ ContextVersionSchema.statics.modifyAppCodeVersionByRepo =
704690
}, cb);
705691
};
706692

693+
/**
694+
* looks for build from contextVersions with the same hash and
695+
* appcode then updates build if dupe
696+
* @return contextVersion self
697+
*/
698+
ContextVersionSchema.methods.dedupeBuild = function (callback) {
699+
var self = this;
700+
var icvId = self.infraCodeVersion;
701+
async.waterfall([
702+
getHash,
703+
setHash,
704+
findPendingDupes,
705+
findCompletedDupes, // must be done after pending due to race
706+
replaceIfDupe,
707+
], callback);
708+
709+
function getHash (cb) {
710+
InfraCodeVersion.findById(icvId, function (err, icv) {
711+
if (err) { return cb(err); }
712+
icv.getHash(cb);
713+
});
714+
}
715+
// hash should be set here so dedup will catch 2 builds comming at same time
716+
function setHash (hash, cb) {
717+
self.update({
718+
$set: {
719+
'build.hash' : hash
720+
}
721+
}, function(err) {
722+
if (err) { return cb(err); }
723+
self.build.hash = hash;
724+
cb();
725+
});
726+
}
727+
// find oldest pending build, (excluding self) which match hash and app-code
728+
// self is determined by started time
729+
function findPendingDupes (cb) {
730+
var query = {
731+
'build.completed': { $exists: false },
732+
'build.hash': self.build.hash,
733+
'build._id': { $ne: self.build._id }
734+
};
735+
query = addAppCodeVersionQuery(self, query);
736+
var opts = {
737+
sort : 'build.started',
738+
limit: 1
739+
};
740+
ContextVersion.find(query, null, opts, function (err, duplicates) {
741+
if (err) { return cb(err); }
742+
743+
// if none found, no completed dups exist
744+
if (duplicates.length === 0) { return cb(null, null); }
745+
746+
// use oldest dupe
747+
cb(null, duplicates[0]);
748+
});
749+
}
750+
751+
// find youngest completed builds, (excluding self) which match hash and app-code
752+
// self is determined by started time
753+
function findCompletedDupes (pending, cb) {
754+
// always use oldest pending if exists
755+
// else use youngest completeed if exists
756+
// else no dupe
757+
if (pending) {
758+
return cb(null, pending);
759+
}
760+
var query = {
761+
'build.completed': { $exists: true },
762+
'build.hash': self.build.hash,
763+
'build._id': { $ne: self.build._id }
764+
};
765+
query = addAppCodeVersionQuery(self, query);
766+
var opts = {
767+
sort : '-build.started',
768+
limit: 1
769+
};
770+
ContextVersion.find(query, null, opts, function (err, duplicates) {
771+
if (err) { return cb(err); }
772+
773+
// if none found, no completed dups exist
774+
if (duplicates.length === 0) { return cb(null, null); }
775+
776+
// use oldest dupe
777+
cb(null, duplicates[0]);
778+
});
779+
}
780+
781+
782+
function replaceIfDupe(dupe, cb) {
783+
if (dupe) {
784+
dogstatsd.increment('api.contextVersion.build.deduped');
785+
self.copyBuildFromContextVersion(dupe, cb);
786+
} else {
787+
dogstatsd.increment('api.contextVersion.build.noDupe');
788+
cb(null, self);
789+
}
790+
}
791+
};
792+
793+
function addAppCodeVersionQuery(contextVersion, query) {
794+
if (contextVersion.appCodeVersions.length) {
795+
query.$and = contextVersion.appCodeVersions.map(function (acv) {
796+
return {
797+
appCodeVersions: {
798+
$elemMatch: {
799+
lowerRepo: acv.lowerRepo,
800+
commit: acv.commit
801+
}
802+
}
803+
};
804+
});
805+
query.$and.push({appCodeVersions: { $size: contextVersion.appCodeVersions.length }});
806+
} else {
807+
query.appCodeVersions = { $size: 0 };
808+
}
809+
return query;
810+
}
811+
812+
ContextVersionSchema.methods.copyBuildFromContextVersion = function (contextVersion, cb) {
813+
var self = this;
814+
self.build.dupeFound = true;
815+
self.containerId = contextVersion.containerId;
816+
self.update({
817+
$set: {
818+
'build': contextVersion.build,
819+
'containerId': contextVersion.containerId
820+
}
821+
}, cb);
822+
};
823+
707824
var ContextVersion = module.exports = mongoose.model('ContextVersions', ContextVersionSchema);

lib/models/mongo/infra-code-version.js

Lines changed: 99 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@ var last = require('101/last');
77
var isFunction = require('101/is-function');
88
var debug = require('debug')('runnable-api:infra-code-version:model');
99
var regexpQuote = require('regexp-quote');
10-
var crypto = require('crypto');
10+
var bcrypt = require('bcrypt');
11+
var jsonHash = require('json-stable-stringify');
12+
var dogstatsd = require('models/datadog');
13+
var uuid = require('uuid');
1114

1215
var path = require('path');
1316
var join = path.join;
@@ -216,38 +219,46 @@ InfraCodeVersionSchema.methods.createFs = function (data, cb) {
216219
infraCodeVersion.files.push(s3Data);
217220
var fileData = infraCodeVersion.files.pop().toJSON();
218221
var fileKey, dirKey;
219-
var attrs = {
220-
edited: true
221-
};
222+
222223
if (last(fileData.Key) === '/') {
223224
fileKey = fileData.Key.slice(0, -1);
224225
dirKey = fileData.Key;
225-
attrs.hash = hashString(data.body.toString());
226+
update();
226227
}
227228
else {
228229
fileKey = fileData.Key;
229230
dirKey = join(fileData.Key, '/');
231+
hashString(data.body, function (err, hash) {
232+
if (err) { return cb(err); }
233+
fileData.hash = hash;
234+
update();
235+
});
230236
}
237+
231238
// atomic update
232-
InfraCodeVersion.update({
233-
_id: infraCodeVersion._id,
234-
'files.Key': { $nin: [ fileKey, dirKey ] }
235-
}, {
236-
$push: {
237-
files: fileData
238-
},
239-
$set: attrs
240-
}, function (err, numUpdated) {
241-
if (err) {
242-
cb(err);
243-
}
244-
else if (numUpdated === 0) {
245-
cb(Boom.conflict('Fs at path already exists: '+fullpath));
246-
}
247-
else {
248-
cb(null, fileData);
249-
}
250-
});
239+
function update () {
240+
InfraCodeVersion.update({
241+
_id: infraCodeVersion._id,
242+
'files.Key': { $nin: [ fileKey, dirKey ] }
243+
}, {
244+
$push: {
245+
files: fileData
246+
},
247+
$set: {
248+
edited: true
249+
}
250+
}, function (err, numUpdated) {
251+
if (err) {
252+
cb(err);
253+
}
254+
else if (numUpdated === 0) {
255+
cb(Boom.conflict('Fs at path already exists: '+fullpath));
256+
}
257+
else {
258+
cb(null, fileData);
259+
}
260+
});
261+
}
251262
}
252263
};
253264

@@ -262,6 +273,7 @@ InfraCodeVersionSchema.methods.updateFile = function (fullpath, body, cb) {
262273
async.waterfall([
263274
findFile,
264275
updateFile,
276+
calcHash,
265277
updateModel
266278
], cb);
267279
function findFile (cb) {
@@ -282,6 +294,13 @@ InfraCodeVersionSchema.methods.updateFile = function (fullpath, body, cb) {
282294
cb(err, file, fileData);
283295
});
284296
}
297+
function calcHash (file, fileData, cb) {
298+
hashString(body, function(err, hash) {
299+
if (err) { return cb(err); }
300+
fileData.hash = hash;
301+
cb(null, file, fileData);
302+
});
303+
}
285304
function updateModel (file, fileData, cb) {
286305
file.set(fileData);
287306
InfraCodeVersion.update({
@@ -290,7 +309,6 @@ InfraCodeVersionSchema.methods.updateFile = function (fullpath, body, cb) {
290309
}, {
291310
$set: {
292311
'files.$': file.toJSON(),
293-
'hash': hashString(body.toString()),
294312
edited: true
295313
}
296314
}, function (err) {
@@ -544,13 +562,66 @@ InfraCodeVersionSchema.methods.copyFilesFromSource = function (sourceInfraCodeVe
544562
sourceVersion.files,
545563
function (file, cb) {
546564
// this protects the scope of bucket
547-
bucket.copyFileFrom(file, cb);
565+
bucket.copyFileFrom(file, function(err, newFile) {
566+
if (err) { return cb(err); }
567+
newFile.hash = file.hash;
568+
cb(null, newFile);
569+
});
548570
},
549571
callback);
550572
}
551573
};
552574

553-
function hashString(data) {
554-
return crypto.createHash('md5').update(data.toString().trim()).digest('hex');
575+
/**
576+
* create a map of file hashes with filepath as key
577+
* @param {Function} cb callback
578+
*/
579+
InfraCodeVersionSchema.methods.getHash = function (cb) {
580+
InfraCodeVersion.findOne({
581+
_id: this._id
582+
}, function (err, infraCodeVersion) {
583+
if (err) { return cb(err); }
584+
var hashMap = {};
585+
var invalidate = false;
586+
infraCodeVersion.files.forEach(function(item) {
587+
var filePath = item.Key.substr(item.Key.indexOf('/'));
588+
if (item.isDir) {
589+
// ensure dirs have some hash
590+
hashMap[filePath] = '1';
591+
} else if (item.hash) {
592+
hashMap[filePath] = item.hash;
593+
} else {
594+
// file without hash. this should not happen.
595+
// skip dedup by returning something that will never match
596+
invalidate = true;
597+
}
598+
});
599+
600+
if (invalidate) {
601+
cb(null, uuid());
602+
} else {
603+
hashString(jsonHash(hashMap), cb);
604+
}
605+
});
606+
};
607+
608+
609+
function hashString(data, cb) {
610+
// salt from require('bcrypt'.enSaltSync(1);
611+
var salt = '$2a$04$fLg/VU5eeDAUARmPVfyUo.';
612+
var start = new Date();
613+
bcrypt.hash(data
614+
.replace(/[\s\uFEFF\xA0]+\n/g, '\n') // trim whitespace after line
615+
.replace(/\n[\s\uFEFF\xA0]*\n/g, '\n') // remove blank lines
616+
.replace(/^[\s\uFEFF\xA0]*\n/g, '') // remove start of file blank lines
617+
.replace(/[\s\uFEFF\xA0]+$/g, '\n'), salt, function(err, hash) {
618+
if (err) { return cb(err); }
619+
dogstatsd.timing('api.infraCodeVersion.hashTime', new Date()-start, 1,
620+
['length:'+data.length]);
621+
cb(null, hash);
622+
});
555623
}
624+
556625
var InfraCodeVersion = module.exports = mongoose.model('InfraCodeVersion', InfraCodeVersionSchema);
626+
627+

0 commit comments

Comments
 (0)