From 55633cc0c2ef5aaf3fe7889ba34f93ab962edc75 Mon Sep 17 00:00:00 2001 From: ZDisket <30500847+ZDisket@users.noreply.github.com> Date: Tue, 27 Feb 2024 00:14:50 -0300 Subject: [PATCH 1/3] Support v-prediction, add prediction type option, refactor Euler --- .../Schedulers/DpmPlusPlus2MScheduler.cpp | 140 ++++++++++-------- .../Schedulers/EulerAncestralScheduler.cpp | 82 +++++++--- .../Schedulers/StableDiffusionScheduler.cpp | 1 + .../Schedulers/StableDiffusionScheduler.h | 8 + .../StableDiffustionInferer.cpp | 2 +- .../MachineLearning/StableDiffustionInferer.h | 1 + 6 files changed, 146 insertions(+), 88 deletions(-) diff --git a/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/DpmPlusPlus2MScheduler.cpp b/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/DpmPlusPlus2MScheduler.cpp index 6a0f182..c1814b3 100644 --- a/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/DpmPlusPlus2MScheduler.cpp +++ b/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/DpmPlusPlus2MScheduler.cpp @@ -5,78 +5,92 @@ using namespace std; namespace Axodox::MachineLearning { - DpmPlusPlus2MScheduler::DpmPlusPlus2MScheduler(const StableDiffusionSchedulerOptions& options) : - StableDiffusionScheduler(options) - { - //Apply Karras sigmas - const auto rho = 7.f; - - auto sigmaMax = _sigmas.front(); - auto sigmaMin = *(_sigmas.end() - 2); - - auto invRhoMin = pow(sigmaMin, 1.f / rho); - auto invRhoMax = pow(sigmaMax, 1.f / rho); - - auto stepCount = _sigmas.size() - 1; - auto stepSize = 1.f / (stepCount - 1); - vector timesteps(_timesteps.size()); - vector sigmas(_sigmas.size()); - for (auto i = 0; i < stepCount; i++) + DpmPlusPlus2MScheduler::DpmPlusPlus2MScheduler(const StableDiffusionSchedulerOptions& options) : + StableDiffusionScheduler(options) { - auto t = i * stepSize; - sigmas[i] = pow(invRhoMax + t * (invRhoMin - invRhoMax), rho); - timesteps[i] = SigmaToTime(sigmas[i]); - } - - _sigmas = move(sigmas); - _timesteps = move(timesteps); + //Apply Karras sigmas + const auto rho = 7.f; - //_sigmas = { 14.614643096923828f, 10.81978988647461f, 7.9029436111450195f, 5.687837600708008f, 4.027743816375732f, 2.8015711307525635f, 1.9103621244430542f, 1.2741049528121948f, 0.8288719058036804f, 0.524259626865387f, 0.3211216926574707f, 0.18956425786018372f, 0.10719937086105347f, 0.05763240531086922f, 0.02916753850877285f, 0.f }; - //_timesteps = { 999.f, 947.6224f, 889.5464f, 823.0464f, 745.8676f, 655.3113f, 549.0170f, 427.4898f, 298.6582f, 179.8307f, 89.9427f, 36.5918f, 12.0011f, 2.8839f, 0.f }; - } + auto sigmaMax = _sigmas.front(); + auto sigmaMin = *(_sigmas.end() - 2); - Tensor DpmPlusPlus2MScheduler::ApplyStep(const Tensor& input, const Tensor& output, size_t step) - { - auto currentSigma = _sigmas[step]; - auto nextSigma = _sigmas[step + 1]; + auto invRhoMin = pow(sigmaMin, 1.f / rho); + auto invRhoMax = pow(sigmaMax, 1.f / rho); - auto predictedOriginalSample = input.BinaryOperation(output, [currentSigma](float a, float b) { return a - currentSigma * b; }); + auto stepCount = _sigmas.size() - 1; + auto stepSize = 1.f / (stepCount - 1); + vector timesteps(_timesteps.size()); + vector sigmas(_sigmas.size()); + for (auto i = 0; i < stepCount; i++) + { + auto t = i * stepSize; + sigmas[i] = pow(invRhoMax + t * (invRhoMin - invRhoMax), rho); + timesteps[i] = SigmaToTime(sigmas[i]); + } - float t = -log(currentSigma); - float tNext = -log(nextSigma); - float h = tNext - t; + _sigmas = move(sigmas); + _timesteps = move(timesteps); - Tensor denoised; - if (!_previousPredictedSample || nextSigma == 0) - { - denoised = predictedOriginalSample; } - else + + Tensor DpmPlusPlus2MScheduler::ApplyStep(const Tensor& input, const Tensor& output, size_t step) { - float hLast = t - -log(_sigmas[step - 1]); - float r = hLast / h; + auto currentSigma = _sigmas[step]; + auto nextSigma = _sigmas[step + 1]; - auto x = 1.f + 1.f / (2.f * r); - auto y = 1.f / (2.f * r); + Tensor predictedOriginalSample; - denoised = predictedOriginalSample.BinaryOperation(_previousPredictedSample, [=](float a, float b) { - return x * a - y * b; - }); - } + // Implement the v-prediction formula + if (_predictiontype == StableDiffusionSchedulerPredictionType::V) + { - if (nextSigma != 0) - { - _previousPredictedSample = predictedOriginalSample; - } - else - { - _previousPredictedSample.Reset(); - } + predictedOriginalSample = output.BinaryOperation(input, [currentSigma](float model_output, float sample) { + float sigmaSquaredPlusOne = currentSigma * currentSigma + 1; + return (model_output * (-currentSigma / std::sqrt(sigmaSquaredPlusOne))) + (sample / sigmaSquaredPlusOne); + }); + + } + else + { + predictedOriginalSample = input.BinaryOperation(output, [currentSigma](float a, float b) { return a - currentSigma * b; }); + + } + + float t = -log(currentSigma); + float tNext = -log(nextSigma); + float h = tNext - t; + + Tensor denoised; + if (!_previousPredictedSample || nextSigma == 0) + { + denoised = predictedOriginalSample; + } + else + { + float hLast = t - -log(_sigmas[step - 1]); + float r = hLast / h; - float x = nextSigma / currentSigma; - float y = exp(-h) - 1.f; - return input.BinaryOperation(denoised, [=](float a, float b) { - return a * x - b * y; - }); - } -} \ No newline at end of file + auto x = 1.f + 1.f / (2.f * r); + auto y = 1.f / (2.f * r); + + denoised = predictedOriginalSample.BinaryOperation(_previousPredictedSample, [=](float a, float b) { + return x * a - y * b; + }); + } + + if (nextSigma != 0) + { + _previousPredictedSample = predictedOriginalSample; + } + else + { + _previousPredictedSample.Reset(); + } + + float x = nextSigma / currentSigma; + float y = exp(-h) - 1.f; + return input.BinaryOperation(denoised, [=](float a, float b) { + return a * x - b * y; + }); + } +} diff --git a/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/EulerAncestralScheduler.cpp b/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/EulerAncestralScheduler.cpp index 12627e2..fd85680 100644 --- a/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/EulerAncestralScheduler.cpp +++ b/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/EulerAncestralScheduler.cpp @@ -5,28 +5,62 @@ using namespace std; namespace Axodox::MachineLearning { - EulerAncestralScheduler::EulerAncestralScheduler(const StableDiffusionSchedulerOptions& options) : - StableDiffusionScheduler(options) - { } - - Tensor EulerAncestralScheduler::ApplyStep(const Tensor& input, const Tensor& output, size_t step) - { - auto currentSigma = _sigmas[step]; - auto nextSigma = _sigmas[step + 1]; - auto predictedOriginalSample = input.BinaryOperation(output, [currentSigma](float a, float b) { return a - currentSigma * b; }); - - //Get ancestral step - auto currentSigmaSquared = currentSigma * currentSigma; - auto nextSigmaSquared = nextSigma * nextSigma; - - float sigmaUp = min(nextSigma, - sqrt((currentSigmaSquared - nextSigmaSquared) * nextSigmaSquared / currentSigmaSquared)); - float sigmaDown = sqrt(nextSigmaSquared - sigmaUp * sigmaUp); - - //Euler method - float dt = sigmaDown - currentSigma; - auto randomNoise = Tensor::CreateRandom(input.Shape, _randoms, sigmaUp); - auto latentDelta = randomNoise.BinaryOperation(output, [dt](float a, float b) { return a + dt * b; }); - return input.BinaryOperation(latentDelta, [](float a, float b) { return a + b; }); - } + EulerAncestralScheduler::EulerAncestralScheduler(const StableDiffusionSchedulerOptions& options) : + StableDiffusionScheduler(options) + { } + + Tensor EulerAncestralScheduler::ApplyStep(const Tensor& input, const Tensor& output, size_t step) + { + auto currentSigma = _sigmas[step]; + auto nextSigma = _sigmas[step + 1]; + + Tensor predictedOriginalSample; + + if (_predictiontype == StableDiffusionSchedulerPredictionType::V) + { + + predictedOriginalSample = output.BinaryOperation(input, [currentSigma](float model_output, float sample) { + float sigmaSquaredPlusOne = currentSigma * currentSigma + 1; + return (model_output * (-currentSigma / std::sqrt(sigmaSquaredPlusOne))) + (sample / sigmaSquaredPlusOne); // note: std::sqrt is VITAL here (???) + }); + + } + else + { + predictedOriginalSample = input.BinaryOperation(output, [currentSigma](float a, float b) { return a - currentSigma * b; }); + + } + + + // Calculate sigma squared values for the process + auto currentSigmaSquared = currentSigma * currentSigma; + auto nextSigmaSquared = nextSigma * nextSigma; + + // Calculate sigma_up and sigma_down according to the Python logic + float sigmaUp = std::sqrt(max(0.0f, nextSigmaSquared - currentSigmaSquared)); + float sigmaDown = std::sqrt(nextSigmaSquared - sigmaUp * sigmaUp); + + // Calculate dt based on sigma changes + float dt = sigmaDown - currentSigma; + + // Derivative calculation (the 'derivative' here is conceptual, representing the reverse diffusion step) + auto derivative = input.BinaryOperation(predictedOriginalSample, [currentSigma](float inputVal, float predOriginalVal) { + return (inputVal - predOriginalVal) / currentSigma; + }); + + // Update sample with derivative and dt + auto updatedSample = input.BinaryOperation(derivative, [dt](float inputVal, float derivativeVal) { + return inputVal + derivativeVal * dt; + }); + + // Generate random noise scaled by sigmaUp + auto randomNoise = Tensor::CreateRandom(input.Shape, _randoms, sigmaUp); + + // Add noise to the updated sample + updatedSample = updatedSample.BinaryOperation(randomNoise, [](float updatedSampleVal, float noiseVal) { + return updatedSampleVal + noiseVal; + }); + + return updatedSample; + } } \ No newline at end of file diff --git a/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/StableDiffusionScheduler.cpp b/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/StableDiffusionScheduler.cpp index b005798..7cfc6db 100644 --- a/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/StableDiffusionScheduler.cpp +++ b/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/StableDiffusionScheduler.cpp @@ -81,6 +81,7 @@ namespace Axodox::MachineLearning _trainingSigmas = move(trainingSigmas); _sigmas = move(inferenceSigmas); _timesteps = move(timesteps); + _predictiontype = options.PredictionType; } std::unique_ptr StableDiffusionScheduler::Create(StableDiffusionSchedulerKind kind, const StableDiffusionSchedulerOptions& options) diff --git a/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/StableDiffusionScheduler.h b/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/StableDiffusionScheduler.h index bd2f745..6285b21 100644 --- a/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/StableDiffusionScheduler.h +++ b/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/StableDiffusionScheduler.h @@ -9,12 +9,19 @@ namespace Axodox::MachineLearning DpmPlusPlus2M }; + enum class StableDiffusionSchedulerPredictionType { + Epsilon, + V + }; + struct AXODOX_MACHINELEARNING_API StableDiffusionSchedulerOptions { size_t TrainStepCount = 1000; size_t InferenceStepCount = 20; float BetaAtStart = 0.00085f; float BetaAtEnd = 0.012f; + StableDiffusionSchedulerPredictionType PredictionType = StableDiffusionSchedulerPredictionType::Epsilon; + std::span BetasTrained; std::span Randoms; @@ -37,6 +44,7 @@ namespace Axodox::MachineLearning std::vector _timesteps; std::vector _trainingSigmas, _sigmas; std::span _randoms; + StableDiffusionSchedulerPredictionType _predictiontype; float SigmaToTime(float sigma) const; }; diff --git a/Axodox.MachineLearning.Shared/MachineLearning/StableDiffustionInferer.cpp b/Axodox.MachineLearning.Shared/MachineLearning/StableDiffustionInferer.cpp index 82eb0f4..5d7fb44 100644 --- a/Axodox.MachineLearning.Shared/MachineLearning/StableDiffustionInferer.cpp +++ b/Axodox.MachineLearning.Shared/MachineLearning/StableDiffustionInferer.cpp @@ -44,7 +44,7 @@ namespace Axodox::MachineLearning context.Randoms.push_back(minstd_rand{ options.Seed + uint32_t(i) }); } - context.Scheduler = StableDiffusionScheduler::Create(options.Scheduler, { .InferenceStepCount = options.StepCount, .Randoms = context.Randoms }); + context.Scheduler = StableDiffusionScheduler::Create(options.Scheduler, { .InferenceStepCount = options.StepCount, .PredictionType = options.PredictionType, .Randoms = context.Randoms }); //Schedule steps auto initialStep = size_t(clamp(int(options.StepCount - options.StepCount * options.DenoisingStrength - 1), 0, int(options.StepCount))); diff --git a/Axodox.MachineLearning.Shared/MachineLearning/StableDiffustionInferer.h b/Axodox.MachineLearning.Shared/MachineLearning/StableDiffustionInferer.h index e37112d..b838725 100644 --- a/Axodox.MachineLearning.Shared/MachineLearning/StableDiffustionInferer.h +++ b/Axodox.MachineLearning.Shared/MachineLearning/StableDiffustionInferer.h @@ -27,6 +27,7 @@ namespace Axodox::MachineLearning Tensor MaskInput; float DenoisingStrength = 1.f; StableDiffusionSchedulerKind Scheduler = StableDiffusionSchedulerKind::EulerAncestral; + StableDiffusionSchedulerPredictionType PredictionType = StableDiffusionSchedulerPredictionType::Epsilon; void Validate() const; }; From 537499d15a1c16f4c03c5175b6eb9bd96e6dce76 Mon Sep 17 00:00:00 2001 From: ZDisket <30500847+ZDisket@users.noreply.github.com> Date: Sun, 10 Mar 2024 21:55:50 -0300 Subject: [PATCH 2/3] Fix some SDXL models not working properly --- .../MachineLearning/TextEncoder.cpp | 18 ++++++++++++++++-- .../MachineLearning/TextEncoder.h | 5 +++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/Axodox.MachineLearning.Shared/MachineLearning/TextEncoder.cpp b/Axodox.MachineLearning.Shared/MachineLearning/TextEncoder.cpp index 75b2f60..9f10622 100644 --- a/Axodox.MachineLearning.Shared/MachineLearning/TextEncoder.cpp +++ b/Axodox.MachineLearning.Shared/MachineLearning/TextEncoder.cpp @@ -24,6 +24,7 @@ namespace Axodox::MachineLearning auto metadata = OnnxModelMetadata::Create(_environment, _session); _has64bitInputIds = metadata.Inputs["input_ids"].Type == TensorType::Int64; _hasHiddenLayers = metadata.Outputs.contains("hidden_states.11"); + isSDXL = false; _logger.log(log_severity::information, "Loaded."); } @@ -32,10 +33,18 @@ namespace Axodox::MachineLearning { _logger.log(log_severity::information, "Running inference..."); + std::string hiddenStatesLayer = "hidden_states.11"; + + // https://github.com/huggingface/diffusers/blob/1f22c9882020cbe2cc08acfee54fab553bbb5678/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py#L387 + // SDXL has -2 (use penultimate layer) CLIP skip in diffusers for both text encoders + // without this, some models generate noise/clouds. + if (isSDXL) + hiddenStatesLayer = "hidden_states.10"; + //Bind values IoBinding bindings{ _session }; bindings.BindInput("input_ids", text.ToInt64(_has64bitInputIds).ToOrtValue()); - bindings.BindOutput(_hasHiddenLayers ? "hidden_states.11" : "last_hidden_state", _environment->MemoryInfo()); + bindings.BindOutput(_hasHiddenLayers ? hiddenStatesLayer.c_str() : "last_hidden_state", _environment->MemoryInfo()); //Run inference _session.Run({}, bindings); @@ -76,7 +85,11 @@ namespace Axodox::MachineLearning //Bind values IoBinding bindings{ _session }; bindings.BindInput("input_ids", input.ToInt64(_has64bitInputIds).ToOrtValue()); - bindings.BindOutput("hidden_states.11", _environment->MemoryInfo()); + + // https://github.com/huggingface/diffusers/blob/1f22c9882020cbe2cc08acfee54fab553bbb5678/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py#L387 + // SDXL has -2 (use penultimate layer) CLIP skip in diffusers for both text encoders + // without this, some models generate noise/clouds. + bindings.BindOutput("hidden_states.31", _environment->MemoryInfo()); bindings.BindOutput("text_embeds", _environment->MemoryInfo()); //Run inference @@ -113,6 +126,7 @@ namespace Axodox::MachineLearning if (filesystem::exists(get(*source), ec)) { _textEncoder2 = make_unique(environment, source); + _textEncoder.isSDXL = true; } } diff --git a/Axodox.MachineLearning.Shared/MachineLearning/TextEncoder.h b/Axodox.MachineLearning.Shared/MachineLearning/TextEncoder.h index 324bf0c..dc34a74 100644 --- a/Axodox.MachineLearning.Shared/MachineLearning/TextEncoder.h +++ b/Axodox.MachineLearning.Shared/MachineLearning/TextEncoder.h @@ -27,6 +27,11 @@ namespace Axodox::MachineLearning bool _has64bitInputIds; bool _hasHiddenLayers; + + protected: + friend class TextEncodingProvider; + bool isSDXL; + }; class AXODOX_MACHINELEARNING_API TextEncoder2 From 4b5ecf92498779c220c2bf29bfa0a347c94eefe3 Mon Sep 17 00:00:00 2001 From: ZDisket <30500847+ZDisket@users.noreply.github.com> Date: Sun, 10 Mar 2024 21:58:46 -0300 Subject: [PATCH 3/3] Correctly implement prediction type checking --- .../MachineLearning/Schedulers/DpmPlusPlus2MScheduler.cpp | 8 ++++++-- .../Schedulers/EulerAncestralScheduler.cpp | 7 ++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/DpmPlusPlus2MScheduler.cpp b/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/DpmPlusPlus2MScheduler.cpp index c1814b3..db2d775 100644 --- a/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/DpmPlusPlus2MScheduler.cpp +++ b/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/DpmPlusPlus2MScheduler.cpp @@ -40,7 +40,6 @@ namespace Axodox::MachineLearning Tensor predictedOriginalSample; - // Implement the v-prediction formula if (_predictiontype == StableDiffusionSchedulerPredictionType::V) { @@ -50,10 +49,15 @@ namespace Axodox::MachineLearning }); } - else + else if (_predictiontype == StableDiffusionSchedulerPredictionType::Epsilon) { predictedOriginalSample = input.BinaryOperation(output, [currentSigma](float a, float b) { return a - currentSigma * b; }); + } + else + { + throw std::invalid_argument("Uninmplemented prediction type."); + } float t = -log(currentSigma); diff --git a/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/EulerAncestralScheduler.cpp b/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/EulerAncestralScheduler.cpp index fd85680..8148267 100644 --- a/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/EulerAncestralScheduler.cpp +++ b/Axodox.MachineLearning.Shared/MachineLearning/Schedulers/EulerAncestralScheduler.cpp @@ -25,11 +25,16 @@ namespace Axodox::MachineLearning }); } - else + else if (_predictiontype == StableDiffusionSchedulerPredictionType::Epsilon) { predictedOriginalSample = input.BinaryOperation(output, [currentSigma](float a, float b) { return a - currentSigma * b; }); } + else + { + throw std::invalid_argument("Uninmplemented prediction type."); + + } // Calculate sigma squared values for the process