From cab61bcd718080998a158aaf560eae274e99892c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Sun, 14 Dec 2025 19:03:49 +0100 Subject: [PATCH 1/4] Add x0 Flux pred (+prepare for others) --- denoiser.hpp | 26 ++++++++++++++++++++++++++ stable-diffusion.cpp | 19 +++++++++++++++---- stable-diffusion.h | 1 + 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/denoiser.hpp b/denoiser.hpp index 32f402786..b0b9391f1 100644 --- a/denoiser.hpp +++ b/denoiser.hpp @@ -481,6 +481,14 @@ struct CompVisVDenoiser : public CompVisDenoiser { } }; +struct ComVisX0Denoiser : public CompVisDenoiser { + std::vector get_scalings(float sigma) override { + float c_skip = 0.0f; + float c_out = 1.0f; + float c_in = 1.0f; + } +}; + struct EDMVDenoiser : public CompVisVDenoiser { float min_sigma = 0.002; float max_sigma = 120.0; @@ -568,6 +576,15 @@ struct DiscreteFlowDenoiser : public Denoiser { } }; +struct DiscreteFlowX0Denoiser : public DiscreteFlowDenoiser { + std::vector get_scalings(float sigma) override { + float c_skip = 0.0f; + float c_out = 1.0f; + float c_in = 1.0f; + return {c_skip, c_out, c_in}; + } +}; + float flux_time_shift(float mu, float sigma, float t) { return std::exp(mu) / (std::exp(mu) + std::pow((1.0 / t - 1.0), sigma)); } @@ -631,6 +648,15 @@ struct FluxFlowDenoiser : public Denoiser { } }; +struct FluxFlowX0Denoiser : public FluxFlowDenoiser { + std::vector get_scalings(float sigma) override { + float c_skip = 0.0f; + float c_out = 1.0f; + float c_in = 1.0f; + return {c_skip, c_out, c_in}; + } +}; + struct Flux2FlowDenoiser : public FluxFlowDenoiser { Flux2FlowDenoiser() = default; diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 2cb588213..324a47205 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -828,7 +828,12 @@ class StableDiffusionGGML { } } } else if (sd_version_is_flux(version)) { - pred_type = FLUX_FLOW_PRED; + if (tensor_storage_map.find("model.diffusion_model.__x0__") != tensor_storage_map.end()) { + pred_type = FLUX_FLOW_X0_PRED; + } else { + pred_type = FLUX_FLOW_PRED; + } + if (flow_shift == INFINITY) { flow_shift = 1.0f; // TODO: validate for (const auto& [name, tensor_storage] : tensor_storage_map) { @@ -871,6 +876,11 @@ class StableDiffusionGGML { denoiser = std::make_shared(); break; } + case FLUX_FLOW_X0_PRED: { + LOG_INFO("running in x0-prediction Flux FLOW mode"); + denoiser = std::make_shared(); + break; + } default: { LOG_ERROR("Unknown predition type %i", pred_type); ggml_free(ctx); @@ -1316,9 +1326,9 @@ class StableDiffusionGGML { uint32_t dim = latents->ne[ggml_n_dims(latents) - 1]; if (preview_mode == PREVIEW_PROJ) { - int64_t patch_sz = 1; - const float(*latent_rgb_proj)[channel] = nullptr; - float* latent_rgb_bias = nullptr; + int64_t patch_sz = 1; + const float (*latent_rgb_proj)[channel] = nullptr; + float* latent_rgb_bias = nullptr; if (dim == 128) { if (sd_version_is_flux2(version)) { @@ -2424,6 +2434,7 @@ const char* prediction_to_str[] = { "edm_v", "sd3_flow", "flux_flow", + "flux_flow_x0" "flux2_flow", }; diff --git a/stable-diffusion.h b/stable-diffusion.h index e4abc8dcd..ee56099ba 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -70,6 +70,7 @@ enum prediction_t { EDM_V_PRED, FLOW_PRED, FLUX_FLOW_PRED, + FLUX_FLOW_X0_PRED, FLUX2_FLOW_PRED, PREDICTION_COUNT }; From b9da97cc40b83c5bdb1d6b15d01e0e8b1582adaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Sun, 14 Dec 2025 19:04:06 +0100 Subject: [PATCH 2/4] Fix convert models with empty tensors --- model.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/model.cpp b/model.cpp index 0480efefb..131de3c32 100644 --- a/model.cpp +++ b/model.cpp @@ -1731,7 +1731,14 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type // tensor_storage.n_dims, // tensor_storage.ne[0], tensor_storage.ne[1], tensor_storage.ne[2], tensor_storage.ne[3], // tensor->n_dims, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]); - + + if (!tensor->data) { + GGML_ASSERT(ggml_nelements(tensor) == 0); + // avoid crashing the gguf writer by setting a dummy pointer for zero-sized tensors + LOG_DEBUG("setting dummy pointer for zero-sized tensor %s", name.c_str()); + tensor->data = ggml_get_mem_buffer(ggml_ctx); + } + *dst_tensor = tensor; gguf_add_tensor(gguf_ctx, tensor); From e209ad2c82e56cd2615de9cf807587c8a56df305 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Sun, 14 Dec 2025 19:42:11 +0100 Subject: [PATCH 3/4] patch_32 exp support attempt --- flux.hpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/flux.hpp b/flux.hpp index 1df2874ae..d3d1f740d 100644 --- a/flux.hpp +++ b/flux.hpp @@ -781,7 +781,7 @@ namespace Flux { Flux(FluxParams params) : params(params) { if (params.version == VERSION_CHROMA_RADIANCE) { - std::pair kernel_size = {(int)params.patch_size, (int)params.patch_size}; + std::pair kernel_size = {16, 16}; std::pair stride = kernel_size; blocks["img_in_patch"] = std::make_shared(params.in_channels, @@ -1068,6 +1068,11 @@ namespace Flux { auto img = pad_to_patch_size(ctx->ggml_ctx, x); auto orig_img = img; + if (patch_size != 16) { + int ratio = patch_size / 16; + img = ggml_interpolate(ctx->ggml_ctx, img, W / ratio, H / ratio, C, x->ne[3], GGML_SCALE_MODE_NEAREST); + } + auto img_in_patch = std::dynamic_pointer_cast(blocks["img_in_patch"]); img = img_in_patch->forward(ctx, img); // [N, hidden_size, H/patch_size, W/patch_size] @@ -1290,6 +1295,9 @@ namespace Flux { // not schnell flux_params.guidance_embed = true; } + if (tensor_name.find("__32x32__") != std::string::npos) { + flux_params.patch_size = 32; + } if (tensor_name.find("distilled_guidance_layer.in_proj.weight") != std::string::npos) { // Chroma flux_params.is_chroma = true; From 0cd491b049d44f5e836f5a3238825df81ecd476f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Mon, 15 Dec 2025 00:18:41 +0100 Subject: [PATCH 4/4] improve support for patch_32 --- flux.hpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/flux.hpp b/flux.hpp index d3d1f740d..6e56bd648 100644 --- a/flux.hpp +++ b/flux.hpp @@ -1070,7 +1070,10 @@ namespace Flux { if (patch_size != 16) { int ratio = patch_size / 16; - img = ggml_interpolate(ctx->ggml_ctx, img, W / ratio, H / ratio, C, x->ne[3], GGML_SCALE_MODE_NEAREST); + // It's supposed to be using GGML_SCALE_MODE_NEAREST, but this seems more stable + // Maybe the implementation of nearest-neighbor interpolation in ggml behaves differently than the one in PyTorch? + // img = F.interpolate(img, size=(H//2, W//2), mode="nearest") + img = ggml_interpolate(ctx->ggml_ctx, img, W / ratio, H / ratio, C, x->ne[3], GGML_SCALE_MODE_BILINEAR); } auto img_in_patch = std::dynamic_pointer_cast(blocks["img_in_patch"]);