llvm · fhahn · Dec 1, 2025 · Dec 1, 2025 · Dec 16, 2025 · ayalz
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8506,8 +8506,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
 
   // Apply mandatory transformation to handle reductions with multiple in-loop
   // uses if possible, bail out otherwise.
-  if (!VPlanTransforms::runPass(VPlanTransforms::handleMultiUseReductions,
-                                *Plan))
+  if (!VPlanTransforms::handleMultiUseReductions(*Plan))
     return nullptr;
   // Apply mandatory transformation to handle FP maxnum/minnum reduction with
   // NaNs if possible, bail out otherwise.

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2158,7 +2158,9 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
   /// incoming value, its start value.
   unsigned getNumIncoming() const override { return 1; }
 
-  PHINode *getPHINode() const { return cast<PHINode>(getUnderlyingValue()); }
+  PHINode *getPHINode() const {
+    return cast_if_present<PHINode>(getUnderlyingValue());
+  }
 
   /// Returns the induction descriptor for the recipe.
   const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }

diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -13,10 +13,12 @@
 
 #include "LoopVectorizationPlanner.h"
 #include "VPlan.h"
+#include "VPlanAnalysis.h"
 #include "VPlanCFG.h"
 #include "VPlanDominatorTree.h"
 #include "VPlanPatternMatch.h"
 #include "VPlanTransforms.h"
+#include "VPlanUtils.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/ScalarEvolution.h"
@@ -1120,6 +1122,133 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
   return true;
 }
 
+/// For argmin/argmax reductions with strict predicates, convert the existing
+/// FindLastIV reduction to a new UMin reduction of a wide canonical IV. If the
-/// FindLastIV reduction to a new UMin reduction of a wide canonical IV. If the
+/// FindFirstIV reduction to a new UMin reduction of a wide canonical IV. If the
-/// FindLastIV reduction to a new UMin reduction of a wide canonical IV. If the
+/// FindFirstIV reduction to a new UMin reduction of a wide canonical IV. If the
+/// original IV was not canonical, a new canonical wide IV is added, and the
+/// final result is scaled back to the original IV.
+static bool handleStrictArgMinArgMax(VPlan &Plan,
-static bool handleStrictArgMinArgMax(VPlan &Plan,
+static bool handleFirstArgMinArgMax(VPlan &Plan,
-static bool handleStrictArgMinArgMax(VPlan &Plan,
+static bool handleFirstArgMinArgMax(VPlan &Plan,
+                                     VPReductionPHIRecipe *MinMaxPhiR,
+                                     VPReductionPHIRecipe *FindIVPhiR,
+                                     VPWidenIntOrFpInductionRecipe *WideIV,
+                                     VPInstruction *MinMaxResult) {
+  Type *Ty = Plan.getVectorLoopRegion()->getCanonicalIVType();
+  if (Ty != VPTypeAnalysis(Plan).inferScalarType(FindIVPhiR))
+    return false;
+
+  // If the original wide IV is not canonical, create a new one. The wide IV is
+  // guaranteed to not wrap for all lanes that are active in the vector loop.
+  if (!WideIV->isCanonical()) {
+    VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Ty, 0));
-    VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Ty, 0));
+    VPValue *Zero = Plan.getConstantInt(Ty, 0);
-    VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Ty, 0));
+    VPValue *Zero = Plan.getConstantInt(Ty, 0);
+    VPValue *One = Plan.getOrAddLiveIn(ConstantInt::get(Ty, 1));
-    VPValue *One = Plan.getOrAddLiveIn(ConstantInt::get(Ty, 1));
+    VPValue *One = Plan.getConstantInt(Ty, 1);
-    VPValue *One = Plan.getOrAddLiveIn(ConstantInt::get(Ty, 1));
+    VPValue *One = Plan.getConstantInt(Ty, 1);
+    auto *WidenCanIV = new VPWidenIntOrFpInductionRecipe(
+        nullptr, Zero, One, WideIV->getVFValue(),
+        WideIV->getInductionDescriptor(), VPIRFlags(), WideIV->getDebugLoc());
+    WidenCanIV->insertBefore(WideIV);
+
+    // Update the select to use the wide canonical IV.
+    auto *SelectRecipe = cast<VPSingleDefRecipe>(
+        FindIVPhiR->getBackedgeValue()->getDefiningRecipe());
+    if (SelectRecipe->getOperand(1) == WideIV)
+      SelectRecipe->setOperand(1, WidenCanIV);
+    else if (SelectRecipe->getOperand(2) == WideIV)
+      SelectRecipe->setOperand(2, WidenCanIV);
+  }
+
+  // Create the new UMin reduction recipe to track the minimum index.
+  assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
+         "inloop and ordered reductions not supported");
+  VPValue *MaxInt =
+      Plan.getConstantInt(APInt::getMaxValue(Ty->getIntegerBitWidth()));
+  ReductionStyle Style = RdxUnordered{FindIVPhiR->getVFScaleFactor()};
+  auto *MinIdxPhiR = new VPReductionPHIRecipe(
-  auto *MinIdxPhiR = new VPReductionPHIRecipe(
+  auto *FirstIdxPhiR = new VPReductionPHIRecipe(
-  auto *MinIdxPhiR = new VPReductionPHIRecipe(
+  auto *FirstIdxPhiR = new VPReductionPHIRecipe(
+      dyn_cast_or_null<PHINode>(FindIVPhiR->getUnderlyingValue()),
+      RecurKind::UMin, *MaxInt, *FindIVPhiR->getBackedgeValue(), Style,
+      FindIVPhiR->hasUsesOutsideReductionChain());
+  MinIdxPhiR->insertBefore(FindIVPhiR);
+
+  VPInstruction *FindLastIVResult =
-  VPInstruction *FindLastIVResult =
+  VPInstruction *FindFirstIVResult =
-  VPInstruction *FindLastIVResult =
+  VPInstruction *FindFirstIVResult =
+      findUserOf<VPInstruction::ComputeFindIVResult>(FindIVPhiR);
+  MinMaxResult->moveBefore(*FindLastIVResult->getParent(),
+                           FindLastIVResult->getIterator());
+
+  // The reduction using MinMaxPhiR needs adjusting to compute the correct
+  // result:
+  //  1. We need to find the first canonical IV for which the condition based
+  //     on the min/max recurrence is true,
-  //  1. We need to find the first canonical IV for which the condition based
-  //     on the min/max recurrence is true,
+  //  1. Find the first canonical indices corresponding to partial min/max values, using loop reductions.
-  //  1. We need to find the first canonical IV for which the condition based
-  //     on the min/max recurrence is true,
+  //  1. Find the first canonical indices corresponding to partial min/max values, using loop reductions.
+  //  2. Compare the partial min/max reduction result to its final value and,
-  //  2. Compare the partial min/max reduction result to its final value and,
+  //  2. Find which of the partial min/max values are equal to the overall min/max value.
-  //  2. Compare the partial min/max reduction result to its final value and,
+  //  2. Find which of the partial min/max values are equal to the overall min/max value.
+  //  3. Select the lanes of the partial UMin reduction of the canonical wide
+  //     IV which correspond to the lanes matching the min/max reduction result.
-  //  3. Select the lanes of the partial UMin reduction of the canonical wide
-  //     IV which correspond to the lanes matching the min/max reduction result.
+  //  3. Select among the canonical indices those corresponding to the overall min/max value.
-  //  3. Select the lanes of the partial UMin reduction of the canonical wide
-  //     IV which correspond to the lanes matching the min/max reduction result.
+  //  3. Select among the canonical indices those corresponding to the overall min/max value.
+  //  4. Scale the final select canonical IV back to the original IV using
+  //     VPDerivedIVRecipe.
-  //  4. Scale the final select canonical IV back to the original IV using
-  //     VPDerivedIVRecipe.
+  //  4. Find the first canonical index of overall min/max and scale it back to the original IV using
+  //     VPDerivedIVRecipe.
-  //  4. Scale the final select canonical IV back to the original IV using
-  //     VPDerivedIVRecipe.
+  //  4. Find the first canonical index of overall min/max and scale it back to the original IV using
+  //     VPDerivedIVRecipe.
+  //  5. If the minimum value matches the start value, the condition in the
+  //     loop was never true, return the start value in that case.
-  //  5. If the minimum value matches the start value, the condition in the
-  //     loop was never true, return the start value in that case.
+  //  5. If the overall min/max is equal to the start value, the condition in the
+  //     loop was always false, due to being strict; return the start value in that case.
-  //  5. If the minimum value matches the start value, the condition in the
-  //     loop was never true, return the start value in that case.
+  //  5. If the overall min/max is equal to the start value, the condition in the
+  //     loop was always false, due to being strict; return the start value in that case.
+  //
+  // The original reductions need adjusting:
+  // For example, this transforms
+  // vp<%min.result> = compute-reduction-result ir<%min.val>,
+  //                                            ir<%min.val.next>
-  // vp<%min.result> = compute-reduction-result ir<%min.val>,
-  //                                            ir<%min.val.next>
+  // vp<%min.result> = compute-reduction-result ir<%min.val>, ir<%min.val.next>
-  // vp<%min.result> = compute-reduction-result ir<%min.val>,
-  //                                            ir<%min.val.next>
+  // vp<%min.result> = compute-reduction-result ir<%min.val>, ir<%min.val.next>
+  // vp<%find.iv.result = compute-find-iv-result ir<%min.idx>, ir<0>,
+  //                                             SENTINEL, vp<%min.idx.next>
+  //
+  // into:
+  //  vp<%min.result> = compute-reduction-result ir<%min.val>, ir<%min.val.next>
+  //  vp<%final.min.cmp> = icmp eq ir<%min.val.next>, vp<%min.result>
+  //  vp<%final.min.iv> = select vp<%final.min.cmp>, ir<%min.idx.next>, ir<-1>
-  //  vp<%final.min.iv> = select vp<%final.min.cmp>, ir<%min.idx.next>, ir<-1>
+  //  vp<%final.min.idx> = select vp<%final.min.cmp>, ir<%min.idx.next>, ir<MaxUInt>
-  //  vp<%final.min.iv> = select vp<%final.min.cmp>, ir<%min.idx.next>, ir<-1>
+  //  vp<%final.min.idx> = select vp<%final.min.cmp>, ir<%min.idx.next>, ir<MaxUInt>
+  //  vp<%13> = compute-reduction-result ir<%min.idx>, vp<%final.min.iv>
+  //  vp<%scaled.result.iv> = DERIVED-IV ir<20> + vp<%13> * ir<1>
+  //  vp<%threshold.cmp> = icmp slt vp<%min.result>, ir<0>
-  //  vp<%threshold.cmp> = icmp slt vp<%min.result>, ir<0>
+  //  vp<%always.false> = icmp eq vp<%min.result>, ir<%original.min.start>
-  //  vp<%threshold.cmp> = icmp slt vp<%min.result>, ir<0>
+  //  vp<%always.false> = icmp eq vp<%min.result>, ir<%original.min.start>
+  //  vp<%final.result> = select vp<%threshold.cmp>, vp<%scaled.result.iv>,
+  //  ir<%original.start>
-  //  vp<%final.result> = select vp<%threshold.cmp>, vp<%scaled.result.iv>,
-  //  ir<%original.start>
+  //  vp<%final.idx.result> = select vp<%always.false>, ir<%original.idx.start>, vp<%scaled.result.iv>
-  //  vp<%final.result> = select vp<%threshold.cmp>, vp<%scaled.result.iv>,
-  //  ir<%original.start>
+  //  vp<%final.idx.result> = select vp<%always.false>, ir<%original.idx.start>, vp<%scaled.result.iv>
+
+  VPBuilder Builder(FindLastIVResult);
+  VPValue *MinMaxExiting = MinMaxResult->getOperand(1);
+  auto *FinalMinMaxCmp =
+      Builder.createICmp(CmpInst::ICMP_EQ, MinMaxExiting, MinMaxResult);
+  VPValue *LastIVExiting = FindLastIVResult->getOperand(3);
+  auto *FinalIVSelect =
+      Builder.createSelect(FinalMinMaxCmp, LastIVExiting, MaxInt);
+  VPSingleDefRecipe *FinalResult = Builder.createNaryOp(
+      VPInstruction::ComputeReductionResult, {MinIdxPhiR, FinalIVSelect}, {},
+      FindLastIVResult->getDebugLoc());
+
+  // If we used a new wide canonical IV convert the reduction result back to the
+  // original IV scale before the final select.
+  if (!WideIV->isCanonical()) {
+    auto *DerivedIVRecipe =
+        new VPDerivedIVRecipe(InductionDescriptor::IK_IntInduction,
+                              nullptr, // No FPBinOp for integer induction
+                              WideIV->getStartValue(), FinalResult,
+                              WideIV->getStepValue(), "derived.iv.result");
+    DerivedIVRecipe->insertBefore(&*Builder.getInsertPoint());
+    FinalResult = DerivedIVRecipe;
+  }
+
+  auto GetPred = [&MinMaxPhiR]() {
+    switch (MinMaxPhiR->getRecurrenceKind()) {
+    case RecurKind::UMin:
+      return CmpInst::ICMP_ULT;
+    case RecurKind::SMin:
+      return CmpInst::ICMP_SLT;
+    case RecurKind::UMax:
+      return CmpInst::ICMP_UGT;
+    case RecurKind::SMax:
+      return CmpInst::ICMP_SGT;
+    default:
+      llvm_unreachable("must be an integer min/max recurrence kind");
+    }
+  };
+  // If the final min/max value matches the start value, the condition in the
+  // loop was always false, i.e. no induction value has been selected. If that's
+  // the case, use the original start value.
+  VPValue *MinMaxLT =
+      Builder.createICmp(GetPred(), MinMaxResult, MinMaxPhiR->getStartValue());
+  VPValue *Res = Builder.createSelect(MinMaxLT, FinalResult,
+                                      FindLastIVResult->getOperand(1));
+  FindIVPhiR->replaceAllUsesWith(MinIdxPhiR);
+  FindLastIVResult->replaceAllUsesWith(Res);
+  return true;
+}
+
 bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) {
   for (auto &PhiR : make_early_inc_range(
            Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis())) {
@@ -1131,7 +1260,7 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) {
     // MinMaxPhiR has users outside the reduction cycle in the loop. Check if
     // the only other user is a FindLastIV reduction. MinMaxPhiR must have
     // exactly 3 users: 1) the min/max operation, the compare of a FindLastIV
-    // reduction and ComputeReductionResult. The comparisom must compare
+    // reduction and ComputeReductionResult. The comparison must compare
     // MinMaxPhiR against the min/max operand used for the min/max reduction
     // and only be used by the select of the FindLastIV reduction.
     RecurKind RdxKind = MinMaxPhiR->getRecurrenceKind();
@@ -1203,33 +1332,42 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) {
                            FindIVPhiR->getRecurrenceKind()))
       return false;
 
+    assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
+           "cannot handle inloop/ordered reductions yet");
+
     // TODO: Support cases where IVOp is the IV increment.
     if (!match(IVOp, m_TruncOrSelf(m_VPValue(IVOp))) ||
         !isa<VPWidenIntOrFpInductionRecipe>(IVOp))
       return false;
 
-    CmpInst::Predicate RdxPredicate = [RdxKind]() {
+    // Check if the predicate is compatible with the reduction kind.
+    bool IsValidPredicate = [RdxKind, Pred]() {
       switch (RdxKind) {
       case RecurKind::UMin:
-        return CmpInst::ICMP_UGE;
+        return Pred == CmpInst::ICMP_UGE || Pred == CmpInst::ICMP_UGT;
       case RecurKind::UMax:
-        return CmpInst::ICMP_ULE;
+        return Pred == CmpInst::ICMP_ULE || Pred == CmpInst::ICMP_ULT;
       case RecurKind::SMax:
-        return CmpInst::ICMP_SLE;
+        return Pred == CmpInst::ICMP_SLE || Pred == CmpInst::ICMP_SLT;
       case RecurKind::SMin:
-        return CmpInst::ICMP_SGE;
+        return Pred == CmpInst::ICMP_SGE || Pred == CmpInst::ICMP_SGT;
       default:
         llvm_unreachable("unhandled recurrence kind");
       }
     }();
 
-    // TODO: Strict predicates need to find the first IV value for which the
-    // predicate holds, not the last.
-    if (Pred != RdxPredicate)
+    if (!IsValidPredicate)
       return false;
 
-    assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
-           "cannot handle inloop/ordered reductions yet");
+    // For strict predicates, use a UMin reduction to find the minimum index.
+    // Canonical IVs (0, 1, 2, ...) are guaranteed not to wrap in the vector
+    // loop, so UMin can always be used.
+    bool IsStrictPredicate = ICmpInst::isLT(Pred) || ICmpInst::isGT(Pred);
+    if (IsStrictPredicate) {
+      return handleStrictArgMinArgMax(Plan, MinMaxPhiR, FindIVPhiR,
+                                      cast<VPWidenIntOrFpInductionRecipe>(IVOp),
+                                      MinMaxResult);
+    }
 
     // The reduction using MinMaxPhiR needs adjusting to compute the correct
     // result:

diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -163,6 +163,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
     return cast<VPExpressionRecipe>(this)->mayHaveSideEffects();
   case VPDerivedIVSC:
   case VPFirstOrderRecurrencePHISC:
+  case VPReductionPHISC:
   case VPPredInstPHISC:
   case VPVectorEndPointerSC:
     return false;
@@ -1189,6 +1190,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
   case VPInstruction::BuildVector:
   case VPInstruction::CalculateTripCountMinusVF:
   case VPInstruction::CanonicalIVIncrementForPart:
+  case VPInstruction::ComputeFindIVResult:
   case VPInstruction::ExtractLane:
   case VPInstruction::ExtractLastLane:
   case VPInstruction::ExtractLastPart:

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -157,8 +157,8 @@ struct VPlanTransforms {
       const TargetLibraryInfo &TLI);
 
   /// Try to legalize reductions with multiple in-loop uses. Currently only
-  /// min/max reductions used by FindLastIV reductions are supported. Otherwise
-  /// return false.
+  /// min/max reductions used by FindLastIV and FindFirstIV reductions are
+  /// supported. Otherwise return false.
   static bool handleMultiUseReductions(VPlan &Plan);
 
   /// Try to have all users of fixed-order recurrences appear after the recipe