SearchScale · punAhuja · Dec 11, 2025 · Dec 19, 2025 · Jan 9, 2026
diff --git a/README.md b/README.md
@@ -46,4 +46,4 @@ example: ./run_pareto_analysis.sh 3cNWY5 wiki10m
 
 Serve the webui on port 8000:
 
-    cd web-ui-new; python3 -m http.server
+    cd web-ui; python3 -m http.server
diff --git a/generate-combinations.py b/generate-combinations.py
@@ -59,104 +59,92 @@
                 else:
                     algo_variants[param] = value
 
-        # Generate all combination of variants. For each combination, generate a hashed ID, and a file with the
-        # name pattern as <sweep>-<algo>-<hash>.json. The file should contain the invariants as is, and the variants as the current combination.
         if algo_variants:
-            # Separate efSearch from other variants if it exists
             efSearch_values = None
+            efSearchScaleFactor_values = None
             other_variant_keys = []
             other_variant_values = []
-            
+
             for key, value in algo_variants.items():
                 if key == 'efSearch':
                     efSearch_values = value
+                elif key == 'efSearchScaleFactor':
+                    efSearchScaleFactor_values = value
                 else:
                     other_variant_keys.append(key)
                     other_variant_values.append(value)
-
-            # Generate combinations with efSearch at the beginning (innermost loop)
-            if efSearch_values and other_variant_keys:
-                # Generate combinations of other parameters first
+
+            if (efSearch_values or efSearchScaleFactor_values) and other_variant_keys:
                 for other_combination in itertools.product(*other_variant_values):
                     other_variants = dict(zip(other_variant_keys, other_combination))
-                    # Then iterate through efSearch values
-                    for ef_index, ef_value in enumerate(efSearch_values):
+                    search_values = efSearch_values if efSearch_values else efSearchScaleFactor_values
+                    search_key = 'efSearch' if efSearch_values else 'efSearchScaleFactor'
+                    for ef_index, ef_value in enumerate(search_values):
                         current_variants = other_variants.copy()
-                        current_variants['efSearch'] = ef_value
-
-                        # Skip if cagraIntermediateDegree < cagraGraphDegree
+                        current_variants[search_key] = ef_value
+
                         if 'cagraIntermediateDegree' in current_variants and 'cagraGraphDegree' in current_variants:
                             if current_variants['cagraIntermediateDegree'] < current_variants['cagraGraphDegree']:
-                                print(f"\t\tSkipping combination: cagraIntermediateDegree ({current_variants['cagraIntermediateDegree']}) < cagraGraphDegree ({current_variants['cagraGraphDegree']})")
                                 continue
-
-                        # Skip if hnswMaxConn > hnswBeamWidth
+
                         if 'hnswMaxConn' in current_variants and 'hnswBeamWidth' in current_variants:
                             if current_variants['hnswMaxConn'] > current_variants['hnswBeamWidth']:
-                                print(f"\t\tSkipping combination: hnswMaxConn ({current_variants['hnswMaxConn']}) > hnswBeamWidth ({current_variants['hnswBeamWidth']})")
                                 continue
-
-                        # Generate hash only from other_variants (excluding efSearch)
+
                         base_hash = hashlib.md5(json.dumps(other_variants, sort_keys=True).encode()).hexdigest()[:8]
-                        hash_id = f"{base_hash}-ef{ef_value}"
-                        
+                        hash_id = f"{base_hash}-ef{ef_value}" if search_key == 'efSearch' else f"{base_hash}-efs{ef_value}"
+
                         config = algo_invariants.copy()
                         config.update(current_variants)
-
-                        # For multiple efSearch combinations: subsequent ones skip indexing
-                        if len(efSearch_values) > 1 and ef_index > 0:
+
+                        if len(search_values) > 1 and ef_index > 0:
                             config['skipIndexing'] = True
-
-                        # Set cleanIndexDirectory based on position
+
                         if ef_index == 0:
                             config['cleanIndexDirectory'] = False
-                        elif ef_index == len(efSearch_values) - 1:
+                        elif ef_index == len(search_values) - 1:
                             config['cleanIndexDirectory'] = True
                         else:
                             config['cleanIndexDirectory'] = False
-
-                        # Use base_hash for index directory paths
+
                         if 'hnswIndexDirPath' in config:
                             config['hnswIndexDirPath'] = f"hnswIndex-{base_hash}"
                         if 'cuvsIndexDirPath' in config:
                             config['cuvsIndexDirPath'] = f"cuvsIndex-{base_hash}"
-                        
+
                         filename = f"{algo}-{hash_id}.json"
                         sweep_dir = f"{args.configs_dir}/{sweep}"
                         filepath = f"{sweep_dir}/{filename}"
                         os.makedirs(sweep_dir, exist_ok=True)
                         with open(filepath, 'w') as f:
                             json.dump(config, f, indent=2)
                         print(f"\tGenerated config file: {filepath}")
-            elif efSearch_values:
-                # Only efSearch values, no other variants
-                for ef_index, ef_value in enumerate(efSearch_values):
-                    current_variants = {'efSearch': ef_value}
-                    # Generate hash from empty dict since no other variants exist
+            elif efSearch_values or efSearchScaleFactor_values:
+                search_values = efSearch_values if efSearch_values else efSearchScaleFactor_values
+                search_key = 'efSearch' if efSearch_values else 'efSearchScaleFactor'
+                for ef_index, ef_value in enumerate(search_values):
+                    current_variants = {search_key: ef_value}
                     base_hash = hashlib.md5(json.dumps({}, sort_keys=True).encode()).hexdigest()[:8]
-                    hash_id = f"{base_hash}-ef{ef_value}"
-                    
+                    hash_id = f"{base_hash}-ef{ef_value}" if search_key == 'efSearch' else f"{base_hash}-efs{ef_value}"
+
                     config = algo_invariants.copy()
                     config.update(current_variants)
-
-                    # For multiple efSearch combinations: subsequent ones skip indexing
-                    if len(efSearch_values) > 1 and ef_index > 0:
+
+                    if len(search_values) > 1 and ef_index > 0:
                         config['skipIndexing'] = True
-
-                    # Set cleanIndexDirectory based on position
+
                     if ef_index == 0:
                         config['cleanIndexDirectory'] = False
-                    elif ef_index == len(efSearch_values) - 1:
+                    elif ef_index == len(search_values) - 1:
                         config['cleanIndexDirectory'] = True
                     else:
                         config['cleanIndexDirectory'] = False
-
-                    # Use base_hash for index directory paths
+
                     if 'hnswIndexDirPath' in config:
                         config['hnswIndexDirPath'] = f"hnswIndex-{base_hash}"
                     if 'cuvsIndexDirPath' in config:
                         config['cuvsIndexDirPath'] = f"cuvsIndex-{base_hash}"
-                    
+
                     filename = f"{algo}-{hash_id}.json"
                     sweep_dir = f"{args.configs_dir}/{sweep}"
                     filepath = f"{sweep_dir}/{filename}"
@@ -165,26 +153,21 @@
                         json.dump(config, f, indent=2)
                     print(f"\tGenerated config file: {filepath}")
             else:
-                # No efSearch, use original logic
                 variant_keys = list(algo_variants.keys())
                 variant_values = list(algo_variants.values())
                 for combination in itertools.product(*variant_values):
                     current_variants = dict(zip(variant_keys, combination))
-
-                    # Skip if cagraIntermediateDegree < cagraGraphDegree
+
                     if 'cagraIntermediateDegree' in current_variants and 'cagraGraphDegree' in current_variants:
                         if current_variants['cagraIntermediateDegree'] < current_variants['cagraGraphDegree']:
-                            print(f"\t\tSkipping combination: cagraIntermediateDegree ({current_variants['cagraIntermediateDegree']}) < cagraGraphDegree ({current_variants['cagraGraphDegree']})")
                             continue
-
-                    # Skip if hnswMaxConn > hnswBeamWidth
+
                     if 'hnswMaxConn' in current_variants and 'hnswBeamWidth' in current_variants:
                         if current_variants['hnswMaxConn'] > current_variants['hnswBeamWidth']:
-                            print(f"\t\tSkipping combination: hnswMaxConn ({current_variants['hnswMaxConn']}) > hnswBeamWidth ({current_variants['hnswBeamWidth']})")
                             continue
-                    
+
                     hash_id = hashlib.md5(json.dumps(current_variants, sort_keys=True).encode()).hexdigest()[:8]
-                    
+
                     config = algo_invariants.copy()
                     config.update(current_variants)
                     filename = f"{algo}-{hash_id}.json"
@@ -194,6 +177,16 @@
                     with open(filepath, 'w') as f:
                         json.dump(config, f, indent=2)
                     print(f"\tGenerated config file: {filepath}")
-
-
+        else:
+            hash_id = hashlib.md5(json.dumps({}, sort_keys=True).encode()).hexdigest()[:8]
+            config = algo_invariants.copy()
+            filename = f"{algo}-{hash_id}.json"
+            sweep_dir = f"{args.configs_dir}/{sweep}"
+            filepath = f"{sweep_dir}/{filename}"
+            os.makedirs(sweep_dir, exist_ok=True)
+            with open(filepath, 'w') as f:
+                json.dump(config, f, indent=2)
+            print(f"\tGenerated config file: {filepath}")
+
+
     print("----------------------")
diff --git a/plot_pareto.py b/plot_pareto.py
@@ -126,12 +126,16 @@ def create_plot_search(
     # Sorting by mean y-value helps aligning plots with labels
     def mean_y(algo):
         points = np.array(all_data[algo], dtype=object)
+        if len(points) == 0 or points.ndim < 2:
+            return float('inf')
         return -np.log(np.array(points[:, 3], dtype=np.float32)).mean()
 
     # Find range for logit x-scale
     min_x, max_x = 1, 0
     for algo in sorted(all_data.keys(), key=mean_y):
         points = np.array(all_data[algo], dtype=object)
+        if len(points) == 0 or points.ndim < 2:
+            continue
         xs = points[:, 2]
         ys = points[:, 3]
         min_x = min([min_x] + [x for x in xs if x > 0])
@@ -226,10 +230,14 @@ def create_plot_build(
     # Sorting by mean y-value helps aligning plots with labels
     def mean_y(algo):
         points = np.array(search_results[algo], dtype=object)
+        if len(points) == 0 or points.ndim < 2:
+            return float('inf')
         return -np.log(np.array(points[:, 3], dtype=np.float32)).mean()
 
     for pos, algo in enumerate(sorted(search_results.keys(), key=mean_y)):
         points = np.array(search_results[algo], dtype=object)
+        if len(points) == 0 or points.ndim < 2:
+            continue
         # x is recall, ls is algo_name, idxs is index_name
         xs = points[:, 2]
         ls = points[:, 0]
@@ -279,33 +287,32 @@ def mean_y(algo):
     df = pd.DataFrame(data, index=index)
     df.replace(0.0, np.nan, inplace=True)
     df = df.dropna(how="all")
+
+    if df.empty or df.shape[1] == 0:
+        print(f"Skipping build plot: no data points in recall buckets >= 80%")
+        return
+
     plt.figure(figsize=(12, 9))
     ax = df.plot.bar(rot=0, color=colors)
     fig = ax.get_figure()
 
-    # Add speedup annotations
     if 'LUCENE_HNSW' in df.columns and 'CAGRA_HNSW' in df.columns:
         y_max = ax.get_ylim()[1]
-
         for i, bucket in enumerate(df.index):
             lucene_time = df.loc[bucket, 'LUCENE_HNSW']
             cagra_time = df.loc[bucket, 'CAGRA_HNSW']
-
             if pd.notna(lucene_time) and pd.notna(cagra_time) and lucene_time > 0 and cagra_time > 0:
                 speedup = lucene_time / cagra_time
-                # Position annotations just above the bars, below subtitle
                 ax.text(i, y_max * 0.98, f'{speedup:.1f}x', 
                        ha='center', va='bottom', fontsize=9, fontweight='bold',
                        bbox=dict(boxstyle='round,pad=0.2', facecolor='white', alpha=0.9, edgecolor='gray'))
 
     print(f"writing build output to {fn_out}")
-    plt.title(
-        "Average Build Time within Recall Range "
-        f"for k={k} n_queries={n_queries}"
-    )
+    plt.title(f"Average Build Time within Recall Range for k={k} n_queries={n_queries}")
     plt.suptitle(f"{dataset}")
     plt.ylabel("Build Time (s)")
     fig.savefig(fn_out)
+    plt.close()
 
 
 def load_lines(results_path, result_files, method, index_key, mode, time_unit):

diff --git a/pom.xml b/pom.xml
@@ -16,14 +16,6 @@
     <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
   </properties>
 
-  <repositories>
-    <repository>
-      <id>searchscale-maven</id>
-      <name>SearchScale Maven</name>
-      <url>https://maven.searchscale.com/snapshots</url>
-    </repository>
-  </repositories>
-
   <dependencyManagement>
     <dependencies>
       <dependency>
@@ -37,15 +29,15 @@
   </dependencyManagement>
 
     <dependencies>
-	<dependency>
-		  <groupId>com.nvidia.cuvs.lucene</groupId>
-		    <artifactId>cuvs-lucene</artifactId>
-		      <version>25.10.0-33318-SNAPSHOT</version>
-		      </dependency>
+        <dependency>
+            <groupId>com.nvidia.cuvs.lucene</groupId>
+            <artifactId>cuvs-lucene</artifactId>
+            <version>25.10.0</version>
+        </dependency>
         <dependency>
             <groupId>com.nvidia.cuvs</groupId>
             <artifactId>cuvs-java</artifactId>
-            <version>25.10.0-55985-SNAPSHOT</version>
+            <version>25.10.0</version>
         </dependency>
         <dependency>
             <groupId>org.apache.lucene</groupId>
Original file line number	Diff line number	Diff line change
Expand Up		@@ -46,4 +46,4 @@ example: ./run_pareto_analysis.sh 3cNWY5 wiki10m

		Serve the webui on port 8000:

		cd web-ui-new; python3 -m http.server
		cd web-ui; python3 -m http.server