From 4a249f5f47262e57827bfb762aed8d3e0aaef2d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= Date: Fri, 10 Oct 2025 14:03:01 -0700 Subject: [PATCH] Allow sorting in the API calls by path and date MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Bernát Gábor --- apiary.apib | 11 ++- .../opengrok/indexer/search/SearchEngine.java | 81 ++++++++++++++++--- .../indexer/search/SearchEngineTest.java | 79 +++++++++++++++++- .../opengrok/indexer/util/TestRepository.java | 39 +++++++++ .../api/v1/controller/SearchController.java | 11 ++- .../web/api/v1/filter/IncomingFilter.java | 2 +- 6 files changed, 208 insertions(+), 15 deletions(-) diff --git a/apiary.apib b/apiary.apib index 0e9bed9319e..26bafcce3e7 100644 --- a/apiary.apib +++ b/apiary.apib @@ -643,7 +643,7 @@ The repository path is relative to source root. + repository - repository path with native path separators (of the machine running the service) starting with path separator for which to return type -## Search [/search{?full,def,symbol,path,hist,type,projects,maxresults,start}] +## Search [/search{?full,def,symbol,path,hist,type,projects,maxresults,start,sort}] ## return search results [GET] @@ -657,6 +657,15 @@ The repository path is relative to source root. + projects (optional, string) - projects to search in + maxresults (optional, string) - maximum number of documents whose hits will be returned (default 1000) + start (optional, string) - start index from which to return results + + sort: relevancy (optional, enum[string]) + + Enum + + relevancy + + fullpath + + lastmodtime + + Description: Sort order for results. Possible values: + - `relevancy`: Sort by Lucene score (most relevant first). + - `fullpath`: Sort by file path (alphabetical). + - `lastmodtime`: Sort by last modification date (newest first). + Response 200 (application/json) + Body diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchEngine.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchEngine.java index 2c13165d3d4..ed7076a4d31 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchEngine.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchEngine.java @@ -48,6 +48,9 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopScoreDocCollectorManager; import org.apache.lucene.util.Version; import org.opengrok.indexer.analysis.AbstractAnalyzer; @@ -66,6 +69,7 @@ import org.opengrok.indexer.util.Statistics; import org.opengrok.indexer.util.TandemPath; import org.opengrok.indexer.web.Prefix; +import org.opengrok.indexer.web.SortOrder; /** * This is an encapsulation of the details on how to search in the index database. @@ -114,6 +118,10 @@ public class SearchEngine { * Holds value of property type. */ private String type; + /** + * Holds value of property sort. + */ + private SortOrder sortOrder; /** * Holds value of property indexDatabase. */ @@ -181,6 +189,10 @@ private void searchSingleDatabase(boolean paging) throws IOException { SuperIndexSearcher superIndexSearcher = RuntimeEnvironment.getInstance().getSuperIndexSearcher(""); searcherList.add(superIndexSearcher); searcher = superIndexSearcher; + // If a field-based sort is requested, collect all hits (disable paging optimization) + if (sortOrder != SortOrder.RELEVANCY) { + paging = false; + } searchIndex(superIndexSearcher, paging); } @@ -205,20 +217,41 @@ private void searchMultiDatabase(List projectList, boolean paging) thro } private void searchIndex(IndexSearcher searcher, boolean paging) throws IOException { - collectorManager = new TopScoreDocCollectorManager(hitsPerPage * cachePages, Short.MAX_VALUE); + Sort luceneSort = null; + if (getSortOrder() == SortOrder.LASTMODIFIED) { + luceneSort = new Sort(new SortField(QueryBuilder.DATE, SortField.Type.STRING, true)); + } else if (getSortOrder() == SortOrder.BY_PATH) { + luceneSort = new Sort(new SortField(QueryBuilder.FULLPATH, SortField.Type.STRING)); + } + + if (luceneSort == null) { + collectorManager = new TopScoreDocCollectorManager(hitsPerPage * cachePages, Integer.MAX_VALUE); + hits = searcher.search(query, collectorManager).scoreDocs; + totalHits = searcher.count(query); + } else { + TopDocs top = searcher.search(query, hitsPerPage * cachePages, luceneSort); + hits = top.scoreDocs; + totalHits = (int) top.totalHits.value; + } + Statistics stat = new Statistics(); - hits = searcher.search(query, collectorManager).scoreDocs; - totalHits = searcher.count(query); stat.report(LOGGER, Level.FINEST, "search via SearchEngine done", "search.latency", new String[]{"category", "engine", "outcome", totalHits > 0 ? "success" : "empty"}); + if (!paging && totalHits > hitsPerPage * cachePages) { - collectorManager = new TopScoreDocCollectorManager(totalHits, Short.MAX_VALUE); - hits = searcher.search(query, collectorManager).scoreDocs; + if (luceneSort == null) { + collectorManager = new TopScoreDocCollectorManager(totalHits, Integer.MAX_VALUE); + hits = searcher.search(query, collectorManager).scoreDocs; + } else { + TopDocs top = searcher.search(query, totalHits, luceneSort); + hits = top.scoreDocs; + } stat.report(LOGGER, Level.FINEST, "FULL search via SearchEngine done", "search.latency", new String[]{"category", "engine", "outcome", totalHits > 0 ? "success" : "empty"}); } + StoredFields storedFields = searcher.storedFields(); for (ScoreDoc hit : hits) { int docId = hit.doc; @@ -337,7 +370,6 @@ private int search(List projects, File root) { } catch (Exception e) { LOGGER.log(Level.WARNING, SEARCH_EXCEPTION_MSG, e); } - if (!docs.isEmpty()) { sourceContext = null; summarizer = null; @@ -413,14 +445,27 @@ public void results(int start, int end, List ret) { // TODO check if below fits for if end=old hits.length, or it should include it if (end > hits.length && !allCollected) { - //do the requery, we want more than 5 pages - collectorManager = new TopScoreDocCollectorManager(totalHits, Short.MAX_VALUE); + // do the requery to collect all hits (beyond cached pages) + Sort luceneSort = null; + if (getSortOrder() == SortOrder.LASTMODIFIED) { + luceneSort = new Sort(new SortField(QueryBuilder.DATE, SortField.Type.STRING, true)); + } else if (getSortOrder() == SortOrder.BY_PATH) { + luceneSort = new Sort(new SortField(QueryBuilder.FULLPATH, SortField.Type.STRING)); + } + try { - hits = searcher.search(query, collectorManager).scoreDocs; + if (luceneSort == null) { + collectorManager = new TopScoreDocCollectorManager(totalHits, Integer.MAX_VALUE); + hits = searcher.search(query, collectorManager).scoreDocs; + } else { + TopDocs top = searcher.search(query, totalHits, luceneSort); + hits = top.scoreDocs; + } } catch (Exception e) { // this exception should never be hit, since search() will hit this before LOGGER.log( Level.WARNING, SEARCH_EXCEPTION_MSG, e); } + StoredFields storedFields = null; try { storedFields = searcher.storedFields(); @@ -646,4 +691,22 @@ public String getType() { public void setType(String fileType) { this.type = fileType; } + + /** + * Getter for property sort. + * + * @return Value of property sortOrder. + */ + public SortOrder getSortOrder() { + return this.sortOrder; + } + + /** + * Setter for property sort. + * + * @param sortOrder New value of property sortOrder. + */ + public void setSortOrder(SortOrder sortOrder) { + this.sortOrder = sortOrder; + } } diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/search/SearchEngineTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/search/SearchEngineTest.java index d477ef11295..b369d3d0ed8 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/search/SearchEngineTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/search/SearchEngineTest.java @@ -24,10 +24,15 @@ package org.opengrok.indexer.search; import java.io.File; +import java.net.URL; +import java.nio.file.Path; +import java.util.ArrayList; import java.util.Collections; +import java.util.List; import java.util.TreeSet; import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.opengrok.indexer.configuration.RuntimeEnvironment; @@ -36,7 +41,9 @@ import org.opengrok.indexer.util.TestRepository; import org.opengrok.indexer.history.RepositoryFactory; +import org.opengrok.indexer.web.SortOrder; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNull; @@ -55,7 +62,10 @@ class SearchEngineTest { @BeforeAll static void setUpClass() throws Exception { repository = new TestRepository(); - repository.create(HistoryGuru.class.getResource("/repositories")); + URL url = HistoryGuru.class.getResource("/repositories"); + repository.createEmpty(); + Assertions.assertNotNull(url); + repository.copyDirectoryWithUniqueModifiedTime(Path.of(url.toURI()), Path.of(repository.getSourceRoot())); RuntimeEnvironment env = RuntimeEnvironment.getInstance(); env.setSourceRoot(repository.getSourceRoot()); @@ -148,6 +158,73 @@ void testGetQuery() throws Exception { instance.getQuery()); } + @Test + void testSortOrderLastModified() { + SearchEngine instance = new SearchEngine(); + instance.setFile("main.c"); + instance.setFreetext("arguments"); + instance.setSortOrder(SortOrder.LASTMODIFIED); + int hitsCount = instance.search(); + List hits = new ArrayList<>(); + instance.results(0, hitsCount, hits); + assertTrue(hits.size() != 6, "Should return at least 2 hits for RELEVANCY sort to check order"); + + String[] results = hits.stream(). + map(hit -> hit.getPath() + "@" + hit.getLineno()). + toArray(String[]::new); + final String[] expectedResults = { + "/teamware/main.c@5", + "/rcs_test/main.c@5", + "/mercurial/main.c@5", + "/git/main.c@5", + "/cvs_test/cvsrepo/main.c@7", + "/bazaar/main.c@5" + }; + + assertArrayEquals(expectedResults, results); + + instance.destroy(); + } + + @Test + void testSortOrderByPath() { + SearchEngine instance = new SearchEngine(); + instance.setFile("main.c OR header.h"); + instance.setFreetext("arguments OR stdio"); + instance.setSortOrder(SortOrder.BY_PATH); + int hitsCount = instance.search(); + List hits = new ArrayList<>(); + instance.results(0, hitsCount, hits); + assertTrue(hits.size() != 11, "Should return at least 2 hits for RELEVANCY sort to check order"); + + String[] results = hits.stream(). + map(hit -> hit.getPath() + "@" + hit.getLineno()). + toArray(String[]::new); + final String[] expectedResults = { + "/bazaar/header.h@2", + "/bazaar/main.c@5", + "/cvs_test/cvsrepo/main.c@7", + "/git/header.h@2", + "/git/main.c@5", + "/mercurial/header.h@2", + "/mercurial/main.c@5", + "/rcs_test/header.h@2", + "/rcs_test/main.c@5", + "/teamware/header.h@2", + "/teamware/main.c@5" + }; + + assertArrayEquals(expectedResults, results); + + instance.destroy(); + } + + @Test + void testDefaultSortOrder() { + SearchEngine instance = new SearchEngine(); + assertNull(instance.getSortOrder(), "Default sort should be relevancy (null implies Lucene score ordering)"); + } + /* see https://github.com/oracle/opengrok/issues/2030 @Test void testSearch() { diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java index 167980d139f..e5e36a8f87d 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java @@ -33,6 +33,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import java.util.stream.Stream; @@ -133,6 +134,44 @@ public void copyDirectory(Path src, Path dest) throws IOException { } } + /** + * Assumes the destination directory exists. + * @param src source directory + * @param dest destination directory + * @throws IOException on error + */ + public void copyDirectoryWithUniqueModifiedTime(Path src, Path dest) throws IOException { + // Create a deterministic order of paths for creation time, so last modified time indexing is stable in tests + // note we cannot use Files.copy(sourceFile, destPath, REPLACE_EXISTING, COPY_ATTRIBUTES) + // as the original creation time is the user checkout and not different accross files + List allPaths; + try (Stream stream = Files.walk(src)) { + allPaths = stream.filter(p -> !p.equals(src)).sorted().toList(); + } + // Set base time to now, and go ahead in time for each subsequent path by 1 minute + java.time.Instant baseTime = java.time.Instant.now(); + for (int i = 0; i < allPaths.size(); i++) { + Path sourcePath = allPaths.get(i); + Path destRelativePath = getDestinationRelativePath(src, sourcePath); + Path destPath = dest.resolve(destRelativePath); + var fileTime = java.nio.file.attribute.FileTime.from(baseTime.plusSeconds(i * 60L)); + if (Files.isDirectory(sourcePath)) { + if (!Files.exists(destPath)) { + Files.createDirectories(destPath); + } + Files.setLastModifiedTime(destPath, fileTime); + } else { + // Ensure parent directory exists before copying file + Path parentDir = destPath.getParent(); + if (parentDir != null && !Files.exists(parentDir)) { + Files.createDirectories(parentDir); + } + Files.copy(sourcePath, destPath, REPLACE_EXISTING, COPY_ATTRIBUTES); + Files.setLastModifiedTime(destPath, fileTime); + } + } + } + private Path getDestinationRelativePath(Path sourceDirectory, Path sourceFile) { // possibly strip zip filesystem for the startsWith method to work var relativePath = Path.of(sourceDirectory.relativize(sourceFile).toString()); diff --git a/opengrok-web/src/main/java/org/opengrok/web/api/v1/controller/SearchController.java b/opengrok-web/src/main/java/org/opengrok/web/api/v1/controller/SearchController.java index f251c3b8005..08acb2c5089 100644 --- a/opengrok-web/src/main/java/org/opengrok/web/api/v1/controller/SearchController.java +++ b/opengrok-web/src/main/java/org/opengrok/web/api/v1/controller/SearchController.java @@ -39,6 +39,7 @@ import org.opengrok.indexer.search.Hit; import org.opengrok.indexer.search.SearchEngine; import org.opengrok.indexer.web.QueryParameters; +import org.opengrok.indexer.web.SortOrder; import org.opengrok.web.PageConfig; import org.opengrok.web.api.v1.filter.CorsEnable; import org.opengrok.web.api.v1.suggester.provider.service.SuggesterService; @@ -58,6 +59,7 @@ public class SearchController { public static final String PATH = "search"; private static final int MAX_RESULTS = 1000; + private static final String DEFAULT_SORT_ORDER = "relevancy"; private final SuggesterService suggester; @@ -81,9 +83,10 @@ public SearchResult search( @QueryParam("projects") final List projects, @QueryParam("maxresults") // Akin to QueryParameters.COUNT_PARAM @DefaultValue(MAX_RESULTS + "") final int maxResults, - @QueryParam(QueryParameters.START_PARAM) @DefaultValue(0 + "") final int startDocIndex + @QueryParam(QueryParameters.START_PARAM) @DefaultValue(0 + "") final int startDocIndex, + @QueryParam(QueryParameters.SORT_PARAM) @DefaultValue(DEFAULT_SORT_ORDER) final String sort ) { - try (SearchEngineWrapper engine = new SearchEngineWrapper(full, def, symbol, path, hist, type)) { + try (SearchEngineWrapper engine = new SearchEngineWrapper(full, def, symbol, path, hist, type, SortOrder.get(sort))) { if (!engine.isValid()) { throw new WebApplicationException("Invalid request", Response.Status.BAD_REQUEST); @@ -119,7 +122,8 @@ private SearchEngineWrapper( final String symbol, final String path, final String hist, - final String type + final String type, + final SortOrder sortOrder ) { engine.setFreetext(full); engine.setDefinition(def); @@ -127,6 +131,7 @@ private SearchEngineWrapper( engine.setFile(path); engine.setHistory(hist); engine.setType(type); + engine.setSortOrder(sortOrder); } public List search( diff --git a/opengrok-web/src/main/java/org/opengrok/web/api/v1/filter/IncomingFilter.java b/opengrok-web/src/main/java/org/opengrok/web/api/v1/filter/IncomingFilter.java index 20848148acb..495d59555cd 100644 --- a/opengrok-web/src/main/java/org/opengrok/web/api/v1/filter/IncomingFilter.java +++ b/opengrok-web/src/main/java/org/opengrok/web/api/v1/filter/IncomingFilter.java @@ -66,7 +66,7 @@ public class IncomingFilter implements ContainerRequestFilter, ConfigurationChan /** * Endpoint paths that are exempted from this filter. * @see SearchController#search(HttpServletRequest, String, String, String, String, String, String, - * java.util.List, int, int) + * java.util.List, int, int, String) * @see SuggesterController#getSuggestions(org.opengrok.web.api.v1.suggester.model.SuggesterQueryData) * @see SuggesterController#getConfig() */