Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions biome.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
{
"$schema": "https://biomejs.dev/schemas/2.0.6/schema.json",
"$schema": "https://biomejs.dev/schemas/2.2.4/schema.json",
"files": {
"ignoreUnknown": true
"ignoreUnknown": true,
"includes": ["**", "!**/src/tests/__fixtures__"]
},
"vcs": {
"enabled": true,
Expand Down
6 changes: 4 additions & 2 deletions src/commands/sessions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ import type {
ProcessingSummaryResult,
} from "@/core/ProcessingPipeline";
import type { CrawlMetadata, FieldExtractionStats } from "@/core/types";
import type { ContentFieldName } from "@/crawlers/extractors/ContentPageExtractor";
import type { ListingFieldName } from "@/crawlers/extractors/ListingPageExtractor";
import type {
ContentMetadata,
CrawlSession,
Expand Down Expand Up @@ -206,7 +208,7 @@ function calculateFieldStats(
): FieldExtractionStats[] {
if (contents.length === 0) return [];

const fields = [
const fields: Array<{ name: ListingFieldName; isOptional: boolean }> = [
{ name: "title", isOptional: false },
{ name: "url", isOptional: false },
{ name: "author", isOptional: true },
Expand Down Expand Up @@ -251,7 +253,7 @@ function calculateContentFieldStats(

return [
{
fieldName: "content",
fieldName: "content" as ContentFieldName,
successCount,
totalAttempts: contents.length,
isOptional: false,
Expand Down
5 changes: 3 additions & 2 deletions src/core/ProcessingPipeline.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type {
ContentSessionLinker,
CrawledData,
CrawlerRegistry,
CrawlOptions,
Expand Down Expand Up @@ -38,7 +39,7 @@ export interface ProcessingPipeline {
async function handleItemStorage(
data: CrawledData,
contentStore: ContentStore,
metadataTracker?: any,
metadataTracker?: ContentSessionLinker,
): Promise<ProcessedData> {
try {
const storageResult = await contentStore.store(data);
Expand Down Expand Up @@ -79,7 +80,7 @@ async function handleItemStorage(
async function processPageItems(
items: CrawledData[],
contentStore: ContentStore,
metadataTracker?: any,
metadataTracker?: ContentSessionLinker,
): Promise<ProcessedData[]> {
const processedData: ProcessedData[] = [];

Expand Down
6 changes: 1 addition & 5 deletions src/core/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -164,11 +164,7 @@ export interface CrawlMetadata {
contentFieldStats: FieldExtractionStats[];
listingErrors: string[];
contentErrors: string[];
stoppedReason?:
| "max_pages"
| "no_next_button"
| "all_duplicates"
| "process_interrupted";
stoppedReason?: StoppedReason;
}

// Interface for junction table linking
Expand Down
8 changes: 6 additions & 2 deletions src/tests/commands/crawl.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { describe, expect, it, vi } from "vitest";
import { crawlWithOptions } from "@/commands/crawl";
import { sources } from "@/config/sources/index.js";
import type { ProcessingPipeline } from "@/core/ProcessingPipeline";
import { ERROR_MESSAGES } from "@/ui/constants";
import { displaySources } from "@/ui/formatter";
import { validatePositiveIntegerOrEmpty } from "@/ui/utils";
Expand Down Expand Up @@ -96,8 +97,11 @@ describe("Crawl Command Validation", () => {
describe("crawlWithOptions", () => {
it("should display available sources when source not found", async () => {
// Mock pipeline
const mockPipeline = {
const mockPipeline: ProcessingPipeline = {
processSummary: vi.fn(),
process: vi.fn(),
getMetadataStore: vi.fn(),
getContentStore: vi.fn(),
};

// Mock console.log
Expand All @@ -106,7 +110,7 @@ describe("crawlWithOptions", () => {
// Call crawlWithOptions with non-existent source
const result = await crawlWithOptions(
{ source: "non-existent-source" },
mockPipeline as any,
mockPipeline,
);

// Verify the error message and available sources are displayed
Expand Down
2 changes: 1 addition & 1 deletion src/tests/commands/sessions.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import { handleSessions } from "@/commands/sessions";
import type { ProcessingPipeline } from "@/core/ProcessingPipeline";
import type { CrawlSession, MetadataStore } from "@/storage/index";
import type { CrawlSession, MetadataStore } from "@/storage/MetadataStore";

// Mock inquirer
const mockPrompt = vi.fn();
Expand Down
28 changes: 14 additions & 14 deletions src/tests/crawlers/extractors/BrowserFieldExtractor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ describe("BrowserFieldExtractor", () => {
}
return null;
}),
} as any;
} as unknown as Document;

const result = extractionFunction({
container_selector: ".container",
Expand Down Expand Up @@ -71,13 +71,13 @@ describe("BrowserFieldExtractor", () => {
}
return null;
}),
} as any;
} as unknown as Document;

global.window = {
location: {
href: "https://example.com/page",
},
} as any;
} as unknown as typeof window;

const result = extractionFunction({
container_selector: ".container",
Expand All @@ -98,10 +98,10 @@ describe("BrowserFieldExtractor", () => {

// Mock browser context - container not found
global.document = {
querySelector: vi.fn().mockImplementation((selector: string) => {
querySelector: vi.fn().mockImplementation((_selector: string) => {
return null; // Always return null to simulate missing container
}),
} as any;
} as unknown as Document;

const result = extractionFunction({
container_selector: ".nonexistent",
Expand All @@ -124,7 +124,7 @@ describe("BrowserFieldExtractor", () => {

// Mock browser context - container exists but field element doesn't
const mockContainer = {
querySelector: vi.fn().mockImplementation((selector: string) => {
querySelector: vi.fn().mockImplementation((_selector: string) => {
return null; // Always return null for field elements
}),
};
Expand All @@ -136,7 +136,7 @@ describe("BrowserFieldExtractor", () => {
}
return null;
}),
} as any;
} as unknown as Document;

const result = extractionFunction({
container_selector: ".container",
Expand Down Expand Up @@ -170,7 +170,7 @@ describe("BrowserFieldExtractor", () => {
}
return null;
}),
} as any;
} as unknown as Document;

const result = extractionFunction({
container_selector: ".container",
Expand Down Expand Up @@ -203,7 +203,7 @@ describe("BrowserFieldExtractor", () => {
}
return null;
}),
} as any;
} as unknown as Document;

const result = extractionFunction({
container_selector: ".container",
Expand Down Expand Up @@ -249,13 +249,13 @@ describe("BrowserFieldExtractor", () => {
}
return null;
}),
} as any;
} as unknown as Document;

global.window = {
location: {
href: "https://example.com/page",
},
} as any;
} as unknown as typeof window;

const result = extractionFunction({
container_selector: ".container",
Expand Down Expand Up @@ -296,7 +296,7 @@ describe("BrowserFieldExtractor", () => {
}
return null;
}),
} as any;
} as unknown as Document;

const result = extractionFunction({
container_selector: ".container",
Expand Down Expand Up @@ -342,7 +342,7 @@ describe("BrowserFieldExtractor", () => {
}
return null;
}),
} as any;
} as unknown as Document;

const result = extractionFunction({
container_selector: ".container",
Expand Down Expand Up @@ -392,7 +392,7 @@ describe("BrowserFieldExtractor", () => {
}
return null;
}),
} as any;
} as unknown as Document;

const result = extractionFunction({
container_selector: ".container",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ describe("BrowserFieldExtractor - Whitespace handling", () => {
}
return null;
}),
} as any;
} as unknown as Document;

const result = extractionFunction({
container_selector: ".container",
Expand Down Expand Up @@ -74,7 +74,7 @@ describe("BrowserFieldExtractor - Whitespace handling", () => {
}
return null;
}),
} as any;
} as unknown as Document;

const result = extractionFunction({
container_selector: ".container",
Expand Down
14 changes: 6 additions & 8 deletions src/tests/crawlers/extractors/ConcurrentContentExtractor.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import type { Page } from "puppeteer";
import { beforeEach, describe, expect, it, vi } from "vitest";
import type {
CrawledData,
Expand All @@ -14,7 +13,6 @@ describe("ConcurrentContentExtractor", () => {
let mockBrowserHandler: BrowserHandler;
let mockExtractContentForSingleItem: ReturnType<typeof vi.fn>;
let concurrentExtractor: ReturnType<typeof createConcurrentContentExtractor>;
let mockPage: Page;
let mockMetadataStore: MetadataStore;

const mockConfig: SourceConfig = {
Expand Down Expand Up @@ -68,19 +66,19 @@ describe("ConcurrentContentExtractor", () => {
setupNewPage: mockNewPage,
} as unknown as BrowserHandler;

mockPage = {
browser: () => mockBrowserHandler,
} as unknown as Page;

// Create mock metadata store
mockMetadataStore = {
getExistingUrls: vi.fn().mockReturnValue(new Set()),
} as unknown as MetadataStore;

// Create concurrent extractor with proper dependencies
concurrentExtractor = createConcurrentContentExtractor(mockBrowserHandler, {
const extractorDependencies = {
extractContentForSingleItem: mockExtractContentForSingleItem,
});
};
concurrentExtractor = createConcurrentContentExtractor(
mockBrowserHandler,
extractorDependencies,
);
});

describe("extractConcurrently", () => {
Expand Down
2 changes: 1 addition & 1 deletion src/tests/crawlers/extractors/ContentPageExtractor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ describe("ContentPageExtractor", () => {

it("should handle missing URL gracefully", async () => {
const mockItem: CrawledData = {
url: undefined as any, // Intentionally missing URL
url: undefined as unknown as string, // Intentionally missing URL
title: "Article 1",
content: "Content 1",
crawledAt: new Date(),
Expand Down
2 changes: 1 addition & 1 deletion src/tests/crawlers/extractors/ListingPageExtractor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ describe("ListingPageExtractor", () => {
...mockConfig,
listing: {
...mockConfig.listing,
shouldExcludeItem: vi.fn().mockImplementation((html, values) => {
shouldExcludeItem: vi.fn().mockImplementation((_html, values) => {
return values?.title === "Excluded Article";
}),
},
Expand Down
Loading