Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/config/sources/an.ts → src/config/sources/access_now.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { SourceConfig } from "@/core/types";

export const anSource: SourceConfig = {
id: "an",
export const AccessNowSource: SourceConfig = {
id: "access_now",
name: "Access Now",
type: "listing",
listing: {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { SourceConfig } from "@/core/types";

export const dukSource: SourceConfig = {
id: "duk",
export const DeclassifiedUkSource: SourceConfig = {
id: "declassified_uk",
name: "Declassified UK",
type: "listing",
disableJavascript: true,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { SourceConfig } from "@/core/types";

export const effSource: SourceConfig = {
id: "eff",
export const ElectronicFrontierFoundationSource: SourceConfig = {
id: "electronic_frontier_foundation",
name: "Electronic Frontier Foundation",
type: "listing",
listing: {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { SourceConfig } from "@/core/types";

export const fpfSource: SourceConfig = {
id: "fpf",
export const FreedomPressFoundationSource: SourceConfig = {
id: "freedom_press_foundation",
name: "Freedom of the Press Foundation",
type: "listing",
listing: {
Expand Down
28 changes: 14 additions & 14 deletions src/config/sources/index.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
import type { SourceConfig } from "@/core/types";
import { anSource } from "./an.js";
import { dukSource } from "./duk.js";
import { effSource } from "./eff.js";
import { fpfSource } from "./fpf.js";
import { lpeSource } from "./lpe.js";
import { p2pSource } from "./p2p.js";
import { tfSource } from "./tf.js";
import { AccessNowSource } from "./access_now.js";
import { DeclassifiedUkSource } from "./declassified_uk.js";
import { ElectronicFrontierFoundationSource } from "./electronic_frontier_foundation.js";
import { FreedomPressFoundationSource } from "./freedom_press_foundation.js";
import { LogosPressEngineSource } from "./logos_press_engine.js";
import { P2pFoundationSource } from "./p2p_foundation.js";
import { TorrentFreakSource } from "./torrent_freak.js";

export const sources: SourceConfig[] = [
effSource,
fpfSource,
lpeSource,
p2pSource,
dukSource,
tfSource,
anSource,
ElectronicFrontierFoundationSource,
FreedomPressFoundationSource,
LogosPressEngineSource,
P2pFoundationSource,
DeclassifiedUkSource,
TorrentFreakSource,
AccessNowSource,
];
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { SourceConfig } from "@/core/types";

export const lpeSource: SourceConfig = {
id: "lpe",
export const LogosPressEngineSource: SourceConfig = {
id: "logos_press_engine",
name: "Logos Press Engine",
type: "listing",
listing: {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { SourceConfig } from "@/core/types";

export const p2pSource: SourceConfig = {
id: "p2p",
export const P2pFoundationSource: SourceConfig = {
id: "p2p_foundation",
name: "P2P Foundation",
type: "listing",
disableJavascript: true,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { SourceConfig } from "@/core/types";

export const tfSource: SourceConfig = {
id: "tf",
export const TorrentFreakSource: SourceConfig = {
id: "torrent_freak",
name: "TorrentFreak",
type: "listing",
listing: {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import { afterAll, beforeAll, describe, expect, it, vi } from "vitest";
import { anSource as config } from "@/config/sources/an.js";
import { AccessNowSource as config } from "@/config/sources/access_now.js";
import { createContentPageExtractor } from "@/crawlers/extractors/ContentPageExtractor";
import { createListingPageExtractor } from "@/crawlers/extractors/ListingPageExtractor";
import type { BrowserHandler } from "@/crawlers/handlers/BrowserHandler";
import { createBrowserHandler } from "@/crawlers/handlers/BrowserHandler";
import { navigateToNextPage } from "@/crawlers/handlers/PaginationHandler";
import fixture4 from "@/tests/__fixtures__/an/biden-digital-rights";
import fixture2 from "@/tests/__fixtures__/an/kenya-sim-card-biometrics";
import fixture1 from "@/tests/__fixtures__/an/russias-record-war-on-connectivity";
import fixture3 from "@/tests/__fixtures__/an/vodafone-challenged-release-transparency-report";
import fixture4 from "@/tests/__fixtures__/access_now/biden-digital-rights";
import fixture2 from "@/tests/__fixtures__/access_now/kenya-sim-card-biometrics";
import fixture1 from "@/tests/__fixtures__/access_now/russias-record-war-on-connectivity";
import fixture3 from "@/tests/__fixtures__/access_now/vodafone-challenged-release-transparency-report";

const ifDescribe = process.env.INT_TEST === "true" ? describe : describe.skip;

Expand All @@ -24,7 +24,7 @@ ifDescribe("Access Now integration tests", () => {
await browser.close();
});

it("should crawl AN listing page", async () => {
it("should crawl Access Now listing page", async () => {
const page = await browser.setupNewPage(config.listing.url);
const extractor = createListingPageExtractor();
const result = await extractor.extractItemsFromPage(page, config, [], 0);
Expand All @@ -34,12 +34,12 @@ ifDescribe("Access Now integration tests", () => {
expect(result.items.every((item) => !!item.publishedDate)).toBeTruthy();
});

it("should crawl to next AN listing page", async () => {
it("should crawl to next Access Now listing page", async () => {
const page = await browser.setupNewPage(config.listing.url);
expect(await navigateToNextPage(page, config)).toBeTruthy();
});

it("should crawl multiple AN content pages", async () => {
it("should crawl multiple Access Now content pages", async () => {
const testCases = [
{
url: "https://www.accessnow.org/russias-record-war-on-connectivity/",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import { afterAll, beforeAll, describe, expect, it, vi } from "vitest";
import { dukSource as config } from "@/config/sources/duk.js";
import { DeclassifiedUkSource as config } from "@/config/sources/declassified_uk.js";
import { createContentPageExtractor } from "@/crawlers/extractors/ContentPageExtractor";
import { createListingPageExtractor } from "@/crawlers/extractors/ListingPageExtractor";
import type { BrowserHandler } from "@/crawlers/handlers/BrowserHandler";
import { createBrowserHandler } from "@/crawlers/handlers/BrowserHandler";
import { navigateToNextPage } from "@/crawlers/handlers/PaginationHandler";
import fixture4 from "@/tests/__fixtures__/duk/genocide-questions-avoided-as-starmer-meets-israeli-president";
import fixture1 from "@/tests/__fixtures__/duk/how-the-uk-security-services-neutralised-the-countrys-leading-liberal-newspaper";
import fixture3 from "@/tests/__fixtures__/duk/maersk-the-shipping-company-transporting-arms-to-israel";
import fixture2 from "@/tests/__fixtures__/duk/rishi-sunaks-mission-creep-in-yemen";
import fixture4 from "@/tests/__fixtures__/declassified_uk/genocide-questions-avoided-as-starmer-meets-israeli-president";
import fixture1 from "@/tests/__fixtures__/declassified_uk/how-the-uk-security-services-neutralised-the-countrys-leading-liberal-newspaper";
import fixture3 from "@/tests/__fixtures__/declassified_uk/maersk-the-shipping-company-transporting-arms-to-israel";
import fixture2 from "@/tests/__fixtures__/declassified_uk/rishi-sunaks-mission-creep-in-yemen";

const ifDescribe = process.env.INT_TEST === "true" ? describe : describe.skip;

Expand All @@ -24,7 +24,7 @@ ifDescribe("Declassified UK integration tests", () => {
await browser.close();
});

it("should crawl DUK listing page", async () => {
it("should crawl Declassified UK listing page", async () => {
const page = await browser.setupNewPage(config.listing.url);
const extractor = createListingPageExtractor();
const result = await extractor.extractItemsFromPage(page, config, [], 0);
Expand All @@ -34,12 +34,12 @@ ifDescribe("Declassified UK integration tests", () => {
expect(result.items.every((item) => !!item.publishedDate)).toBeTruthy();
});

it("should crawl to next DUK listing page", async () => {
it("should crawl to next Declassified UK listing page", async () => {
const page = await browser.setupNewPage(config.listing.url);
expect(await navigateToNextPage(page, config)).toBeTruthy();
});

it("should crawl multiple DUK content pages", async () => {
it("should crawl multiple Declassified UK content pages", async () => {
const testCases = [
{
url: "https://www.declassifieduk.org/how-the-uk-security-services-neutralised-the-countrys-leading-liberal-newspaper/",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
import { afterAll, beforeAll, describe, expect, it, vi } from "vitest";
import { effSource as config } from "@/config/sources/eff.js";
import { ElectronicFrontierFoundationSource as config } from "@/config/sources/electronic_frontier_foundation.js";
import { createContentPageExtractor } from "@/crawlers/extractors/ContentPageExtractor";
import { createListingPageExtractor } from "@/crawlers/extractors/ListingPageExtractor";
import type { BrowserHandler } from "@/crawlers/handlers/BrowserHandler";
import { createBrowserHandler } from "@/crawlers/handlers/BrowserHandler";
import { navigateToNextPage } from "@/crawlers/handlers/PaginationHandler";
import fixture3 from "@/tests/__fixtures__/eff/21-44";
import fixture1 from "@/tests/__fixtures__/eff/eff-awards-spotlight-software-freedom-law-center-india";
import fixture2 from "@/tests/__fixtures__/eff/eff-commerce-department-we-must-revise-overbroad-export-control-proposal";
import fixture4 from "@/tests/__fixtures__/eff/trailblazing-tech-scholar-danah-boyd-groundbreaking-cyberpunk-author-william-gibson";
import fixture5 from "@/tests/__fixtures__/eff/wiring-big-brother-machine";
import fixture3 from "@/tests/__fixtures__/electronics_frontier_foundation/21-44";
import fixture1 from "@/tests/__fixtures__/electronics_frontier_foundation/eff-awards-spotlight-software-freedom-law-center-india";
import fixture2 from "@/tests/__fixtures__/electronics_frontier_foundation/eff-commerce-department-we-must-revise-overbroad-export-control-proposal";
import fixture4 from "@/tests/__fixtures__/electronics_frontier_foundation/trailblazing-tech-scholar-danah-boyd-groundbreaking-cyberpunk-author-william-gibson";
import fixture5 from "@/tests/__fixtures__/electronics_frontier_foundation/wiring-big-brother-machine";

const ifDescribe = process.env.INT_TEST === "true" ? describe : describe.skip;

ifDescribe("Electronics Foundation integration tests", () => {
ifDescribe("Electronics Frontier Foundation integration tests", () => {
let browser: BrowserHandler;
vi.setConfig({ testTimeout: 60000 });

Expand All @@ -25,7 +25,7 @@ ifDescribe("Electronics Foundation integration tests", () => {
await browser.close();
});

it("should crawl EFF listing page", async () => {
it("should crawl Electronics Frontier Foundation listing page", async () => {
const page = await browser.setupNewPage(config.listing.url);
const extractor = createListingPageExtractor();
const result = await extractor.extractItemsFromPage(page, config, [], 0);
Expand All @@ -35,12 +35,12 @@ ifDescribe("Electronics Foundation integration tests", () => {
expect(result.items.every((item) => !!item.publishedDate)).toBeTruthy();
});

it("should crawl to next EFF listing page", async () => {
it("should crawl to next Electronics Frontier Foundation listing page", async () => {
const page = await browser.setupNewPage(config.listing.url);
expect(await navigateToNextPage(page, config)).toBeTruthy();
});

it("should crawl multiple EFF content pages", async () => {
it("should crawl multiple Electronics Frontier Foundation content pages", async () => {
const testCases = [
{
url: "https://www.eff.org/deeplinks/2025/08/eff-awards-spotlight-software-freedom-law-center-india",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
import { afterAll, beforeAll, describe, expect, it, vi } from "vitest";
import { fpfSource as config } from "@/config/sources/fpf.js";
import { FreedomPressFoundationSource as config } from "@/config/sources/freedom_press_foundation.js";
import { createContentPageExtractor } from "@/crawlers/extractors/ContentPageExtractor";
import { createListingPageExtractor } from "@/crawlers/extractors/ListingPageExtractor";
import type { BrowserHandler } from "@/crawlers/handlers/BrowserHandler";
import { createBrowserHandler } from "@/crawlers/handlers/BrowserHandler";
import { navigateToNextPage } from "@/crawlers/handlers/PaginationHandler";
import fixture1 from "@/tests/__fixtures__/fpf/a-massive-failure-in-kansas-two-years-since-the-marion-county-record-raid";
import fixture2 from "@/tests/__fixtures__/fpf/how-aaron-swartz-fought-for-government-transparency";
import fixture3 from "@/tests/__fixtures__/fpf/new-election-blog-catalogs-media-suppression-by-candidates-campaigns";
import fixture4 from "@/tests/__fixtures__/fpf/prosecutor-puts-doge-ahead-of-first-amendment";
import fixture1 from "@/tests/__fixtures__/freedom_press_foundation/a-massive-failure-in-kansas-two-years-since-the-marion-county-record-raid";
import fixture2 from "@/tests/__fixtures__/freedom_press_foundation/how-aaron-swartz-fought-for-government-transparency";
import fixture3 from "@/tests/__fixtures__/freedom_press_foundation/new-election-blog-catalogs-media-suppression-by-candidates-campaigns";
import fixture4 from "@/tests/__fixtures__/freedom_press_foundation/prosecutor-puts-doge-ahead-of-first-amendment";

const ifDescribe = process.env.INT_TEST === "true" ? describe : describe.skip;

ifDescribe("Freedom Press integration tests", () => {
ifDescribe("Freedom Press Foundation integration tests", () => {
let browser: BrowserHandler;
vi.setConfig({ testTimeout: 60000 });

Expand All @@ -24,7 +24,7 @@ ifDescribe("Freedom Press integration tests", () => {
await browser.close();
});

it("should crawl FPF listing page", async () => {
it("should crawl Freedom Press Foundation listing page", async () => {
const page = await browser.setupNewPage(config.listing.url);
const extractor = createListingPageExtractor();
const result = await extractor.extractItemsFromPage(page, config, [], 0);
Expand All @@ -34,12 +34,12 @@ ifDescribe("Freedom Press integration tests", () => {
expect(result.items.every((item) => !!item.publishedDate)).toBeTruthy();
});

it("should crawl to next FPF listing page", async () => {
it("should crawl to next Freedom Press Foundation listing page", async () => {
const page = await browser.setupNewPage(config.listing.url);
expect(await navigateToNextPage(page, config)).toBeTruthy();
});

it("should crawl multiple FPF content pages", async () => {
it("should crawl multiple Freedom Press Foundation content pages", async () => {
const testCases = [
{
url: "https://freedom.press/issues/a-massive-failure-in-kansas-two-years-since-the-marion-county-record-raid/",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import { afterAll, beforeAll, describe, expect, it, vi } from "vitest";
import { lpeSource as config } from "@/config/sources/lpe.js";
import { LogosPressEngineSource as config } from "@/config/sources/logos_press_engine.js";
import { createContentPageExtractor } from "@/crawlers/extractors/ContentPageExtractor";
import { createListingPageExtractor } from "@/crawlers/extractors/ListingPageExtractor";
import type { BrowserHandler } from "@/crawlers/handlers/BrowserHandler";
import { createBrowserHandler } from "@/crawlers/handlers/BrowserHandler";
import fixture1 from "@/tests/__fixtures__/lpe/august-2025";
import fixture3 from "@/tests/__fixtures__/lpe/keycard-manifesto";
import fixture2 from "@/tests/__fixtures__/lpe/logos-a-declaration-of-independence-in-cyberspace";
import fixture1 from "@/tests/__fixtures__/logos_press_engine/august-2025";
import fixture3 from "@/tests/__fixtures__/logos_press_engine/keycard-manifesto";
import fixture2 from "@/tests/__fixtures__/logos_press_engine/logos-a-declaration-of-independence-in-cyberspace";

const ifDescribe = process.env.INT_TEST === "true" ? describe : describe.skip;

ifDescribe("Logos integration tests", () => {
ifDescribe("Logos Press Engine integration tests", () => {
let browser: BrowserHandler;
vi.setConfig({ testTimeout: 60000 });

Expand All @@ -22,7 +22,7 @@ ifDescribe("Logos integration tests", () => {
await browser.close();
});

it("should crawl LPE listing page", async () => {
it("should crawl Logos Press Engine listing page", async () => {
const page = await browser.setupNewPage(config.listing.url);
const extractor = createListingPageExtractor();
const result = await extractor.extractItemsFromPage(page, config, [], 0);
Expand All @@ -34,12 +34,12 @@ ifDescribe("Logos integration tests", () => {
});

// Logos Press Engine has only 1 page for now
/* it("should crawl to next LPE listing page", async () => {
/* it("should crawl to next Logos Press Engine listing page", async () => {
const page = await setupPage(browser, config.listing.url);
expect(await navigateToNextPage(page, config)).toBeTruthy();
}); */

it("should crawl multiple LPE content pages", async () => {
it("should crawl multiple Logos Press Engine content pages", async () => {
const testCases = [
{
url: "https://press.logos.co/article/august-2025",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import { afterAll, beforeAll, describe, expect, it, vi } from "vitest";
import { p2pSource as config } from "@/config/sources/p2p.js";
import { P2pFoundationSource as config } from "@/config/sources/p2p_foundation.js";
import { createContentPageExtractor } from "@/crawlers/extractors/ContentPageExtractor";
import { createListingPageExtractor } from "@/crawlers/extractors/ListingPageExtractor";
import type { BrowserHandler } from "@/crawlers/handlers/BrowserHandler";
import { createBrowserHandler } from "@/crawlers/handlers/BrowserHandler";
import { navigateToNextPage } from "@/crawlers/handlers/PaginationHandler";
import fixture2 from "@/tests/__fixtures__/p2p/book-of-the-day-abundance-the-future-is-better-than-you-think";
import fixture3 from "@/tests/__fixtures__/p2p/great-transition-alternative-paths-better-climate-just-future";
import fixture4 from "@/tests/__fixtures__/p2p/take-back-the-app-a-dialogue-on-platform-cooperativism-free-software-and-discos";
import fixture1 from "@/tests/__fixtures__/p2p/trusting-google-or-not";
import fixture2 from "@/tests/__fixtures__/p2p_foundation/book-of-the-day-abundance-the-future-is-better-than-you-think";
import fixture3 from "@/tests/__fixtures__/p2p_foundation/great-transition-alternative-paths-better-climate-just-future";
import fixture4 from "@/tests/__fixtures__/p2p_foundation/take-back-the-app-a-dialogue-on-platform-cooperativism-free-software-and-discos";
import fixture1 from "@/tests/__fixtures__/p2p_foundation/trusting-google-or-not";

const ifDescribe = process.env.INT_TEST === "true" ? describe : describe.skip;

Expand All @@ -24,7 +24,7 @@ ifDescribe("P2P Foundation integration tests", () => {
await browser.close();
});

it("should crawl P2P listing page", async () => {
it("should crawl P2P Foundation listing page", async () => {
const page = await browser.setupNewPage(config.listing.url);
const extractor = createListingPageExtractor();
const result = await extractor.extractItemsFromPage(page, config, [], 0);
Expand All @@ -34,12 +34,12 @@ ifDescribe("P2P Foundation integration tests", () => {
expect(result.items.every((item) => !!item.publishedDate)).toBeTruthy();
});

it("should crawl to next P2P listing page", async () => {
it("should crawl to next P2P Foundation listing page", async () => {
const page = await browser.setupNewPage(config.listing.url);
expect(await navigateToNextPage(page, config)).toBeTruthy();
});

it("should crawl multiple P2P content pages", async () => {
it("should crawl multiple P2P Foundation content pages", async () => {
const testCases = [
{
url: "https://blog.p2pfoundation.net/trusting-google-or-not/",
Expand Down
Loading