From ac3585644cf99b5bc58324161f4e3e690144176a Mon Sep 17 00:00:00 2001 From: apoint123 <108002475+apoint123@users.noreply.github.com> Date: Wed, 31 Dec 2025 21:07:09 +0800 Subject: [PATCH 1/6] =?UTF-8?q?=E2=9C=A8=20feat:=20=E6=9B=B4=E5=A5=BD?= =?UTF-8?q?=E7=9A=84=E5=85=83=E6=95=B0=E6=8D=AE=E6=B8=85=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/assets/data/exclude.ts | 365 ++++++++++++++++++++++++++------ src/core/player/LyricManager.ts | 95 +++++---- src/utils/lyricStripper.ts | 340 +++++++++++++++++++++++++++++ 3 files changed, 703 insertions(+), 97 deletions(-) create mode 100644 src/utils/lyricStripper.ts diff --git a/src/assets/data/exclude.ts b/src/assets/data/exclude.ts index 185b75683..13d9ceb74 100644 --- a/src/assets/data/exclude.ts +++ b/src/assets/data/exclude.ts @@ -1,78 +1,321 @@ +/** + * @fileoverview 跟元数据清理器有关的配置内容 + */ + +/** + * 默认的关键字列表,不包含冒号 + * + * 修改这里的列表会自动同步到用户的设置里面 + */ export const keywords = [ - "词:", - "曲:", "作曲", "作词", "编曲", - "翻唱", - "制作", - "调教", - "混音", + "演唱", + "歌手", + "歌名", + "专辑", + "发行", + "出品", "监制", - "音响", - "文案", - "音统", - "策划", "录音", - "制作", - "贝斯", - "工程", - "吉他", - "合成器", - "助理", - "编程", - "和声", - "合声", + "混音", "母带", - "人声", + "吉他", + "贝斯", "鼓", - "混音", - "提琴:", - "曲绘", - "视频", - "编写", - "钢琴:", - "出版", - "发行", - "出品", "键盘", "弦乐", - "设计", - "监制", - "原曲", - "演唱", - "声明", + "和声", "版权", - "封面", - "插画", - "统筹", - "企划", - "填词", + "制作人", "原唱", - "后期", - "和音", - "和声", - "琵琶", - "二胡", - "笛子", - "古筝", - "主唱", - "伴唱", - "编排", - "剧务", - "音效", - "录音", - "纯音乐", - "缩混", - "音乐总监", + "翻唱", + "词", + "曲", + "发行人", + "发行公司", + "宣推", + "录音制作", + "制作发行", + "制作团队", "音乐制作", - "Talkbox", - "Producers", + "录音师", + "混音工程师", + "混音师", + "母带工程师", + "母带处理工程师", + "制作统筹", + "艺术指导", + "出品团队", + "发行方", + "和声编写", + "封面设计", + "策划", + "营销推广", + "总策划", + "特别鸣谢", + "出品人", + "出品公司", + "联合出品", + "词曲提供", + "制作公司", + "推广策划", + "乐器演奏", + "钢琴/合成器演奏", + "钢琴演奏", + "合成器演奏", + "弦乐编写", + "弦乐监制", + "第一小提琴", + "第二小提琴", + "中提琴", + "大提琴", + "弦乐录音师", + "弦乐录音室", + "和声演唱", + "录/混音", + "制作助理", + "和音", + "乐队统筹", + "维伴音乐", + "灯光设计", + "配唱制作人", + "文案", + "设计", + "策划统筹", + "企划宣传", + "企划营销", + "录音室", + "混音室", + "母带后期制作人", + "母带后期处理工程师", + "母带后期处理录音室", + "鸣谢", + "工作室", + "特别企划", + "音频编辑", + "词曲协力", + "制作公司/OP", + "企划", + "宣传", + "统筹", + "推广", + "宣传/推广", + "出品/发行", + "出品/发行方", + "封面", + "总企划", + "混缩", + "联合策划", + "联合推广", + "题记", + "项目统筹", + "营销推广/出品/发行", + "出品/发行公司", + "音乐制作发行", + "本歌曲商用授权,前往小程序", + "特别鸣谢/艺人支持", + "出品、发行", + "推广宣传", + "OP、SP", + "和声/和声设计", + "合声", + "合声编写", + "OP", + "SP", + "Artist", + "Songs Title", + "Lyrics by", + "Composed by", + "Produced by", + "Published by", + "Vocals by", + "Background Vocals by", + "Additional Vocal by", + "Mixing Engineer", + "Mastered by", + "Executive Producer", + "Vocal Engineer", + "Vocals Produced by", + "Recorded at", + "Repertoire Owner", + "Co-Producer", + "Mastering Engineer", + "Written by", + "Lyrics", + "Composer", + "Arranged By", + "Record Producer", + "Guitar", + "Music Production", + "Recording Engineer", + "Backing Vocal", + "Art Director", + "Chief Producer", + "Production Team", + "Publisher", + "Lyricist", + "Arranger", "Producer", - "Produced", + "Backing Vocals", + "Backing Vocals Design", + "Cover Design", + "Planner", + "Marketing Promotion", + "Chref Planner", + "Acknowledgement", + "Production Company", + "Jointly Produced by", + "Co-production", + "Presenter", + "Presented by", + "Co-produced by", + "Lyrics and Composition Provided by", + "Music and Lyrics Provided by", + "Lyrics & Composition Provided by", + "Words and Music by", + "Distribution", + "Release", + "Distributed by", + "Released by", + "Produce Company", + "Promotion Planning", + "Marketing Strategy", + "Promotion Strategy", + "Strings", + "First Violin", + "Second Violin", + "Viola", + "Cello", + "Vocal Producer", + "Supervised production", + "Copywriting", + "Design", + "Planner and coordinator", + "Propaganda", + "Arrangement", + "Guitars", + "Bass", + "Drums", + "Backing Vocal Arrangement", + "Strings Arrangement", + "Recording Studio", + "OP/发行", + "混音/母带工程师", + "OP/SP", + "词Lyrics", + "曲Composer", + "编曲Arranged By", + "制作人Record Producer", + "吉他Guitar", + "音乐制作Music Production", + "录音师Recording Engineer", + "混音工程师Mixing Engineer", + "母带工程师Mastering Engineer", + "和声Backing Vocal", + "制作统筹Executive Producer", + "艺术指导Art Director", + "监制Chief Producer", + "出品团队Production Team", + "发行方Publisher", + "词Lyricist", + "编曲Arranger", + "制作人Producer", + "和声Backing Vocals", + "和声编写Backing Vocals Design", + "混音Mixing Engineer", + "封面设计Cover Design", + "策划Planner", + "营销推广Marketing Promotion", + "总策划Chref Planner", + "特别鸣谢Acknowledgement", + "出品人Chief Producer", + "出品公司Production Company", + "联合出品Co-produced by", + "联合出品Jointly Produced by", + "联合出品Co-production", + "出品方Presenter", + "出品方Presented by", + "词曲提供Lyrics and Composition Provided by", + "词曲提供Music and Lyrics Provided by", + "词曲提供Lyrics & Composition Provided by", + "词曲提供Words and Music by", + "发行Distribution", + "发行Release", + "发行Distributed by", + "发行Released by", + "制作公司Produce Company", + "推广策划Promotion Planning", + "推广策划Marketing Strategy", + "推广策划Promotion Strategy", + "弦乐 Strings", + "第一小提琴 First Violin", + "第二小提琴 Second Violin", + "中提琴 Viola", + "大提琴 Cello", + "配唱制作人Vocal Producer", + "监制Supervised production", + "文案Copywriting", + "设计Design", + "策划统筹Planner and coordinator", + "企划宣传Propaganda", + "编曲Arrangement", + "吉他Guitars", + "贝斯Bass", + "鼓Drums", + "和声编写Backing Vocal Arrangement", + "弦乐编写Strings Arrangement", + "录音室Recording Studio", + "混音室Mixing Studio", + "母带后期制作人Mastering Producer", + "母带后期处理工程师Mastering Engineer", + "母带后期处理录音室Mastering Studio", + "制作人Music Producer", + "统筹Planning", + "营销推广Marketing", + "出品人Producer", + "企划Planning", + "监制Executive Director", + "统筹Coordinator", + "出品人Publisher", + "出品Present by", + "词 Lyricist", + "曲 Composer", + "编曲 Arranger", + "制作人 Producer", + "配唱制作人 Vocals Producer", + "吉他 Guitar", + "和声 Backing Vocals", + "混音 Mixing Engineer", + "录音 Recording Engineer", + "录音室 Recording Studio", + "封面设计 Cover Design", + "策划 Planner", + "推广营销 Marketing Promotion", + "总策划 Chief Planner", + "出品人 Publisher", + "制作公司 Production", + "项目统筹 Project Coordinator", + "推广统筹 Promotion Coordinator", + "监制 Deputy Executive Producer", + "总监制 Chief Executive Producer", + "企划制作A&R Planning", + "音乐项目总监Project Executive", + "联合出品Published", + "出品人Presenter", + "封面插画Cover illustration", + "和音 Bvox", + "混音 Mixed by", + "母带 Mastered by", ]; +/** + * 默认的正则表达式 + * + * 修改这里的列表会自动同步到用户的设置里面 + */ export const regexes = [ - /^[Oo][Pp]\s*[::]/, - /^[Ss][Pp]\s*[::]/, -].map((regex) => regex.source); + "(?:【.*?音乐人.*?】|\\(.*?音乐人.*?\\)|「.*?音乐人.*?」|(.*?音乐人.*?)|『.*?音乐人.*?』)", + ".*?未经.*?不得.*?", +]; diff --git a/src/core/player/LyricManager.ts b/src/core/player/LyricManager.ts index 86604644c..68dbcc73e 100644 --- a/src/core/player/LyricManager.ts +++ b/src/core/player/LyricManager.ts @@ -1,10 +1,12 @@ -import { useStatusStore, useMusicStore, useSettingStore } from "@/stores"; import { songLyric, songLyricTTML } from "@/api/song"; +import { keywords as defaultKeywords, regexes as defaultRegexes } from "@/assets/data/exclude"; +import { useCacheManager } from "@/core/resource/CacheManager"; +import { useMusicStore, useSettingStore, useStatusStore } from "@/stores"; import { type SongLyric } from "@/types/lyric"; -import { type LyricLine, parseLrc, parseTTML, parseYrc } from "@applemusic-like-lyrics/lyric"; import { isElectron } from "@/utils/env"; -import { isEmpty } from "lodash-es"; -import { useCacheManager } from "@/core/resource/CacheManager"; +import { stripLyricMetadata } from "@/utils/lyricStripper"; +import { type LyricLine, parseLrc, parseTTML, parseYrc } from "@applemusic-like-lyrics/lyric"; +import { escapeRegExp, isEmpty } from "lodash-es"; class LyricManager { /** @@ -108,7 +110,8 @@ class LyricManager { // 同一时间的两/三行分别作为主句、翻译、音译 const toTime = (line: LyricLine) => Number(line?.startTime ?? line?.words?.[0]?.startTime ?? 0); // 获取结束时间 - const toEndTime = (line: LyricLine) => Number(line?.endTime ?? line?.words?.[line?.words?.length - 1]?.endTime ?? 0); + const toEndTime = (line: LyricLine) => + Number(line?.endTime ?? line?.words?.[line?.words?.length - 1]?.endTime ?? 0); // 取内容 const toText = (line: LyricLine) => String(line?.words?.[0]?.word || "").trim(); const lrc = lyricData.lrcData || []; @@ -318,42 +321,62 @@ class LyricManager { * @returns 处理后的歌词数据 */ private handleLyricExclude(lyricData: SongLyric): SongLyric { - const statusStore = useStatusStore(); const settingStore = useSettingStore(); + const musicStore = useMusicStore(); + const { enableExcludeLyrics, excludeKeywords, excludeRegexes } = settingStore; - // 未开启排除 + if (!enableExcludeLyrics) return lyricData; - // 处理正则表达式 - const regexes = (excludeRegexes || []).map((r: string) => new RegExp(r)); - /** - * 判断歌词是否被排除 - * @param line 歌词行 - * @returns 是否被排除 - */ - const isExcluded = (line: LyricLine) => { - const content = (line?.words || []) - .map((w) => String(w.word || "")) - .join("") - .trim(); - if (!content) return true; - return ( - (excludeKeywords || []).some((k: string) => content.includes(k)) || - regexes.some((re) => re.test(content)) - ); + + // 将设置中和默认的预定义的关键字和正则表达式合并在一起给 stripLyricMetadata,方便之后更新默认的列表 + // TODO: 建议在设置界面加一个默认的规则集以便和用户自己加的关键字分开,也方便更新默认列表 + const userKeywords = excludeKeywords || []; + const userRegexes = excludeRegexes || []; + + const mergedKeywords = [...new Set([...userKeywords, ...defaultKeywords])]; + const mergedRegexes = [...new Set([...userRegexes, ...defaultRegexes])]; + + const { name, artists } = musicStore.playSong; + const songMetadataRegexes: string[] = []; + + // 例如第一行就是 `歌手 - 歌曲名` 这样的格式,或者只有歌曲名 + if (name && name !== "未播放歌曲") { + songMetadataRegexes.push(escapeRegExp(name)); + } + + if (artists) { + if (typeof artists === "string") { + if (artists !== "未知歌手") { + songMetadataRegexes.push(escapeRegExp(artists)); + } + } else if (Array.isArray(artists)) { + artists.forEach((artist) => { + if (artist.name) { + songMetadataRegexes.push(escapeRegExp(artist.name)); + } + }); + } + } + + const options = { + keywords: mergedKeywords, + regexPatterns: mergedRegexes, + softMatchRegexes: songMetadataRegexes, }; - /** - * 过滤排除的歌词行 - * @param lines 歌词行数组 - * @returns 过滤后的歌词行数组 - */ - const filterLines = (lines: LyricLine[]) => (lines || []).filter((l) => !isExcluded(l)); + + const lrcData = stripLyricMetadata(lyricData.lrcData || [], options); + + // FIXME: 这部分逻辑有问题,因为 TTML 歌词 (硬性规定没有元数据行) 和网易云的 YRC 歌词都塞进 yrcData 了,无法区分, + // 不开排除 TTML 就不能清理 YRC 歌词 + // 暂时关掉,因为 stripLyricMetadata 应该足够稳健,不会删掉正常的歌词行 + let yrcData = lyricData.yrcData || []; + // if (statusStore.usingTTMLLyric && enableExcludeTTML) { + yrcData = stripLyricMetadata(yrcData, options); + // } + return { - lrcData: filterLines(lyricData.lrcData || []), - yrcData: - // 若当前为 TTML 且开启排除 - statusStore.usingTTMLLyric && settingStore.enableExcludeTTML - ? filterLines(lyricData.yrcData || []) - : lyricData.yrcData || [], + lrcData, + yrcData, }; } diff --git a/src/utils/lyricStripper.ts b/src/utils/lyricStripper.ts new file mode 100644 index 000000000..d6ebeaffc --- /dev/null +++ b/src/utils/lyricStripper.ts @@ -0,0 +1,340 @@ +/** + * @fileoverview 元数据行清理器 + * + * 用于清理歌词中开头和结尾的元数据行,例如: + * + * (歌曲名) - (歌手名) + * 词:... + * 曲:... + * 编曲:... + * 和声编唱:... + * 人声编辑:... + * 混音:... + * 母带:... + * 监制:... + * 出品:... + * 真正的歌词行 1 + * 真正的歌词行 2 + * + * @see https://github.com/apoint123/Unilyric/blob/afd351c54eca7137cf8ee4ea5652d9ee55c20e32/lyrics_helper_rs/src/converter/processors/metadata_stripper.rs + */ + +import { type LyricLine } from "@applemusic-like-lyrics/lyric"; + +/** + * 扫描限制配置 + */ +export interface ScanLimitConfig { + /** + * 扫描比例 (0.0 - 1.0) + */ + ratio: number; + /** + * 最小扫描行数 + */ + minLines: number; + /** + * 最大扫描行数 + */ + maxLines: number; +} + +/** + * 歌词清理配置 + */ +export interface StripOptions { + /** + * 要匹配的关键词列表 + */ + keywords: string[]; + /** + * 要匹配的正则字符串列表 + */ + regexPatterns: string[]; + /** + * 这里的正则表达式会被视为弱匹配 + */ + softMatchRegexes?: string[]; +} + +const DEFAULT_HEADER_LIMIT: ScanLimitConfig = { + ratio: 0.2, + minLines: 20, + maxLines: 70, +}; + +const DEFAULT_FOOTER_LIMIT: ScanLimitConfig = { + ratio: 0.2, + minLines: 20, + maxLines: 50, +}; + +function calculateScanLimit(config: ScanLimitConfig, totalLines: number): number { + const proportional = Math.ceil(totalLines * config.ratio); + + return Math.min(Math.min(Math.max(proportional, config.minLines), config.maxLines), totalLines); +} + +function getLineText(line: LyricLine): string { + if (!line || !line.words) return ""; + return line.words + .map((w) => w.word) + .join("") + .trim(); +} + +/** + * 清理文本以便检查 + * + * 移除行首尾的括号,例如 (作曲: xxx) -> 作曲: xxx + */ +function cleanTextForCheck(text: string): string { + let processed = text.trim(); + const brackets = [ + ["(", ")"], + ["(", ")"], + ["【", "】"], + ["[", "]"], + ["{", "}"], + ["『", "』"], + ["「", "」"], + ]; + + let changed = true; + let loopCount = 0; + + while (changed && loopCount < 5) { + changed = false; + loopCount++; + + for (const [open, close] of brackets) { + if (processed.startsWith(open)) { + // 类似于这样的格式:(作曲:周杰伦) + if (processed.endsWith(close)) { + processed = processed.slice(open.length, processed.length - close.length).trim(); + changed = true; + break; + } + + // 类似于这样的格式:(Live) 作曲:周杰伦 + const closeIdx = processed.indexOf(close); + if (closeIdx > -1) { + const contentAfter = processed.slice(closeIdx + close.length).trim(); + + if (contentAfter.length > 0) { + processed = contentAfter; + changed = true; + break; + } + } + } + } + } + return processed; +} + +// 强匹配行:匹配关键词加冒号,或者匹配正则表达式的行 +// 弱匹配行:带有冒号,但不匹配关键词或正则表达式的行。如果夹在强匹配行之间,多半是元数据行但是没有对应的规则。但也有可能是演唱者标识,“男:...”这样的 +// 真正的歌词行:既不匹配规则,又没有冒号的行,作为防火墙来阻止对之后行的移除。避免元数据在歌词中间,把中间的歌词也移除了 + +function isStrictMatch(text: string, keywords: string[], regexes: RegExp[]): boolean { + const cleaned = cleanTextForCheck(text); + const lowerCleaned = cleaned.toLowerCase(); + + for (const kw of keywords) { + const lowerKw = kw.toLowerCase(); + if (lowerCleaned.startsWith(lowerKw)) { + const remainder = lowerCleaned.slice(lowerKw.length).trim(); + if (remainder.startsWith(":") || remainder.startsWith(":")) { + return true; + } + } + } + + for (const reg of regexes) { + if (reg.test(text)) { + return true; + } + } + + return false; +} + +function looksLikeMetadata(text: string, softRegexes: RegExp[]): boolean { + const cleaned = cleanTextForCheck(text); + if ( + cleaned.includes(":") || + cleaned.includes(":") || + // 第一行的 歌曲名 - 歌手名 这样的格式 + cleaned.includes("-") + ) { + return true; + } + + for (const reg of softRegexes) { + if (reg.test(text)) { + return true; + } + } + + return false; +} + +/** + * 扫描头部,寻找正文开始的位置 + */ +function findHeaderCutoff( + lines: readonly LyricLine[], + keywords: string[], + regexes: RegExp[], + softRegexes: RegExp[], + limit: number, +): number { + let lastValidMetadataIndex = -1; + + console.groupCollapsed(`[LyricStripper] ⬇️ 开始头部扫描 (Limit: ${limit})`); + + for (let i = 0; i < limit; i++) { + if (i >= lines.length) break; + + const text = getLineText(lines[i]); + + if (!text) { + continue; + } + + const strict = isStrictMatch(text, keywords, regexes); + const weak = looksLikeMetadata(text, softRegexes); + + let status = "❌ NONE"; + if (strict) status = "✅ STRICT"; + else if (weak) status = "⚠️ WEAK"; + + console.log(`Line [${i}]: "${text}" | Result: ${status}`); + + if (!strict && !weak) { + break; + } + + if (strict) { + lastValidMetadataIndex = i; + } + } + console.groupEnd(); + + return lastValidMetadataIndex + 1; +} + +/** + * 扫描尾部,寻找正文结束的位置 + */ +function findFooterCutoff( + lines: readonly LyricLine[], + startIndex: number, + keywords: string[], + regexes: RegExp[], + softRegexes: RegExp[], + limit: number, +): number { + if (startIndex >= lines.length) return startIndex; + + const scanEnd = Math.max(startIndex, lines.length - limit); + let firstValidFooterIndex = lines.length; + + console.groupCollapsed(`[LyricStripper] ⬆️ 开始尾部扫描 (Limit: ${limit})`); + + for (let i = lines.length - 1; i >= scanEnd; i--) { + const text = getLineText(lines[i]); + + if (!text) { + continue; + } + + const strict = isStrictMatch(text, keywords, regexes); + const weak = looksLikeMetadata(text, softRegexes); + + let status = "❌ NONE"; + if (strict) status = "✅ STRICT"; + else if (weak) status = "⚠️ WEAK"; + + console.log(`Line [${i}]: "${text}" | Result: ${status}`); + + if (!strict && !weak) { + break; + } + + if (strict) { + firstValidFooterIndex = i; + } + } + console.groupEnd(); + + return firstValidFooterIndex; +} + +/** + * 剥离歌词中的元数据行 + * @param lines 原始歌词行数组 + * @param options 包含关键词和正则的配置 + * @returns 清理后的新歌词行数组 + */ +export function stripLyricMetadata( + lines: readonly LyricLine[], + options: StripOptions, +): LyricLine[] { + if (!lines || lines.length === 0) return []; + + if ( + (!options.keywords || options.keywords.length === 0) && + (!options.regexPatterns || options.regexPatterns.length === 0) && + (!options.softMatchRegexes || options.softMatchRegexes.length === 0) + ) { + return [...lines]; + } + + const regexes: RegExp[] = []; + if (options.regexPatterns) { + options.regexPatterns.forEach((p) => { + try { + if (p.trim()) { + regexes.push(new RegExp(p, "i")); // 忽略大小写 + } + } catch (e) { + console.warn(`[LyricStripper] 无效的正则表达式: ${p}`, e); + } + }); + } + + const softRegexes: RegExp[] = []; + if (options.softMatchRegexes) { + options.softMatchRegexes.forEach((p) => { + try { + if (p.trim()) softRegexes.push(new RegExp(p, "i")); + } catch (e) { + console.warn(`[LyricStripper] 无效的正则表达式: ${p}`, e); + } + }); + } + + const keywords = options.keywords || []; + const totalLines = lines.length; + + const headerConfig = DEFAULT_HEADER_LIMIT; + const headerLimit = calculateScanLimit(headerConfig, totalLines); + + const startIdx = findHeaderCutoff(lines, keywords, regexes, softRegexes, headerLimit); + + const footerConfig = DEFAULT_FOOTER_LIMIT; + const footerLimit = calculateScanLimit(footerConfig, totalLines); + + const endIdx = findFooterCutoff(lines, startIdx, keywords, regexes, softRegexes, footerLimit); + + if (startIdx === 0 && endIdx === lines.length) { + return [...lines]; + } + + const newLength = endIdx - startIdx; + console.log(`[LyricStripper] 清理完成,总行数从 ${totalLines} 变为 ${newLength}`); + + return lines.slice(startIdx, endIdx); +} From 8e3c2ad6751975d43073060f0c2412eed88fd138 Mon Sep 17 00:00:00 2001 From: apoint123 <108002475+apoint123@users.noreply.github.com> Date: Wed, 31 Dec 2025 21:29:59 +0800 Subject: [PATCH 2/6] =?UTF-8?q?=F0=9F=93=84=20docs:=20=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/utils/lyricStripper.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/lyricStripper.ts b/src/utils/lyricStripper.ts index d6ebeaffc..c0f792b43 100644 --- a/src/utils/lyricStripper.ts +++ b/src/utils/lyricStripper.ts @@ -134,7 +134,7 @@ function cleanTextForCheck(text: string): string { } // 强匹配行:匹配关键词加冒号,或者匹配正则表达式的行 -// 弱匹配行:带有冒号,但不匹配关键词或正则表达式的行。如果夹在强匹配行之间,多半是元数据行但是没有对应的规则。但也有可能是演唱者标识,“男:...”这样的 +// 弱匹配行:带有冒号,但不匹配关键词或正则表达式的行。如果夹在强匹配行之间,多半是元数据行但是没有对应的规则。但也有可能是演唱者标识,“男:...”这样的,为了避免误删,如果后面全是弱匹配行,就不删它们 // 真正的歌词行:既不匹配规则,又没有冒号的行,作为防火墙来阻止对之后行的移除。避免元数据在歌词中间,把中间的歌词也移除了 function isStrictMatch(text: string, keywords: string[], regexes: RegExp[]): boolean { From e25dc7a284d83309e854b9da233b83079d8a9062 Mon Sep 17 00:00:00 2001 From: apoint123 <108002475+apoint123@users.noreply.github.com> Date: Wed, 31 Dec 2025 21:44:24 +0800 Subject: [PATCH 3/6] =?UTF-8?q?=F0=9F=90=9E=20fix:=20=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E7=A9=BA=E6=A0=BC=E5=8C=B9=E9=85=8D=E8=BE=B9=E7=95=8C=E6=83=85?= =?UTF-8?q?=E5=86=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/utils/lyricStripper.ts | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/utils/lyricStripper.ts b/src/utils/lyricStripper.ts index c0f792b43..2a3cef2a0 100644 --- a/src/utils/lyricStripper.ts +++ b/src/utils/lyricStripper.ts @@ -139,12 +139,16 @@ function cleanTextForCheck(text: string): string { function isStrictMatch(text: string, keywords: string[], regexes: RegExp[]): boolean { const cleaned = cleanTextForCheck(text); - const lowerCleaned = cleaned.toLowerCase(); + + // 转小写并移除空格进行匹配 + const normalizedText = cleaned.toLowerCase().replace(/\s+/g, ""); for (const kw of keywords) { - const lowerKw = kw.toLowerCase(); - if (lowerCleaned.startsWith(lowerKw)) { - const remainder = lowerCleaned.slice(lowerKw.length).trim(); + const normalizedKw = kw.toLowerCase().replace(/\s+/g, ""); + + if (normalizedText.startsWith(normalizedKw)) { + const remainder = normalizedText.slice(normalizedKw.length); + if (remainder.startsWith(":") || remainder.startsWith(":")) { return true; } From bcd511728bd9f2f246d4adcd155acf0f4e254674 Mon Sep 17 00:00:00 2001 From: apoint123 <108002475+apoint123@users.noreply.github.com> Date: Wed, 31 Dec 2025 22:29:43 +0800 Subject: [PATCH 4/6] =?UTF-8?q?=E2=9C=A8=20feat:=20=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=E5=85=B3=E9=94=AE=E5=AD=97=E5=88=97=E8=A1=A8=E6=8E=92=E5=BA=8F?= =?UTF-8?q?=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/sort-keywords.ts | 71 +++++ src/assets/data/exclude.ts | 521 ++++++++++++++++++------------------- 2 files changed, 326 insertions(+), 266 deletions(-) create mode 100644 scripts/sort-keywords.ts diff --git a/scripts/sort-keywords.ts b/scripts/sort-keywords.ts new file mode 100644 index 000000000..924d6c9b4 --- /dev/null +++ b/scripts/sort-keywords.ts @@ -0,0 +1,71 @@ +import fs from "node:fs"; +import path from "node:path"; + +const TARGET_FILE = path.join(process.cwd(), "src/assets/data/exclude.ts"); + +if (!fs.existsSync(TARGET_FILE)) { + console.error("❌ 找不到 src/assets/data/exclude.ts,确保你在项目根目录下运行此脚本"); + process.exit(1); +} + +const fileContent = fs.readFileSync(TARGET_FILE, "utf-8"); + +// export const keywords = [ ... ]; +const keywordsMatch = fileContent.match(/(export\s+const\s+keywords\s*=\s*\[)([\s\S]*?)(\];)/); + +if (!keywordsMatch) { + console.error("❌ 找不到 `export const keywords = [...]` 结构"); + process.exit(1); +} + +const [fullMatch, prefix, rawContent, suffix] = keywordsMatch; + +const itemRegex = /(['"`])(.*?)\1/g; +const rawItems: string[] = []; +let match: RegExpExecArray | null; + +while ((match = itemRegex.exec(rawContent)) !== null) { + const content = match[2].trim(); + if (content) { + rawItems.push(content); + } +} + +console.log(`✅ 找到 ${rawItems.length} 个关键词`); + +const uniqueMap = new Map(); +let duplicatesRemoved = 0; + +for (const item of rawItems) { + const fingerprint = item.toLowerCase().replace(/\s+/g, ""); + + if (uniqueMap.has(fingerprint)) { + const existing = uniqueMap.get(fingerprint)!; + + if (item.length > existing.length) { + uniqueMap.set(fingerprint, item); + } + + duplicatesRemoved++; + } else { + uniqueMap.set(fingerprint, item); + } +} + +const uniqueItems = Array.from(uniqueMap.values()); + +console.log(`🧹 去重完毕,关键词有 ${uniqueItems.length},移除了 ${duplicatesRemoved}`); + +const collator = new Intl.Collator("zh-Hans-CN", { sensitivity: "accent" }); + +uniqueItems.sort((a, b) => { + return collator.compare(a, b); +}); + +const newArrayContent = uniqueItems.map((item) => ` "${item}",`).join("\n"); + +const newContentBlock = `\n${newArrayContent}\n`; + +const newFileContent = fileContent.replace(fullMatch, `${prefix}${newContentBlock}${suffix}`); + +fs.writeFileSync(TARGET_FILE, newFileContent, "utf-8"); diff --git a/src/assets/data/exclude.ts b/src/assets/data/exclude.ts index 13d9ceb74..f9bcc98a6 100644 --- a/src/assets/data/exclude.ts +++ b/src/assets/data/exclude.ts @@ -8,306 +8,295 @@ * 修改这里的列表会自动同步到用户的设置里面 */ export const keywords = [ - "作曲", - "作词", + "版权", + "贝斯", + "贝斯Bass", + "本歌曲商用授权,前往小程序", "编曲", - "演唱", - "歌手", - "歌名", - "专辑", - "发行", + "编曲 Arranger", + "编曲Arranged By", + "编曲Arrangement", + "策划", + "策划 Planner", + "策划统筹", + "策划统筹Planner and coordinator", "出品", - "监制", - "录音", - "混音", - "母带", - "吉他", - "贝斯", - "鼓", - "键盘", - "弦乐", - "和声", - "版权", - "制作人", - "原唱", - "翻唱", - "词", - "曲", - "发行人", - "发行公司", - "宣推", - "录音制作", - "制作发行", - "制作团队", - "音乐制作", - "录音师", - "混音工程师", - "混音师", - "母带工程师", - "母带处理工程师", - "制作统筹", - "艺术指导", + "出品、发行", + "出品/发行", + "出品/发行方", + "出品/发行公司", + "出品方Presented by", + "出品方Presenter", + "出品公司", + "出品公司Production Company", + "出品人", + "出品人 Publisher", + "出品人Chief Producer", + "出品人Presenter", + "出品人Producer", "出品团队", + "出品团队Production Team", + "出品Present by", + "词", + "词 Lyricist", + "词曲提供", + "词曲提供Lyrics & Composition Provided by", + "词曲提供Lyrics and Composition Provided by", + "词曲提供Music and Lyrics Provided by", + "词曲提供Words and Music by", + "词曲协力", + "词Lyrics", + "大提琴", + "大提琴 Cello", + "灯光设计", + "第二小提琴", + "第二小提琴 Second Violin", + "第一小提琴", + "第一小提琴 First Violin", + "发行", "发行方", - "和声编写", + "发行方Publisher", + "发行公司", + "发行人", + "发行Distributed by", + "发行Distribution", + "发行Release", + "发行Released by", + "翻唱", + "封面", + "封面插画Cover illustration", "封面设计", - "策划", - "营销推广", - "总策划", - "特别鸣谢", - "出品人", - "出品公司", - "联合出品", - "词曲提供", - "制作公司", - "推广策划", - "乐器演奏", + "封面设计 Cover Design", "钢琴/合成器演奏", "钢琴演奏", + "歌名", + "歌手", + "工作室", + "鼓", + "鼓Drums", "合成器演奏", - "弦乐编写", - "弦乐监制", - "第一小提琴", - "第二小提琴", - "中提琴", - "大提琴", - "弦乐录音师", - "弦乐录音室", + "合声", + "合声编写", + "和声", + "和声 Backing Vocals", + "和声/和声设计", + "和声编写", + "和声编写Backing Vocal Arrangement", + "和声编写Backing Vocals Design", "和声演唱", - "录/混音", - "制作助理", + "和声Backing Vocal", "和音", + "和音 Bvox", + "混缩", + "混音", + "混音 Mixed by", + "混音 Mixing Engineer", + "混音/母带工程师", + "混音工程师", + "混音工程师Mixing Engineer", + "混音师", + "混音室", + "混音室Mixing Studio", + "吉他", + "吉他 Guitar", + "吉他Guitars", + "监制", + "监制 Deputy Executive Producer", + "监制Chief Producer", + "监制Executive Director", + "监制Supervised production", + "键盘", "乐队统筹", - "维伴音乐", - "灯光设计", - "配唱制作人", - "文案", - "设计", - "策划统筹", - "企划宣传", - "企划营销", + "乐器演奏", + "联合策划", + "联合出品", + "联合出品Co-produced by", + "联合出品Co-production", + "联合出品Jointly Produced by", + "联合出品Published", + "联合推广", + "录/混音", + "录音", + "录音 Recording Engineer", + "录音师", + "录音师Recording Engineer", "录音室", - "混音室", - "母带后期制作人", + "录音室 Recording Studio", + "录音制作", + "鸣谢", + "母带", + "母带 Mastered by", + "母带处理工程师", + "母带工程师", + "母带工程师Mastering Engineer", "母带后期处理工程师", + "母带后期处理工程师Mastering Engineer", "母带后期处理录音室", - "鸣谢", - "工作室", - "特别企划", - "音频编辑", - "词曲协力", - "制作公司/OP", + "母带后期处理录音室Mastering Studio", + "母带后期制作人", + "母带后期制作人Mastering Producer", + "配唱制作人", + "配唱制作人 Vocals Producer", + "配唱制作人Vocal Producer", "企划", - "宣传", + "企划宣传", + "企划宣传Propaganda", + "企划营销", + "企划制作A&R Planning", + "企划Planning", + "曲", + "曲 Composer", + "设计", + "设计Design", + "特别鸣谢", + "特别鸣谢/艺人支持", + "特别鸣谢Acknowledgement", + "特别企划", + "题记", "统筹", + "统筹Coordinator", + "统筹Planning", "推广", - "宣传/推广", - "出品/发行", - "出品/发行方", - "封面", - "总企划", - "混缩", - "联合策划", - "联合推广", - "题记", + "推广策划", + "推广策划Marketing Strategy", + "推广策划Promotion Planning", + "推广策划Promotion Strategy", + "推广统筹 Promotion Coordinator", + "推广宣传", + "推广营销 Marketing Promotion", + "维伴音乐", + "文案", + "文案Copywriting", + "弦乐", + "弦乐 Strings", + "弦乐编写", + "弦乐编写Strings Arrangement", + "弦乐监制", + "弦乐录音师", + "弦乐录音室", "项目统筹", - "营销推广/出品/发行", - "出品/发行公司", + "项目统筹 Project Coordinator", + "宣传", + "宣传/推广", + "宣推", + "演唱", + "艺术指导", + "艺术指导Art Director", + "音乐项目总监Project Executive", + "音乐制作", "音乐制作发行", - "本歌曲商用授权,前往小程序", - "特别鸣谢/艺人支持", - "出品、发行", - "推广宣传", - "OP、SP", - "和声/和声设计", - "合声", - "合声编写", - "OP", - "SP", - "Artist", - "Songs Title", - "Lyrics by", - "Composed by", - "Produced by", - "Published by", - "Vocals by", - "Background Vocals by", + "音乐制作Music Production", + "音频编辑", + "营销推广", + "营销推广/出品/发行", + "营销推广Marketing", + "营销推广Marketing Promotion", + "原唱", + "制作发行", + "制作公司", + "制作公司 Production", + "制作公司/OP", + "制作公司Produce Company", + "制作人", + "制作人 Producer", + "制作人Music Producer", + "制作人Record Producer", + "制作统筹", + "制作统筹Executive Producer", + "制作团队", + "制作助理", + "中提琴", + "中提琴 Viola", + "专辑", + "总策划", + "总策划 Chief Planner", + "总策划Chref Planner", + "总监制 Chief Executive Producer", + "总企划", + "作词", + "作曲", + "Acknowledgement", "Additional Vocal by", - "Mixing Engineer", - "Mastered by", - "Executive Producer", - "Vocal Engineer", - "Vocals Produced by", - "Recorded at", - "Repertoire Owner", - "Co-Producer", - "Mastering Engineer", - "Written by", - "Lyrics", - "Composer", "Arranged By", - "Record Producer", - "Guitar", - "Music Production", - "Recording Engineer", - "Backing Vocal", - "Art Director", - "Chief Producer", - "Production Team", - "Publisher", - "Lyricist", + "Arrangement", "Arranger", - "Producer", + "Art Director", + "Artist", + "Background Vocals by", + "Backing Vocal", + "Backing Vocal Arrangement", "Backing Vocals", "Backing Vocals Design", - "Cover Design", - "Planner", - "Marketing Promotion", + "Bass", + "Cello", + "Chief Producer", "Chref Planner", - "Acknowledgement", - "Production Company", - "Jointly Produced by", - "Co-production", - "Presenter", - "Presented by", "Co-produced by", + "Co-Producer", + "Co-production", + "Composed by", + "Composer", + "Copywriting", + "Cover Design", + "Design", + "Distributed by", + "Distribution", + "Drums", + "Executive Producer", + "First Violin", + "Guitar", + "Guitars", + "Jointly Produced by", + "Lyricist", + "Lyrics", + "Lyrics & Composition Provided by", "Lyrics and Composition Provided by", + "Lyrics by", + "Marketing Promotion", + "Marketing Strategy", + "Mastered by", + "Mastering Engineer", + "Mixing Engineer", "Music and Lyrics Provided by", - "Lyrics & Composition Provided by", - "Words and Music by", - "Distribution", - "Release", - "Distributed by", - "Released by", + "Music Production", + "OP", + "OP、SP", + "OP/发行", + "OP/SP", + "Planner", + "Planner and coordinator", + "Presented by", + "Presenter", "Produce Company", + "Produced by", + "Producer", + "Production Company", + "Production Team", "Promotion Planning", - "Marketing Strategy", "Promotion Strategy", - "Strings", - "First Violin", + "Propaganda", + "Published by", + "Publisher", + "Record Producer", + "Recorded at", + "Recording Engineer", + "Recording Studio", + "Release", + "Released by", + "Repertoire Owner", "Second Violin", + "Songs Title", + "SP", + "Strings", + "Strings Arrangement", + "Supervised production", "Viola", - "Cello", + "Vocal Engineer", "Vocal Producer", - "Supervised production", - "Copywriting", - "Design", - "Planner and coordinator", - "Propaganda", - "Arrangement", - "Guitars", - "Bass", - "Drums", - "Backing Vocal Arrangement", - "Strings Arrangement", - "Recording Studio", - "OP/发行", - "混音/母带工程师", - "OP/SP", - "词Lyrics", - "曲Composer", - "编曲Arranged By", - "制作人Record Producer", - "吉他Guitar", - "音乐制作Music Production", - "录音师Recording Engineer", - "混音工程师Mixing Engineer", - "母带工程师Mastering Engineer", - "和声Backing Vocal", - "制作统筹Executive Producer", - "艺术指导Art Director", - "监制Chief Producer", - "出品团队Production Team", - "发行方Publisher", - "词Lyricist", - "编曲Arranger", - "制作人Producer", - "和声Backing Vocals", - "和声编写Backing Vocals Design", - "混音Mixing Engineer", - "封面设计Cover Design", - "策划Planner", - "营销推广Marketing Promotion", - "总策划Chref Planner", - "特别鸣谢Acknowledgement", - "出品人Chief Producer", - "出品公司Production Company", - "联合出品Co-produced by", - "联合出品Jointly Produced by", - "联合出品Co-production", - "出品方Presenter", - "出品方Presented by", - "词曲提供Lyrics and Composition Provided by", - "词曲提供Music and Lyrics Provided by", - "词曲提供Lyrics & Composition Provided by", - "词曲提供Words and Music by", - "发行Distribution", - "发行Release", - "发行Distributed by", - "发行Released by", - "制作公司Produce Company", - "推广策划Promotion Planning", - "推广策划Marketing Strategy", - "推广策划Promotion Strategy", - "弦乐 Strings", - "第一小提琴 First Violin", - "第二小提琴 Second Violin", - "中提琴 Viola", - "大提琴 Cello", - "配唱制作人Vocal Producer", - "监制Supervised production", - "文案Copywriting", - "设计Design", - "策划统筹Planner and coordinator", - "企划宣传Propaganda", - "编曲Arrangement", - "吉他Guitars", - "贝斯Bass", - "鼓Drums", - "和声编写Backing Vocal Arrangement", - "弦乐编写Strings Arrangement", - "录音室Recording Studio", - "混音室Mixing Studio", - "母带后期制作人Mastering Producer", - "母带后期处理工程师Mastering Engineer", - "母带后期处理录音室Mastering Studio", - "制作人Music Producer", - "统筹Planning", - "营销推广Marketing", - "出品人Producer", - "企划Planning", - "监制Executive Director", - "统筹Coordinator", - "出品人Publisher", - "出品Present by", - "词 Lyricist", - "曲 Composer", - "编曲 Arranger", - "制作人 Producer", - "配唱制作人 Vocals Producer", - "吉他 Guitar", - "和声 Backing Vocals", - "混音 Mixing Engineer", - "录音 Recording Engineer", - "录音室 Recording Studio", - "封面设计 Cover Design", - "策划 Planner", - "推广营销 Marketing Promotion", - "总策划 Chief Planner", - "出品人 Publisher", - "制作公司 Production", - "项目统筹 Project Coordinator", - "推广统筹 Promotion Coordinator", - "监制 Deputy Executive Producer", - "总监制 Chief Executive Producer", - "企划制作A&R Planning", - "音乐项目总监Project Executive", - "联合出品Published", - "出品人Presenter", - "封面插画Cover illustration", - "和音 Bvox", - "混音 Mixed by", - "母带 Mastered by", + "Vocals by", + "Vocals Produced by", + "Words and Music by", + "Written by", ]; /** From ed3686bf59ada74eaa758172806f8cbb95b522f2 Mon Sep 17 00:00:00 2001 From: apoint123 <108002475+apoint123@users.noreply.github.com> Date: Wed, 31 Dec 2025 22:35:48 +0800 Subject: [PATCH 5/6] =?UTF-8?q?=F0=9F=93=84=20docs:=20=E4=BF=AE=E6=AD=A3?= =?UTF-8?q?=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/assets/data/exclude.ts | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/assets/data/exclude.ts b/src/assets/data/exclude.ts index f9bcc98a6..55aedbfe4 100644 --- a/src/assets/data/exclude.ts +++ b/src/assets/data/exclude.ts @@ -4,8 +4,6 @@ /** * 默认的关键字列表,不包含冒号 - * - * 修改这里的列表会自动同步到用户的设置里面 */ export const keywords = [ "版权", @@ -301,8 +299,6 @@ export const keywords = [ /** * 默认的正则表达式 - * - * 修改这里的列表会自动同步到用户的设置里面 */ export const regexes = [ "(?:【.*?音乐人.*?】|\\(.*?音乐人.*?\\)|「.*?音乐人.*?」|(.*?音乐人.*?)|『.*?音乐人.*?』)", From 23ed96e18502e7ce70ac11ae0bfe0b558bff5cd6 Mon Sep 17 00:00:00 2001 From: apoint123 <108002475+apoint123@users.noreply.github.com> Date: Wed, 31 Dec 2025 23:39:57 +0800 Subject: [PATCH 6/6] =?UTF-8?q?=F0=9F=93=84=20docs:=20=E8=A1=A5=E5=85=85?= =?UTF-8?q?=E6=96=87=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/assets/data/exclude.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/assets/data/exclude.ts b/src/assets/data/exclude.ts index 55aedbfe4..b2635ad7e 100644 --- a/src/assets/data/exclude.ts +++ b/src/assets/data/exclude.ts @@ -4,6 +4,8 @@ /** * 默认的关键字列表,不包含冒号 + * + * 建议更新这里的列表后运行 scripts\sort-keywords.ts 来排序并去重列表 */ export const keywords = [ "版权",