From 096058c61b4bf91f7356bc5eb896b64a9a83f056 Mon Sep 17 00:00:00 2001 From: Leon Banik <57633748+wallbloggerbeing@users.noreply.github.com> Date: Thu, 15 Jan 2026 14:36:21 +0100 Subject: [PATCH 1/2] Enhance audio extraction from GoodNotes files Refactor GoodNotes audio extraction to improve functionality and error handling. Added support for ffmpeg and ffprobe, enhanced file detection, and implemented concurrent processing for better performance. --- GoodNotes_Audio_Extractor.py | 253 ++++++++++++++++++++++++++++------- 1 file changed, 202 insertions(+), 51 deletions(-) diff --git a/GoodNotes_Audio_Extractor.py b/GoodNotes_Audio_Extractor.py index 2aacde9..25e50af 100755 --- a/GoodNotes_Audio_Extractor.py +++ b/GoodNotes_Audio_Extractor.py @@ -1,9 +1,18 @@ #!/usr/bin/env python3 - import os import zipfile import shutil +import subprocess from datetime import datetime +from concurrent.futures import ProcessPoolExecutor, as_completed +import uuid + +MAGIC_READ_BYTES = 65536 +MIN_SIZE_BYTES_DEFAULT = 10 * 1024 +DEFAULT_WORKERS = os.cpu_count() or 2 +DEFAULT_TARGET_FORMAT = "mp3" +DEFAULT_BITRATE = "192k" +DEFAULT_SAMPLE_RATE = None def parse_prefix(line, fmt): ''' @@ -22,6 +31,135 @@ def parse_prefix(line, fmt): raise return t.strftime('%m-%d_%H-%M') +def safe_filename(prefix, unique_id, ext): + ts = datetime.now().strftime("%Y%m%d_%H%M%S") + return f"{prefix}_{ts}_{unique_id}{ext}" + +def detect_extension_by_magic(path): + try: + with open(path, "rb") as f: + data = f.read(MAGIC_READ_BYTES) + except Exception: + return None + if len(data) >= 12 and data[4:8] == b"ftyp": + up = data.upper() + if b"M4A" in up or b"M4A " in up: + return ".m4a" + return ".mp4" + if data.startswith(b"RIFF") and b"WAVE" in data[8:12]: + return ".wav" + if data.startswith(b"OggS"): + return ".ogg" + if data.startswith(b"caff"): + return ".caf" + if data.startswith(b"ID3"): + return ".mp3" + if len(data) >= 2 and data[0] == 0xFF and (data[1] & 0xE0) == 0xE0: + return ".mp3" + if len(data) >= 2 and data[0] == 0xFF and (data[1] & 0xF6) == 0xF0: + return ".aac" + if data.startswith(b'\xff\xd8'): + return ".jpg" + return None + +def ffprobe_has_audio(path): + try: + proc = subprocess.run( + ["ffprobe", "-v", "error", "-select_streams", "a", "-show_entries", "stream=index", "-of", "csv=p=0", str(path)], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=False, + ) + except FileNotFoundError: + return False + return bool(proc.stdout.strip()) + +def convert_with_ffmpeg(src, dst, target_format, bitrate="192k", sample_rate=None): + cmd = ["ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-nostdin", "-i", str(src), "-vn"] + if sample_rate: + cmd += ["-ar", str(sample_rate)] + if target_format == "mp3": + cmd += ["-ac", "2", "-b:a", bitrate, "-f", "mp3", str(dst)] + elif target_format == "wav": + cmd += ["-ac", "2", "-c:a", "pcm_s16le", "-f", "wav", str(dst)] + else: + raise ValueError("Unsupported format: " + str(target_format)) + res = subprocess.run(cmd) + return res.returncode == 0 + +def _process_single_file(args): + (src, relpath, output_dir, target_format, bitrate, sample_rate, min_size, ffmpeg_ok, ffprobe_ok) = args + res = {"src": src, "relpath": relpath, "status": None, "out": None, "err": None} + try: + size = os.path.getsize(src) + if size < min_size: + try: + os.remove(src) + res["status"] = "deleted_small" + except Exception as e: + res["status"] = "delete_failed" + res["err"] = str(e) + return res + guessed_ext = detect_extension_by_magic(src) + is_audio = False + if guessed_ext and guessed_ext.lower() in (".m4a", ".mp3", ".wav", ".aac", ".ogg", ".caf"): + is_audio = True + if guessed_ext and guessed_ext.lower() == ".mp4": + if ffprobe_ok: + is_audio = ffprobe_has_audio(src) + else: + is_audio = not ffmpeg_ok or False + if guessed_ext is None and ffprobe_ok: + is_audio = ffprobe_has_audio(src) + if is_audio: + unique_id = uuid.uuid4().hex[:8] + if ffmpeg_ok and target_format in ("mp3", "wav"): + out_ext = ".mp3" if target_format == "mp3" else ".wav" + out_name = safe_filename("Audio", unique_id, out_ext) + out_path = os.path.join(output_dir, out_name) + ok = convert_with_ffmpeg(src, out_path, target_format, bitrate=bitrate, sample_rate=sample_rate) + if ok: + try: + os.remove(src) + except Exception: + pass + res["status"] = "converted" + res["out"] = out_path + else: + fallback_ext = guessed_ext if guessed_ext else ".mp4" + fallback_name = safe_filename("Attachment", unique_id, fallback_ext) + fallback_path = os.path.join(output_dir, fallback_name) + try: + shutil.move(src, fallback_path) + res["status"] = "kept_original" + res["out"] = fallback_path + except Exception as e: + res["status"] = "keep_failed" + res["err"] = str(e) + else: + fallback_ext = guessed_ext if guessed_ext else ".mp4" + fallback_name = safe_filename("Attachment", unique_id, fallback_ext) + fallback_path = os.path.join(output_dir, fallback_name) + try: + shutil.move(src, fallback_path) + res["status"] = "kept_original_no_ffmpeg" + res["out"] = fallback_path + except Exception as e: + res["status"] = "keep_failed" + res["err"] = str(e) + else: + try: + os.remove(src) + res["status"] = "deleted_non_audio" + except Exception as e: + res["status"] = "delete_failed" + res["err"] = str(e) + except Exception as e: + res["status"] = "error" + res["err"] = str(e) + return res + def extract_voice_files(goodnotes_file, output_dir): ''' Extracts audio files from a GoodNotes file and renames them. @@ -29,65 +167,78 @@ def extract_voice_files(goodnotes_file, output_dir): Returns: output_dir (str): The directory to save the extracted audio files. ''' - # Create output directory if it doesn't exist os.makedirs(output_dir, exist_ok=True) - + temp_dir = os.path.join(output_dir, "temp") + if os.path.exists(temp_dir): + shutil.rmtree(temp_dir, ignore_errors=True) + ffmpeg_ok = shutil.which("ffmpeg") is not None + ffprobe_ok = shutil.which("ffprobe") is not None + target_format = DEFAULT_TARGET_FORMAT if ffmpeg_ok else "keep" + min_size_bytes = MIN_SIZE_BYTES_DEFAULT + bitrate = DEFAULT_BITRATE + sample_rate = DEFAULT_SAMPLE_RATE + workers = DEFAULT_WORKERS try: - # Open the GoodNotes file as a ZIP archive with zipfile.ZipFile(goodnotes_file, 'r') as zip_ref: - # Extract all files from the ZIP archive to a temporary directory - temp_dir = os.path.join(output_dir, "temp") zip_ref.extractall(temp_dir) - - # Check if "attachments" folder exists in the extracted directory - attachments_dir = os.path.join(temp_dir, "attachments") - if os.path.exists(attachments_dir): - # List audio files in the attachments directory - audio_files = [file for file in os.listdir(attachments_dir) if os.path.isfile(os.path.join(attachments_dir, file))] - - # Filter audio files that are in MB size - audio_files = [file for file in audio_files if os.path.getsize(os.path.join(attachments_dir, file)) > 1024 * 1024] - - # Sort audio files based on creation time - audio_files.sort(key=lambda x: os.path.getctime(os.path.join(attachments_dir, x))) - - # Initialize audio count for renaming - audio_count = 0 - - # Rename and move the audio files to the output directory - for file in audio_files: - audio_count += 1 - # Get file creation date for renaming - creation_time = os.path.getctime(os.path.join(attachments_dir, file)) - # Format creation time as a string - creation_time_str = datetime.fromtimestamp(creation_time).strftime('%Y%m%d%H%M%S') - # Construct new file name - new_filename = f"Audio_{audio_count}.mp4" - # Move the audio file to the output directory - shutil.move(os.path.join(attachments_dir, file), os.path.join(output_dir, new_filename)) - print(f"Renamed and moved audio file: {new_filename}") - - # Print the total number of audio files extracted - print(f"Total {audio_count} audio file(s) extracted successfully.") - - # Remove the temporary directory - shutil.rmtree(temp_dir) - + attachments_dir = None + for root, dirs, files in os.walk(temp_dir): + for d in dirs: + if d.lower() == "attachments": + attachments_dir = os.path.join(root, d) + break + if attachments_dir: + break + if not attachments_dir: + print(f"No attachments found in {goodnotes_file}") + shutil.rmtree(temp_dir, ignore_errors=True) + return output_dir + tasks = [] + for root, _, files in os.walk(attachments_dir): + for f in files: + full = os.path.join(root, f) + rel = os.path.relpath(full, temp_dir) + tasks.append((full, rel, output_dir, target_format, bitrate, sample_rate, min_size_bytes, ffmpeg_ok, ffprobe_ok)) + if not tasks: + print(f"No files in attachments for {goodnotes_file}") + shutil.rmtree(temp_dir, ignore_errors=True) + return output_dir + converted = kept = deleted = errors = 0 + workers = max(1, int(workers)) + with ProcessPoolExecutor(max_workers=workers) as exe: + futures = {exe.submit(_process_single_file, t): t[1] for t in tasks} + for fut in as_completed(futures): + relpath = futures[fut] + try: + r = fut.result() + except Exception as e: + print(f"ERROR processing {relpath}: {e}") + errors += 1 + continue + status = r.get("status") + if status == "converted": + converted += 1 + print(f"Converted: {relpath} -> {os.path.basename(r.get('out',''))}") + elif status in ("kept_original", "kept_original_no_ffmpeg"): + kept += 1 + print(f"Kept (no conversion): {relpath} -> {os.path.basename(r.get('out',''))}") + elif status in ("deleted_non_audio", "deleted_small"): + deleted += 1 + else: + errors += 1 + print(f"{status} for {relpath}. Err: {r.get('err')}") + print(f"Total for {goodnotes_file}: converted={converted}, kept={kept}, deleted={deleted}, errors={errors}") except zipfile.BadZipFile: - print("Error: Not a valid GoodNotes file.") + print(f"Error: Not a valid GoodNotes file: {goodnotes_file}") except Exception as e: - print(f"An error occurred: {e}") + print(f"An error occurred while processing {goodnotes_file}: {e}") + finally: + shutil.rmtree(temp_dir, ignore_errors=True) + return output_dir if __name__ == "__main__": - # Get the list of all files in the current directory files_in_dir = os.listdir() - - # Iterate over each file in the directory for file in files_in_dir: - # Check if the file is a GoodNotes file (ends with .goodnotes) - if file.endswith('.goodnotes'): - # Create output directory for each GoodNotes file + if file.endswith('.goodnotes') and os.path.isfile(file): output_dir = os.path.splitext(file)[0] + "_Extracted_Audio_Files" - - # Extract voice files from GoodNotes file extract_voice_files(file, output_dir) From 99390769b22193c9da3d01379db6c2e91da6a120 Mon Sep 17 00:00:00 2001 From: Leon Banik <57633748+wallbloggerbeing@users.noreply.github.com> Date: Thu, 15 Jan 2026 14:42:31 +0100 Subject: [PATCH 2/2] Refactor GoodNotes audio extraction script Refactor GoodNotes audio extraction code for improved readability and efficiency by consolidating imports, simplifying function definitions, and enhancing variable naming. --- GoodNotes_Audio_Extractor.py | 302 ++++++++++------------------------- 1 file changed, 84 insertions(+), 218 deletions(-) diff --git a/GoodNotes_Audio_Extractor.py b/GoodNotes_Audio_Extractor.py index 25e50af..90e8552 100755 --- a/GoodNotes_Audio_Extractor.py +++ b/GoodNotes_Audio_Extractor.py @@ -1,244 +1,110 @@ #!/usr/bin/env python3 -import os -import zipfile -import shutil -import subprocess +import os,zipfile,shutil,subprocess,uuid from datetime import datetime -from concurrent.futures import ProcessPoolExecutor, as_completed -import uuid +from concurrent.futures import ProcessPoolExecutor,as_completed -MAGIC_READ_BYTES = 65536 -MIN_SIZE_BYTES_DEFAULT = 10 * 1024 -DEFAULT_WORKERS = os.cpu_count() or 2 -DEFAULT_TARGET_FORMAT = "mp3" -DEFAULT_BITRATE = "192k" -DEFAULT_SAMPLE_RATE = None +MAGIC=65536;MIN_SIZE=10*1024;WORKERS=os.cpu_count() or 2;DFMT="mp3";DBIT="192k" -def parse_prefix(line, fmt): - ''' - Parses the prefix from a line with the specified format. - - Returns: - str: The parsed prefix. - ''' - try: - t = datetime.strptime(line, fmt) +def parse_prefix(line,fmt): + try: t=datetime.strptime(line,fmt) except ValueError as v: - if len(v.args) > 0 and v.args[0].startswith('unconverted data remains: '): - line = line[:-(len(v.args[0]) - 26)] - t = datetime.strptime(line, fmt) - else: - raise + if v.args and v.args[0].startswith('unconverted data remains: '): + line=line[:-(len(v.args[0])-26)]; t=datetime.strptime(line,fmt) + else: raise return t.strftime('%m-%d_%H-%M') -def safe_filename(prefix, unique_id, ext): - ts = datetime.now().strftime("%Y%m%d_%H%M%S") - return f"{prefix}_{ts}_{unique_id}{ext}" +def safe_name(p,uid,ext): return f"{p}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uid}{ext}" -def detect_extension_by_magic(path): +def magic_ext(p): try: - with open(path, "rb") as f: - data = f.read(MAGIC_READ_BYTES) - except Exception: - return None - if len(data) >= 12 and data[4:8] == b"ftyp": - up = data.upper() - if b"M4A" in up or b"M4A " in up: - return ".m4a" - return ".mp4" - if data.startswith(b"RIFF") and b"WAVE" in data[8:12]: - return ".wav" - if data.startswith(b"OggS"): - return ".ogg" - if data.startswith(b"caff"): - return ".caf" - if data.startswith(b"ID3"): - return ".mp3" - if len(data) >= 2 and data[0] == 0xFF and (data[1] & 0xE0) == 0xE0: - return ".mp3" - if len(data) >= 2 and data[0] == 0xFF and (data[1] & 0xF6) == 0xF0: - return ".aac" - if data.startswith(b'\xff\xd8'): - return ".jpg" + with open(p,'rb') as f: d=f.read(MAGIC) + except: return None + if len(d)>=12 and d[4:8]==b'ftyp': u=d.upper(); return '.m4a' if b'M4A' in u else '.mp4' + if d.startswith(b'RIFF') and b'WAVE' in d[8:12]: return '.wav' + if d.startswith(b'OggS'): return '.ogg' + if d.startswith(b'caff'): return '.caf' + if d.startswith(b'ID3'): return '.mp3' + if len(d)>=2 and d[0]==0xFF and (d[1]&0xE0)==0xE0: return '.mp3' + if len(d)>=2 and d[0]==0xFF and (d[1]&0xF6)==0xF0: return '.aac' + if d.startswith(b'\xff\xd8') or d.startswith(b'\xff\xd9') or d.startswith(b'\xff\xe0'): + return '.jpg' return None -def ffprobe_has_audio(path): +def ffprobe_has_audio(p): try: - proc = subprocess.run( - ["ffprobe", "-v", "error", "-select_streams", "a", "-show_entries", "stream=index", "-of", "csv=p=0", str(path)], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - check=False, - ) + r=subprocess.run(["ffprobe","-v","error","-select_streams","a","-show_entries","stream=index","-of","csv=p=0",p],stdout=subprocess.PIPE,stderr=subprocess.PIPE,text=True) + return bool(r.stdout.strip()) except FileNotFoundError: return False - return bool(proc.stdout.strip()) -def convert_with_ffmpeg(src, dst, target_format, bitrate="192k", sample_rate=None): - cmd = ["ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-nostdin", "-i", str(src), "-vn"] - if sample_rate: - cmd += ["-ar", str(sample_rate)] - if target_format == "mp3": - cmd += ["-ac", "2", "-b:a", bitrate, "-f", "mp3", str(dst)] - elif target_format == "wav": - cmd += ["-ac", "2", "-c:a", "pcm_s16le", "-f", "wav", str(dst)] - else: - raise ValueError("Unsupported format: " + str(target_format)) - res = subprocess.run(cmd) - return res.returncode == 0 +def convert_ffmpeg(src,dst,fmt,bitrate=DBIT,sr=None): + cmd=["ffmpeg","-y","-hide_banner","-loglevel","error","-nostdin","-i",src,"-vn"] + if sr: cmd += ["-ar",str(sr)] + if fmt=="mp3": cmd += ["-ac","2","-b:a",bitrate,"-f","mp3",dst] + elif fmt=="wav": cmd += ["-ac","2","-c:a","pcm_s16le","-f","wav",dst] + else: raise ValueError(fmt) + return subprocess.run(cmd).returncode==0 -def _process_single_file(args): - (src, relpath, output_dir, target_format, bitrate, sample_rate, min_size, ffmpeg_ok, ffprobe_ok) = args - res = {"src": src, "relpath": relpath, "status": None, "out": None, "err": None} +def _proc(args): + src,rel,outd,fmt,bitrate,sr,minsz,ffmpeg_ok,ffprobe_ok = args + r={"src":src,"rel":rel,"status":None,"out":None,"err":None} try: - size = os.path.getsize(src) - if size < min_size: - try: - os.remove(src) - res["status"] = "deleted_small" - except Exception as e: - res["status"] = "delete_failed" - res["err"] = str(e) - return res - guessed_ext = detect_extension_by_magic(src) - is_audio = False - if guessed_ext and guessed_ext.lower() in (".m4a", ".mp3", ".wav", ".aac", ".ogg", ".caf"): - is_audio = True - if guessed_ext and guessed_ext.lower() == ".mp4": - if ffprobe_ok: - is_audio = ffprobe_has_audio(src) + if os.path.getsize(src) < minsz: + os.remove(src); r['status']='deleted_small'; return r + g=magic_ext(src); is_audio=False + if g and g.lower() in ('.m4a','.mp3','.wav','.aac','.ogg','.caf'): is_audio=True + if g and g.lower()=='.mp4': is_audio = ffprobe_has_audio(src) if ffprobe_ok else (not ffmpeg_ok or False) + if g is None and ffprobe_ok: is_audio = ffprobe_has_audio(src) + if not is_audio: + os.remove(src); r['status']='deleted_non_audio'; return r + uid=uuid.uuid4().hex[:8] + if ffmpeg_ok and fmt in ('mp3','wav'): + out_ext = '.mp3' if fmt=='mp3' else '.wav' + out = os.path.join(outd, safe_name('Audio',uid,out_ext)) + if convert_ffmpeg(src,out,fmt,bitrate,sr): + try: os.remove(src) + except: pass + r['status']='converted'; r['out']=out else: - is_audio = not ffmpeg_ok or False - if guessed_ext is None and ffprobe_ok: - is_audio = ffprobe_has_audio(src) - if is_audio: - unique_id = uuid.uuid4().hex[:8] - if ffmpeg_ok and target_format in ("mp3", "wav"): - out_ext = ".mp3" if target_format == "mp3" else ".wav" - out_name = safe_filename("Audio", unique_id, out_ext) - out_path = os.path.join(output_dir, out_name) - ok = convert_with_ffmpeg(src, out_path, target_format, bitrate=bitrate, sample_rate=sample_rate) - if ok: - try: - os.remove(src) - except Exception: - pass - res["status"] = "converted" - res["out"] = out_path - else: - fallback_ext = guessed_ext if guessed_ext else ".mp4" - fallback_name = safe_filename("Attachment", unique_id, fallback_ext) - fallback_path = os.path.join(output_dir, fallback_name) - try: - shutil.move(src, fallback_path) - res["status"] = "kept_original" - res["out"] = fallback_path - except Exception as e: - res["status"] = "keep_failed" - res["err"] = str(e) - else: - fallback_ext = guessed_ext if guessed_ext else ".mp4" - fallback_name = safe_filename("Attachment", unique_id, fallback_ext) - fallback_path = os.path.join(output_dir, fallback_name) - try: - shutil.move(src, fallback_path) - res["status"] = "kept_original_no_ffmpeg" - res["out"] = fallback_path - except Exception as e: - res["status"] = "keep_failed" - res["err"] = str(e) + fe = g or '.mp4'; fn = os.path.join(outd, safe_name('Attachment',uid,fe)); shutil.move(src,fn); r['status']='kept_original'; r['out']=fn else: - try: - os.remove(src) - res["status"] = "deleted_non_audio" - except Exception as e: - res["status"] = "delete_failed" - res["err"] = str(e) + fe = g or '.mp4'; fn = os.path.join(outd, safe_name('Attachment',uid,fe)); shutil.move(src,fn); r['status']='kept_original_no_ffmpeg'; r['out']=fn except Exception as e: - res["status"] = "error" - res["err"] = str(e) - return res + r['status']='error'; r['err']=str(e) + return r -def extract_voice_files(goodnotes_file, output_dir): - ''' - Extracts audio files from a GoodNotes file and renames them. - - Returns: - output_dir (str): The directory to save the extracted audio files. - ''' - os.makedirs(output_dir, exist_ok=True) - temp_dir = os.path.join(output_dir, "temp") - if os.path.exists(temp_dir): - shutil.rmtree(temp_dir, ignore_errors=True) - ffmpeg_ok = shutil.which("ffmpeg") is not None - ffprobe_ok = shutil.which("ffprobe") is not None - target_format = DEFAULT_TARGET_FORMAT if ffmpeg_ok else "keep" - min_size_bytes = MIN_SIZE_BYTES_DEFAULT - bitrate = DEFAULT_BITRATE - sample_rate = DEFAULT_SAMPLE_RATE - workers = DEFAULT_WORKERS +def extract_voice_files(gf,outd,fmt=DFMT,minsz=MIN_SIZE,bitrate=DBIT,sr=None,workers=WORKERS): + os.makedirs(outd,exist_ok=True); td=os.path.join(outd,'temp'); shutil.rmtree(td,ignore_errors=True) + ffmpeg_ok=shutil.which('ffmpeg') is not None; ffprobe_ok=shutil.which('ffprobe') is not None try: - with zipfile.ZipFile(goodnotes_file, 'r') as zip_ref: - zip_ref.extractall(temp_dir) - attachments_dir = None - for root, dirs, files in os.walk(temp_dir): + with zipfile.ZipFile(gf) as z: z.extractall(td) + ads=None + for r,dirs,files in os.walk(td): for d in dirs: - if d.lower() == "attachments": - attachments_dir = os.path.join(root, d) - break - if attachments_dir: - break - if not attachments_dir: - print(f"No attachments found in {goodnotes_file}") - shutil.rmtree(temp_dir, ignore_errors=True) - return output_dir - tasks = [] - for root, _, files in os.walk(attachments_dir): - for f in files: - full = os.path.join(root, f) - rel = os.path.relpath(full, temp_dir) - tasks.append((full, rel, output_dir, target_format, bitrate, sample_rate, min_size_bytes, ffmpeg_ok, ffprobe_ok)) - if not tasks: - print(f"No files in attachments for {goodnotes_file}") - shutil.rmtree(temp_dir, ignore_errors=True) - return output_dir - converted = kept = deleted = errors = 0 - workers = max(1, int(workers)) - with ProcessPoolExecutor(max_workers=workers) as exe: - futures = {exe.submit(_process_single_file, t): t[1] for t in tasks} + if d.lower()=='attachments': ads=os.path.join(r,d); break + if ads: break + if not ads: print(f"No attachments found in {gf}"); return outd + tasks=[] + for r,_,files in os.walk(ads): + for f in files: tasks.append((os.path.join(r,f), os.path.relpath(os.path.join(r,f),td), outd, fmt, bitrate, sr, minsz, ffmpeg_ok, ffprobe_ok)) + if not tasks: print(f"No files in attachments for {gf}"); return outd + c=k=d=e=0 + with ProcessPoolExecutor(max_workers=max(1,int(workers))) as ex: + futures={ex.submit(_proc,t):t[1] for t in tasks} for fut in as_completed(futures): - relpath = futures[fut] - try: - r = fut.result() - except Exception as e: - print(f"ERROR processing {relpath}: {e}") - errors += 1 - continue - status = r.get("status") - if status == "converted": - converted += 1 - print(f"Converted: {relpath} -> {os.path.basename(r.get('out',''))}") - elif status in ("kept_original", "kept_original_no_ffmpeg"): - kept += 1 - print(f"Kept (no conversion): {relpath} -> {os.path.basename(r.get('out',''))}") - elif status in ("deleted_non_audio", "deleted_small"): - deleted += 1 - else: - errors += 1 - print(f"{status} for {relpath}. Err: {r.get('err')}") - print(f"Total for {goodnotes_file}: converted={converted}, kept={kept}, deleted={deleted}, errors={errors}") - except zipfile.BadZipFile: - print(f"Error: Not a valid GoodNotes file: {goodnotes_file}") - except Exception as e: - print(f"An error occurred while processing {goodnotes_file}: {e}") + rel=futures[fut] + try: res=fut.result() + except Exception as exv: print('ERROR',rel,exv); e+=1; continue + s=res.get('status') + if s=='converted': c+=1; print(f"Converted: {rel} -> {os.path.basename(res.get('out',''))}") + elif s in ('kept_original','kept_original_no_ffmpeg'): k+=1; print(f"Kept: {rel} -> {os.path.basename(res.get('out',''))}") + elif s in ('deleted_non_audio','deleted_small'): d+=1 + else: e+=1; print(s,rel,res.get('err')) + print(f"Total for {gf}: converted={c}, kept={k}, deleted={d}, errors={e}") finally: - shutil.rmtree(temp_dir, ignore_errors=True) - return output_dir + shutil.rmtree(td,ignore_errors=True) + return outd -if __name__ == "__main__": - files_in_dir = os.listdir() - for file in files_in_dir: - if file.endswith('.goodnotes') and os.path.isfile(file): - output_dir = os.path.splitext(file)[0] + "_Extracted_Audio_Files" - extract_voice_files(file, output_dir) +if __name__=='__main__': + for f in [x for x in os.listdir() if x.endswith('.goodnotes') and os.path.isfile(x)]: + extract_voice_files(f, os.path.splitext(f)[0] + '_Extracted_Audio_Files')