Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 96 additions & 79 deletions GoodNotes_Audio_Extractor.py
Original file line number Diff line number Diff line change
@@ -1,93 +1,110 @@
#!/usr/bin/env python3

import os
import zipfile
import shutil
import os,zipfile,shutil,subprocess,uuid
from datetime import datetime
from concurrent.futures import ProcessPoolExecutor,as_completed

def parse_prefix(line, fmt):
'''
Parses the prefix from a line with the specified format.

Returns:
str: The parsed prefix.
'''
try:
t = datetime.strptime(line, fmt)
MAGIC=65536;MIN_SIZE=10*1024;WORKERS=os.cpu_count() or 2;DFMT="mp3";DBIT="192k"

def parse_prefix(line,fmt):
try: t=datetime.strptime(line,fmt)
except ValueError as v:
if len(v.args) > 0 and v.args[0].startswith('unconverted data remains: '):
line = line[:-(len(v.args[0]) - 26)]
t = datetime.strptime(line, fmt)
else:
raise
if v.args and v.args[0].startswith('unconverted data remains: '):
line=line[:-(len(v.args[0])-26)]; t=datetime.strptime(line,fmt)
else: raise
return t.strftime('%m-%d_%H-%M')

def extract_voice_files(goodnotes_file, output_dir):
'''
Extracts audio files from a GoodNotes file and renames them.

Returns:
output_dir (str): The directory to save the extracted audio files.
'''
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
def safe_name(p,uid,ext): return f"{p}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uid}{ext}"

def magic_ext(p):
try:
# Open the GoodNotes file as a ZIP archive
with zipfile.ZipFile(goodnotes_file, 'r') as zip_ref:
# Extract all files from the ZIP archive to a temporary directory
temp_dir = os.path.join(output_dir, "temp")
zip_ref.extractall(temp_dir)

# Check if "attachments" folder exists in the extracted directory
attachments_dir = os.path.join(temp_dir, "attachments")
if os.path.exists(attachments_dir):
# List audio files in the attachments directory
audio_files = [file for file in os.listdir(attachments_dir) if os.path.isfile(os.path.join(attachments_dir, file))]

# Filter audio files that are in MB size
audio_files = [file for file in audio_files if os.path.getsize(os.path.join(attachments_dir, file)) > 1024 * 1024]

# Sort audio files based on creation time
audio_files.sort(key=lambda x: os.path.getctime(os.path.join(attachments_dir, x)))
with open(p,'rb') as f: d=f.read(MAGIC)
except: return None
if len(d)>=12 and d[4:8]==b'ftyp': u=d.upper(); return '.m4a' if b'M4A' in u else '.mp4'
if d.startswith(b'RIFF') and b'WAVE' in d[8:12]: return '.wav'
if d.startswith(b'OggS'): return '.ogg'
if d.startswith(b'caff'): return '.caf'
if d.startswith(b'ID3'): return '.mp3'
if len(d)>=2 and d[0]==0xFF and (d[1]&0xE0)==0xE0: return '.mp3'
if len(d)>=2 and d[0]==0xFF and (d[1]&0xF6)==0xF0: return '.aac'
if d.startswith(b'\xff\xd8') or d.startswith(b'\xff\xd9') or d.startswith(b'\xff\xe0'):
return '.jpg'
return None

# Initialize audio count for renaming
audio_count = 0

# Rename and move the audio files to the output directory
for file in audio_files:
audio_count += 1
# Get file creation date for renaming
creation_time = os.path.getctime(os.path.join(attachments_dir, file))
# Format creation time as a string
creation_time_str = datetime.fromtimestamp(creation_time).strftime('%Y%m%d%H%M%S')
# Construct new file name
new_filename = f"Audio_{audio_count}.mp4"
# Move the audio file to the output directory
shutil.move(os.path.join(attachments_dir, file), os.path.join(output_dir, new_filename))
print(f"Renamed and moved audio file: {new_filename}")

# Print the total number of audio files extracted
print(f"Total {audio_count} audio file(s) extracted successfully.")
def ffprobe_has_audio(p):
try:
r=subprocess.run(["ffprobe","-v","error","-select_streams","a","-show_entries","stream=index","-of","csv=p=0",p],stdout=subprocess.PIPE,stderr=subprocess.PIPE,text=True)
return bool(r.stdout.strip())
except FileNotFoundError:
return False

# Remove the temporary directory
shutil.rmtree(temp_dir)
def convert_ffmpeg(src,dst,fmt,bitrate=DBIT,sr=None):
cmd=["ffmpeg","-y","-hide_banner","-loglevel","error","-nostdin","-i",src,"-vn"]
if sr: cmd += ["-ar",str(sr)]
if fmt=="mp3": cmd += ["-ac","2","-b:a",bitrate,"-f","mp3",dst]
elif fmt=="wav": cmd += ["-ac","2","-c:a","pcm_s16le","-f","wav",dst]
else: raise ValueError(fmt)
return subprocess.run(cmd).returncode==0

except zipfile.BadZipFile:
print("Error: Not a valid GoodNotes file.")
def _proc(args):
src,rel,outd,fmt,bitrate,sr,minsz,ffmpeg_ok,ffprobe_ok = args
r={"src":src,"rel":rel,"status":None,"out":None,"err":None}
try:
if os.path.getsize(src) < minsz:
os.remove(src); r['status']='deleted_small'; return r
g=magic_ext(src); is_audio=False
if g and g.lower() in ('.m4a','.mp3','.wav','.aac','.ogg','.caf'): is_audio=True
if g and g.lower()=='.mp4': is_audio = ffprobe_has_audio(src) if ffprobe_ok else (not ffmpeg_ok or False)
if g is None and ffprobe_ok: is_audio = ffprobe_has_audio(src)
if not is_audio:
os.remove(src); r['status']='deleted_non_audio'; return r
uid=uuid.uuid4().hex[:8]
if ffmpeg_ok and fmt in ('mp3','wav'):
out_ext = '.mp3' if fmt=='mp3' else '.wav'
out = os.path.join(outd, safe_name('Audio',uid,out_ext))
if convert_ffmpeg(src,out,fmt,bitrate,sr):
try: os.remove(src)
except: pass
r['status']='converted'; r['out']=out
else:
fe = g or '.mp4'; fn = os.path.join(outd, safe_name('Attachment',uid,fe)); shutil.move(src,fn); r['status']='kept_original'; r['out']=fn
else:
fe = g or '.mp4'; fn = os.path.join(outd, safe_name('Attachment',uid,fe)); shutil.move(src,fn); r['status']='kept_original_no_ffmpeg'; r['out']=fn
except Exception as e:
print(f"An error occurred: {e}")
r['status']='error'; r['err']=str(e)
return r

if __name__ == "__main__":
# Get the list of all files in the current directory
files_in_dir = os.listdir()
def extract_voice_files(gf,outd,fmt=DFMT,minsz=MIN_SIZE,bitrate=DBIT,sr=None,workers=WORKERS):
os.makedirs(outd,exist_ok=True); td=os.path.join(outd,'temp'); shutil.rmtree(td,ignore_errors=True)
ffmpeg_ok=shutil.which('ffmpeg') is not None; ffprobe_ok=shutil.which('ffprobe') is not None
try:
with zipfile.ZipFile(gf) as z: z.extractall(td)
ads=None
for r,dirs,files in os.walk(td):
for d in dirs:
if d.lower()=='attachments': ads=os.path.join(r,d); break
if ads: break
if not ads: print(f"No attachments found in {gf}"); return outd
tasks=[]
for r,_,files in os.walk(ads):
for f in files: tasks.append((os.path.join(r,f), os.path.relpath(os.path.join(r,f),td), outd, fmt, bitrate, sr, minsz, ffmpeg_ok, ffprobe_ok))
if not tasks: print(f"No files in attachments for {gf}"); return outd
c=k=d=e=0
with ProcessPoolExecutor(max_workers=max(1,int(workers))) as ex:
futures={ex.submit(_proc,t):t[1] for t in tasks}
for fut in as_completed(futures):
rel=futures[fut]
try: res=fut.result()
except Exception as exv: print('ERROR',rel,exv); e+=1; continue
s=res.get('status')
if s=='converted': c+=1; print(f"Converted: {rel} -> {os.path.basename(res.get('out',''))}")
elif s in ('kept_original','kept_original_no_ffmpeg'): k+=1; print(f"Kept: {rel} -> {os.path.basename(res.get('out',''))}")
elif s in ('deleted_non_audio','deleted_small'): d+=1
else: e+=1; print(s,rel,res.get('err'))
print(f"Total for {gf}: converted={c}, kept={k}, deleted={d}, errors={e}")
finally:
shutil.rmtree(td,ignore_errors=True)
return outd

# Iterate over each file in the directory
for file in files_in_dir:
# Check if the file is a GoodNotes file (ends with .goodnotes)
if file.endswith('.goodnotes'):
# Create output directory for each GoodNotes file
output_dir = os.path.splitext(file)[0] + "_Extracted_Audio_Files"

# Extract voice files from GoodNotes file
extract_voice_files(file, output_dir)
if __name__=='__main__':
for f in [x for x in os.listdir() if x.endswith('.goodnotes') and os.path.isfile(x)]:
extract_voice_files(f, os.path.splitext(f)[0] + '_Extracted_Audio_Files')