From 26935370d8e719d2e2fe49bd6b46463efd22c573 Mon Sep 17 00:00:00 2001 From: Gavin Date: Tue, 27 Jul 2021 11:46:30 -0400 Subject: [PATCH] add option to use aria2c --- downloader.py | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/downloader.py b/downloader.py index 8162b04..15b3b39 100644 --- a/downloader.py +++ b/downloader.py @@ -12,12 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# +# Edited by Gavin Gray to use optionally use aria2c """Download AIST++ videos from AIST Dance Video Database website.""" import argparse import multiprocessing import os import sys import urllib.request +import shutil from functools import partial SOURCE_URL = 'https://aistdancedb.ongaaccel.jp/v1.0.0/video/10M/' @@ -26,7 +29,7 @@ def _download(video_url, download_folder): save_path = os.path.join(download_folder, os.path.basename(video_url)) urllib.request.urlretrieve(video_url, save_path) - + if __name__ == '__main__': parser = argparse.ArgumentParser( description='Scripts for downloading AIST++ videos.') @@ -40,7 +43,14 @@ def _download(video_url, download_folder): type=int, default=1, help='number of threads for multiprocessing.') + parser.add_argument('--aria2c', + action='store_true', + help='use aria2c to download the videos') + aria2c_exists = shutil.which("aria2c") is not None + args = parser.parse_args() + if args.aria2c: + assert aria2c_exists, "aria2c does not appear to be installed" os.makedirs(args.download_folder, exist_ok=True) seq_names = urllib.request.urlopen(LIST_URL) @@ -48,8 +58,21 @@ def _download(video_url, download_folder): video_urls = [ os.path.join(SOURCE_URL, seq_name + '.mp4') for seq_name in seq_names] - download_func = partial(_download, download_folder=args.download_folder) - pool = multiprocessing.Pool(processes=args.num_processes) - for i, _ in enumerate(pool.imap_unordered(download_func, video_urls)): - sys.stderr.write('\rdownloading %d / %d' % (i + 1, len(video_urls))) - sys.stderr.write('\ndone.\n') + if args.aria2c: + import subprocess + import tempfile + with tempfile.TemporaryDirectory() as tmpdirname: + urlsfile = os.path.join(tmpdirname, "aria-urls.txt") + with open(urlsfile, "w") as f: + f.write("\n".join(video_urls)) + subprocess.run(["aria2c", + "-c", + "--dir="+args.download_folder, + "--input-file="+urlsfile, + "--max-concurrent-downloads=%i"%args.num_processes]) + else: + download_func = partial(_download, download_folder=args.download_folder) + pool = multiprocessing.Pool(processes=args.num_processes) + for i, _ in enumerate(pool.imap_unordered(download_func, video_urls)): + sys.stderr.write('\rdownloading %d / %d' % (i + 1, len(video_urls))) + sys.stderr.write('\ndone.\n')