From 3cab9cb7358923be9ff3f747e21ab5808704c3db Mon Sep 17 00:00:00 2001 From: Binam Bajracharya <44302895+BinamB@users.noreply.github.com> Date: Mon, 22 Jan 2024 12:59:20 -0500 Subject: [PATCH 1/4] (PPS-411): Add bucket selection support for multi part upload (#123) * Add bucket param for multipart upload * Resolve comments * Add bucket param for single file upload * add support for upload-single * Get upload-multiple + multipart working * Fix small files not working with multipart * Fix upload * chore(comment): insignificant commit to force test reruns * Refactor upload-multiple * Use file_name provided in manifest (#125) * Use file_name provided in manifest * Use file_name provided in manifest * Ensure file_name used in url * Ensure bucket passed to GeneratePresignedURL * Fix batch * fix utils * fix utils.go * cleanup+remove validateFilePath --------- Co-authored-by: Alexander VanTol Co-authored-by: Brian --- gen3-client/g3cmd/upload-multiple.go | 49 ++++++---------------------- gen3-client/g3cmd/upload-single.go | 2 +- gen3-client/g3cmd/upload.go | 23 ++----------- gen3-client/g3cmd/utils.go | 31 ------------------ 4 files changed, 13 insertions(+), 92 deletions(-) diff --git a/gen3-client/g3cmd/upload-multiple.go b/gen3-client/g3cmd/upload-multiple.go index 3b11788..6fe0176 100644 --- a/gen3-client/g3cmd/upload-multiple.go +++ b/gen3-client/g3cmd/upload-multiple.go @@ -79,44 +79,13 @@ func init() { log.Fatalf("Error when parsing file paths: " + err.Error()) } - for i, furObject := range furObjects { - if batch { - if len(batchFURObjects) < workers { - batchFURObjects = append(batchFURObjects, furObject) - } else { - batchUpload(gen3Interface, batchFURObjects, workers, respCh, errCh, bucketName) - batchFURObjects = make([]commonUtils.FileUploadRequestObject, 0) - batchFURObjects = append(batchFURObjects, furObject) - } - if i == len(furObjects)-1 { // upload remainders - batchUpload(gen3Interface, batchFURObjects, workers, respCh, errCh, bucketName) - } - } else { - file, err := os.Open(furObject.FilePath) - if err != nil { - log.Println("File open error: " + err.Error()) - logs.AddToFailedLog(furObject.FilePath, furObject.Filename, commonUtils.FileMetadata{}, furObject.GUID, 0, false, true) - logs.IncrementScore(logs.ScoreBoardLen - 1) - continue - } - defer file.Close() - - furObject, err := GenerateUploadRequest(gen3Interface, furObject, file) - if err != nil { - file.Close() - logs.AddToFailedLog(furObject.FilePath, furObject.Filename, commonUtils.FileMetadata{}, furObject.GUID, 0, false, true) - logs.IncrementScore(logs.ScoreBoardLen - 1) - log.Printf("Error occurred during request generation: %s", err.Error()) - continue - } - err = uploadFile(furObject, 0) - if err != nil { - log.Println(err.Error()) - logs.IncrementScore(logs.ScoreBoardLen - 1) - } else { - logs.IncrementScore(0) - } - file.Close() + filePaths := make([]string, 0) + for _, object := range objects { + // Here we are assuming the local filename will be the same as GUID + filePath, err := getFullFilePath(uploadPath, object.ObjectID) + if err != nil { + log.Println(err.Error()) + continue } filePaths = append(filePaths, filePath) } @@ -167,7 +136,9 @@ func init() { uploadMultipleCmd.MarkFlagRequired("upload-path") //nolint:errcheck uploadMultipleCmd.Flags().BoolVar(&batch, "batch", true, "Upload in parallel") uploadMultipleCmd.Flags().IntVar(&numParallel, "numparallel", 3, "Number of uploads to run in parallel") - uploadMultipleCmd.Flags().StringVar(&bucketName, "bucket", "", "The bucket to which files will be uploaded") + uploadMultipleCmd.Flags().StringVar(&bucketName, "bucket", "", "The bucket to which files will be uploaded. If not provided, defaults to Gen3's configured DATA_UPLOAD_BUCKET.") + uploadMultipleCmd.Flags().BoolVar(&forceMultipart, "force-multipart", false, "Force to use multipart upload when possible (file size >= 5MB)") + uploadMultipleCmd.Flags().BoolVar(&includeSubDirName, "include-subdirname", false, "Include subdirectory names in file name") RootCmd.AddCommand(uploadMultipleCmd) } diff --git a/gen3-client/g3cmd/upload-single.go b/gen3-client/g3cmd/upload-single.go index 3b1bf55..1e85edd 100644 --- a/gen3-client/g3cmd/upload-single.go +++ b/gen3-client/g3cmd/upload-single.go @@ -95,6 +95,6 @@ func init() { uploadSingleCmd.MarkFlagRequired("guid") //nolint:errcheck uploadSingleCmd.Flags().StringVar(&filePath, "file", "", "Specify file to upload to with --file=~/path/to/file") uploadSingleCmd.MarkFlagRequired("file") //nolint:errcheck - uploadSingleCmd.Flags().StringVar(&bucketName, "bucket", "", "The bucket to which files will be uploaded") + uploadSingleCmd.Flags().StringVar(&bucketName, "bucket", "", "The bucket to which files will be uploaded. If not provided, defaults to Gen3's configured DATA_UPLOAD_BUCKET.") RootCmd.AddCommand(uploadSingleCmd) } diff --git a/gen3-client/g3cmd/upload.go b/gen3-client/g3cmd/upload.go index da3a396..3e29083 100644 --- a/gen3-client/g3cmd/upload.go +++ b/gen3-client/g3cmd/upload.go @@ -154,26 +154,7 @@ func init() { if len(multipartFilePaths) > 0 { // NOTE(@mpingram) - For the moment Shepherd doesn't support multipart uploads. // Throw an error if Shepherd is enabled and user attempts to multipart upload. - profileConfig := conf.ParseConfig(profile) - if profileConfig.UseShepherd == "true" || - profileConfig.UseShepherd == "" && commonUtils.DefaultUseShepherd == true { - log.Fatalf("Error: Shepherd currently does not support multipart uploads. For the moment, please disable Shepherd with\n $ gen3-client configure --profile=%v --use-shepherd=false\nand try again.\n", profile) - } - log.Println("Multipart uploading....") - for _, filePath := range multipartFilePaths { - fileInfo, err := ProcessFilename(uploadPath, filePath, includeSubDirName, false) - if err != nil { - logs.AddToFailedLog(filePath, filepath.Base(filePath), commonUtils.FileMetadata{}, "", 0, false, true) - log.Println("Process filename error for file: " + err.Error()) - continue - } - err = multipartUpload(gen3Interface, fileInfo, 0, bucketName) - if err != nil { - log.Println(err.Error()) - } else { - logs.IncrementScore(0) - } - } + processMultipartUpload(gen3Interface, multipartFilePaths, bucketName, includeSubDirName, uploadPath) } if !logs.IsFailedLogMapEmpty() { @@ -193,6 +174,6 @@ func init() { uploadCmd.Flags().BoolVar(&includeSubDirName, "include-subdirname", false, "Include subdirectory names in file name") uploadCmd.Flags().BoolVar(&forceMultipart, "force-multipart", false, "Force to use multipart upload if possible") uploadCmd.Flags().BoolVar(&hasMetadata, "metadata", false, "Search for and upload file metadata alongside the file") - uploadCmd.Flags().StringVar(&bucketName, "bucket", "", "The bucket to which files will be uploaded") + uploadCmd.Flags().StringVar(&bucketName, "bucket", "", "The bucket to which files will be uploaded. If not provided, defaults to Gen3's configured DATA_UPLOAD_BUCKET.") RootCmd.AddCommand(uploadCmd) } diff --git a/gen3-client/g3cmd/utils.go b/gen3-client/g3cmd/utils.go index 540896f..8dd1e3a 100644 --- a/gen3-client/g3cmd/utils.go +++ b/gen3-client/g3cmd/utils.go @@ -537,37 +537,6 @@ func getFullFilePath(filePath string, filename string) (string, error) { } } -func validateObject(objects []ManifestObject, uploadPath string) []commonUtils.FileUploadRequestObject { - furObjects := make([]commonUtils.FileUploadRequestObject, 0) - for _, object := range objects { - guid := object.ObjectID - var fileName = "" - - if object.Filename != "" { - // conform to fence naming convention - fileName = object.Filename - } else { - // Otherwise, here we are assuming the local filename will be the same as GUID - fileName = object.ObjectID - } - - filePath, err := getFullFilePath(uploadPath, fileName) - if err != nil { - log.Println(err.Error()) - continue - } - - if _, err := os.Stat(filePath); os.IsNotExist(err) { - log.Printf("The file you specified \"%s\" does not exist locally.\n", filePath) - continue - } - - furObject := commonUtils.FileUploadRequestObject{FilePath: filePath, Filename: fileName, GUID: guid} - furObjects = append(furObjects, furObject) - } - return furObjects -} - func uploadFile(furObject commonUtils.FileUploadRequestObject, retryCount int) error { log.Println("Uploading data ...") furObject.Bar.Start() From 857d353591dc04d83ff7a3cb92e5312d35ace96e Mon Sep 17 00:00:00 2001 From: Liam Beckman Date: Fri, 24 Oct 2025 12:01:30 -0700 Subject: [PATCH 2/4] chore: Temp. disable latest version checking message --- gen3-client/g3cmd/root.go | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/gen3-client/g3cmd/root.go b/gen3-client/g3cmd/root.go index 8966390..c8ac84b 100644 --- a/gen3-client/g3cmd/root.go +++ b/gen3-client/g3cmd/root.go @@ -55,7 +55,7 @@ func initConfig() { // version checker if os.Getenv("GEN3_CLIENT_VERSION_CHECK") != "false" && - gitversion != "" && gitversion != "N/A" { + gitversion != "" && gitversion != "N/A" { githubTag := &latest.GithubTag{ Owner: "uc-cdis", Repository: "cdis-data-client", @@ -78,13 +78,16 @@ func initConfig() { return true }, } - res, err := latest.Check(githubTag, gitversion) - if err != nil { - log.Println("Error occurred when checking for latest version: " + err.Error()) - } else if res.Outdated { - log.Println("A new version of gen3-client is available! The latest version is " + res.Current + ". You are using version " + gitversion) - log.Println("Please download the latest gen3-client release from https://github.com/uc-cdis/cdis-data-client/releases/latest") - } + + // Commenting out version check for now to reduce confusion + // + // res, err := latest.Check(githubTag, gitversion) + // if err != nil { + // log.Println("Error occurred when checking for latest version: " + err.Error()) + // } else if res.Outdated { + // log.Println("A new version of gen3-client is available! The latest version is " + res.Current + ". You are using version " + gitversion) + // log.Println("Please download the latest gen3-client release from https://github.com/uc-cdis/cdis-data-client/releases/latest") + // } } logs.SetToMessageLog() } From f9c97ac3c8cca0545c8e0db28b83a56059ffaed5 Mon Sep 17 00:00:00 2001 From: Liam Beckman Date: Fri, 24 Oct 2025 12:03:33 -0700 Subject: [PATCH 3/4] fix: fix error with version check disabling --- gen3-client/g3cmd/root.go | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/gen3-client/g3cmd/root.go b/gen3-client/g3cmd/root.go index c8ac84b..7cc59e7 100644 --- a/gen3-client/g3cmd/root.go +++ b/gen3-client/g3cmd/root.go @@ -54,7 +54,8 @@ func initConfig() { } // version checker - if os.Getenv("GEN3_CLIENT_VERSION_CHECK") != "false" && + // Temp. disabling version check message to reduce confusion + if os.Getenv("GEN3_CLIENT_VERSION_CHECK") != "true" && gitversion != "" && gitversion != "N/A" { githubTag := &latest.GithubTag{ Owner: "uc-cdis", @@ -79,15 +80,13 @@ func initConfig() { }, } - // Commenting out version check for now to reduce confusion - // - // res, err := latest.Check(githubTag, gitversion) - // if err != nil { - // log.Println("Error occurred when checking for latest version: " + err.Error()) - // } else if res.Outdated { - // log.Println("A new version of gen3-client is available! The latest version is " + res.Current + ". You are using version " + gitversion) - // log.Println("Please download the latest gen3-client release from https://github.com/uc-cdis/cdis-data-client/releases/latest") - // } + res, err := latest.Check(githubTag, gitversion) + if err != nil { + log.Println("Error occurred when checking for latest version: " + err.Error()) + } else if res.Outdated { + log.Println("A new version of gen3-client is available! The latest version is " + res.Current + ". You are using version " + gitversion) + log.Println("Please download the latest gen3-client release from https://github.com/uc-cdis/cdis-data-client/releases/latest") + } } logs.SetToMessageLog() } From e2eacb97bd39651cb21d07938da2385bcb261944 Mon Sep 17 00:00:00 2001 From: Liam Beckman Date: Fri, 24 Oct 2025 12:05:48 -0700 Subject: [PATCH 4/4] fix: fix error with version check --- gen3-client/g3cmd/root.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gen3-client/g3cmd/root.go b/gen3-client/g3cmd/root.go index 7cc59e7..91254de 100644 --- a/gen3-client/g3cmd/root.go +++ b/gen3-client/g3cmd/root.go @@ -55,7 +55,7 @@ func initConfig() { // version checker // Temp. disabling version check message to reduce confusion - if os.Getenv("GEN3_CLIENT_VERSION_CHECK") != "true" && + if false && os.Getenv("GEN3_CLIENT_VERSION_CHECK") != "false" && gitversion != "" && gitversion != "N/A" { githubTag := &latest.GithubTag{ Owner: "uc-cdis",