diff --git a/charon/cmd/cmd_index.py b/charon/cmd/cmd_index.py index e5dd11a5..7d4c07a6 100644 --- a/charon/cmd/cmd_index.py +++ b/charon/cmd/cmd_index.py @@ -42,6 +42,13 @@ """, required=True ) +@option( + "--recursive", + "-r", + help="If do indexing recursively under $path", + is_flag=True, + default=False +) @option( "--config", "-c", @@ -69,6 +76,7 @@ def index( path: str, target: str, + recursive: bool = False, config: str = None, debug: bool = False, quiet: bool = False, @@ -120,7 +128,15 @@ def index( if not aws_bucket: logger.error("No bucket specified for target %s!", target) else: - re_index(b, path, package_type, aws_profile, dryrun) + args = { + "target": b, + "path": path, + "package_type": package_type, + "aws_profile": aws_profile, + "recursive": recursive, + "dry_run": dryrun + } + re_index(**args) # type: ignore except Exception: print(traceback.format_exc()) diff --git a/charon/pkgs/checksum_http.py b/charon/pkgs/checksum_http.py index e57dab34..e30a373e 100644 --- a/charon/pkgs/checksum_http.py +++ b/charon/pkgs/checksum_http.py @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ -from charon.utils.files import digest, HashType +from charon.utils.files import digest, HashType, overwrite_file from charon.storage import S3Client from typing import Tuple, List, Dict, Optional from html.parser import HTMLParser @@ -169,9 +169,10 @@ def _check_and_remove_file(file_name: str): def _write_one_col_file(items: List[str], file_name: str): if items and len(items) > 0: _check_and_remove_file(file_name) - with open(file_name, "w") as f: - for i in items: - f.write(i + "\n") + content = "" + for i in items: + content = content + i + "\n" + overwrite_file(file_name, content) logger.info("The report file %s is generated.", file_name) _write_one_col_file(content[0], os.path.join(work_dir, "mismatched_files.csv")) @@ -180,10 +181,9 @@ def _write_one_col_file(items: List[str], file_name: str): if content[2] and len(content[2]) > 0: error_file = os.path.join(work_dir, "error_files.csv") _check_and_remove_file(error_file) - with open(error_file, "w") as f: - f.write("path,error\n") - for d in content[2]: - f.write("{path},{error}\n".format(path=d["path"], error=d["error"])) + f_content_lines: List[str] = [] + f_content = "path,error\n" + "\n".join(f_content_lines) + overwrite_file(error_file, f_content) logger.info("The report file %s is generated.", error_file) diff --git a/charon/pkgs/indexing.py b/charon/pkgs/indexing.py index 4710cdab..6794a478 100644 --- a/charon/pkgs/indexing.py +++ b/charon/pkgs/indexing.py @@ -19,7 +19,7 @@ # from charon.pkgs.pkg_utils import invalidate_cf_paths from charon.constants import (INDEX_HTML_TEMPLATE, NPM_INDEX_HTML_TEMPLATE, PACKAGE_TYPE_MAVEN, PACKAGE_TYPE_NPM, PROD_INFO_SUFFIX) -from charon.utils.files import digest_content +from charon.utils.files import digest_content, overwrite_file from jinja2 import Template import os import logging @@ -155,8 +155,7 @@ def __to_html(package_type: str, contents: List[str], folder: str, top_level: st if folder == "/": html_path = os.path.join(top_level, "index.html") os.makedirs(os.path.dirname(html_path), exist_ok=True) - with open(html_path, 'w', encoding='utf-8') as html: - html.write(html_content) + overwrite_file(html_path, html_content) return html_path @@ -267,7 +266,7 @@ def re_index( path: str, package_type: str, aws_profile: str = None, - # cf_enable: bool = False, + recursive: bool = False, dry_run: bool = False ): """Refresh the index.html for the specified folder in the bucket. @@ -307,6 +306,7 @@ def re_index( logger.debug("The re-indexed page content: %s", index_content) if not dry_run: index_path = os.path.join(path, "index.html") + logger.info("Start re-indexing %s in bucket %s", index_path, bucket_name) if path == "/": index_path = "index.html" s3_client.simple_delete_file(index_path, (bucket_name, real_prefix)) @@ -314,10 +314,23 @@ def re_index( index_path, index_content, (bucket_name, real_prefix), "text/html", digest_content(index_content) ) - # We will not invalidate index.html per cost consideration - # if cf_enable: - # cf_client = CFClient(aws_profile=aws_profile) - # invalidate_cf_paths(cf_client, bucket, [index_path]) + logger.info("%s re-indexing finished", index_path) + if recursive: + for c in contents: + if c.endswith("/"): + sub_path = c.removeprefix(real_prefix).strip() + if sub_path.startswith("/"): + sub_path = sub_path.removeprefix("/") + logger.debug("subpath: %s", sub_path) + args = { + "target": target, + "path": sub_path, + "package_type": package_type, + "aws_profile": aws_profile, + "recursive": recursive, + "dry_run": dry_run + } + re_index(**args) # type: ignore else: logger.warning( "The path %s does not contain any contents in bucket %s. " diff --git a/charon/utils/files.py b/charon/utils/files.py index d811200b..ccad3e23 100644 --- a/charon/utils/files.py +++ b/charon/utils/files.py @@ -125,6 +125,5 @@ def write_manifest(paths: List[str], root: str, product_key: str) -> Tuple[str, if not os.path.isfile(manifest_path): with open(manifest_path, mode="a", encoding="utf-8"): pass - with open(manifest_path, mode="w", encoding="utf-8") as f: - f.write('\n'.join(artifacts)) + overwrite_file(manifest_path, '\n'.join(artifacts)) return manifest_name, manifest_path