diff --git a/example_usage.py b/example_usage.py new file mode 100644 index 0000000..5cb41b4 --- /dev/null +++ b/example_usage.py @@ -0,0 +1,36 @@ +from pathlib import Path +from kernels.kernel_card_utils import ( + _update_kernel_card_available_funcs, + _update_kernel_card_license, + _load_or_create_kernel_card, + _update_kernel_card_usage, + _update_kernel_card_backends, +) +import argparse + + +def main(args): + kernel_dir = Path(args.kernels_dir) + + kernel_card = _load_or_create_kernel_card(kernel_description=args.description, license="apache-2.0") + + updated_card = _update_kernel_card_usage(kernel_card=kernel_card, local_path=kernel_dir) + updated_card = _update_kernel_card_available_funcs(kernel_card=kernel_card, local_path=kernel_dir) + updated_card = _update_kernel_card_backends(kernel_card=updated_card, local_path=kernel_dir) + updated_card = _update_kernel_card_license(kernel_card, kernel_dir) + + card_path = args.card_path or "README.md" + updated_card.save(card_path) + print("Kernel card updated successfully!") + print("\nUpdated content preview:") + print(updated_card.content[:500] + "...") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--kernels_dir", type=str, required=True, help="Path to the kernels source.") + parser.add_argument("--card_path", type=str, default=None, help="Path to save the card to.") + parser.add_argument("--description", type=str, default=None) + args = parser.parse_args() + + main(args) diff --git a/src/kernels/card_template.md b/src/kernels/card_template.md new file mode 100644 index 0000000..975cab9 --- /dev/null +++ b/src/kernels/card_template.md @@ -0,0 +1,34 @@ +--- +{{ card_data }} +--- + + + +{{ model_description }} + +## How to use + +```python +# TODO: add an example code snippet for running this kernel +``` + +## Available functions + +[TODO: add the functions available through this kernel] + +## Supported backends + +[TODO: add the backends this kernel supports] + +## Benchmarks + +[TODO: provide benchmarks if available] + +## Code source + +[TODO: provide original code source and other relevant citations if available] + +## Notes + +[TODO: provide additional notes about this kernel if needed] diff --git a/src/kernels/kernel_card_utils.py b/src/kernels/kernel_card_utils.py new file mode 100644 index 0000000..c94684c --- /dev/null +++ b/src/kernels/kernel_card_utils.py @@ -0,0 +1,229 @@ +import ast +import re +from pathlib import Path + +from .compat import tomllib +from huggingface_hub import ModelCard, ModelCardData +from huggingface_hub.errors import EntryNotFoundError, RepositoryNotFoundError + +KERNEL_CARD_TEMPLATE_PATH = Path(__file__).parent / "card_template.md" +DESCRIPTION = """ +This is the repository card of {repo_id} that has been pushed on the Hub. It was built to be used with the [`kernels` library](https://github.com/huggingface/kernels). This card was automatically generated. +""" +EXAMPLE_CODE = """```python +# make sure `kernels` is installed: `pip install -U kernels` +from kernels import get_kernel + +kernel_module = get_kernel("{repo_id}") # <- change the ID if needed +{func_name} = kernel_module.{func_name} + +{func_name}(...) +```""" +LIBRARY_NAME = "kernels" + +is_jinja_available = False +try: + import jinja2 # noqa + + is_jinja_available = True +except ImportError: + pass + + +def _load_or_create_kernel_card( + repo_id_or_path: str = "REPO_ID", + token: str | None = None, + kernel_description: str | None = None, + license: str | None = None, + force_update_content: bool = False, +) -> ModelCard: + if not is_jinja_available: + raise ValueError( + "Modelcard rendering is based on Jinja templates." + " Please make sure to have `jinja` installed before using `load_or_create_model_card`." + " To install it, please run `pip install Jinja2`." + ) + + kernel_card = None + + if not force_update_content: + try: + kernel_card = ModelCard.load(repo_id_or_path, token=token) + except (EntryNotFoundError, RepositoryNotFoundError): + pass # Will create from template below + + if kernel_card is None: + kernel_description = kernel_description or DESCRIPTION + kernel_card = ModelCard.from_template( + # Card metadata object that will be converted to YAML block + card_data=ModelCardData(license=license, library_name=LIBRARY_NAME), + template_path=str(KERNEL_CARD_TEMPLATE_PATH), + model_description=kernel_description, + ) + + return kernel_card + + +def _parse_build_toml(local_path: str | Path) -> dict | None: + local_path = Path(local_path) + build_toml_path = local_path / "build.toml" + + if not build_toml_path.exists(): + return None + + try: + with open(build_toml_path, "rb") as f: + return tomllib.load(f) + except Exception: + return None + + +def _find_torch_ext_init(local_path: str | Path) -> Path | None: + local_path = Path(local_path) + + config = _parse_build_toml(local_path) + if not config: + return None + + try: + # Get kernel name from general.name + kernel_name = config.get("general", {}).get("name") + if not kernel_name: + return None + + module_name = kernel_name.replace("-", "_") + init_file = local_path / "torch-ext" / module_name / "__init__.py" + + if init_file.exists(): + return init_file + + return None + except Exception: + return None + + +def _extract_functions_from_all(init_file_path: Path) -> list[str] | None: + try: + content = init_file_path.read_text() + + # Parse the file as an AST + tree = ast.parse(content) + + # Find the __all__ assignment + for node in ast.walk(tree): + if isinstance(node, ast.Assign): + for target in node.targets: + if isinstance(target, ast.Name) and target.id == "__all__": + # Extract all list values + if isinstance(node.value, ast.List): + functions = [] + for elt in node.value.elts: + if isinstance(elt, ast.Constant): + func_name = str(elt.value) + functions.append(func_name) + return functions if functions else None + return None + except Exception: + return None + + +def _update_kernel_card_usage( + kernel_card: ModelCard, + local_path: str | Path, + repo_id: str = "REPO_ID", +) -> ModelCard: + init_file = _find_torch_ext_init(local_path) + + if not init_file: + return kernel_card + + func_names = _extract_functions_from_all(init_file) + + if not func_names: + return kernel_card + + # Use the first function as an example + func_name = func_names[0] + example_code = EXAMPLE_CODE.format(repo_id=repo_id, func_name=func_name) + + # Update the model card content + card_content = str(kernel_card.content) + pattern = r"(## How to use\s*\n\n)```python\n# TODO: add an example code snippet for running this kernel\n```" + + if re.search(pattern, card_content): + updated_content = re.sub(pattern, r"\1" + example_code, card_content) + kernel_card.content = updated_content + + return kernel_card + + +def _update_kernel_card_available_funcs( + kernel_card: ModelCard, local_path: str | Path +) -> ModelCard: + init_file = _find_torch_ext_init(local_path) + + if not init_file: + return kernel_card + + func_names = _extract_functions_from_all(init_file) + + if not func_names: + return kernel_card + + # Format functions as a bulleted list + functions_list = "\n".join(f"- `{func}`" for func in func_names) + + # Update the model card content + card_content = str(kernel_card.content) + pattern = r"(## Available functions\s*\n\n)\[TODO: add the functions available through this kernel\]" + + if re.search(pattern, card_content): + updated_content = re.sub(pattern, r"\1" + functions_list, card_content) + kernel_card.content = updated_content + + return kernel_card + + +def _update_kernel_card_backends( + kernel_card: ModelCard, local_path: str | Path +) -> ModelCard: + config = _parse_build_toml(local_path) + if not config: + return kernel_card + + config = config.get("general", {}) + + card_content = str(kernel_card.content) + + backends = config.get("backends") + if backends: + backends_list = "\n".join(f"- {backend}" for backend in backends) + pattern = r"(## Supported backends\s*\n\n)\[TODO: add the backends this kernel supports\]" + if re.search(pattern, card_content): + card_content = re.sub(pattern, r"\1" + backends_list, card_content) + + # TODO: should we consider making it a separate utility? + cuda_capabilities = config.get("cuda-capabilities") + if cuda_capabilities: + cuda_list = "\n".join(f"- {cap}" for cap in cuda_capabilities) + cuda_section = f"## CUDA Capabilities\n\n{cuda_list}\n\n" + pattern = r"(## Benchmarks)" + if re.search(pattern, card_content): + card_content = re.sub(pattern, cuda_section + r"\1", card_content) + + kernel_card.content = card_content + return kernel_card + + +def _update_kernel_card_license( + kernel_card: ModelCard, local_path: str | Path +) -> ModelCard: + config = _parse_build_toml(local_path) + if not config: + return kernel_card + + existing_license = kernel_card.data.get("license", None) + license_from_config = config.get("license", None) + final_license = license_from_config or existing_license + kernel_card.data["license"] = final_license + return kernel_card