bitsandbytes-foundation · Harshvardhan-To1 · Dec 31, 2025 · Dec 31, 2025
diff --git a/bitsandbytes/__init__.py b/bitsandbytes/__init__.py
@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 
 
+import logging
 import importlib
 import sys
 
@@ -20,6 +21,10 @@
 from .nn import modules
 from .optim import adam
 
+# Library logging should be opt-in for downstream users.
+# (No handlers are configured by default; CLI entrypoints may configure logging.)
+logging.getLogger(__name__).addHandler(logging.NullHandler())
+
 # This is a signal for integrations with transformers/diffusers.
 # Eventually we may remove this but it is currently required for compatibility.
 features = {"multi_backend"}

diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py
@@ -108,9 +108,10 @@ def find_cudart_libraries() -> Iterator[Path]:
 
 
 def _print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None:
-    print(
-        f"PyTorch settings found: CUDA_VERSION={cuda_specs.cuda_version_string}, "
-        f"Highest Compute Capability: {cuda_specs.highest_compute_capability}.",
+    logger.info(
+        "PyTorch settings found: CUDA_VERSION=%s, Highest Compute Capability: %s.",
+        cuda_specs.cuda_version_string,
+        cuda_specs.highest_compute_capability,
     )
 
     binary_path = get_cuda_bnb_library_path(cuda_specs)
@@ -133,7 +134,7 @@ def _print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None:
 
 
 def _print_hip_diagnostics(cuda_specs: CUDASpecs) -> None:
-    print(f"PyTorch settings found: ROCM_VERSION={cuda_specs.cuda_version_string}")
+    logger.info("PyTorch settings found: ROCM_VERSION=%s", cuda_specs.cuda_version_string)
 
     binary_path = get_cuda_bnb_library_path(cuda_specs)
     if not binary_path.exists():
@@ -165,7 +166,7 @@ def print_diagnostics(cuda_specs: CUDASpecs) -> None:
 def _print_cuda_runtime_diagnostics() -> None:
     cudart_paths = list(find_cudart_libraries())
     if not cudart_paths:
-        print("CUDA SETUP: WARNING! CUDA runtime files not found in any environmental path.")
+        logger.warning("CUDA SETUP: WARNING! CUDA runtime files not found in any environmental path.")
     elif len(cudart_paths) > 1:
         print_dedented(
             f"""
@@ -186,13 +187,13 @@ def _print_cuda_runtime_diagnostics() -> None:
             """,
         )
         for pth in cudart_paths:
-            print(f"* Found CUDA runtime at: {pth}")
+            logger.info("* Found CUDA runtime at: %s", pth)
 
 
 def _print_hip_runtime_diagnostics() -> None:
     cudart_paths = list(find_cudart_libraries())
     if not cudart_paths:
-        print("WARNING! ROCm runtime files not found in any environmental path.")
+        logger.warning("WARNING! ROCm runtime files not found in any environmental path.")
     elif len(cudart_paths) > 1:
         print_dedented(
             f"""
@@ -209,7 +210,7 @@ def _print_hip_runtime_diagnostics() -> None:
         )
 
         for pth in cudart_paths:
-            print(f"* Found ROCm runtime at: {pth}")
+            logger.info("* Found ROCm runtime at: %s", pth)
 
 
 def print_runtime_diagnostics() -> None:

diff --git a/bitsandbytes/diagnostics/main.py b/bitsandbytes/diagnostics/main.py
@@ -1,4 +1,6 @@
 import importlib
+import logging
+import os
 import platform
 import sys
 import traceback
@@ -26,6 +28,8 @@
     "trl",
 ]
 
+logger = logging.getLogger(__name__)
+
 
 def sanity_check():
     from bitsandbytes.optim import Adam
@@ -53,24 +57,30 @@ def get_package_version(name: str) -> str:
 def show_environment():
     """Simple utility to print out environment information."""
 
-    print(f"Platform: {platform.platform()}")
+    logger.info("Platform: %s", platform.platform())
     if platform.system() == "Linux":
-        print(f"  libc: {'-'.join(platform.libc_ver())}")
+        logger.info("  libc: %s", "-".join(platform.libc_ver()))
 
-    print(f"Python: {platform.python_version()}")
+    logger.info("Python: %s", platform.python_version())
 
-    print(f"PyTorch: {torch.__version__}")
-    print(f"  CUDA: {torch.version.cuda or 'N/A'}")
-    print(f"  HIP: {torch.version.hip or 'N/A'}")
-    print(f"  XPU: {getattr(torch.version, 'xpu', 'N/A') or 'N/A'}")
+    logger.info("PyTorch: %s", torch.__version__)
+    logger.info("  CUDA: %s", torch.version.cuda or "N/A")
+    logger.info("  HIP: %s", torch.version.hip or "N/A")
+    logger.info("  XPU: %s", getattr(torch.version, "xpu", "N/A") or "N/A")
 
-    print("Related packages:")
+    logger.info("Related packages:")
     for pkg in _RELATED_PACKAGES:
         version = get_package_version(pkg)
-        print(f"  {pkg}: {version}")
+        logger.info("  %s: %s", pkg, version)
 
 
 def main():
+    # bitsandbytes' CLI entrypoint: configure logging for human-readable output.
+    # Library imports do not configure logging; downstream apps should decide.
+    level_name = os.environ.get("BNB_LOG_LEVEL", "INFO").upper()
+    level = getattr(logging, level_name, logging.INFO)
+    logging.basicConfig(level=level, format="%(message)s")
+
     print_header(f"bitsandbytes v{bnb_version}")
     show_environment()
     print_header("")
@@ -84,29 +94,30 @@ def main():
     # print_cuda_runtime_diagnostics()
 
     if not torch.cuda.is_available():
-        print(f"PyTorch says {BNB_BACKEND} is not available. Possible reasons:")
-        print(f"1. {BNB_BACKEND} driver not installed")
-        print("2. Using a CPU-only PyTorch build")
-        print("3. No GPU detected")
+        logger.warning("PyTorch says %s is not available. Possible reasons:", BNB_BACKEND)
+        logger.warning("1. %s driver not installed", BNB_BACKEND)
+        logger.warning("2. Using a CPU-only PyTorch build")
+        logger.warning("3. No GPU detected")
 
     else:
-        print(f"Checking that the library is importable and {BNB_BACKEND} is callable...")
+        logger.info("Checking that the library is importable and %s is callable...", BNB_BACKEND)
 
         try:
             sanity_check()
-            print("SUCCESS!")
+            logger.info("SUCCESS!")
             return
         except RuntimeError as e:
             if "not available in CPU-only" in str(e):
-                print(
-                    f"WARNING: {__package__} is currently running as CPU-only!\n"
+                logger.warning(
+                    "WARNING: %s is currently running as CPU-only!\n"
                     "Therefore, 8-bit optimizers and GPU quantization are unavailable.\n\n"
-                    f"If you think that this is so erroneously,\nplease report an issue!",
+                    "If you think that this is so erroneously,\nplease report an issue!",
+                    __package__,
                 )
             else:
                 raise e
         except Exception:
-            traceback.print_exc()
+            logger.exception("Diagnostics sanity check failed:")
 
         print_dedented(
             f"""

diff --git a/bitsandbytes/diagnostics/utils.py b/bitsandbytes/diagnostics/utils.py
@@ -1,12 +1,15 @@
+import logging
 import textwrap
 
 HEADER_WIDTH = 60
 
+logger = logging.getLogger(__name__)
+
 
 def print_header(txt: str, width: int = HEADER_WIDTH, filler: str = "=") -> None:
     txt = f" {txt} " if txt else ""
-    print(txt.center(width, filler))
+    logger.info(txt.center(width, filler))
 
 
 def print_dedented(text):
-    print("\n".join(textwrap.dedent(text).strip().split("\n")))
+    logger.info("\n".join(textwrap.dedent(text).strip().split("\n")))
diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py
@@ -3,6 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 import copy
+import logging
 from typing import Any, Optional, TypeVar, Union, overload
 import warnings
 
@@ -23,6 +24,8 @@
 
 T = TypeVar("T", bound="torch.nn.Module")
 
+logger = logging.getLogger(__name__)
+
 
 class StableEmbedding(torch.nn.Embedding):
     """
@@ -1115,9 +1118,10 @@ def forward(self, x):
         if self.outlier_dim is None:
             tracer = OutlierTracer.get_instance()
             if not tracer.is_initialized():
-                print("Please use OutlierTracer.initialize(model) before using the OutlierAwareLinear layer")
+                logger.warning(
+                    "Please use OutlierTracer.initialize(model) before using the OutlierAwareLinear layer",
+                )
             outlier_idx = tracer.get_outliers(self.weight)
-            # print(outlier_idx, tracer.get_hvalue(self.weight))
             self.outlier_dim = outlier_idx
 
         if not self.is_quantized:

diff --git a/bitsandbytes/nn/triton_based_modules.py b/bitsandbytes/nn/triton_based_modules.py
@@ -1,4 +1,5 @@
 from functools import partial
+import logging
 
 import torch
 import torch.nn as nn
@@ -20,6 +21,8 @@
 from bitsandbytes.triton.quantize_rowwise import quantize_rowwise
 from bitsandbytes.triton.triton_utils import is_triton_available
 
+logger = logging.getLogger(__name__)
+
 
 class _switchback_global(torch.autograd.Function):
     @staticmethod
@@ -173,8 +176,8 @@ def __init__(
         if self.vector_wise_quantization:
             self._fn = _switchback_vectorrize
             if mem_efficient:
-                print("mem efficient is not supported for vector-wise quantization.")
-                exit(1)
+                logger.error("mem efficient is not supported for vector-wise quantization.")
+                raise ValueError("mem_efficient is not supported for vector-wise quantization.")
         else:
             if mem_efficient:
                 self._fn = _switchback_global_mem_efficient
@@ -189,7 +192,7 @@ def prepare_for_eval(self):
         #     if hasattr(m, "prepare_for_eval"):
         #         m.prepare_for_eval()
         # model.apply(cond_prepare)
-        print("=> preparing for eval.")
+        logger.info("Preparing SwitchBackLinear for eval.")
         if self.vector_wise_quantization:
             W_int8, state_W = quantize_rowwise(self.weight)
         else:

diff --git a/bitsandbytes/research/autograd/_functions.py b/bitsandbytes/research/autograd/_functions.py
@@ -234,7 +234,6 @@ def forward(ctx, A, B, out=None, bias=None, state: Optional[MatmulLtState] = Non
 
         # 2. Quantize B
         if state.has_fp16_weights:
-            # print('B shape', B.shape)
             has_grad = getattr(B, "grad", None) is not None
             is_transposed = not B.is_contiguous() and B.shape[0] == B.stride(1)
             if is_transposed:
@@ -323,8 +322,6 @@ def backward(ctx, grad_output):
         _Cgrad, _Cgradt, _SCgrad, _SCgradt, _outlier_cols = F.int8_double_quant(grad_output.to(torch.float16))
 
         if req_gradB:
-            # print('back A shape', A.shape)
-            # print('grad output t shape', grad_output.t().shape)
             grad_B = torch.matmul(grad_output.t(), A)
 
         if req_gradA:

diff --git a/bitsandbytes/triton/matmul_perf_model.py b/bitsandbytes/triton/matmul_perf_model.py
@@ -3,6 +3,7 @@
 
 import functools
 import heapq
+import logging
 
 import torch
 
@@ -15,6 +16,8 @@
     nvsmi,
 )
 
+logger = logging.getLogger(__name__)
+
 
 @functools.lru_cache
 def get_clock_rate_in_khz():
@@ -125,10 +128,13 @@ def estimate_matmul_time(
 
     total_time_ms = max(compute_ms, load_ms) + store_ms
     if debug:
-        print(
-            f"Total time: {total_time_ms}ms, compute time: {compute_ms}ms, "
-            f"loading time: {load_ms}ms, store time: {store_ms}ms, "
-            f"Activate CTAs: {active_cta_ratio * 100}%"
+        logger.debug(
+            "Total time: %sms, compute time: %sms, loading time: %sms, store time: %sms, Activate CTAs: %s%%",
+            total_time_ms,
+            compute_ms,
+            load_ms,
+            store_ms,
+            active_cta_ratio * 100,
         )
     return total_time_ms
 

diff --git a/bitsandbytes/utils.py b/bitsandbytes/utils.py
@@ -1,9 +1,12 @@
 import json
+import logging
 import shlex
 import subprocess
 
 import torch
 
+logger = logging.getLogger(__name__)
+
 
 def outlier_hook(module, input):
     assert isinstance(module, torch.nn.Linear)
@@ -65,7 +68,7 @@ def get_hvalue(self, weight):
 
     def get_outliers(self, weight):
         if not self.is_initialized():
-            print("Outlier tracer is not initialized...")
+            logger.warning("Outlier tracer is not initialized...")
             return None
         hvalue = self.get_hvalue(weight)
         if hvalue in self.hvalue2outlier_idx: