JasonAHendry · danieljbridges · Jan 28, 2026 · Jan 28, 2026 · Jan 28, 2026 · Jan 28, 2026
diff --git a/src/nomadic/start/data/NOMADS_Library_Worksheet.xlsx b/src/nomadic/start/data/NOMADS_Library_Worksheet.xlsx
diff --git a/src/nomadic/util/metadata.py b/src/nomadic/util/metadata.py
@@ -4,10 +4,10 @@
 from typing import List, Optional
 
 import pandas as pd
+from openpyxl import load_workbook
 
 from .exceptions import MetadataFormatError
 
-
 STANDARD_METADATA_FILENAME = "samples.csv"
 
 
@@ -49,6 +49,9 @@ def correct_barcode_format(barcode: str, try_to_fix: bool = True) -> str:
     EXPECTED = "barcode[0-9]{2}$"
     EXAMPLE = "barcode01"
 
+    if isinstance(barcode, float):
+        barcode = int(barcode)
+
     if not isinstance(barcode, str):
         barcode = str(barcode)
 
@@ -58,11 +61,6 @@ def correct_barcode_format(barcode: str, try_to_fix: bool = True) -> str:
                 f"Barcode '{barcode}' has bad format: must conform to '{EXAMPLE}'."
             )
 
-        # Raise a warning
-        warnings.warn(
-            f"Barcode '{barcode}' has bad format: must conform to '{EXAMPLE}'. Trying to fix..."
-        )
-
         nums = re.findall("[0-9]+", barcode)
 
         if not nums:
@@ -96,7 +94,7 @@ class MetadataTableParser:
 
     # If the required columns are not found, try these alternative names, case insensitive
     ALTERNATIVE_NAMES = {
-        "barcode": ["barcodes"],
+        "barcode": ["barcodes", "barcode#"],
         "sample_id": [
             "sample",
             "sampleid",
@@ -133,17 +131,45 @@ def _load_metadata(self, path: str):
         _, ext = os.path.splitext(path)
         ext = ext.lower()
         if ext == ".xlsx":
-            xlsx = pd.ExcelFile(path, engine="openpyxl")
+            warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")
+            xlsx = load_workbook(path, data_only=True)
             # name in nomadic excel template, and in the (legacy) warehouse template
-            target_sheets = ["nomadic", "rxn_metadata"]
+            target_sheets = ["Library", "rxn_metadata"]
             # Find first matching sheetname or use first sheet
             sheet_names = [
-                sheetname
-                for sheetname in target_sheets
-                if sheetname in xlsx.sheet_names
-            ] + [xlsx.sheet_names[0]]
-            data = pd.read_excel(path, sheet_name=sheet_names[0], engine="openpyxl")
+                sheetname for sheetname in target_sheets if sheetname in xlsx.sheetnames
+            ] + [xlsx.sheetnames[0]]
+            # Get the sheet and table
+            ws = xlsx[sheet_names[0]]
+            tbl_name = "tbl_SeqLib"
+            tbl = ws.tables[tbl_name]
+            cells = ws[tbl.ref]
+            start_col = cells[0][0].column  # 1-based worksheet column index
+
+            # Collect ALL hidden column ranges (including grouped ones) to identify only
+            # visible columns
+            hidden_ranges = [
+                (dim.min, dim.max)
+                for dim in ws.column_dimensions.values()
+                if dim.hidden is True
+            ]
+            visible_cols = []
+            for i in range(len(cells[0])):
+                col_idx = start_col + i
+                hidden = any(lo <= col_idx <= hi for lo, hi in hidden_ranges)
+
+                if not hidden:
+                    visible_cols.append(i)
+
+            # Extract data from visible columns ONLY
+            cells = ws[tbl.ref]
+            rows = [[cell.value for cell in row] for row in cells]
+            rows_filt = [[row[i] for i in visible_cols] for row in rows]
+            data = pd.DataFrame(rows_filt[1:], columns=rows_filt[0])
+
+            # Ensure that empty rows or those with missing sample_id are not included
             data.dropna(how="all", inplace=True)
+            data = data.dropna(subset=["Sample ID"])
             self.df = data
         else:
             self.df = pd.read_csv(path, delimiter=get_csv_delimiter(path))