From 7ce80ca5574c44470383b5182b1510296d6ea02b Mon Sep 17 00:00:00 2001
From: rowanwalker96 <rowan.walker-gibbons@diamond.ac.uk>
Date: Wed, 17 Dec 2025 09:06:24 +0000
Subject: [PATCH 1/3] Init

---
 setup.py                             |  1 +
 src/dlstbx/services/trigger_xchem.py | 34 +++++++++++++++-------------
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/setup.py b/setup.py
index b948ad46b..6dfdd39db 100644
--- a/setup.py
+++ b/setup.py
@@ -80,6 +80,7 @@
     "pandda_xchem = dlstbx.wrapper.pandda_xchem:PanDDAWrapper",
     "pandda_post = dlstbx.wrapper.pandda_post:PanDDApostWrapper",
     "pandda_rhofit = dlstbx.wrapper.pandda_rhofit:PanDDARhofitWrapper",
+    "pipedream = dlstbx.wrapper.pipedream_xchem:PipedreamWrapper",
     "phaser_ellg  = dlstbx.wrapper.phaser_ellg:PhasereLLGWrapper",
     "rlv = dlstbx.wrapper.rlv:RLVWrapper",
     "scaleit = dlstbx.wrapper.scaleit:ScaleitWrapper",
diff --git a/src/dlstbx/services/trigger_xchem.py b/src/dlstbx/services/trigger_xchem.py
index 323e612e6..3c60d76c4 100644
--- a/src/dlstbx/services/trigger_xchem.py
+++ b/src/dlstbx/services/trigger_xchem.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+import ast
+import re
 import json
 import pathlib
 import shutil
@@ -260,7 +262,7 @@ def trigger_pandda_xchem(
                         subdir / "processing/database" / "soakDBDataFile.sqlite"
                     )
                     con = sqlite3.connect(
-                        f"file:{db_path}?mode=ro", uri=True, timeout=20
+                        f"file:{db_path}?mode=ro", uri=True, timeout=10
                     )
                     cur = con.cursor()
                     cur.execute("SELECT Protein FROM soakDB")
@@ -271,12 +273,6 @@ def trigger_pandda_xchem(
                         # visit = dir.parts[-1]
                         expt_yaml = {}
                         expt_yaml["data"] = {"acronym": name}
-                        # expt_yaml["autoprocessing"] = {}
-                        # expt_yaml["autoprocessing"]["pandda"] = {
-                        #     "prerun-threshold": 300,
-                        #     "heuristic": "default",
-                        # }
-
                         with open(subdir / ".user.yaml", "w") as f:
                             yaml.dump(expt_yaml, f)
 
@@ -306,10 +302,11 @@ def trigger_pandda_xchem(
             return {"success": True}
 
         processing_dir = xchem_visit_dir / "processing"
-        db = xchem_visit_dir / "processing/database" / "soakDBDataFile.sqlite"
+        processed_dir = xchem_visit_dir / "processed"
+        db = processing_dir / 'database' / "soakDBDataFile.sqlite"
 
-        # Make a copy of the most recent sqlite for reading
-        # db_copy = xchem_visit_dir / "processing/database" / "auto_soakDBDataFile.sqlite"
+        # Make a copy of the most recent sqlite for reading?
+        # db_copy = xchem_visit_dir / "processed/database" / "auto_soakDBDataFile.sqlite"
         # if not db_copy.exists() or (db.stat().st_mtime != db_copy.stat().st_mtime):
         #     shutil.copy2(str(db), str(db_copy))
         #     self.log.info(f"Made a copy of {db}, auto_soakDBDataFile.sqlite")
@@ -495,7 +492,7 @@ def trigger_pandda_xchem(
             * df["nTotalUniqueObservations"].astype(float)
         )
         # I/sigI*completeness*# unique reflections
-        df = df[["autoProcScalingId", "heuristic"]].copy()
+        df = df[["processingPrograms","autoProcScalingId", "heuristic"]].copy()
         scaling_ids = df["autoProcScalingId"].tolist()
 
         # find associated dimple jobs
@@ -562,6 +559,10 @@ def trigger_pandda_xchem(
             f"Chosen dataset to take forward: {chosen_dataset_path} for dcid {dcid}"
         )
         scaling_id = int(df3["autoProcScalingId"][0])
+        environment = df3['processingEnvironment'][0]
+        upstream_mtz = ast.literal_eval(re.search(r"data=(\[[^\]]*\])", environment).group(1))[0]
+        upstream_proc = df[df['autoProcScalingId']==scaling_id]['processingPrograms'].item()
+
         pdb = chosen_dataset_path + "/final.pdb"
         mtz = chosen_dataset_path + "/final.mtz"
 
@@ -583,7 +584,7 @@ def trigger_pandda_xchem(
 
         # Read XChem SQLite for ligand info
         try:
-            conn = sqlite3.connect(f"file:{db}?mode=ro", uri=True, timeout=20)
+            conn = sqlite3.connect(f"file:{db}?mode=ro", uri=True, timeout=10)
             df = pd.read_sql_query(
                 f"SELECT * from mainTable WHERE Puck = '{code}' AND PuckPosition = {location} AND CrystalName = '{dtag}'",
                 conn,
@@ -619,10 +620,10 @@ def trigger_pandda_xchem(
             self.log.info(
                 f"Puck {code}, puck position {location} has no corresponding CompoundSMILES, considering as an apo dataset"
             )
+            return {"success": True}
 
         # 3. Create the dataset directory
-        tmp_dir = pathlib.Path("/dls/tmp/xchem_diff2ir")  # TEMPORARY RESULTS DIR
-        processing_dir = tmp_dir / xchem_visit_dir.parts[-1]
+        processing_dir = xchem_visit_dir / 'processed'
         model_dir = processing_dir / "analysis" / "auto_model_building"
         dataset_dir = model_dir / dtag
         compound_dir = dataset_dir / "compound"
@@ -632,15 +633,16 @@ def trigger_pandda_xchem(
         dataset_count = sum(1 for p in model_dir.iterdir() if p.is_dir())
         self.log.info(f"Dataset count is: {dataset_count}")
 
-        # Copy the dimple files of the selected dataset
+        # Copy the dimple & upstream files of the selected dataset
         shutil.copy(pdb, str(dataset_dir / "dimple.pdb"))
         shutil.copy(mtz, str(dataset_dir / "dimple.mtz"))
+        shutil.copy(upstream_mtz, str(dataset_dir / pathlib.Path(upstream_mtz).parts[-1]))
 
         with open(compound_dir / f"{CompoundCode}.smiles", "w") as smi_file:
             smi_file.write(CompoundSMILES)
 
         # 4. Job launch logic
-
+        
         comparator_threshold = parameters.comparator_threshold
 
         if dataset_count < comparator_threshold:

From 9dd002d589eb18271ff20a6599d19e7d278a337f Mon Sep 17 00:00:00 2001
From: rowanwalker96 <rowan.walker-gibbons@diamond.ac.uk>
Date: Thu, 18 Dec 2025 16:45:34 +0000
Subject: [PATCH 2/3] Combine PanDDA2 & Pipedream trigger

---
 src/dlstbx/services/trigger_xchem.py | 146 ++++++++++++++-------------
 src/dlstbx/wrapper/pandda_xchem.py   |   4 +-
 src/dlstbx/wrapper/pipedream.py      |  93 +++++++++++++++++
 3 files changed, 172 insertions(+), 71 deletions(-)
 create mode 100644 src/dlstbx/wrapper/pipedream.py

diff --git a/src/dlstbx/services/trigger_xchem.py b/src/dlstbx/services/trigger_xchem.py
index 3c60d76c4..84df6b007 100644
--- a/src/dlstbx/services/trigger_xchem.py
+++ b/src/dlstbx/services/trigger_xchem.py
@@ -1,9 +1,9 @@
 from __future__ import annotations
 
 import ast
-import re
 import json
 import pathlib
+import re
 import shutil
 import sqlite3
 from datetime import datetime, timedelta
@@ -159,6 +159,34 @@ def trigger(self, rw, header, message):
             return
         rw.transport.transaction_commit(txn)
 
+    def upsert_proc(self, rw, dcid, procname, recipe_parameters):
+        jp = self.ispyb.mx_processing.get_job_params()
+        jp["automatic"] = True
+        # jp["comments"] = parameters.comment
+        jp["datacollectionid"] = dcid
+        jp["display_name"] = "procname"
+        jp["recipe"] = f"postprocessing-{procname.lower()}"
+        self.log.info(jp)
+        jobid = self.ispyb.mx_processing.upsert_job(list(jp.values()))
+        self.log.debug(f"{procname} trigger: generated JobID {jobid}")
+
+        for key, value in recipe_parameters.items():
+            jpp = self.ispyb.mx_processing.get_job_parameter_params()
+            jpp["job_id"] = jobid
+            jpp["parameter_key"] = key
+            jpp["parameter_value"] = value
+            jppid = self.ispyb.mx_processing.upsert_job_parameter(list(jpp.values()))
+            self.log.debug(
+                f"{procname} trigger: generated JobParameterID {jppid} with {key}={value}"
+            )
+
+        self.log.debug(f"{procname}_id trigger: Processing job {jobid} created")
+
+        message = {"recipes": [], "parameters": {"ispyb_process": jobid}}
+        rw.transport.send("processing_recipe", message)
+
+        self.log.info(f"{procname}_id trigger: Processing job {jobid} triggered")
+
     @pydantic.validate_call(config={"arbitrary_types_allowed": True})
     def trigger_pandda_xchem(
         self,
@@ -197,6 +225,7 @@ def trigger_pandda_xchem(
 
         dcid = parameters.dcid
         scaling_id = parameters.scaling_id[0]
+        comparator_threshold = parameters.comparator_threshold
 
         protein_info = get_protein_for_dcid(parameters.dcid, session)
         # protein_id = getattr(protein_info, "proteinId")
@@ -206,21 +235,21 @@ def trigger_pandda_xchem(
         query = (session.query(Proposal)).filter(Proposal.proposalId == proposal_id)
         proposal = query.first()
 
-        # 0. Check that this is an XChem expt, find .sqlite database
+        # 0. Check that this is an XChem expt & locate .SQLite database
         if proposal.proposalCode not in {"lb"}:  # need to handle industrial 'sw' also
             self.log.debug(
                 f"Not triggering PanDDA2 pipeline for dcid={dcid} with proposal_code={proposal.proposalCode}"
             )
             return {"success": True}
 
-        # TEMPORARY, OPENBIND TEST VISIT
+        # TEMPORARY, FILTER BY OPENBIND VISIT
         if proposal.proposalNumber not in {"42888"}:
             self.log.debug(
                 f"Not triggering PanDDA2 pipeline for dcid={dcid}, only accepting data collections from lb42888 during test phase"
             )
             return {"success": True}
 
-        # Find corresponding xchem visit directory and database
+        # Find corresponding XChem visit directory and database
         xchem_dir = pathlib.Path(
             f"/dls/labxchem/data/{proposal.proposalCode}{proposal.proposalNumber}"
         )
@@ -262,7 +291,7 @@ def trigger_pandda_xchem(
                         subdir / "processing/database" / "soakDBDataFile.sqlite"
                     )
                     con = sqlite3.connect(
-                        f"file:{db_path}?mode=ro", uri=True, timeout=10
+                        f"file:{db_path}?mode=ro", uri=True, timeout=20
                     )
                     cur = con.cursor()
                     cur.execute("SELECT Protein FROM soakDB")
@@ -303,10 +332,10 @@ def trigger_pandda_xchem(
 
         processing_dir = xchem_visit_dir / "processing"
         processed_dir = xchem_visit_dir / "processed"
-        db = processing_dir / 'database' / "soakDBDataFile.sqlite"
+        db = xchem_visit_dir / "processing/database" / "soakDBDataFile.sqlite"
 
-        # Make a copy of the most recent sqlite for reading?
-        # db_copy = xchem_visit_dir / "processed/database" / "auto_soakDBDataFile.sqlite"
+        # Make a copy of the most recent sqlite for reading
+        # db_copy = xchem_visit_dir / "processing/database" / "auto_soakDBDataFile.sqlite"
         # if not db_copy.exists() or (db.stat().st_mtime != db_copy.stat().st_mtime):
         #     shutil.copy2(str(db), str(db_copy))
         #     self.log.info(f"Made a copy of {db}, auto_soakDBDataFile.sqlite")
@@ -492,7 +521,7 @@ def trigger_pandda_xchem(
             * df["nTotalUniqueObservations"].astype(float)
         )
         # I/sigI*completeness*# unique reflections
-        df = df[["processingPrograms","autoProcScalingId", "heuristic"]].copy()
+        df = df[["autoProcScalingId", "heuristic"]].copy()
         scaling_ids = df["autoProcScalingId"].tolist()
 
         # find associated dimple jobs
@@ -559,14 +588,16 @@ def trigger_pandda_xchem(
             f"Chosen dataset to take forward: {chosen_dataset_path} for dcid {dcid}"
         )
         scaling_id = int(df3["autoProcScalingId"][0])
-        environment = df3['processingEnvironment'][0]
-        upstream_mtz = ast.literal_eval(re.search(r"data=(\[[^\]]*\])", environment).group(1))[0]
-        upstream_proc = df[df['autoProcScalingId']==scaling_id]['processingPrograms'].item()
-
+        environment = df3["processingEnvironment"][0]
+        upstream_mtz = ast.literal_eval(
+            re.search(r"data=(\[[^\]]*\])", environment).group(1)
+        )[0]
+        self.log.info(f"Chosen mtz for dcid {dcid} is {upstream_mtz}")
+        # upstream_proc = df[df['autoProcScalingId']==scaling_id]['processingPrograms'].item() # fails
         pdb = chosen_dataset_path + "/final.pdb"
         mtz = chosen_dataset_path + "/final.mtz"
 
-        self.log.debug("PanDDA2 trigger: Starting")
+        self.log.debug("PanDDA2/Pipedream trigger: Starting")
 
         # 2. Get ligand information, location & container code
 
@@ -602,7 +633,7 @@ def trigger_pandda_xchem(
 
         if len(df) != 1:
             self.log.info(
-                f"Unique row in .sqlite for dcid {dcid}, puck {code}, puck position {location} cannot be found in database {db}, can't continue."
+                f"Unique row in .sqlite for dtag {dtag}, puck {code}, puck position {location} cannot be found in database {db}, can't continue."
             )
             return {"success": True}
 
@@ -613,18 +644,15 @@ def trigger_pandda_xchem(
 
         if LibraryName == "DMSO":  # exclude DMSO screen from PanDDA analysis
             self.log.info(
-                f"Puck {code}, puck position {location} is from DMSO solvent screen, excluding from PanDDA analysis"
+                f"{dtag} is DMSO solvent screen, excluding from PanDDA analysis"
             )
             return {"success": True}
         elif not CompoundSMILES:
-            self.log.info(
-                f"Puck {code}, puck position {location} has no corresponding CompoundSMILES, considering as an apo dataset"
-            )
+            self.log.info(f"{dtag} has no corresponding CompoundSMILES, skipping...")
             return {"success": True}
 
         # 3. Create the dataset directory
-        processing_dir = xchem_visit_dir / 'processed'
-        model_dir = processing_dir / "analysis" / "auto_model_building"
+        model_dir = processed_dir / "analysis" / "auto_model_building"
         dataset_dir = model_dir / dtag
         compound_dir = dataset_dir / "compound"
         self.log.info(f"Creating directory {dataset_dir}")
@@ -633,74 +661,54 @@ def trigger_pandda_xchem(
         dataset_count = sum(1 for p in model_dir.iterdir() if p.is_dir())
         self.log.info(f"Dataset count is: {dataset_count}")
 
-        # Copy the dimple & upstream files of the selected dataset
+        # Copy the dimple files of the selected dataset
         shutil.copy(pdb, str(dataset_dir / "dimple.pdb"))
         shutil.copy(mtz, str(dataset_dir / "dimple.mtz"))
-        shutil.copy(upstream_mtz, str(dataset_dir / pathlib.Path(upstream_mtz).parts[-1]))
+        shutil.copy(
+            upstream_mtz, str(dataset_dir / pathlib.Path(upstream_mtz).parts[-1])
+        )
 
         with open(compound_dir / f"{CompoundCode}.smiles", "w") as smi_file:
             smi_file.write(CompoundSMILES)
 
         # 4. Job launch logic
-        
-        comparator_threshold = parameters.comparator_threshold
-
-        if dataset_count < comparator_threshold:
-            self.log.info(
-                f"Dataset dataset_count {dataset_count} < PanDDA2 comparator dataset threshold of {comparator_threshold}, skipping for now..."
-            )
-            return {"success": True}
-        elif dataset_count == comparator_threshold:
-            n_datasets = len(dataset_list)
-            with open(model_dir / ".batch.json", "w") as f:
-                json.dump(dataset_list, f)
-            self.log.info(
-                f"Dataset dataset_count {dataset_count} = comparator_threshold of {comparator_threshold} datasets, launching PanDDA2 array job"
-            )
-        elif dataset_count > comparator_threshold:
-            n_datasets = 1
-            self.log.info(f"Launching single PanDDA2 job for dtag {dtag}")
-
-        self.log.debug("PanDDA2 trigger: Starting")
 
-        pandda_parameters = {
-            "dcid": dcid,  #
-            "processing_directory": str(processing_dir),
+        recipe_parameters = {
+            "dcid": dcid,
+            "processed_directory": str(processed_dir),
             "model_directory": str(model_dir),
             "dataset_directory": str(dataset_dir),
             "dtag": dtag,
-            "n_datasets": n_datasets,
+            "n_datasets": 1,
             "scaling_id": scaling_id,
             "comparator_threshold": comparator_threshold,
             "database_path": str(db),
+            "upstream_mtz": upstream_mtz,
         }
 
-        jp = self.ispyb.mx_processing.get_job_params()
-        jp["automatic"] = parameters.automatic
-        # jp["comments"] = parameters.comment
-        jp["datacollectionid"] = dcid
-        jp["display_name"] = "PanDDA2"
-        jp["recipe"] = "postprocessing-pandda2"
-        self.log.info(jp)
-        jobid = self.ispyb.mx_processing.upsert_job(list(jp.values()))
-        self.log.debug(f"PanDDA2 trigger: generated JobID {jobid}")
-
-        for key, value in pandda_parameters.items():
-            jpp = self.ispyb.mx_processing.get_job_parameter_params()
-            jpp["job_id"] = jobid
-            jpp["parameter_key"] = key
-            jpp["parameter_value"] = value
-            jppid = self.ispyb.mx_processing.upsert_job_parameter(list(jpp.values()))
-            self.log.debug(
-                f"PanDDA2 trigger: generated JobParameterID {jppid} with {key}={value}"
+        if dataset_count < comparator_threshold:
+            self.log.info(
+                f"Dataset dataset_count {dataset_count} < comparator dataset threshold of {comparator_threshold}, skipping PanDDA2 for now..."
             )
+            self.upsert_proc(rw, dcid, "Pipedream", recipe_parameters)
+            return {"success": True}
 
-        self.log.debug(f"PanDDA2_id trigger: Processing job {jobid} created")
+        elif dataset_count == comparator_threshold:
+            recipe_parameters["n_datasets"] = len(dataset_list)
 
-        message = {"recipes": [], "parameters": {"ispyb_process": jobid}}
-        rw.transport.send("processing_recipe", message)
+            with open(model_dir / ".batch.json", "w") as f:
+                json.dump(dataset_list, f)
+
+            self.log.info(
+                f"Dataset dataset_count {dataset_count} = comparator dataset threshold of {comparator_threshold}, launching PanDDA2 array job"
+            )
+            self.upsert_proc(rw, dcid, "Pipedream", recipe_parameters)
+            self.upsert_proc(rw, dcid, "PanDDA2", recipe_parameters)
 
-        self.log.info(f"PanDDA2_id trigger: Processing job {jobid} triggered")
+        elif dataset_count > comparator_threshold:
+            self.log.info(f"Launching single PanDDA2 job for dtag {dtag}")
+            self.upsert_proc(rw, dcid, "Pipedream", recipe_parameters)
+            self.upsert_proc(rw, dcid, "PanDDA2", recipe_parameters)
 
         return {"success": True}
 
diff --git a/src/dlstbx/wrapper/pandda_xchem.py b/src/dlstbx/wrapper/pandda_xchem.py
index 5e216d2d0..72a8b1616 100644
--- a/src/dlstbx/wrapper/pandda_xchem.py
+++ b/src/dlstbx/wrapper/pandda_xchem.py
@@ -28,8 +28,8 @@ def run(self):
 
         PANDDA_2_DIR = "/dls_sw/i04-1/software/PanDDA2"
         # database_path = Path(params.get("database_path"))
-        processing_dir = Path(params.get("processing_directory"))
-        analysis_dir = Path(processing_dir / "analysis")
+        processed_dir = Path(params.get("processed_directory"))
+        analysis_dir = Path(processed_dir / "analysis")
         model_dir = Path(params.get("model_directory"))
         auto_panddas_dir = Path(analysis_dir / "auto_pandda2")
         Path(auto_panddas_dir).mkdir(exist_ok=True)
diff --git a/src/dlstbx/wrapper/pipedream.py b/src/dlstbx/wrapper/pipedream.py
new file mode 100644
index 000000000..ceb2cfa5e
--- /dev/null
+++ b/src/dlstbx/wrapper/pipedream.py
@@ -0,0 +1,93 @@
+from __future__ import annotations
+
+import sqlite3
+import subprocess
+from pathlib import Path
+
+from dlstbx.wrapper import Wrapper
+
+
+class PipedreamWrapper(Wrapper):
+    _logger_name = "dlstbx.wrap.pipedream"
+
+    def run(self):
+        assert hasattr(self, "recwrap"), "No recipewrapper object found"
+        self.log.info(
+            f"Running recipewrap file {self.recwrap.recipe_step['parameters']['recipewrapper']}"
+        )
+
+        params = self.recwrap.recipe_step["job_parameters"]
+
+        # database_path = Path(params.get("database_path"))
+        processed_dir = Path(params.get("processed_directory"))
+        analysis_dir = Path(processed_dir / "analysis")
+        upstream_mtz = params.get("upstream_mtz")
+        dimple_pdb = ""
+        dimple_mtz = ""
+        out_dir = analysis_dir / "Pipedream" / dtag
+
+        dtag = params.get("dtag")
+
+        self.log.info(f"Processing dtag: {dtag}")
+
+        pipedream_command = f"module load buster; module load graphviz; \
+            export BDG_TOOL_MOGUL=/dls_sw/apps/CSDS/2024.1.0/ccdc-software/mogul/bin/mogul; \
+            /dls_sw/apps/GPhL/BUSTER/20250717/scripts/pipedream \
+            -nolmr \
+            -hklin {upstream_mtz} \
+            -xyzin {dimple_pdb} \
+            -hklref {dimple_mtz} \
+            -d {out_dir} \
+            -mrefine TLSbasic,WaterUpdatePkmaps \
+            -keepwater \
+            -remediate \
+            -sidechainrebuild \
+            -runpepflip \
+            -rhocommands \
+            -xclusters \
+            -nochirals "
+        # -rhofit ligand.cif
+
+        try:
+            result = subprocess.run(
+                pipedream_command,
+                shell=True,
+                capture_output=True,
+                text=True,
+                cwd=analysis_dir,
+                check=True,
+                timeout=params.get("timeout-minutes") * 60,
+            )
+
+        except subprocess.CalledProcessError as e:
+            self.log.error(f"PanDDA2 command: '{pipedream_command}' failed")
+            self.log.info(e.stdout)
+            self.log.error(e.stderr)
+            return False
+
+        self.log.info(f"Pipedream finished successfully for dtag {dtag}")
+        return True
+
+    def update_data_source(self, db_dict, dtag, database_path):
+        sql = (
+            "UPDATE mainTable SET "
+            + ", ".join([f"{k} = :{k}" for k in db_dict])
+            + f" WHERE CrystalName = '{dtag}'"
+        )
+        conn = sqlite3.connect(database_path)
+        # conn.execute("PRAGMA journal_mode=WAL;")
+        cursor = conn.cursor()
+        cursor.execute(sql, db_dict)
+        conn.commit()
+
+    # Integrate back with XCE via datasource
+    # db_dict = {}
+    # db_dict["DimplePANDDAwasRun"] = True
+    # # db_dict["DimplePANDDAreject"] = False
+    # db_dict["DimplePANDDApath"] = str(auto_panddas_dir / "processed_datasets")
+
+    # try:
+    #     self.update_data_source(db_dict, dtag, database_path)
+    #     self.log.info(f"Updated sqlite database for dataset {dtag}")
+    # except Exception as e:
+    #     self.log.info(f"Could not update sqlite database for dataset {dtag}: {e}")

From 2102c73a7088a09ef0b379039043d18397a6dc15 Mon Sep 17 00:00:00 2001
From: rowanwalker96 <rowan.walker-gibbons@diamond.ac.uk>
Date: Fri, 19 Dec 2025 11:49:31 +0000
Subject: [PATCH 3/3] Fix pipedream wrapper

---
 setup.py                                      |  2 +-
 src/dlstbx/services/trigger_xchem.py          | 37 +++----------------
 .../{pipedream.py => pipedream_xchem.py}      | 18 +++++----
 3 files changed, 18 insertions(+), 39 deletions(-)
 rename src/dlstbx/wrapper/{pipedream.py => pipedream_xchem.py} (85%)

diff --git a/setup.py b/setup.py
index 6dfdd39db..97d80d313 100644
--- a/setup.py
+++ b/setup.py
@@ -80,7 +80,7 @@
     "pandda_xchem = dlstbx.wrapper.pandda_xchem:PanDDAWrapper",
     "pandda_post = dlstbx.wrapper.pandda_post:PanDDApostWrapper",
     "pandda_rhofit = dlstbx.wrapper.pandda_rhofit:PanDDARhofitWrapper",
-    "pipedream = dlstbx.wrapper.pipedream_xchem:PipedreamWrapper",
+    "pipedream_xchem = dlstbx.wrapper.pipedream_xchem:PipedreamWrapper",
     "phaser_ellg  = dlstbx.wrapper.phaser_ellg:PhasereLLGWrapper",
     "rlv = dlstbx.wrapper.rlv:RLVWrapper",
     "scaleit = dlstbx.wrapper.scaleit:ScaleitWrapper",
diff --git a/src/dlstbx/services/trigger_xchem.py b/src/dlstbx/services/trigger_xchem.py
index 84df6b007..2db01951c 100644
--- a/src/dlstbx/services/trigger_xchem.py
+++ b/src/dlstbx/services/trigger_xchem.py
@@ -66,7 +66,7 @@ class PanDDA_PostParameters(pydantic.BaseModel):
     automatic: Optional[bool] = False
     comment: Optional[str] = None
     scaling_id: list[int]
-    processing_directory: str
+    processed_directory: str
     timeout: float = pydantic.Field(default=60, alias="timeout-minutes")
 
 
@@ -744,7 +744,7 @@ def trigger_pandda_xchem_post(
 
         dcid = parameters.dcid
         scaling_id = parameters.scaling_id[0]
-        processing_directory = pathlib.Path(parameters.processing_directory)
+        processed_directory = pathlib.Path(parameters.processed_directory)
 
         _, ispyb_info = dlstbx.ispybtbx.ispyb_filter({}, {"ispyb_dcid": dcid}, session)
         visit = ispyb_info.get("ispyb_visit", "")
@@ -776,7 +776,7 @@ def trigger_pandda_xchem_post(
             )
             .filter(ProcessingJob.dataCollectionId.in_(dcids))
             .filter(ProcessingJob.automatic == True)  # noqa E711
-            .filter(AutoProcProgram.processingPrograms == "PanDDA2_post")
+            .filter(AutoProcProgram.processingPrograms == "PanDDA2-post")
             .filter(AutoProcProgram.recordTimeStamp > min_start_time)
             .filter(
                 or_(
@@ -794,37 +794,12 @@ def trigger_pandda_xchem_post(
 
         self.log.debug("PanDDA2 postrun trigger: Starting")
 
-        pandda_parameters = {
+        recipe_parameters = {
             "dcid": dcid,  #
-            "processing_directory": str(processing_directory),
+            "processed_directory": str(processed_directory),
             "scaling_id": scaling_id,
         }
 
-        jp = self.ispyb.mx_processing.get_job_params()
-        jp["automatic"] = parameters.automatic
-        # jp["comments"] = parameters.comment
-        jp["datacollectionid"] = dcid
-        jp["display_name"] = "PanDDA2_post"
-        jp["recipe"] = "postprocessing-pandda2-post"
-        self.log.info(jp)
-        jobid = self.ispyb.mx_processing.upsert_job(list(jp.values()))
-        self.log.debug(f"PanDDA2 postrun trigger: generated JobID {jobid}")
-
-        for key, value in pandda_parameters.items():
-            jpp = self.ispyb.mx_processing.get_job_parameter_params()
-            jpp["job_id"] = jobid
-            jpp["parameter_key"] = key
-            jpp["parameter_value"] = value
-            jppid = self.ispyb.mx_processing.upsert_job_parameter(list(jpp.values()))
-            self.log.debug(
-                f"PanDDA2 trigger: generated JobParameterID {jppid} with {key}={value}"
-            )
-
-        self.log.debug(f"PanDDA2_post trigger: Processing job {jobid} created")
-
-        message = {"recipes": [], "parameters": {"ispyb_process": jobid}}
-        rw.transport.send("processing_recipe", message)
-
-        self.log.info(f"PanDDA2_post trigger: Processing job {jobid} triggered")
+        self.upsert_proc(rw, dcid, "PanDDA2-post", recipe_parameters)
 
         return {"success": True}
diff --git a/src/dlstbx/wrapper/pipedream.py b/src/dlstbx/wrapper/pipedream_xchem.py
similarity index 85%
rename from src/dlstbx/wrapper/pipedream.py
rename to src/dlstbx/wrapper/pipedream_xchem.py
index ceb2cfa5e..af8d1706f 100644
--- a/src/dlstbx/wrapper/pipedream.py
+++ b/src/dlstbx/wrapper/pipedream_xchem.py
@@ -8,7 +8,7 @@
 
 
 class PipedreamWrapper(Wrapper):
-    _logger_name = "dlstbx.wrap.pipedream"
+    _logger_name = "dlstbx.wrap.pipedream_xchem"
 
     def run(self):
         assert hasattr(self, "recwrap"), "No recipewrapper object found"
@@ -21,13 +21,17 @@ def run(self):
         # database_path = Path(params.get("database_path"))
         processed_dir = Path(params.get("processed_directory"))
         analysis_dir = Path(processed_dir / "analysis")
-        upstream_mtz = params.get("upstream_mtz")
-        dimple_pdb = ""
-        dimple_mtz = ""
-        out_dir = analysis_dir / "Pipedream" / dtag
-
+        model_dir = Path(params.get("model_directory"))
         dtag = params.get("dtag")
 
+        dataset_dir = model_dir / dtag
+        pipedream_dir = analysis_dir / "pipedream"
+        Path(pipedream_dir).mkdir(parents=True, exist_ok=True)
+        out_dir = pipedream_dir / dtag
+        upstream_mtz = params.get("upstream_mtz")
+        dimple_pdb = dataset_dir / "dimple.pdb"
+        dimple_mtz = dataset_dir / "dimple.mtz"
+
         self.log.info(f"Processing dtag: {dtag}")
 
         pipedream_command = f"module load buster; module load graphviz; \
@@ -60,7 +64,7 @@ def run(self):
             )
 
         except subprocess.CalledProcessError as e:
-            self.log.error(f"PanDDA2 command: '{pipedream_command}' failed")
+            self.log.error(f"Pipedream command: '{pipedream_command}' failed")
             self.log.info(e.stdout)
             self.log.error(e.stderr)
             return False