From 5c4ff7bc38ff4262c7daac81f9f5628b994c044b Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Wed, 10 Dec 2025 12:04:48 +1100
Subject: [PATCH 1/6] modified REDE and questions inside to be better suited
 for entire company.

---
 .github/ETHICS_QUESTIONNAIRE.MD    |  64 +++++++++--------
 .github/workflows/ethics-gate.yaml | 109 +----------------------------
 .github/workflows/redeengine.py    |  38 ----------
 3 files changed, 37 insertions(+), 174 deletions(-)
 delete mode 100644 .github/workflows/redeengine.py

diff --git a/.github/ETHICS_QUESTIONNAIRE.MD b/.github/ETHICS_QUESTIONNAIRE.MD
index 57223a3..53337ba 100644
--- a/.github/ETHICS_QUESTIONNAIRE.MD
+++ b/.github/ETHICS_QUESTIONNAIRE.MD
@@ -1,36 +1,40 @@
-**Ethics & Regulatory Questionnaire**  
-*This PR cannot be merged until this form is completed.*
-
-Please reply to this comment and answer all questions below (you can copy-paste and fill it).
-
-1. Does this change involve any of the following? (check all that apply)  
-   - [ ] Training or fine-tuning of AI/ML models  
-   - [ ] Inference/serving of AI/ML models in production  
-   - [ ] Processing of personal data (PII, health, biometric, financial, children’s data, etc.)  
-   - [ ] Dual-use or military-applicable technology  
-   - [ ] Safety-critical systems (medical device, aviation, automotive, etc.)  
-   - [ ] High-impact algorithmic decision-making (credit, hiring, criminal justice, etc.)  
-   - [ ] None of the above (pure docs, tests, CI, formatting, etc.)
-
-2. Estimated risk level (your honest assessment)  
-   - [ ] Low – no ethical or regulatory impact  
-   - [ ] Medium – possible fairness/privacy concerns  
-   - [ ] High – potential for serious harm or legal non-compliance
-
-3. Brief description of any ethical/regulatory impact (or write “None”)
-
+**Pull Request Ethics, Security & Compliance Checklist**  
+*(Mandatory for all pull requests. This helps you develop professional habits required in industry and academia.)*
+
+1. Nature of the contribution (select all that apply)  
+   - [ ] Purely non-functional changes (documentation, formatting, tests, CI/CD, refactoring without behavioral change)  
+   - [ ] Introduction or modification of cryptographic functions or security-critical code  
+   - [ ] Collection, storage, transmission, or processing of personal data (PII, health data, biometric data, location, etc.)  
+   - [ ] Use or processing of data belonging to children under 13 (or reasonably likely to include such data)  
+   - [ ] Implementation or modification of AI/ML models (training, fine-tuning, inference, prompt engineering)  
+   - [ ] Code interacting with external networks, APIs, or third-party services  
+
+2. Security & responsible practice assessment  
+   - [ ] No security, privacy, or ethical implications identified  
+   - [ ] Potential security or privacy implications present (e.g., input validation, error handling, data exposure)  
+   - [ ] High-risk implications (potential for unauthorized access, data leakage, bias, or legal non-compliance)
+
+3. Relevant laws, regulations, and frameworks considered (list all that may apply, or if none, refer to content in D2L Ethics module)  
+   Examples:  
+   - Australian Privacy Act 1988 (Cth) & Privacy Amendment (Notifiable Data Breaches) Act 2017  
+   - GDPR (EU), CCPA/CPRA (California), COPPA (US children’s privacy)  
+   - University Human Research Ethics requirements (HREC/NHRMC)  
+   - NIST Cybersecurity Framework / OWASP Top 10 / ASVS  
+   - ISO/IEC 27001, NIST AI Risk Management Framework (AI RMF)  
+   - Export controls (ITAR, EAR, Australian Defence Export Controls)  
    > 
 
-4. Relevant regulations / standards considered (e.g., EU AI Act, GDPR, HIPAA, NIST AI RMF, export controls, etc.)  
-   List them or write “N/A”
-
+4. Security and responsible engineering measures implemented (select and describe)  
+   - [ ] Yes — specify below (e.g., input sanitization, prepared statements, least-privilege access, data minimization, anonymization, secure defaults, dependency scanning, bias audit, consent mechanism, etc.)  
+   - [ ] Partially implemented — further review recommended  
+   - [ ] No — measures appear necessary  
+   - [ ] Not applicable  
    > 
 
-5. Have mitigation measures been implemented (bias testing, data minimization, consent flows, etc.)?  
-   - [ ] Yes → describe below  
-   - [ ] No  
-   - [ ] Not applicable
-
+5. Additional notes (optional)  
+   Any references to secure coding guidelines followed (e.g., ISO27001, OWASP Secure Coding Practices, university security standards), vulnerability scans performed, or ethical review status.  
    > 
 
-Thank you! The ethics gate will evaluate your answers automatically.
\ No newline at end of file
+By submitting this pull request, I confirm I have considered the security, privacy, and ethical implications of my contribution in accordance with university policy and applicable legal and professional standards.
+
+Thank you for helping maintain a secure and responsible codebase.
\ No newline at end of file
diff --git a/.github/workflows/ethics-gate.yaml b/.github/workflows/ethics-gate.yaml
index 28aee42..51a4bda 100644
--- a/.github/workflows/ethics-gate.yaml
+++ b/.github/workflows/ethics-gate.yaml
@@ -1,18 +1,14 @@
 on:
   pull_request_target:
     types: [opened, reopened, synchronize]
-  issue_comment:
-    types: [created]
 
 permissions:
   contents: read          # needed for checkout
-  pull-requests: write    # needed for commenting & reviews (gh) when running in pull_request_target
-  checks: write           # needed to create check runs
+  pull-requests: write    # needed for posting comments
 
 jobs:
-  # Job that posts the questionnaire (runs in the trusted pull_request_target context).
   post-questionnaire:
-    if: github.event_name == 'pull_request_target' && github.event.pull_request.draft == false
+    if: github.event.pull_request.draft == false
     runs-on: ubuntu-latest
     steps:
       - name: Checkout base repo (safe; do NOT checkout PR head here)
@@ -46,103 +42,4 @@ jobs:
             exit 1
           fi
           gh pr comment ${{ github.event.pull_request.number }} --body-file .github/ETHICS_QUESTIONNAIRE.MD
-          echo "Posted ethics questionnaire to PR #${{ github.event.pull_request.number }}."
-
-  # Ethics engine: collects comments, runs evaluation, posts a check, and requests changes for HIGH risk.
-  # This job runs in the trusted context for pull_request_target and also on issue_comment (untrusted).
-  # For untrusted issue_comment runs, write actions (requesting changes) may be skipped if permissions are restricted.
-  ethics-engine:
-    runs-on: ubuntu-latest
-    needs: post-questionnaire
-    steps:
-      - name: Checkout base repo (we run parser from base repo)
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.pull_request.base.sha || github.ref }}
-          fetch-depth: 0
-
-      - name: Authenticate gh CLI with GITHUB_TOKEN
-        run: |
-          echo "${{ secrets.GITHUB_TOKEN }}" | gh auth login --with-token
-
-      - name: Determine PR number
-        id: prnumber
-        run: |
-          # Determine PR number whether triggered by pull_request_target or issue_comment
-          PR_NUMBER=$(jq -r 'if .pull_request then .pull_request.number elif .issue then .issue.number else empty end' "$GITHUB_EVENT_PATH")
-          if [[ -z "$PR_NUMBER" ]]; then
-            echo "No PR number found in event payload; exiting."
-            echo "risk=UNKNOWN" >> $GITHUB_OUTPUT
-            exit 0
-          fi
-          echo "pr_number=$PR_NUMBER" >> $GITHUB_OUTPUT
-
-      - name: Collect comments
-        id: collect
-        run: |
-          PR=${{ steps.prnumber.outputs.pr_number }}
-          # Gather all PR comments into a single string (robust to empty)
-          ANSWERS=$(gh pr view "$PR" --json comments --jq '[.comments[].body] | join("\n\n")' 2>/dev/null || true)
-          echo "$ANSWERS" > answers.txt
-          # Expose the answers (trim to avoid huge output)
-          echo "answers=$(echo "$ANSWERS" | head -c 32768 | sed -e 's/"/'"'"'"/g')" >> $GITHUB_OUTPUT
-
-      - name: Run ethics parser & evaluator (safe runs code from base repo)
-        id: run_engine
-        env:
-          PR_NUMBER: ${{ steps.prnumber.outputs.pr_number }}
-        run: |
-          # Ensure parser exists
-          if [[ ! -f .github/workflows/parse_and_evaluate.py ]]; then
-            echo "Parser .github/workflows/parse_and_evaluate.py not found in base repo; aborting."
-            echo "RISK_LEVEL=UNKNOWN" > result.txt
-          else
-            python3 .github/workflows/parse_and_evaluate.py "$(cat answers.txt)" > result.txt || true
-          fi
-          cat result.txt
-          # Extract RISK_LEVEL=XYZ from result.txt if present
-          RISK=$(grep -m1 '^RISK_LEVEL=' result.txt | cut -d= -f2 || echo "LOW")
-          echo "risk=$RISK" >> $GITHUB_OUTPUT
-
-      - name: Create/update "Ethics Review" check run
-        uses: actions/github-script@v7
-        with:
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-          script: |
-            const risk = "${{ steps.run_engine.outputs.risk }}".trim();
-            const conclusions = {
-              "LOW":    "success",
-              "MEDIUM": "action_required",
-              "HIGH":   "failure"
-            };
-            const conclusion = conclusions[risk] || "failure";
-            const head_sha = (context.payload.pull_request && context.payload.pull_request.head && context.payload.pull_request.head.sha) || (context.payload.issue && context.payload.issue.pull_request && context.payload.issue.number ? undefined : undefined) || github.event.pull_request?.head?.sha;
-            await github.rest.checks.create({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              name: "Ethics Review",
-              head_sha: head_sha || context.sha,
-              status: "completed",
-              conclusion,
-              output: {
-                title: risk === "LOW" ? "Ethics cleared" : `Ethics review: ${risk}`,
-                summary: risk === "LOW" ? "Low risk – automatically approved" : `Risk level ${risk} – review required`
-              }
-            });
-
-      - name: Request changes on HIGH risk (trusted-only; skip on untrusted events)
-        if: steps.run_engine.outputs.risk == 'HIGH'
-        run: |
-          PR=${{ steps.prnumber.outputs.pr_number }}
-          # Only attempt to request changes when running in pull_request_target context (trusted).
-          if [[ "${GITHUB_EVENT_NAME}" != "pull_request_target" ]]; then
-            echo "Not in pull_request_target context; skipping request-changes (insufficient permissions for fork PRs)."
-            exit 0
-          fi
-          # Request changes using gh (GITHUB_TOKEN from pull_request_target has write rights)
-          gh pr review "$PR" --request-changes -b "@ethics-team Required manual review for high-risk change"
-          echo "Requested changes on PR #$PR due to HIGH risk."
-
-      - name: Final status message
-        run: |
-          echo "Ethics engine completed. Risk level: ${{ steps.run_engine.outputs.risk }}"
\ No newline at end of file
+          echo "Posted ethics questionnaire to PR #${{ github.event.pull_request.number }}."
\ No newline at end of file
diff --git a/.github/workflows/redeengine.py b/.github/workflows/redeengine.py
deleted file mode 100644
index c9f5be2..0000000
--- a/.github/workflows/redeengine.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import os
-import json
-import sys
-
-def evaluate_risk(answers):
-    risk_score = 0
-    flags = []
-
-    if answers.get("involves_ai", False):
-        risk_score += 3
-        flags.append("AI/ML component")
-    if answers.get("processes_pii", False):
-        risk_score += 5
-        flags.append("Personal data")
-    if answers.get("dual_use", False):
-        risk_score += 10
-        flags.append("🚨 Dual-use technology")
-    if answers.get("safety_critical", False):
-        risk_score += 8
-        flags.append("Safety-critical")
-
-    if "purely documentation" in answers.get("safe_changes", []):
-        return "LOW", "No ethical concerns detected."
-
-    if risk_score >= 10:
-        return "HIGH", " | ".join(flags)
-    elif risk_score >= 5:
-        return "MEDIUM", " | ".join(flags)
-    else:
-        return "LOW", "Minor changes"
-
-# Parse comment or form submission here (simplified)
-# In real use, you'd parse the actual comment body
-answers = json.loads(sys.argv[1])  # passed from workflow
-level, reason = evaluate_risk(answers)
-
-print(f"RISK_LEVEL={level}")
-print(f"REASON={reason}")
\ No newline at end of file

From be5d15a0b829b8ca828e62de8b658106167a6473 Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Thu, 11 Dec 2025 22:09:55 +1100
Subject: [PATCH 2/6] added new features such as multiple file selection,
 scanning folders and subfolders, extension filtering, current directory
 scanning.

---
 asset-scanner/file_handler.py  |  20 ++-
 asset-scanner/scan_report.json | 231 +++++++++++++++++++++++++++++++++
 asset-scanner/scanner.py       |  98 +++++++++-----
 3 files changed, 312 insertions(+), 37 deletions(-)

diff --git a/asset-scanner/file_handler.py b/asset-scanner/file_handler.py
index abd571c..7b05934 100644
--- a/asset-scanner/file_handler.py
+++ b/asset-scanner/file_handler.py
@@ -4,14 +4,24 @@
 from scan_media import extract_text_from_file
 
 def find_files(directory, exts=None):
-    exts = exts or []
+    """Find all matching files in a directory, optionally filtered by extensions."""
+    exts = exts or []  # List of extensions to filter files
     matches = []
-    for dirpath, _, filenames in os.walk(directory):
-        for fn in filenames:
-            if not exts or any(fn.lower().endswith(e) for e in exts):
-                matches.append(os.path.join(dirpath, fn))
+
+    try:
+        for dirpath, _, filenames in os.walk(directory):  # Traverse directory recursively
+            for fn in filenames:
+                # Skip files not matching the desired extensions
+                if not exts or any(fn.lower().endswith(e.lower()) for e in exts):
+                    matches.append(os.path.join(dirpath, fn))
+    except PermissionError:
+        print(f"[!] Permission denied for directory: {directory}. Skipping...")
+        # Continue to the next folder
+        pass
+
     return matches
 
+
 def read_file(path):
     lower_path = path.lower()
     if lower_path.endswith('.docx'):
diff --git a/asset-scanner/scan_report.json b/asset-scanner/scan_report.json
index e69de29..8d4a78a 100644
--- a/asset-scanner/scan_report.json
+++ b/asset-scanner/scan_report.json
@@ -0,0 +1,231 @@
+[
+  {
+    "pattern": "email",
+    "description": "Detected EMAIL_ADDRESS",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
+    "line": 2,
+    "risk": "Low",
+    "tip": "Mask or obfuscate emails in logs/code unless strictly required; avoid storing in repos.",
+    "law": "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)",
+    "compliance": [
+      "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)",
+      "GDPR Art. 5(1)(c) \u2014 Data minimisation",
+      "GDPR Art. 32 \u2014 Security of processing",
+      "GDPR Recital 30 \u2014 Online identifiers"
+    ],
+    "raw": "ramonricgarcia@gmail.com"
+  },
+  {
+    "pattern": "full_name",
+    "description": "Detected PERSON",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
+    "line": 1,
+    "risk": "Low",
+    "tip": "Follow secure handling and removal procedures.",
+    "law": "General Best Practice",
+    "compliance": [],
+    "raw": "Ramon Garcia"
+  },
+  {
+    "pattern": "url",
+    "description": "Detected URL",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
+    "line": 2,
+    "risk": "Low",
+    "tip": "Follow secure handling and removal procedures.",
+    "law": "General Best Practice",
+    "compliance": [],
+    "raw": "www.linkedin.com/in/ramon-garcia-081a471a6"
+  },
+  {
+    "pattern": "location",
+    "description": "Detected LOCATION",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
+    "line": 5,
+    "risk": "Low",
+    "tip": "Follow secure handling and removal procedures.",
+    "law": "General Best Practice",
+    "compliance": [],
+    "raw": "Burwood"
+  },
+  {
+    "pattern": "date_time",
+    "description": "Detected DATE_TIME",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
+    "line": 6,
+    "risk": "Low",
+    "tip": "Follow secure handling and removal procedures.",
+    "law": "General Best Practice",
+    "compliance": [],
+    "raw": "March 2022"
+  },
+  {
+    "pattern": "date_time",
+    "description": "Detected DATE_TIME",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
+    "line": 6,
+    "risk": "Low",
+    "tip": "Follow secure handling and removal procedures.",
+    "law": "General Best Practice",
+    "compliance": [],
+    "raw": "October 2026"
+  },
+  {
+    "pattern": "date_time",
+    "description": "Detected DATE_TIME",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
+    "line": 8,
+    "risk": "Low",
+    "tip": "Follow secure handling and removal procedures.",
+    "law": "General Best Practice",
+    "compliance": [],
+    "raw": "December 2021"
+  },
+  {
+    "pattern": "date_time",
+    "description": "Detected DATE_TIME",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
+    "line": 10,
+    "risk": "Low",
+    "tip": "Follow secure handling and removal procedures.",
+    "law": "General Best Practice",
+    "compliance": [],
+    "raw": "April 2022"
+  },
+  {
+    "pattern": "full_name",
+    "description": "Detected PERSON",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
+    "line": 15,
+    "risk": "Low",
+    "tip": "Follow secure handling and removal procedures.",
+    "law": "General Best Practice",
+    "compliance": [],
+    "raw": "Connor Clothing"
+  },
+  {
+    "pattern": "date_time",
+    "description": "Detected DATE_TIME",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
+    "line": 15,
+    "risk": "Low",
+    "tip": "Follow secure handling and removal procedures.",
+    "law": "General Best Practice",
+    "compliance": [],
+    "raw": "2024"
+  },
+  {
+    "pattern": "full_name",
+    "description": "Detected PERSON",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
+    "line": 15,
+    "risk": "Low",
+    "tip": "Follow secure handling and removal procedures.",
+    "law": "General Best Practice",
+    "compliance": [],
+    "raw": "Connor"
+  },
+  {
+    "pattern": "date_time",
+    "description": "Detected DATE_TIME",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
+    "line": 17,
+    "risk": "Low",
+    "tip": "Follow secure handling and removal procedures.",
+    "law": "General Best Practice",
+    "compliance": [],
+    "raw": "March 2022"
+  },
+  {
+    "pattern": "date_time",
+    "description": "Detected DATE_TIME",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
+    "line": 21,
+    "risk": "Low",
+    "tip": "Follow secure handling and removal procedures.",
+    "law": "General Best Practice",
+    "compliance": [],
+    "raw": "January 2021"
+  },
+  {
+    "pattern": "location",
+    "description": "Detected LOCATION",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
+    "line": 29,
+    "risk": "Low",
+    "tip": "Follow secure handling and removal procedures.",
+    "law": "General Best Practice",
+    "compliance": [],
+    "raw": "Johntheripper"
+  },
+  {
+    "pattern": "url",
+    "description": "Detected URL",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
+    "line": 2,
+    "risk": "Low",
+    "tip": "Follow secure handling and removal procedures.",
+    "law": "General Best Practice",
+    "compliance": [],
+    "raw": "gmail.com"
+  },
+  {
+    "pattern": "phone",
+    "description": "Detected PHONE_NUMBER",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
+    "line": 2,
+    "risk": "Low",
+    "tip": "Follow secure handling and removal procedures.",
+    "law": "General Best Practice",
+    "compliance": [],
+    "raw": "0412-145-123"
+  },
+  {
+    "pattern": "date_time",
+    "description": "Detected DATE_TIME",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\testdocument.docx",
+    "line": 3,
+    "risk": "Low",
+    "tip": "Follow secure handling and removal procedures.",
+    "law": "General Best Practice",
+    "compliance": [],
+    "raw": "04/09/2003"
+  },
+  {
+    "pattern": "full_name",
+    "description": "Detected PERSON",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\testdocument.docx",
+    "line": 2,
+    "risk": "Low",
+    "tip": "Follow secure handling and removal procedures.",
+    "law": "General Best Practice",
+    "compliance": [],
+    "raw": "Prospector Drive Cairnlea"
+  },
+  {
+    "pattern": "phone",
+    "description": "Detected PHONE_NUMBER",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\testdocument.docx",
+    "line": 1,
+    "risk": "Low",
+    "tip": "Follow secure handling and removal procedures.",
+    "law": "General Best Practice",
+    "compliance": [],
+    "raw": "0412145123"
+  },
+  {
+    "pattern": "medicare_number",
+    "description": "Australian Medicare number",
+    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\testdocument.docx",
+    "line": 1,
+    "risk": "High",
+    "tip": "Treat Medicare numbers as sensitive; minimise collection; avoid code/logs; secure storage and transmission.",
+    "law": "Privacy Act 1988 (Cth) \u2014 APP 9 (Government related identifiers)",
+    "compliance": [
+      "Privacy Act 1988 (Cth) \u2014 APP 9 (Government related identifiers)",
+      "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)",
+      "Privacy Act 1988 (Cth) \u2014 Notifiable Data Breaches (Part IIIC)"
+    ],
+    "raw": "0412145123"
+  }
+]
\ No newline at end of file
diff --git a/asset-scanner/scanner.py b/asset-scanner/scanner.py
index 04103cd..4fecac8 100644
--- a/asset-scanner/scanner.py
+++ b/asset-scanner/scanner.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python3
-
 #!/usr/bin/env python3
 """
 scanner.py — Redback Ethics PII & Secrets Scanner (Presidio-powered)
@@ -140,34 +138,55 @@ def scan_text(text: str, file_path: str, analyzer: AnalyzerEngine, patterns_meta
 
     return findings
 
+def scan_folder_or_file(file=None, root=None, extensions=None):
+    """Scan either a single file or all relevant files in the folder."""
+    if file:
+        # Single file scan
+        return [file]  # Return as a single-element list for compatibility
+    else:
+        # Scan entire folder
+        return find_files(root, exts=extensions)
+
 # file scanner
-def scan_paths(paths: Iterable[str], analyzer: AnalyzerEngine, patterns_meta: Dict) -> List[Dict[str, Any]]:
+def scan_paths(paths, analyzer, patterns_meta):
+    """Process and scan all provided files."""
     all_findings = []
-    for path in paths:
-        print(f"\n[i] Reading: {path}")
-        content = read_file(path)
-        if isinstance(content, bytes):
-            try:
-                content = content.decode("utf-8")
-            except:
-                content = content.decode("latin-1", errors="ignore")
-        if isinstance(content, str) and content.strip():
-            print(f"    → Extracted {len(content):,} characters")
-            all_findings.extend(scan_text(content, path, analyzer, patterns_meta))
-        else:
-            print("    → No text extracted (image-only PDF?)")
+
+    for path in paths:  # Loop through `paths`, one file at a time
+        print(f"[i] Scanning file: {path}")  # Log the file being scanned
+        try:
+            content = read_file(path)  # Pass a single file to `read_file()`
+            if not content.strip():  # Skip empty or unsupported files
+                print(f"[i] Skipping unsupported or empty file: {path}")
+                continue
+
+            # Scan file content
+            findings = scan_text(content, path, analyzer, patterns_meta)
+            all_findings.extend(findings)  # Collect results
+        except Exception as e:
+            print(f"[!] Error processing file {path}: {e}")
+            continue  # Skip to the next file on error
+
     return all_findings
 
 # CLI & main
 def parse_args(argv=None):
-    # parse_args function
     ap = argparse.ArgumentParser(description="Sensitive data scanner")
-    ap.add_argument("--file", help="Single file to scan")
-    ap.add_argument("--root", default=".", help="Root directory")
-    ap.add_argument("--patterns", default=DEFAULT_PATTERNS_FILE)
-    ap.add_argument("--out", default=DEFAULT_OUT)
-    ap.add_argument("--ext", nargs="*", default=DEFAULT_TARGET_EXTS)
-    ap.add_argument("--no-console", action="store_true")
+    ap.add_argument(
+        "--file", nargs="*", help="One or more specific files to scan (space-separated list)"
+    )  # `nargs="*"` allows multiple files
+    ap.add_argument(
+        "--root", nargs="*", help="One or more directories for recursive scanning"
+    )
+    ap.add_argument(
+        "--patterns", default="patterns.json", help="Path to patterns.json"
+    )
+    ap.add_argument(
+        "--ext", nargs="*", default=[".txt", ".json"], help="File extensions to include (e.g., .txt .pdf)"
+    )
+    ap.add_argument(
+        "--out", default="scan_report.json", help="Output file for results"
+    )
     return ap.parse_args(argv or sys.argv[1:])
 
 def get_valid_path():
@@ -180,24 +199,39 @@ def get_valid_path():
         print("Invalid path, try again.")
 
 def main():
-    ns = parse_args()
+    # Parse arguments from the CLI
+    ns = parse_args()  # Contains file, root, patterns, ext, and out args
+
+    # Load patterns and initialize the analyzer
     patterns_meta = load_patterns(ns.patterns)
     analyzer = get_analyzer()
 
-    if ns.file:
-        paths = [ns.file]
-        print(f"[i] Scanning single file: {ns.file}")
-    else:
-        directory = get_valid_path()
-        paths = list(find_files(directory, ns.ext))
-        print(f"[i] Found {len(paths)} files to scan in {directory}")
+    # Determine files to scan (using --file and --root)
+    paths = []  # Initialize an empty list to store all files
+    if ns.file:  # Add files passed using the --file argument
+        paths.extend(ns.file)  # ns.file is already a list of files
+
+    if ns.root:  # Add files from folders passed using --root
+        for folder in ns.root:
+            folder_files = find_files(folder, extensions=ns.ext)  # Recursively find files
+            paths.extend(folder_files)
+
+    # Validate if any files were found
+    if not paths:
+        print("[!] No files found to scan. Please check your input.")
+        return 0
+
+    print(f"[i] Found {len(paths)} files to scan.")
 
+    # Scan the files
     findings = scan_paths(paths, analyzer, patterns_meta)
 
+    # Write the scan results to an output report file
     enriched = write_report(findings, out_path=ns.out)
     print(f"\n[i] Full report (with paths & raw PII) saved locally → {ns.out}")
     print("    This file is git-ignored and must NEVER be committed.")
 
+    # Handle scan results and risk evaluation
     if any(f.get("risk") == "High" for f in enriched):
         print("\n[!] HIGH-RISK PII DETECTED → SCAN FAILED")
         return 1
@@ -205,7 +239,7 @@ def main():
         print(f"\n[i] {len(findings)} findings → check {ns.out}")
     else:
         print("\n[Success] NO PII FOUND!")
-    return 0
+    return
 
 if __name__ == "__main__":
     raise SystemExit(main())
\ No newline at end of file

From ecdf043e820da8b37f2d3b7517bc523847123079 Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Thu, 11 Dec 2025 22:17:19 +1100
Subject: [PATCH 3/6] update file_handler.py

---
 asset-scanner/file_handler.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/asset-scanner/file_handler.py b/asset-scanner/file_handler.py
index 7b05934..4fec9c5 100644
--- a/asset-scanner/file_handler.py
+++ b/asset-scanner/file_handler.py
@@ -21,7 +21,6 @@ def find_files(directory, exts=None):
 
     return matches
 
-
 def read_file(path):
     lower_path = path.lower()
     if lower_path.endswith('.docx'):

From c67c9f18fb675b28ef138926a730657dc7bc10fc Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Thu, 11 Dec 2025 22:18:24 +1100
Subject: [PATCH 4/6] minor update to readme.md

---
 README.md | Bin 1615 -> 1568 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/README.md b/README.md
index 22fc73fb002f08a42b7dd51a2f54cb35ef29ad15..f099a5a04789749efe0f14cb42d29eb336d730d2 100644
GIT binary patch
delta 247
zcmX@lvw%lbS67#dOIcZ2K}*ZOEVZaCGqqexYoml5qYzBkEj6*Ev?#S$OKY+>W8CCb
zjM<aLm>L+lCeLKj7vs`Z2uZCdQAo?oNi9~;NYE>(C`r(q{D4VP94Hl@UzCE7O36>I
z0Llq5D^E6KR-YWlY{n0>!#zJS2V}uAX7|bOm}4e;v)BPG=wXrI09oMV!o@Xt1B(_D
zkh;$zD#`_NG|<h;3ZA|J{y`y*ej$pAlX+NsC(mNl7J<nEedLpwoSIjh3N-i*YZ5yb
HScVG#8T>#7

delta 305
zcmZ3$bDl>+S67#pi<e7TSy@3#%fBqOs4O$JTuY00qo^FCC_=_9HL;|$D76?UGuf3f
zj*)lrV#aJB$;Z^d3{*9_pGhCc+s7m(&C8{$5a8mbkdmKVnwy$eQmmknpjVKRmY~Ty
z`7M(;P!$WaGLTefRtJ)P%w|FeTio*#bHG+iW_AZk-eQgcl4dM+>PSjbOG+~H(iMVB
z5{pVwQ-JOP+85&F0#d;{c`Az*D};5BMN|SP1@axxZ^{auz5)J0A&!0_ii*6GU$gWA
k4X$9-7Dp%ndfz8AIW@01739b5tVx0(gP_5~z{>?h0QomVCjbBd


From b862d30fbba8d5995e12de2ed283f66196c64b45 Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Thu, 11 Dec 2025 22:28:31 +1100
Subject: [PATCH 5/6] removing tracked files

---
 asset-scanner/scan_report.json | 231 ---------------------------------
 1 file changed, 231 deletions(-)
 delete mode 100644 asset-scanner/scan_report.json

diff --git a/asset-scanner/scan_report.json b/asset-scanner/scan_report.json
deleted file mode 100644
index 8d4a78a..0000000
--- a/asset-scanner/scan_report.json
+++ /dev/null
@@ -1,231 +0,0 @@
-[
-  {
-    "pattern": "email",
-    "description": "Detected EMAIL_ADDRESS",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
-    "line": 2,
-    "risk": "Low",
-    "tip": "Mask or obfuscate emails in logs/code unless strictly required; avoid storing in repos.",
-    "law": "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)",
-    "compliance": [
-      "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)",
-      "GDPR Art. 5(1)(c) \u2014 Data minimisation",
-      "GDPR Art. 32 \u2014 Security of processing",
-      "GDPR Recital 30 \u2014 Online identifiers"
-    ],
-    "raw": "ramonricgarcia@gmail.com"
-  },
-  {
-    "pattern": "full_name",
-    "description": "Detected PERSON",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
-    "line": 1,
-    "risk": "Low",
-    "tip": "Follow secure handling and removal procedures.",
-    "law": "General Best Practice",
-    "compliance": [],
-    "raw": "Ramon Garcia"
-  },
-  {
-    "pattern": "url",
-    "description": "Detected URL",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
-    "line": 2,
-    "risk": "Low",
-    "tip": "Follow secure handling and removal procedures.",
-    "law": "General Best Practice",
-    "compliance": [],
-    "raw": "www.linkedin.com/in/ramon-garcia-081a471a6"
-  },
-  {
-    "pattern": "location",
-    "description": "Detected LOCATION",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
-    "line": 5,
-    "risk": "Low",
-    "tip": "Follow secure handling and removal procedures.",
-    "law": "General Best Practice",
-    "compliance": [],
-    "raw": "Burwood"
-  },
-  {
-    "pattern": "date_time",
-    "description": "Detected DATE_TIME",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
-    "line": 6,
-    "risk": "Low",
-    "tip": "Follow secure handling and removal procedures.",
-    "law": "General Best Practice",
-    "compliance": [],
-    "raw": "March 2022"
-  },
-  {
-    "pattern": "date_time",
-    "description": "Detected DATE_TIME",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
-    "line": 6,
-    "risk": "Low",
-    "tip": "Follow secure handling and removal procedures.",
-    "law": "General Best Practice",
-    "compliance": [],
-    "raw": "October 2026"
-  },
-  {
-    "pattern": "date_time",
-    "description": "Detected DATE_TIME",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
-    "line": 8,
-    "risk": "Low",
-    "tip": "Follow secure handling and removal procedures.",
-    "law": "General Best Practice",
-    "compliance": [],
-    "raw": "December 2021"
-  },
-  {
-    "pattern": "date_time",
-    "description": "Detected DATE_TIME",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
-    "line": 10,
-    "risk": "Low",
-    "tip": "Follow secure handling and removal procedures.",
-    "law": "General Best Practice",
-    "compliance": [],
-    "raw": "April 2022"
-  },
-  {
-    "pattern": "full_name",
-    "description": "Detected PERSON",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
-    "line": 15,
-    "risk": "Low",
-    "tip": "Follow secure handling and removal procedures.",
-    "law": "General Best Practice",
-    "compliance": [],
-    "raw": "Connor Clothing"
-  },
-  {
-    "pattern": "date_time",
-    "description": "Detected DATE_TIME",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
-    "line": 15,
-    "risk": "Low",
-    "tip": "Follow secure handling and removal procedures.",
-    "law": "General Best Practice",
-    "compliance": [],
-    "raw": "2024"
-  },
-  {
-    "pattern": "full_name",
-    "description": "Detected PERSON",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
-    "line": 15,
-    "risk": "Low",
-    "tip": "Follow secure handling and removal procedures.",
-    "law": "General Best Practice",
-    "compliance": [],
-    "raw": "Connor"
-  },
-  {
-    "pattern": "date_time",
-    "description": "Detected DATE_TIME",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
-    "line": 17,
-    "risk": "Low",
-    "tip": "Follow secure handling and removal procedures.",
-    "law": "General Best Practice",
-    "compliance": [],
-    "raw": "March 2022"
-  },
-  {
-    "pattern": "date_time",
-    "description": "Detected DATE_TIME",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
-    "line": 21,
-    "risk": "Low",
-    "tip": "Follow secure handling and removal procedures.",
-    "law": "General Best Practice",
-    "compliance": [],
-    "raw": "January 2021"
-  },
-  {
-    "pattern": "location",
-    "description": "Detected LOCATION",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
-    "line": 29,
-    "risk": "Low",
-    "tip": "Follow secure handling and removal procedures.",
-    "law": "General Best Practice",
-    "compliance": [],
-    "raw": "Johntheripper"
-  },
-  {
-    "pattern": "url",
-    "description": "Detected URL",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
-    "line": 2,
-    "risk": "Low",
-    "tip": "Follow secure handling and removal procedures.",
-    "law": "General Best Practice",
-    "compliance": [],
-    "raw": "gmail.com"
-  },
-  {
-    "pattern": "phone",
-    "description": "Detected PHONE_NUMBER",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx",
-    "line": 2,
-    "risk": "Low",
-    "tip": "Follow secure handling and removal procedures.",
-    "law": "General Best Practice",
-    "compliance": [],
-    "raw": "0412-145-123"
-  },
-  {
-    "pattern": "date_time",
-    "description": "Detected DATE_TIME",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\testdocument.docx",
-    "line": 3,
-    "risk": "Low",
-    "tip": "Follow secure handling and removal procedures.",
-    "law": "General Best Practice",
-    "compliance": [],
-    "raw": "04/09/2003"
-  },
-  {
-    "pattern": "full_name",
-    "description": "Detected PERSON",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\testdocument.docx",
-    "line": 2,
-    "risk": "Low",
-    "tip": "Follow secure handling and removal procedures.",
-    "law": "General Best Practice",
-    "compliance": [],
-    "raw": "Prospector Drive Cairnlea"
-  },
-  {
-    "pattern": "phone",
-    "description": "Detected PHONE_NUMBER",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\testdocument.docx",
-    "line": 1,
-    "risk": "Low",
-    "tip": "Follow secure handling and removal procedures.",
-    "law": "General Best Practice",
-    "compliance": [],
-    "raw": "0412145123"
-  },
-  {
-    "pattern": "medicare_number",
-    "description": "Australian Medicare number",
-    "file": "C:\\Users\\ramon\\Desktop\\testforethics\\testdocument.docx",
-    "line": 1,
-    "risk": "High",
-    "tip": "Treat Medicare numbers as sensitive; minimise collection; avoid code/logs; secure storage and transmission.",
-    "law": "Privacy Act 1988 (Cth) \u2014 APP 9 (Government related identifiers)",
-    "compliance": [
-      "Privacy Act 1988 (Cth) \u2014 APP 9 (Government related identifiers)",
-      "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)",
-      "Privacy Act 1988 (Cth) \u2014 Notifiable Data Breaches (Part IIIC)"
-    ],
-    "raw": "0412145123"
-  }
-]
\ No newline at end of file

From fd832f1a92790799ac8ffd28f4eb78b46e80718e Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Thu, 11 Dec 2025 22:29:06 +1100
Subject: [PATCH 6/6] fixing code in scanner.py

---
 asset-scanner/scanner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asset-scanner/scanner.py b/asset-scanner/scanner.py
index 4fecac8..93cc4fc 100644
--- a/asset-scanner/scanner.py
+++ b/asset-scanner/scanner.py
@@ -185,7 +185,7 @@ def parse_args(argv=None):
         "--ext", nargs="*", default=[".txt", ".json"], help="File extensions to include (e.g., .txt .pdf)"
     )
     ap.add_argument(
-        "--out", default="scan_report.json", help="Output file for results"
+        "--out", default="scan_report.local.json", help="Output file for results"
     )
     return ap.parse_args(argv or sys.argv[1:])