From 5c4ff7bc38ff4262c7daac81f9f5628b994c044b Mon Sep 17 00:00:00 2001 From: RamGcia Date: Wed, 10 Dec 2025 12:04:48 +1100 Subject: [PATCH 1/6] modified REDE and questions inside to be better suited for entire company. --- .github/ETHICS_QUESTIONNAIRE.MD | 64 +++++++++-------- .github/workflows/ethics-gate.yaml | 109 +---------------------------- .github/workflows/redeengine.py | 38 ---------- 3 files changed, 37 insertions(+), 174 deletions(-) delete mode 100644 .github/workflows/redeengine.py diff --git a/.github/ETHICS_QUESTIONNAIRE.MD b/.github/ETHICS_QUESTIONNAIRE.MD index 57223a3..53337ba 100644 --- a/.github/ETHICS_QUESTIONNAIRE.MD +++ b/.github/ETHICS_QUESTIONNAIRE.MD @@ -1,36 +1,40 @@ -**Ethics & Regulatory Questionnaire** -*This PR cannot be merged until this form is completed.* - -Please reply to this comment and answer all questions below (you can copy-paste and fill it). - -1. Does this change involve any of the following? (check all that apply) - - [ ] Training or fine-tuning of AI/ML models - - [ ] Inference/serving of AI/ML models in production - - [ ] Processing of personal data (PII, health, biometric, financial, children’s data, etc.) - - [ ] Dual-use or military-applicable technology - - [ ] Safety-critical systems (medical device, aviation, automotive, etc.) - - [ ] High-impact algorithmic decision-making (credit, hiring, criminal justice, etc.) - - [ ] None of the above (pure docs, tests, CI, formatting, etc.) - -2. Estimated risk level (your honest assessment) - - [ ] Low – no ethical or regulatory impact - - [ ] Medium – possible fairness/privacy concerns - - [ ] High – potential for serious harm or legal non-compliance - -3. Brief description of any ethical/regulatory impact (or write “None”) - +**Pull Request Ethics, Security & Compliance Checklist** +*(Mandatory for all pull requests. This helps you develop professional habits required in industry and academia.)* + +1. Nature of the contribution (select all that apply) + - [ ] Purely non-functional changes (documentation, formatting, tests, CI/CD, refactoring without behavioral change) + - [ ] Introduction or modification of cryptographic functions or security-critical code + - [ ] Collection, storage, transmission, or processing of personal data (PII, health data, biometric data, location, etc.) + - [ ] Use or processing of data belonging to children under 13 (or reasonably likely to include such data) + - [ ] Implementation or modification of AI/ML models (training, fine-tuning, inference, prompt engineering) + - [ ] Code interacting with external networks, APIs, or third-party services + +2. Security & responsible practice assessment + - [ ] No security, privacy, or ethical implications identified + - [ ] Potential security or privacy implications present (e.g., input validation, error handling, data exposure) + - [ ] High-risk implications (potential for unauthorized access, data leakage, bias, or legal non-compliance) + +3. Relevant laws, regulations, and frameworks considered (list all that may apply, or if none, refer to content in D2L Ethics module) + Examples: + - Australian Privacy Act 1988 (Cth) & Privacy Amendment (Notifiable Data Breaches) Act 2017 + - GDPR (EU), CCPA/CPRA (California), COPPA (US children’s privacy) + - University Human Research Ethics requirements (HREC/NHRMC) + - NIST Cybersecurity Framework / OWASP Top 10 / ASVS + - ISO/IEC 27001, NIST AI Risk Management Framework (AI RMF) + - Export controls (ITAR, EAR, Australian Defence Export Controls) > -4. Relevant regulations / standards considered (e.g., EU AI Act, GDPR, HIPAA, NIST AI RMF, export controls, etc.) - List them or write “N/A” - +4. Security and responsible engineering measures implemented (select and describe) + - [ ] Yes — specify below (e.g., input sanitization, prepared statements, least-privilege access, data minimization, anonymization, secure defaults, dependency scanning, bias audit, consent mechanism, etc.) + - [ ] Partially implemented — further review recommended + - [ ] No — measures appear necessary + - [ ] Not applicable > -5. Have mitigation measures been implemented (bias testing, data minimization, consent flows, etc.)? - - [ ] Yes → describe below - - [ ] No - - [ ] Not applicable - +5. Additional notes (optional) + Any references to secure coding guidelines followed (e.g., ISO27001, OWASP Secure Coding Practices, university security standards), vulnerability scans performed, or ethical review status. > -Thank you! The ethics gate will evaluate your answers automatically. \ No newline at end of file +By submitting this pull request, I confirm I have considered the security, privacy, and ethical implications of my contribution in accordance with university policy and applicable legal and professional standards. + +Thank you for helping maintain a secure and responsible codebase. \ No newline at end of file diff --git a/.github/workflows/ethics-gate.yaml b/.github/workflows/ethics-gate.yaml index 28aee42..51a4bda 100644 --- a/.github/workflows/ethics-gate.yaml +++ b/.github/workflows/ethics-gate.yaml @@ -1,18 +1,14 @@ on: pull_request_target: types: [opened, reopened, synchronize] - issue_comment: - types: [created] permissions: contents: read # needed for checkout - pull-requests: write # needed for commenting & reviews (gh) when running in pull_request_target - checks: write # needed to create check runs + pull-requests: write # needed for posting comments jobs: - # Job that posts the questionnaire (runs in the trusted pull_request_target context). post-questionnaire: - if: github.event_name == 'pull_request_target' && github.event.pull_request.draft == false + if: github.event.pull_request.draft == false runs-on: ubuntu-latest steps: - name: Checkout base repo (safe; do NOT checkout PR head here) @@ -46,103 +42,4 @@ jobs: exit 1 fi gh pr comment ${{ github.event.pull_request.number }} --body-file .github/ETHICS_QUESTIONNAIRE.MD - echo "Posted ethics questionnaire to PR #${{ github.event.pull_request.number }}." - - # Ethics engine: collects comments, runs evaluation, posts a check, and requests changes for HIGH risk. - # This job runs in the trusted context for pull_request_target and also on issue_comment (untrusted). - # For untrusted issue_comment runs, write actions (requesting changes) may be skipped if permissions are restricted. - ethics-engine: - runs-on: ubuntu-latest - needs: post-questionnaire - steps: - - name: Checkout base repo (we run parser from base repo) - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.base.sha || github.ref }} - fetch-depth: 0 - - - name: Authenticate gh CLI with GITHUB_TOKEN - run: | - echo "${{ secrets.GITHUB_TOKEN }}" | gh auth login --with-token - - - name: Determine PR number - id: prnumber - run: | - # Determine PR number whether triggered by pull_request_target or issue_comment - PR_NUMBER=$(jq -r 'if .pull_request then .pull_request.number elif .issue then .issue.number else empty end' "$GITHUB_EVENT_PATH") - if [[ -z "$PR_NUMBER" ]]; then - echo "No PR number found in event payload; exiting." - echo "risk=UNKNOWN" >> $GITHUB_OUTPUT - exit 0 - fi - echo "pr_number=$PR_NUMBER" >> $GITHUB_OUTPUT - - - name: Collect comments - id: collect - run: | - PR=${{ steps.prnumber.outputs.pr_number }} - # Gather all PR comments into a single string (robust to empty) - ANSWERS=$(gh pr view "$PR" --json comments --jq '[.comments[].body] | join("\n\n")' 2>/dev/null || true) - echo "$ANSWERS" > answers.txt - # Expose the answers (trim to avoid huge output) - echo "answers=$(echo "$ANSWERS" | head -c 32768 | sed -e 's/"/'"'"'"/g')" >> $GITHUB_OUTPUT - - - name: Run ethics parser & evaluator (safe runs code from base repo) - id: run_engine - env: - PR_NUMBER: ${{ steps.prnumber.outputs.pr_number }} - run: | - # Ensure parser exists - if [[ ! -f .github/workflows/parse_and_evaluate.py ]]; then - echo "Parser .github/workflows/parse_and_evaluate.py not found in base repo; aborting." - echo "RISK_LEVEL=UNKNOWN" > result.txt - else - python3 .github/workflows/parse_and_evaluate.py "$(cat answers.txt)" > result.txt || true - fi - cat result.txt - # Extract RISK_LEVEL=XYZ from result.txt if present - RISK=$(grep -m1 '^RISK_LEVEL=' result.txt | cut -d= -f2 || echo "LOW") - echo "risk=$RISK" >> $GITHUB_OUTPUT - - - name: Create/update "Ethics Review" check run - uses: actions/github-script@v7 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - const risk = "${{ steps.run_engine.outputs.risk }}".trim(); - const conclusions = { - "LOW": "success", - "MEDIUM": "action_required", - "HIGH": "failure" - }; - const conclusion = conclusions[risk] || "failure"; - const head_sha = (context.payload.pull_request && context.payload.pull_request.head && context.payload.pull_request.head.sha) || (context.payload.issue && context.payload.issue.pull_request && context.payload.issue.number ? undefined : undefined) || github.event.pull_request?.head?.sha; - await github.rest.checks.create({ - owner: context.repo.owner, - repo: context.repo.repo, - name: "Ethics Review", - head_sha: head_sha || context.sha, - status: "completed", - conclusion, - output: { - title: risk === "LOW" ? "Ethics cleared" : `Ethics review: ${risk}`, - summary: risk === "LOW" ? "Low risk – automatically approved" : `Risk level ${risk} – review required` - } - }); - - - name: Request changes on HIGH risk (trusted-only; skip on untrusted events) - if: steps.run_engine.outputs.risk == 'HIGH' - run: | - PR=${{ steps.prnumber.outputs.pr_number }} - # Only attempt to request changes when running in pull_request_target context (trusted). - if [[ "${GITHUB_EVENT_NAME}" != "pull_request_target" ]]; then - echo "Not in pull_request_target context; skipping request-changes (insufficient permissions for fork PRs)." - exit 0 - fi - # Request changes using gh (GITHUB_TOKEN from pull_request_target has write rights) - gh pr review "$PR" --request-changes -b "@ethics-team Required manual review for high-risk change" - echo "Requested changes on PR #$PR due to HIGH risk." - - - name: Final status message - run: | - echo "Ethics engine completed. Risk level: ${{ steps.run_engine.outputs.risk }}" \ No newline at end of file + echo "Posted ethics questionnaire to PR #${{ github.event.pull_request.number }}." \ No newline at end of file diff --git a/.github/workflows/redeengine.py b/.github/workflows/redeengine.py deleted file mode 100644 index c9f5be2..0000000 --- a/.github/workflows/redeengine.py +++ /dev/null @@ -1,38 +0,0 @@ -import os -import json -import sys - -def evaluate_risk(answers): - risk_score = 0 - flags = [] - - if answers.get("involves_ai", False): - risk_score += 3 - flags.append("AI/ML component") - if answers.get("processes_pii", False): - risk_score += 5 - flags.append("Personal data") - if answers.get("dual_use", False): - risk_score += 10 - flags.append("🚨 Dual-use technology") - if answers.get("safety_critical", False): - risk_score += 8 - flags.append("Safety-critical") - - if "purely documentation" in answers.get("safe_changes", []): - return "LOW", "No ethical concerns detected." - - if risk_score >= 10: - return "HIGH", " | ".join(flags) - elif risk_score >= 5: - return "MEDIUM", " | ".join(flags) - else: - return "LOW", "Minor changes" - -# Parse comment or form submission here (simplified) -# In real use, you'd parse the actual comment body -answers = json.loads(sys.argv[1]) # passed from workflow -level, reason = evaluate_risk(answers) - -print(f"RISK_LEVEL={level}") -print(f"REASON={reason}") \ No newline at end of file From be5d15a0b829b8ca828e62de8b658106167a6473 Mon Sep 17 00:00:00 2001 From: RamGcia Date: Thu, 11 Dec 2025 22:09:55 +1100 Subject: [PATCH 2/6] added new features such as multiple file selection, scanning folders and subfolders, extension filtering, current directory scanning. --- asset-scanner/file_handler.py | 20 ++- asset-scanner/scan_report.json | 231 +++++++++++++++++++++++++++++++++ asset-scanner/scanner.py | 98 +++++++++----- 3 files changed, 312 insertions(+), 37 deletions(-) diff --git a/asset-scanner/file_handler.py b/asset-scanner/file_handler.py index abd571c..7b05934 100644 --- a/asset-scanner/file_handler.py +++ b/asset-scanner/file_handler.py @@ -4,14 +4,24 @@ from scan_media import extract_text_from_file def find_files(directory, exts=None): - exts = exts or [] + """Find all matching files in a directory, optionally filtered by extensions.""" + exts = exts or [] # List of extensions to filter files matches = [] - for dirpath, _, filenames in os.walk(directory): - for fn in filenames: - if not exts or any(fn.lower().endswith(e) for e in exts): - matches.append(os.path.join(dirpath, fn)) + + try: + for dirpath, _, filenames in os.walk(directory): # Traverse directory recursively + for fn in filenames: + # Skip files not matching the desired extensions + if not exts or any(fn.lower().endswith(e.lower()) for e in exts): + matches.append(os.path.join(dirpath, fn)) + except PermissionError: + print(f"[!] Permission denied for directory: {directory}. Skipping...") + # Continue to the next folder + pass + return matches + def read_file(path): lower_path = path.lower() if lower_path.endswith('.docx'): diff --git a/asset-scanner/scan_report.json b/asset-scanner/scan_report.json index e69de29..8d4a78a 100644 --- a/asset-scanner/scan_report.json +++ b/asset-scanner/scan_report.json @@ -0,0 +1,231 @@ +[ + { + "pattern": "email", + "description": "Detected EMAIL_ADDRESS", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", + "line": 2, + "risk": "Low", + "tip": "Mask or obfuscate emails in logs/code unless strictly required; avoid storing in repos.", + "law": "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)", + "compliance": [ + "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)", + "GDPR Art. 5(1)(c) \u2014 Data minimisation", + "GDPR Art. 32 \u2014 Security of processing", + "GDPR Recital 30 \u2014 Online identifiers" + ], + "raw": "ramonricgarcia@gmail.com" + }, + { + "pattern": "full_name", + "description": "Detected PERSON", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", + "line": 1, + "risk": "Low", + "tip": "Follow secure handling and removal procedures.", + "law": "General Best Practice", + "compliance": [], + "raw": "Ramon Garcia" + }, + { + "pattern": "url", + "description": "Detected URL", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", + "line": 2, + "risk": "Low", + "tip": "Follow secure handling and removal procedures.", + "law": "General Best Practice", + "compliance": [], + "raw": "www.linkedin.com/in/ramon-garcia-081a471a6" + }, + { + "pattern": "location", + "description": "Detected LOCATION", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", + "line": 5, + "risk": "Low", + "tip": "Follow secure handling and removal procedures.", + "law": "General Best Practice", + "compliance": [], + "raw": "Burwood" + }, + { + "pattern": "date_time", + "description": "Detected DATE_TIME", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", + "line": 6, + "risk": "Low", + "tip": "Follow secure handling and removal procedures.", + "law": "General Best Practice", + "compliance": [], + "raw": "March 2022" + }, + { + "pattern": "date_time", + "description": "Detected DATE_TIME", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", + "line": 6, + "risk": "Low", + "tip": "Follow secure handling and removal procedures.", + "law": "General Best Practice", + "compliance": [], + "raw": "October 2026" + }, + { + "pattern": "date_time", + "description": "Detected DATE_TIME", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", + "line": 8, + "risk": "Low", + "tip": "Follow secure handling and removal procedures.", + "law": "General Best Practice", + "compliance": [], + "raw": "December 2021" + }, + { + "pattern": "date_time", + "description": "Detected DATE_TIME", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", + "line": 10, + "risk": "Low", + "tip": "Follow secure handling and removal procedures.", + "law": "General Best Practice", + "compliance": [], + "raw": "April 2022" + }, + { + "pattern": "full_name", + "description": "Detected PERSON", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", + "line": 15, + "risk": "Low", + "tip": "Follow secure handling and removal procedures.", + "law": "General Best Practice", + "compliance": [], + "raw": "Connor Clothing" + }, + { + "pattern": "date_time", + "description": "Detected DATE_TIME", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", + "line": 15, + "risk": "Low", + "tip": "Follow secure handling and removal procedures.", + "law": "General Best Practice", + "compliance": [], + "raw": "2024" + }, + { + "pattern": "full_name", + "description": "Detected PERSON", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", + "line": 15, + "risk": "Low", + "tip": "Follow secure handling and removal procedures.", + "law": "General Best Practice", + "compliance": [], + "raw": "Connor" + }, + { + "pattern": "date_time", + "description": "Detected DATE_TIME", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", + "line": 17, + "risk": "Low", + "tip": "Follow secure handling and removal procedures.", + "law": "General Best Practice", + "compliance": [], + "raw": "March 2022" + }, + { + "pattern": "date_time", + "description": "Detected DATE_TIME", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", + "line": 21, + "risk": "Low", + "tip": "Follow secure handling and removal procedures.", + "law": "General Best Practice", + "compliance": [], + "raw": "January 2021" + }, + { + "pattern": "location", + "description": "Detected LOCATION", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", + "line": 29, + "risk": "Low", + "tip": "Follow secure handling and removal procedures.", + "law": "General Best Practice", + "compliance": [], + "raw": "Johntheripper" + }, + { + "pattern": "url", + "description": "Detected URL", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", + "line": 2, + "risk": "Low", + "tip": "Follow secure handling and removal procedures.", + "law": "General Best Practice", + "compliance": [], + "raw": "gmail.com" + }, + { + "pattern": "phone", + "description": "Detected PHONE_NUMBER", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", + "line": 2, + "risk": "Low", + "tip": "Follow secure handling and removal procedures.", + "law": "General Best Practice", + "compliance": [], + "raw": "0412-145-123" + }, + { + "pattern": "date_time", + "description": "Detected DATE_TIME", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\testdocument.docx", + "line": 3, + "risk": "Low", + "tip": "Follow secure handling and removal procedures.", + "law": "General Best Practice", + "compliance": [], + "raw": "04/09/2003" + }, + { + "pattern": "full_name", + "description": "Detected PERSON", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\testdocument.docx", + "line": 2, + "risk": "Low", + "tip": "Follow secure handling and removal procedures.", + "law": "General Best Practice", + "compliance": [], + "raw": "Prospector Drive Cairnlea" + }, + { + "pattern": "phone", + "description": "Detected PHONE_NUMBER", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\testdocument.docx", + "line": 1, + "risk": "Low", + "tip": "Follow secure handling and removal procedures.", + "law": "General Best Practice", + "compliance": [], + "raw": "0412145123" + }, + { + "pattern": "medicare_number", + "description": "Australian Medicare number", + "file": "C:\\Users\\ramon\\Desktop\\testforethics\\testdocument.docx", + "line": 1, + "risk": "High", + "tip": "Treat Medicare numbers as sensitive; minimise collection; avoid code/logs; secure storage and transmission.", + "law": "Privacy Act 1988 (Cth) \u2014 APP 9 (Government related identifiers)", + "compliance": [ + "Privacy Act 1988 (Cth) \u2014 APP 9 (Government related identifiers)", + "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)", + "Privacy Act 1988 (Cth) \u2014 Notifiable Data Breaches (Part IIIC)" + ], + "raw": "0412145123" + } +] \ No newline at end of file diff --git a/asset-scanner/scanner.py b/asset-scanner/scanner.py index 04103cd..4fecac8 100644 --- a/asset-scanner/scanner.py +++ b/asset-scanner/scanner.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python3 - #!/usr/bin/env python3 """ scanner.py — Redback Ethics PII & Secrets Scanner (Presidio-powered) @@ -140,34 +138,55 @@ def scan_text(text: str, file_path: str, analyzer: AnalyzerEngine, patterns_meta return findings +def scan_folder_or_file(file=None, root=None, extensions=None): + """Scan either a single file or all relevant files in the folder.""" + if file: + # Single file scan + return [file] # Return as a single-element list for compatibility + else: + # Scan entire folder + return find_files(root, exts=extensions) + # file scanner -def scan_paths(paths: Iterable[str], analyzer: AnalyzerEngine, patterns_meta: Dict) -> List[Dict[str, Any]]: +def scan_paths(paths, analyzer, patterns_meta): + """Process and scan all provided files.""" all_findings = [] - for path in paths: - print(f"\n[i] Reading: {path}") - content = read_file(path) - if isinstance(content, bytes): - try: - content = content.decode("utf-8") - except: - content = content.decode("latin-1", errors="ignore") - if isinstance(content, str) and content.strip(): - print(f" → Extracted {len(content):,} characters") - all_findings.extend(scan_text(content, path, analyzer, patterns_meta)) - else: - print(" → No text extracted (image-only PDF?)") + + for path in paths: # Loop through `paths`, one file at a time + print(f"[i] Scanning file: {path}") # Log the file being scanned + try: + content = read_file(path) # Pass a single file to `read_file()` + if not content.strip(): # Skip empty or unsupported files + print(f"[i] Skipping unsupported or empty file: {path}") + continue + + # Scan file content + findings = scan_text(content, path, analyzer, patterns_meta) + all_findings.extend(findings) # Collect results + except Exception as e: + print(f"[!] Error processing file {path}: {e}") + continue # Skip to the next file on error + return all_findings # CLI & main def parse_args(argv=None): - # parse_args function ap = argparse.ArgumentParser(description="Sensitive data scanner") - ap.add_argument("--file", help="Single file to scan") - ap.add_argument("--root", default=".", help="Root directory") - ap.add_argument("--patterns", default=DEFAULT_PATTERNS_FILE) - ap.add_argument("--out", default=DEFAULT_OUT) - ap.add_argument("--ext", nargs="*", default=DEFAULT_TARGET_EXTS) - ap.add_argument("--no-console", action="store_true") + ap.add_argument( + "--file", nargs="*", help="One or more specific files to scan (space-separated list)" + ) # `nargs="*"` allows multiple files + ap.add_argument( + "--root", nargs="*", help="One or more directories for recursive scanning" + ) + ap.add_argument( + "--patterns", default="patterns.json", help="Path to patterns.json" + ) + ap.add_argument( + "--ext", nargs="*", default=[".txt", ".json"], help="File extensions to include (e.g., .txt .pdf)" + ) + ap.add_argument( + "--out", default="scan_report.json", help="Output file for results" + ) return ap.parse_args(argv or sys.argv[1:]) def get_valid_path(): @@ -180,24 +199,39 @@ def get_valid_path(): print("Invalid path, try again.") def main(): - ns = parse_args() + # Parse arguments from the CLI + ns = parse_args() # Contains file, root, patterns, ext, and out args + + # Load patterns and initialize the analyzer patterns_meta = load_patterns(ns.patterns) analyzer = get_analyzer() - if ns.file: - paths = [ns.file] - print(f"[i] Scanning single file: {ns.file}") - else: - directory = get_valid_path() - paths = list(find_files(directory, ns.ext)) - print(f"[i] Found {len(paths)} files to scan in {directory}") + # Determine files to scan (using --file and --root) + paths = [] # Initialize an empty list to store all files + if ns.file: # Add files passed using the --file argument + paths.extend(ns.file) # ns.file is already a list of files + + if ns.root: # Add files from folders passed using --root + for folder in ns.root: + folder_files = find_files(folder, extensions=ns.ext) # Recursively find files + paths.extend(folder_files) + + # Validate if any files were found + if not paths: + print("[!] No files found to scan. Please check your input.") + return 0 + + print(f"[i] Found {len(paths)} files to scan.") + # Scan the files findings = scan_paths(paths, analyzer, patterns_meta) + # Write the scan results to an output report file enriched = write_report(findings, out_path=ns.out) print(f"\n[i] Full report (with paths & raw PII) saved locally → {ns.out}") print(" This file is git-ignored and must NEVER be committed.") + # Handle scan results and risk evaluation if any(f.get("risk") == "High" for f in enriched): print("\n[!] HIGH-RISK PII DETECTED → SCAN FAILED") return 1 @@ -205,7 +239,7 @@ def main(): print(f"\n[i] {len(findings)} findings → check {ns.out}") else: print("\n[Success] NO PII FOUND!") - return 0 + return if __name__ == "__main__": raise SystemExit(main()) \ No newline at end of file From ecdf043e820da8b37f2d3b7517bc523847123079 Mon Sep 17 00:00:00 2001 From: RamGcia Date: Thu, 11 Dec 2025 22:17:19 +1100 Subject: [PATCH 3/6] update file_handler.py --- asset-scanner/file_handler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/asset-scanner/file_handler.py b/asset-scanner/file_handler.py index 7b05934..4fec9c5 100644 --- a/asset-scanner/file_handler.py +++ b/asset-scanner/file_handler.py @@ -21,7 +21,6 @@ def find_files(directory, exts=None): return matches - def read_file(path): lower_path = path.lower() if lower_path.endswith('.docx'): From c67c9f18fb675b28ef138926a730657dc7bc10fc Mon Sep 17 00:00:00 2001 From: RamGcia Date: Thu, 11 Dec 2025 22:18:24 +1100 Subject: [PATCH 4/6] minor update to readme.md --- README.md | Bin 1615 -> 1568 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/README.md b/README.md index 22fc73fb002f08a42b7dd51a2f54cb35ef29ad15..f099a5a04789749efe0f14cb42d29eb336d730d2 100644 GIT binary patch delta 247 zcmX@lvw%lbS67#dOIcZ2K}*ZOEVZaCGqqexYoml5qYzBkEj6*Ev?#S$OKY+>W8CCb zjML+lCeLKj7vs`Z2uZCdQAo?oNi9~;NYE>(C`r(q{D4VP94Hl@UzCE7O36>I z0Llq5D^E6KR-YWlY{n0>!#zJS2V}uAX7|bOm}4e;v)BPG=wXrI09oMV!o@Xt1B(_D zkh;$zD#`_NG|#7 delta 305 zcmZ3$bDl>+S67#piPSjbOG+~H(iMVB z5{pVwQ-JOP+85&F0#d;{c`Az*D};5BMN|SP1@axxZ^{auz5)J0A&!0_ii*6GU$gWA k4X$9-7Dp%ndfz8AIW@01739b5tVx0(gP_5~z{>?h0QomVCjbBd From b862d30fbba8d5995e12de2ed283f66196c64b45 Mon Sep 17 00:00:00 2001 From: RamGcia Date: Thu, 11 Dec 2025 22:28:31 +1100 Subject: [PATCH 5/6] removing tracked files --- asset-scanner/scan_report.json | 231 --------------------------------- 1 file changed, 231 deletions(-) delete mode 100644 asset-scanner/scan_report.json diff --git a/asset-scanner/scan_report.json b/asset-scanner/scan_report.json deleted file mode 100644 index 8d4a78a..0000000 --- a/asset-scanner/scan_report.json +++ /dev/null @@ -1,231 +0,0 @@ -[ - { - "pattern": "email", - "description": "Detected EMAIL_ADDRESS", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", - "line": 2, - "risk": "Low", - "tip": "Mask or obfuscate emails in logs/code unless strictly required; avoid storing in repos.", - "law": "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)", - "compliance": [ - "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)", - "GDPR Art. 5(1)(c) \u2014 Data minimisation", - "GDPR Art. 32 \u2014 Security of processing", - "GDPR Recital 30 \u2014 Online identifiers" - ], - "raw": "ramonricgarcia@gmail.com" - }, - { - "pattern": "full_name", - "description": "Detected PERSON", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", - "line": 1, - "risk": "Low", - "tip": "Follow secure handling and removal procedures.", - "law": "General Best Practice", - "compliance": [], - "raw": "Ramon Garcia" - }, - { - "pattern": "url", - "description": "Detected URL", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", - "line": 2, - "risk": "Low", - "tip": "Follow secure handling and removal procedures.", - "law": "General Best Practice", - "compliance": [], - "raw": "www.linkedin.com/in/ramon-garcia-081a471a6" - }, - { - "pattern": "location", - "description": "Detected LOCATION", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", - "line": 5, - "risk": "Low", - "tip": "Follow secure handling and removal procedures.", - "law": "General Best Practice", - "compliance": [], - "raw": "Burwood" - }, - { - "pattern": "date_time", - "description": "Detected DATE_TIME", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", - "line": 6, - "risk": "Low", - "tip": "Follow secure handling and removal procedures.", - "law": "General Best Practice", - "compliance": [], - "raw": "March 2022" - }, - { - "pattern": "date_time", - "description": "Detected DATE_TIME", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", - "line": 6, - "risk": "Low", - "tip": "Follow secure handling and removal procedures.", - "law": "General Best Practice", - "compliance": [], - "raw": "October 2026" - }, - { - "pattern": "date_time", - "description": "Detected DATE_TIME", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", - "line": 8, - "risk": "Low", - "tip": "Follow secure handling and removal procedures.", - "law": "General Best Practice", - "compliance": [], - "raw": "December 2021" - }, - { - "pattern": "date_time", - "description": "Detected DATE_TIME", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", - "line": 10, - "risk": "Low", - "tip": "Follow secure handling and removal procedures.", - "law": "General Best Practice", - "compliance": [], - "raw": "April 2022" - }, - { - "pattern": "full_name", - "description": "Detected PERSON", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", - "line": 15, - "risk": "Low", - "tip": "Follow secure handling and removal procedures.", - "law": "General Best Practice", - "compliance": [], - "raw": "Connor Clothing" - }, - { - "pattern": "date_time", - "description": "Detected DATE_TIME", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", - "line": 15, - "risk": "Low", - "tip": "Follow secure handling and removal procedures.", - "law": "General Best Practice", - "compliance": [], - "raw": "2024" - }, - { - "pattern": "full_name", - "description": "Detected PERSON", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", - "line": 15, - "risk": "Low", - "tip": "Follow secure handling and removal procedures.", - "law": "General Best Practice", - "compliance": [], - "raw": "Connor" - }, - { - "pattern": "date_time", - "description": "Detected DATE_TIME", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", - "line": 17, - "risk": "Low", - "tip": "Follow secure handling and removal procedures.", - "law": "General Best Practice", - "compliance": [], - "raw": "March 2022" - }, - { - "pattern": "date_time", - "description": "Detected DATE_TIME", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", - "line": 21, - "risk": "Low", - "tip": "Follow secure handling and removal procedures.", - "law": "General Best Practice", - "compliance": [], - "raw": "January 2021" - }, - { - "pattern": "location", - "description": "Detected LOCATION", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", - "line": 29, - "risk": "Low", - "tip": "Follow secure handling and removal procedures.", - "law": "General Best Practice", - "compliance": [], - "raw": "Johntheripper" - }, - { - "pattern": "url", - "description": "Detected URL", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", - "line": 2, - "risk": "Low", - "tip": "Follow secure handling and removal procedures.", - "law": "General Best Practice", - "compliance": [], - "raw": "gmail.com" - }, - { - "pattern": "phone", - "description": "Detected PHONE_NUMBER", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\RamonGarcia2025.docx", - "line": 2, - "risk": "Low", - "tip": "Follow secure handling and removal procedures.", - "law": "General Best Practice", - "compliance": [], - "raw": "0412-145-123" - }, - { - "pattern": "date_time", - "description": "Detected DATE_TIME", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\testdocument.docx", - "line": 3, - "risk": "Low", - "tip": "Follow secure handling and removal procedures.", - "law": "General Best Practice", - "compliance": [], - "raw": "04/09/2003" - }, - { - "pattern": "full_name", - "description": "Detected PERSON", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\testdocument.docx", - "line": 2, - "risk": "Low", - "tip": "Follow secure handling and removal procedures.", - "law": "General Best Practice", - "compliance": [], - "raw": "Prospector Drive Cairnlea" - }, - { - "pattern": "phone", - "description": "Detected PHONE_NUMBER", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\testdocument.docx", - "line": 1, - "risk": "Low", - "tip": "Follow secure handling and removal procedures.", - "law": "General Best Practice", - "compliance": [], - "raw": "0412145123" - }, - { - "pattern": "medicare_number", - "description": "Australian Medicare number", - "file": "C:\\Users\\ramon\\Desktop\\testforethics\\testdocument.docx", - "line": 1, - "risk": "High", - "tip": "Treat Medicare numbers as sensitive; minimise collection; avoid code/logs; secure storage and transmission.", - "law": "Privacy Act 1988 (Cth) \u2014 APP 9 (Government related identifiers)", - "compliance": [ - "Privacy Act 1988 (Cth) \u2014 APP 9 (Government related identifiers)", - "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)", - "Privacy Act 1988 (Cth) \u2014 Notifiable Data Breaches (Part IIIC)" - ], - "raw": "0412145123" - } -] \ No newline at end of file From fd832f1a92790799ac8ffd28f4eb78b46e80718e Mon Sep 17 00:00:00 2001 From: RamGcia Date: Thu, 11 Dec 2025 22:29:06 +1100 Subject: [PATCH 6/6] fixing code in scanner.py --- asset-scanner/scanner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asset-scanner/scanner.py b/asset-scanner/scanner.py index 4fecac8..93cc4fc 100644 --- a/asset-scanner/scanner.py +++ b/asset-scanner/scanner.py @@ -185,7 +185,7 @@ def parse_args(argv=None): "--ext", nargs="*", default=[".txt", ".json"], help="File extensions to include (e.g., .txt .pdf)" ) ap.add_argument( - "--out", default="scan_report.json", help="Output file for results" + "--out", default="scan_report.local.json", help="Output file for results" ) return ap.parse_args(argv or sys.argv[1:])