diff --git a/.github/workflows/scan.yml b/.github/workflows/scan.yml new file mode 100644 index 0000000..bb7e688 --- /dev/null +++ b/.github/workflows/scan.yml @@ -0,0 +1,12 @@ +name: OWASP PR Scanner +on: [pull_request] +jobs: + scan: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + - run: pip install -r requirements.txt + - run: python scanner/main.py tests/test_positive.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b7faf40 --- /dev/null +++ b/.gitignore @@ -0,0 +1,207 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock +#poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +#pdm.lock +#pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +#pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Cursor +# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to +# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data +# refer to https://docs.cursor.com/context/ignore-files +.cursorignore +.cursorindexingignore + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ diff --git a/README.md b/README.md index 70a0b50..b42465f 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,104 @@ -# Cyber-Security +# OWASP PR Scanner -This repository is the main repository for the Cyber Security Team. Whilst general files should go here, full projects within the Cyber Security team should be split-off into their own repository within the Redback Operations company (under the Cyber team, ensure Tutors have **admin** and cyber team **write** permissions) to avoid bloat in this central repository. +This tool scans Python files for security vulnerabilities based on the **OWASP Top 10**. +It is designed for lightweight static analysis of pull requests, helping developers catch common issues early and enforce secure coding practices. +--- -- Research folder contains generic research not relevant to a particular trimester. +## ✅ Current Functionality -- Otherwise, each trimester folder contains small projects / trials conducted. +The scanner detects vulnerabilities using static analysis (regex + simple heuristics). +It groups results by OWASP Top 10 category and highlights severity with colour-coded output. -- Documentation links for associated docs are scattered were relevant documentation exists. +Implemented rules: -- Some 2022 files yet to be moved over. +- **A01:2021 – Broken Access Control** + - Detects Flask routes without authentication decorators -- If you are creating documentation or a research piece, please create a .md equivalent and add to the [documentation repo](https://github.com/Redback-Operations/redback-documentation) +- **A02:2021 – Cryptographic Failures** + - Detects weak hashing algorithms (MD5, SHA1) + - Flags hardcoded secrets, API keys, and default passwords + - Warns about unsafe fallback values -- [General doc site here](https://redback-operations.github.io/redback-documentation/docs/category/cyber-security-team). \ No newline at end of file +- **A03:2021 – Injection** + - Detects unparameterized SQL queries + - Flags SQL built with string concatenation or f-strings + +- **A04:2021 – Insecure Design** + - Flags insecure “TODO” markers, temporary overrides, or auth bypass notes + +- **A05:2021 – Security Misconfiguration** + - Detects `debug=True` in Flask apps + - Flags permissive host settings (`ALLOWED_HOSTS = ['*']`) + - Insecure cookie/CSRF flags + - Hardcoded Flask secrets + +- **A06:2021 – Vulnerable and Outdated Components** + - Detects dependency pins like `flask==0.12` or `django==1.11` + - Helps identify outdated or risky components + +- **A07:2021 – Identification and Authentication Failures** + - Detects default credentials (`admin`, `password`) + - Flags login routes without auth checks + - Warns about disabled TLS verification (`verify=False`) + +- **A08:2021 – Software and Data Integrity Failures** + - Detects dangerous use of `eval()` + - Warns about unsafe deserialization (`pickle.load`) + - Flags subprocess calls with `shell=True` + +- **A09:2021 – Security Logging and Monitoring Failures** + - Detects print statements in auth flows + - Flags bare `except:` blocks with no logging + - Warns when secrets are printed to stdout + +- **A10:2021 – Server-Side Request Forgery (SSRF)** + - Detects unvalidated user input passed into `requests.get/post` + +--- + + +## 📂 Test Cases + +- **`test_positive.py`** + A deliberately vulnerable file that triggers all implemented OWASP rules (A01–A10). + +- **`test_negative.py`** + A safe baseline file with secure practices — should pass with **no findings**. + Used for regression testing and validation. + +--- + +## 🎨 Output Example + +- Findings are grouped by OWASP category (A01–A10) +- Severity levels are **colour-coded**: + - 🔴 High + - 🟠 Medium + - 🟢 Low + +Example: +=== A01: Injection (2 findings) === +Summary: High: 2 + +• Line 60 | Severity HIGH | Confidence MEDIUM +→ SQL query created via string concatenation: ... + +--- + +## Running the Script +### 1. Navigate to your project root +cd path/to/owasp-scanner + +### 2. Set PYTHONPATH so Python recognizes `scanner/` as a package +set PYTHONPATH=. + +### 3. Run the script with the file to scan as an argument +python scanner/main.py tests/test_positive.py + +## 👤 Author +Developed by Liana Perry (2025) +Cybersecurity SecDevOps Sub-team | Redback Operations + +## 🙌 Acknowledgements +This project is inspired by the original vulnerability scanning logic created by Amir Zandieh, and extends it into a modular and OWASP-aligned security scanning tool for pull requests. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6c924a7 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +# This file lists all dependencies needed to run the scanner. +# To install the requirements: +# pip install -r requirements.txt \ No newline at end of file diff --git a/scanner/__init__.py b/scanner/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scanner/core.py b/scanner/core.py new file mode 100644 index 0000000..f48deb8 --- /dev/null +++ b/scanner/core.py @@ -0,0 +1,146 @@ +# Responsibilities: +# - Reads target file, stores code lines +# - Manages vulnerability list +# - Runs all rule checks (auto-discovers rules in scanner/rules) +# - Provides add_vulnerability callback +# - Prints a grouped, colourised report + +import os +import importlib +import pkgutil +import scanner.rules as rules_pkg + + +# -------- Rule auto-discovery -------- +def _load_rule_modules(): + modules = [] + for _, modname, _ in pkgutil.iter_modules(rules_pkg.__path__): + if modname.startswith("_"): + continue # skip __init__, _template, etc. + mod = importlib.import_module(f"{rules_pkg.__name__}.{modname}") + if hasattr(mod, "check"): + modules.append(mod) + + # Stable order: by CATEGORY "A01: ..." if provided, else by module name + def key(m): + cat = getattr(m, "CATEGORY", "") + head = cat.split(":", 1)[0].strip() if cat else "" + return (0, int(head[1:])) if head.startswith("A") and head[1:].isdigit() else (1, m.__name__) + + return sorted(modules, key=key) + + +RULE_MODULES = _load_rule_modules() + + +# -------- Scanner -------- +class VulnerabilityScanner: + def __init__(self, file_path): + self.file_path = file_path + self.code_lines = [] + self.vulnerabilities = [] + + def add_vulnerability(self, category, description, line, severity, confidence): + self.vulnerabilities.append( + { + "category": category, + "description": description, + "line": line, + "severity": severity, + "confidence": confidence, + } + ) + + def parse_file(self): + if not os.path.exists(self.file_path): + print(f"File {self.file_path} does not exist.") + return False + with open(self.file_path, "r", encoding="utf-8") as f: + self.code_lines = f.readlines() + return True + + def run_checks(self): + for rule in RULE_MODULES: + # each rule exposes: check(code_lines, add_vulnerability) + rule.check(self.code_lines, self.add_vulnerability) + + def run(self): + if not self.parse_file(): + return + self.run_checks() + + def report(self): + # ---- colour helpers ---- + def supports_truecolor() -> bool: + return os.environ.get("COLORTERM", "").lower() in ("truecolor", "24bit") + + def rgb(r, g, b) -> str: + return f"\033[38;2;{r};{g};{b}m" + + ANSI = { + "reset": "\033[0m", + "bold": "\033[1m", + "cyan": "\033[96m", + "magenta": "\033[95m", + "yellow": "\033[93m", + "red": "\033[91m", + "green": "\033[92m", + "blue": "\033[94m", + } + + TRUECOLOR = supports_truecolor() + + # Severity colours (true-color -> fallback) + CRIT = (rgb(220, 20, 60) if TRUECOLOR else ANSI["red"] + ANSI["bold"]) # crimson + HIGH = (rgb(255, 0, 0) if TRUECOLOR else ANSI["red"]) # red + MED = (rgb(255, 165, 0) if TRUECOLOR else ANSI["yellow"]) # orange-ish + LOW = (rgb(0, 200, 0) if TRUECOLOR else ANSI["green"]) # green + + RESET = ANSI["reset"] + BOLD = ANSI["bold"] + HDR = (rgb(180, 130, 255) if TRUECOLOR else ANSI["magenta"]) # section header + TITLE = (rgb(120, 220, 200) if TRUECOLOR else ANSI["cyan"]) # title + SUM = (rgb(255, 215, 0) if TRUECOLOR else ANSI["yellow"]) # summary label + + sev_color = {"CRITICAL": CRIT, "HIGH": HIGH, "MEDIUM": MED, "LOW": LOW} + + print(f"\n{BOLD}{TITLE}Scan Results for {self.file_path}:{RESET}") + + if not self.vulnerabilities: + ok = rgb(0, 200, 0) if TRUECOLOR else ANSI["green"] + print(f"{ok}✅ No vulnerabilities found.{RESET}") + return + + # Group by category + groups = {} + for v in self.vulnerabilities: + groups.setdefault(v["category"], []).append(v) + + def cat_key(cat: str): + head = cat.split(":", 1)[0].strip() + return (0, int(head[1:])) if head.startswith("A") and head[1:].isdigit() else (1, cat.lower()) + + for cat in sorted(groups.keys(), key=cat_key): + items = sorted(groups[cat], key=lambda x: x["line"]) + # tally + sev_counts = {"CRITICAL": 0, "HIGH": 0, "MEDIUM": 0, "LOW": 0} + for v in items: + sev_counts[v["severity"]] = sev_counts.get(v["severity"], 0) + 1 + + total = len(items) + print(f"\n{BOLD}{HDR}=== {cat} ({total} finding{'s' if total != 1 else ''}) ==={RESET}") + + chips = [] + for k in ["CRITICAL", "HIGH", "MEDIUM", "LOW"]: + n = sev_counts.get(k, 0) + if n: + chips.append(f"{sev_color[k]}{k.title()}{RESET}: {n}") + if chips: + print(f"{SUM}Summary:{RESET} " + ", ".join(chips)) + + for v in items: + sc = sev_color.get(v["severity"], ANSI["blue"]) + print(f"\n {BOLD}• Line {v['line']} |{RESET} " + f"Severity {sc}{v['severity']}{RESET} | " + f"Confidence {v['confidence']}") + print(f" → {v['description']}") diff --git a/scanner/main.py b/scanner/main.py new file mode 100644 index 0000000..8f78b47 --- /dev/null +++ b/scanner/main.py @@ -0,0 +1,21 @@ +# Entry point for the OWASP PR Scanner CLI tool. +# This script parses the command-line arguments (i.e., the file path to scan), +# initializes the VulnerabilityScanner with the specified file, runs all rule checks, +# and prints a formatted vulnerability report to the console. + + +import argparse +from scanner.core import VulnerabilityScanner + + +def main(): + parser = argparse.ArgumentParser(description="OWASP PR Vulnerability Scanner") + parser.add_argument("path", help="Path to Python file to scan") + args = parser.parse_args() + + scanner = VulnerabilityScanner(args.path) + scanner.run() + scanner.report() + +if __name__ == "__main__": + main() diff --git a/scanner/rules/__init__.py b/scanner/rules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scanner/rules/_template.py b/scanner/rules/_template.py new file mode 100644 index 0000000..1d04ded --- /dev/null +++ b/scanner/rules/_template.py @@ -0,0 +1,11 @@ +# Template for adding new OWASP rule modules +def check(code_lines, add_vulnerability): + for i, line in enumerate(code_lines): + if "pattern" in line: # replace with real logic + add_vulnerability( + "Axx: Rule Name", + f"Description: {line.strip()}", + i + 1, + "HIGH", + "MEDIUM" + ) diff --git a/scanner/rules/auth_failures.py b/scanner/rules/auth_failures.py new file mode 100644 index 0000000..faacceb --- /dev/null +++ b/scanner/rules/auth_failures.py @@ -0,0 +1,45 @@ +# A07:2021 – Identification and Authentication Failures +# Detects default credentials (`admin`, `password`) +# Flags login routes without auth checks +# Warns about disabled TLS verification (`verify=False`) +import re + +def check(code_lines, add_vulnerability): + for i, line in enumerate(code_lines): + # Flask/Django style routes that should require auth + if re.search(r"@app\.route\([\"'](/login|/auth|/signin)[\"']", line): + add_vulnerability( + "A07: Identification and Authentication Failures", + f"Authentication-related route without explicit auth checks: {line.strip()}", + i + 1, + "HIGH", + "MEDIUM" + ) + + # Python requests with TLS verify disabled + if "requests." in line and "verify=False" in line: + add_vulnerability( + "A07: Identification and Authentication Failures", + f"Insecure TLS verification disabled: {line.strip()}", + i + 1, + "HIGH", + "HIGH" + ) + + # Hardcoded default creds + if re.search(r"(user(name)?\s*=\s*['\"](admin|root)['\"])", line, re.IGNORECASE): + add_vulnerability( + "A07: Identification and Authentication Failures", + f"Hardcoded default username detected: {line.strip()}", + i + 1, + "HIGH", + "HIGH" + ) + if re.search(r"(password\s*=\s*['\"](admin|1234|password)['\"])", line, re.IGNORECASE): + add_vulnerability( + "A07: Identification and Authentication Failures", + f"Hardcoded default password detected: {line.strip()}", + i + 1, + "HIGH", + "HIGH" + ) diff --git a/scanner/rules/broken_access_control.py b/scanner/rules/broken_access_control.py new file mode 100644 index 0000000..2038991 --- /dev/null +++ b/scanner/rules/broken_access_control.py @@ -0,0 +1,94 @@ +# A01:2021 – Broken Access Control +# +# It looks for common patterns that suggest missing or weak authorization checks: +# 1) Flask routes without an auth/role decorator (e.g., @login_required, @jwt_required). +# 2) Django REST Framework endpoints that explicitly allow unauthenticated access +# (e.g., permission_classes = [AllowAny]). +# 3) Express.js routes that attach a handler directly with no middleware +# (e.g., app.get('/admin', (req, res) => ...)) which often implies no auth check. +# +# Function: +# - `check(code_lines, add_vulnerability)`: Scans lines and reports findings with context. + +import re + +AUTH_DECORATOR_RE = re.compile( + r'@(login_required|jwt_required|roles_required|requires_auth|auth_required|permission_required)', + re.IGNORECASE, +) +FLASK_ROUTE_RE = re.compile(r'@(?:\w+\.)?route\s*\(', re.IGNORECASE) +DEF_RE = re.compile(r'^\s*def\s+\w+\s*\(', re.IGNORECASE) + +DRF_ALLOWANY_RE = re.compile(r'permission_classes\s*=\s*\[\s*AllowAny\s*\]') +DRF_IMPORT_ALLOWANY_RE = re.compile(r'from\s+rest_framework\.permissions\s+import\s+.*AllowAny', re.IGNORECASE) + +# app.get('/path', handler) or router.post("/path", handler) +# If there is a direct callback right after the path, there is probably no middleware. +EXPRESS_ROUTE_RE = re.compile( + r'\b(?:app|router)\.(get|post|put|patch|delete|options|head)\s*\(\s*[\'"][^\'"]+[\'"]\s*,\s*(?:function|\()', + re.IGNORECASE, +) + +def check(code_lines, add_vulnerability): + # Track whether DRF AllowAny is imported to increase confidence + drf_allowany_seen = any(DRF_IMPORT_ALLOWANY_RE.search(line) for line in code_lines) + + # -------- Flask route without auth decorator ---------- + i = 0 + while i < len(code_lines): + line = code_lines[i] + if FLASK_ROUTE_RE.search(line): + # Collect decorators until we hit the function def line + decorators = [] + j = i + while j + 1 < len(code_lines) and not DEF_RE.search(code_lines[j + 1]): + j += 1 + if code_lines[j].lstrip().startswith('@'): + decorators.append(code_lines[j].strip()) + + # If next line is a function def, evaluate decorators + if j + 1 < len(code_lines) and DEF_RE.search(code_lines[j + 1]): + has_auth = any(AUTH_DECORATOR_RE.search(d) for d in decorators) + # Heuristic: mark as High likelihood if path looks sensitive + path_hint = "" + m = re.search(r'route\s*\(\s*[\'"]([^\'"]+)', line, re.IGNORECASE) + if m: + path_hint = m.group(1) + + if not has_auth: + sev_like = "HIGH" if re.search(r'/?(admin|settings|manage|delete|update|user|account)', path_hint, re.IGNORECASE) else "MEDIUM" + add_vulnerability( + "A02: Broken Access Control", + f"Flask route appears without an auth decorator: {line.strip()}", + i + 1, + sev_like, + "HIGH", + ) + i = j + 1 + else: + i += 1 + else: + i += 1 + + # -------- DRF AllowAny on views / viewsets ---------- + for idx, line in enumerate(code_lines): + if DRF_ALLOWANY_RE.search(line): + like = "HIGH" if drf_allowany_seen else "MEDIUM" + add_vulnerability( + "A02: Broken Access Control", + f"DRF endpoint allows unauthenticated access with AllowAny: {line.strip()}", + idx + 1, + like, + "HIGH", + ) + + # -------- Express routes without middleware ---------- + for idx, line in enumerate(code_lines): + if EXPRESS_ROUTE_RE.search(line): + add_vulnerability( + "A02: Broken Access Control", + f"Express route handler attached without visible auth middleware: {line.strip()}", + idx + 1, + "MEDIUM", + "HIGH", + ) diff --git a/scanner/rules/insecure_design.py b/scanner/rules/insecure_design.py new file mode 100644 index 0000000..5548256 --- /dev/null +++ b/scanner/rules/insecure_design.py @@ -0,0 +1,25 @@ +# A04:2021 – Insecure Design +# Flags insecure “TODO” markers, temporary overrides, or auth bypass notes + + +import re + +PATTERNS = [ + re.compile(r'\btodo\b.*\b(insecure|security|auth|bypass)\b', re.IGNORECASE), + re.compile(r'\btemporary\b.*\boverride\b', re.IGNORECASE), + re.compile(r'\bdisable(d)?\s+(auth(entication)?|authori[sz]ation)\b', re.IGNORECASE), + re.compile(r'\bbypass(ing)?\s+(auth|security)\b', re.IGNORECASE), +] + +def check(code_lines, add_vulnerability): + for i, line in enumerate(code_lines): + stripped = line.strip() + # do NOT skip comments — we want to catch insecure design notes in comments too + if any(p.search(stripped) for p in PATTERNS): + add_vulnerability( + "A04: Insecure Design", + f"Potential insecure design marker: {stripped}", + i + 1, + "MEDIUM", + "LOW", + ) \ No newline at end of file diff --git a/scanner/rules/integrity_failures.py b/scanner/rules/integrity_failures.py new file mode 100644 index 0000000..b9a29c9 --- /dev/null +++ b/scanner/rules/integrity_failures.py @@ -0,0 +1,52 @@ +# A08:2021 – Software and Data Integrity Failure +# Flags: eval/exec, unsafe deserialization (pickle), unsafe YAML load, and shell=True + +import re + +UNSAFE_YAML_RE = re.compile(r'\byaml\.load\s*\(') # safe form is yaml.safe_load + +def check(code_lines, add_vulnerability): + for i, line in enumerate(code_lines): + stripped = line.strip() + if stripped.startswith("#"): + continue + + # Dangerous dynamic evaluation + if "eval(" in stripped or "exec(" in stripped: + add_vulnerability( + "A08: Software and Data Integrity Failures", + f"Use of dangerous dynamic evaluation: {stripped}", + i + 1, + "HIGH", + "HIGH", + ) + + # Unsafe deserialization (pickle) + if "pickle.load(" in stripped or "pickle.loads(" in stripped: + add_vulnerability( + "A08: Software and Data Integrity Failures", + f"Potential unsafe deserialization via pickle: {stripped}", + i + 1, + "HIGH", + "HIGH", + ) + + # Unsafe YAML load (must be yaml.safe_load) + if UNSAFE_YAML_RE.search(stripped) and "safe_load" not in stripped: + add_vulnerability( + "A08: Software and Data Integrity Failures", + f"Unsafe YAML load detected; use yaml.safe_load(): {stripped}", + i + 1, + "HIGH", + "MEDIUM", + ) + + # shell=True in subprocess calls + if "subprocess." in stripped and "shell=True" in stripped: + add_vulnerability( + "A08: Software and Data Integrity Failures", + f"subprocess call with shell=True detected: {stripped}", + i + 1, + "HIGH", + "MEDIUM", + ) diff --git a/scanner/rules/logging_failures.py b/scanner/rules/logging_failures.py new file mode 100644 index 0000000..dbe62d8 --- /dev/null +++ b/scanner/rules/logging_failures.py @@ -0,0 +1,50 @@ +# A09:2021 – Security Logging and Monitoring Failures +# Flags: printing secrets, bare except with print, and print in login/auth paths + +import re + +SECRET_WORDS = ("password", "passwd", "secret", "api_key", "apikey", "token") + +def check(code_lines, add_vulnerability): + for i, line in enumerate(code_lines): + stripped = line.strip() + low = stripped.lower() + + if stripped.startswith("#"): + continue + + # Printing potential secrets + if "print(" in low and any(w in low for w in SECRET_WORDS): + add_vulnerability( + "A09: Security Logging and Monitoring Failures", + f"Possible secret printed to stdout: {stripped}", + i + 1, + "MEDIUM", + "MEDIUM", + ) + + # Bare except printing errors (poor monitoring/alerting) + if low.startswith("except:") or re.match(r"^except\s+[A-Za-z_][A-Za-z0-9_]*\s+as\s+\w+\s*:\s*$", low): + # Peek next line(s) for print + nxt = code_lines[i + 1].strip().lower() if i + 1 < len(code_lines) else "" + if "print(" in nxt: + add_vulnerability( + "A09: Security Logging and Monitoring Failures", + f"Exception handled with print() instead of proper logging/alerting near: {stripped}", + i + 1, + "MEDIUM", + "LOW", + ) + + # Print statements in login/auth contexts (heuristic) + if ("@app.route('/login'" in low or "@app.route(\"/login\"" in low) and i + 3 < len(code_lines): + # scan a small window after the route for print usage + window = " ".join(code_lines[i : i + 5]).lower() + if "print(" in window: + add_vulnerability( + "A09: Security Logging and Monitoring Failures", + "Print used in authentication flow; prefer structured, secure logging.", + i + 1, + "MEDIUM", + "LOW", + ) diff --git a/scanner/rules/security_misconfig.py b/scanner/rules/security_misconfig.py new file mode 100644 index 0000000..17885d2 --- /dev/null +++ b/scanner/rules/security_misconfig.py @@ -0,0 +1,86 @@ +# A05:2021 – Security Misconfiguration +# +# It flags risky configuration patterns commonly seen in Python, JS, and YAML: +# 1) Debug modes enabled (Django DEBUG=True, Flask app.run(debug=True)). +# 2) Overly permissive hosts or CORS settings (ALLOWED_HOSTS=['*'], Access-Control-Allow-Origin: *). +# 3) Insecure cookie or transport flags (SECURE_... = False, SESSION_COOKIE_SECURE=False). +# 4) Hardcoded or default-like secrets in config contexts (SECRET_KEY='...', password='admin'). +# +# Function: +# - `check(code_lines, add_vulnerability)`: Scans lines and reports findings with context. + +import re + +DJANGO_DEBUG_RE = re.compile(r'\bDEBUG\s*=\s*True\b') +FLASK_DEBUG_RE = re.compile(r'\bapp\.run\s*\(\s*.*\bdebug\s*=\s*True\b', re.IGNORECASE) +DJANGO_ALLOWED_HOSTS_ANY_RE = re.compile(r'\bALLOWED_HOSTS\s*=\s*\[\s*[\'"]\*\s*[\'"]\s*\]', re.IGNORECASE) + +CORS_WILDCARD_RE = re.compile(r'(Access-Control-Allow-Origin\s*[:=]\s*[\'"]\*\s*[\'"])|("allowAllOrigins"\s*:\s*true)', re.IGNORECASE) +SECURE_FLAG_FALSE_RE = re.compile(r'\b(SECURE_[A-Z_]+|SESSION_COOKIE_SECURE|CSRF_COOKIE_SECURE)\s*=\s*False\b') +INSECURE_COOKIE_RE = re.compile(r'cookie\s*(secure|httpOnly)\s*[:=]\s*false', re.IGNORECASE) + +DEFAULTY_SECRET_RE = re.compile( + r'\b(SECRET_KEY|APP_SECRET|JWT_SECRET|API_KEY|TOKEN|PASSWORD)\s*[:=]\s*[\'"]([^\'"]+)[\'"]', re.IGNORECASE +) +OBVIOUS_DEFAULTS = {'admin', 'password', 'changeme', 'change_me', 'default', 'test', 'secret'} + +def check(code_lines, add_vulnerability): + for i, line in enumerate(code_lines): + # Debug modes + if DJANGO_DEBUG_RE.search(line): + add_vulnerability( + "A05: Security Misconfiguration", + f"Django DEBUG is enabled: {line.strip()}", + i + 1, + "HIGH", + "MEDIUM", + ) + if FLASK_DEBUG_RE.search(line): + add_vulnerability( + "A05: Security Misconfiguration", + f"Flask debug mode is enabled: {line.strip()}", + i + 1, + "HIGH", + "MEDIUM", + ) + + # Permissive hosts and CORS + if DJANGO_ALLOWED_HOSTS_ANY_RE.search(line): + add_vulnerability( + "A05: Security Misconfiguration", + f"ALLOWED_HOSTS permits all hosts: {line.strip()}", + i + 1, + "MEDIUM", + "MEDIUM", + ) + if CORS_WILDCARD_RE.search(line): + add_vulnerability( + "A05: Security Misconfiguration", + f"Wildcard CORS detected: {line.strip()}", + i + 1, + "MEDIUM", + "MEDIUM", + ) + + # Insecure cookie and transport flags + if SECURE_FLAG_FALSE_RE.search(line) or INSECURE_COOKIE_RE.search(line): + add_vulnerability( + "A05: Security Misconfiguration", + f"Insecure cookie or transport flag: {line.strip()}", + i + 1, + "MEDIUM", + "MEDIUM", + ) + + # Default-like or hardcoded secrets + m = DEFAULTY_SECRET_RE.search(line) + if m: + key, value = m.group(1), m.group(2) + like = "HIGH" if value.strip().lower() in OBVIOUS_DEFAULTS else "MEDIUM" + add_vulnerability( + "A05: Security Misconfiguration", + f"Hardcoded secret or credential in config context: {key} = '***'", + i + 1, + like, + "HIGH", + ) diff --git a/scanner/rules/sensitive_data_exposure.py b/scanner/rules/sensitive_data_exposure.py new file mode 100644 index 0000000..c914ea7 --- /dev/null +++ b/scanner/rules/sensitive_data_exposure.py @@ -0,0 +1,38 @@ +# A02:2021 – Cryptographic Failures +# Detects weak hashing algorithms (MD5, SHA1) +# Flags hardcoded secrets, API keys, and default passwords +# Warns about unsafe fallback values +import re + +def check(code_lines, add_vulnerability): + weak_hashes = ["md5", "sha1"] + sensitive_keywords = ["password", "passwd", "secret", "apikey", "api_key", "token"] + + for i, line in enumerate(code_lines): + stripped = line.strip() + + # Skip comments + if stripped.startswith("#"): + continue + + # Weak crypto usage + if any(h in stripped.lower() for h in weak_hashes): + add_vulnerability( + "A03: Sensitive Data Exposure", + f"Weak hashing algorithm detected: {stripped}", + i + 1, + "HIGH", + "HIGH" + ) + + # Hardcoded secrets (but ignore env lookups and hashes) + if any(kw in stripped.lower() for kw in sensitive_keywords) and "=" in stripped: + if "os.environ" in stripped or "hashlib.sha256" in stripped: + continue # safe usage, skip + add_vulnerability( + "A03: Sensitive Data Exposure", + f"Potential hardcoded sensitive data: {stripped}", + i + 1, + "HIGH", + "MEDIUM" + ) diff --git a/scanner/rules/sql_injection.py b/scanner/rules/sql_injection.py new file mode 100644 index 0000000..a220d9c --- /dev/null +++ b/scanner/rules/sql_injection.py @@ -0,0 +1,39 @@ +# A03:2021 – Injection* + +# Specifically, it searches for suspicious SQL query patterns in Python code, +# such as unparameterized queries or string concatenation in `execute()` calls. + +# Function: +# - `check(code_lines, add_vulnerability)`: Accepts lines of code and a callback to report findings. +# Uses regular expressions to detect potential SQLi and sends alerts via `add_vulnerability()`. + +import re + +def check(code_lines, add_vulnerability): + assigned_queries = {} + + for i, line in enumerate(code_lines): + if re.search(r"=\s*['\"]\s*(SELECT|INSERT|UPDATE|DELETE)", line, re.IGNORECASE) and '+' in line: + var_match = re.match(r"\s*(\w+)\s*=", line) + if var_match: + var_name = var_match.group(1) + assigned_queries[var_name] = i + 1 + + add_vulnerability( + "A01: Injection", + f"SQL query created via string concatenation: {line.strip()}", + i + 1, + "HIGH", + "MEDIUM" + ) + + # Detect execution of those suspicious queries + for var_name in assigned_queries: + if f"execute({var_name})" in line: + add_vulnerability( + "A01: Injection", + f"Suspicious query passed to execute(): {line.strip()}", + i + 1, + "HIGH", + "HIGH" + ) \ No newline at end of file diff --git a/scanner/rules/ssrf.py b/scanner/rules/ssrf.py new file mode 100644 index 0000000..c5cb654 --- /dev/null +++ b/scanner/rules/ssrf.py @@ -0,0 +1,39 @@ +# A10:2021 – Server-Side Request Forgery (SSRF) +# Heuristic data-flow: user input -> variable -> requests.*(var) + +import re + +REQUEST_CALL_RE = re.compile(r'\brequests\.(get|post|put|patch|delete|head)\s*\(') + +def check(code_lines, add_vulnerability): + input_vars = set() + + # Track variables that come from input() + for i, line in enumerate(code_lines): + stripped = line.strip() + if stripped.startswith("#"): + continue + + # var = input("...") + m = re.match(r'^\s*([A-Za-z_][A-Za-z0-9_]*)\s*=\s*input\s*\(', stripped) + if m: + input_vars.add(m.group(1)) + + # Flag when those variables are used in requests.*(var) + for i, line in enumerate(code_lines): + stripped = line.strip() + if stripped.startswith("#"): + continue + + if REQUEST_CALL_RE.search(stripped): + # naive arg capture + for var in input_vars: + if re.search(rf'\b{var}\b', stripped): + add_vulnerability( + "A10: Server-Side Request Forgery", + f"Potential SSRF: unvalidated user-controlled URL passed to requests.*(): {stripped}", + i + 1, + "HIGH", + "HIGH", + ) + break diff --git a/scanner/rules/vulnerable_components.py b/scanner/rules/vulnerable_components.py new file mode 100644 index 0000000..a7c0288 --- /dev/null +++ b/scanner/rules/vulnerable_components.py @@ -0,0 +1,33 @@ +# A06:2021 – Vulnerable and Outdated Components +# Placeholder rule: looks for requirements with outdated versions. + +import re + +# e.g., flask==2.0.1, Django==1.11.29, requests==2.25.1 +PIN_RE = re.compile(r'^\s*([A-Za-z0-9][A-Za-z0-9_\-]*)\s*==\s*([A-Za-z0-9\.\-\+]+)\s*$') + +SUSPECT_PACKAGES = {"flask", "django"} # expand as needed + +def check(code_lines, add_vulnerability): + for i, line in enumerate(code_lines): + stripped = line.strip() + + # Skip comments entirely + if stripped.startswith("#"): + continue + + m = PIN_RE.match(stripped) + if not m: + continue + + pkg = m.group(1).lower() + ver = m.group(2) + + if pkg in SUSPECT_PACKAGES: + add_vulnerability( + "A06: Vulnerable and Outdated Components", + f"Dependency pin detected (manual review required): {pkg}=={ver}", + i + 1, + "MEDIUM", + "LOW", + ) \ No newline at end of file diff --git a/tests/test_negative.py b/tests/test_negative.py new file mode 100644 index 0000000..06ac430 --- /dev/null +++ b/tests/test_negative.py @@ -0,0 +1,43 @@ +# This file should produce clean results + +import os +import sqlite3 +import hashlib +import requests +from flask import Flask + +# Assume a real auth decorator exists in the project. The scanner only checks for its presence. +def login_required(fn): + return fn + +# --- Secure Flask setup --- +app = Flask(__name__) +app.config["DEBUG"] = False +app.config["SECRET_KEY"] = os.environ.get("SECRET_KEY", "fallback_only_for_dev_builds") + +@app.route("/dashboard") +@login_required +def dashboard(): + return "secure dashboard" + +# --- Parameterised SQL query (safe) --- +username = input("Enter your username: ") +query = "SELECT * FROM users WHERE username = ?" + +conn = sqlite3.connect("example.db") +cursor = conn.cursor() +cursor.execute(query, (username,)) + +# --- Secure cryptography usage --- +hashed_password = hashlib.sha256(username.encode()).hexdigest() + +# --- Secure HTTP request (TLS verification enabled) --- +resp = requests.get("https://example.com", verify=True) +print(resp.status_code) + + +# --- Safe YAML load --- +data = yaml.safe_load("key: value") + +# --- Safe subprocess usage (no shell=True) --- +subprocess.run(["echo", "hello"], check=True) \ No newline at end of file diff --git a/tests/test_positive.py b/tests/test_positive.py new file mode 100644 index 0000000..7bb30b1 --- /dev/null +++ b/tests/test_positive.py @@ -0,0 +1,95 @@ +# Triggers: +# A01 Injection +# A02 Broken Access Control +# A03 Sensitive Data Exposure (Cryptographic Failures) +# A04 Insecure Design +# A05 Security Misconfiguration +# A06 Vulnerable and Outdated Components +# A07 Identification and Authentication Failures +# A08 Software and Data Integrity Failures +# A09 Security Logging and Monitoring Failures +# A10 Server-Side Request Forgery (SSRF) + +import sqlite3 +import hashlib +import requests +import yaml +import pickle +import subprocess +from flask import Flask, Response + +# ---------- A05: Security Misconfiguration ---------- +SECRET_KEY = "changeme" # hardcoded secret +ALLOWED_HOSTS = ['*'] # permissive hosts +SESSION_COOKIE_SECURE = False # insecure cookie flag +CSRF_COOKIE_SECURE = False # insecure CSRF flag + +# ---------- A03: Sensitive Data Exposure ---------- +password = "SuperSecret123" # potential hardcoded password +api_key = "sk_test_123456" # potential hardcoded API key +hashlib.md5(b"weak") # weak hashing algorithm + +# ---------- A07: Identification and Authentication Failures ---------- +username = "admin" # default username +default_password = "password" # default password +requests.get("https://example.com", verify=False) # TLS verification disabled + +app = Flask(__name__) + +# ---------- A02: Broken Access Control ---------- +# Sensitive route without auth decorator +@app.route("/admin") +def admin_panel(): + # Wildcard CORS header (also A05) + resp = Response("admin panel") + resp.headers["Access-Control-Allow-Origin"] = "*" + return resp + +# Login route that should be protected or checked (A07 heuristic) +@app.route("/login") +def login_page(): + print("login attempt for user") # A09: print in auth flow + return "login page" + +# ---------- A04: Insecure Design ---------- +# TODO insecure: temporary admin override without proper checks + +# ---------- A06: Vulnerable and Outdated Components ---------- +# Simulated vulnerable pins inside code string (still scanned by our rule) +requirements_block = """ +flask==0.12 +django==1.11 +""" + +# ---------- A08: Software and Data Integrity Failures ---------- +user_code = "1 + 2" +result = eval(user_code) # dangerous dynamic evaluation +data = yaml.load("key: value") # unsafe YAML load (should be yaml.safe_load) +with open("tmp.bin", "wb") as fh: + pickle.dump({"x": 1}, fh) # create a pickle to then load (unsafe) +with open("tmp.bin", "rb") as fh: + obj = pickle.load(fh) # unsafe deserialization +subprocess.run("echo hi", shell=True) # shell=True + +# ---------- A09: Security Logging and Monitoring Failures ---------- +try: + raise ValueError("x") +except: + print("error:", default_password) # prints secret-ish value and uses bare-except + +# ---------- A10: SSRF ---------- +url = input("Enter URL: ") +requests.get(url) # user-controlled URL + +# ---------- A01: Injection ---------- +user_input = input("Enter your username: ") +# Unparameterized, concatenated query assignment beginning with SELECT +query = "SELECT * FROM users WHERE username = '" + user_input + "'" + +conn = sqlite3.connect("example.db") +cursor = conn.cursor() +cursor.execute(query) # executes suspicious query var (A01) + +# Explicit Flask debug enable (A05) +if __name__ == "__main__": + app.run(debug=True)