From 92d2d44a7acaebdef65353191b4504bd42cfb595 Mon Sep 17 00:00:00 2001 From: Saqib Rokadia Date: Sat, 29 Mar 2025 09:41:17 -0700 Subject: [PATCH] Fixing up ocr alignment and details for running the model. --- Docker/docker-compose.yml | 2 + Dockerfile | 65 +-------- Scripts/setup.sh | 2 +- docker-base/Dockerfile | 11 +- requirements-py-3-11.txt | 167 ++++++++++++++++++++++++ src/Alignment/DocTrAlignmentProvider.py | 124 ++++++++++-------- 6 files changed, 247 insertions(+), 124 deletions(-) mode change 100644 => 100755 Scripts/setup.sh create mode 100644 requirements-py-3-11.txt diff --git a/Docker/docker-compose.yml b/Docker/docker-compose.yml index 0a4b776..aa90405 100644 --- a/Docker/docker-compose.yml +++ b/Docker/docker-compose.yml @@ -5,6 +5,8 @@ services: hostname: ${HOSTNAME} # Uses the current hostname of the system privileged: true # Equivalent to --privileged flag network_mode: "host" # Expose the container to the network + environment: + USE_TORCH: "1" devices: - "/dev/dri/renderD129:/dev/dri/renderD129" # rknpu device volumes: diff --git a/Dockerfile b/Dockerfile index 72fc417..d9f412d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,68 +1,7 @@ -ARG TARGETPLATFORM -FROM johnylamw/488-alt-python-3.10 +FROM rokadias/488-alt-python-3.11 WORKDIR /xbot/Alt/src RUN pip install --upgrade tensorflow RUN pip install --upgrade XTablesClient RUN pip install --upgrade ultralytics - -# ARG TARGETPLATFORM -# FROM johnylamw/alt-docker-base-python-3.10 -# WORKDIR /xbot/Alt/src - -# RUN apt-get update && apt-get install -y --no-install-recommends build-essential python3-dev -# # rm -rf /var/lib/apt/lists/* && apt-get clean - -# # this dependency allows cmake to install -# RUN apt-get install -y python3-launchpadlib - -# # Install cmake from the official repository -# RUN apt-get install -y software-properties-common && \ -# add-apt-repository ppa:george-edison55/cmake-3.x && \ -# apt-get update && \ -# apt-get install -y cmake - - -# RUN apt-get update && \ -# apt-get install -y python3-pip - -# # installing robotpy__apriltag (currenntly this installs all of robotpy) -# WORKDIR /xbot/Alt -# RUN git clone https://github.com/robotpy/mostrobotpy.git -# # into repo -# WORKDIR /xbot/Alt/mostrobotpy -# # keep deterministic for mostrobotpy -# RUN git fetch origin -# RUN git checkout f16ab492127e01f8db152ecfd0de47acbce5674a - -# RUN pip install pybind11 -# RUN pip install --upgrade pip -# RUN pip install -r rdev_requirements.txt # Install project-specific dependencies -# RUN pip install numpy # Install numpy separately, as instructed -# RUN pip install devtools -# # Step 5: Make the rdev.sh script executable -# RUN chmod +x rdev.sh - -# # Step 6: Run the build command to generate the wheels -# RUN ./rdev.sh ci run - -# # Step 7: Install the resulting wheels -# RUN pip install dist/*.whl -# # to make this only install apriltag we can find that whl only, but it might break it - - -# # go back to regular workdir -# WORKDIR /xbot/Alt -# COPY non-base-requirements.txt /xbot/Alt/non-base-requirements.txt -# RUN pip install --no-cache-dir --prefer-binary -r non-base-requirements.txt - -# COPY ./src/assets/librknnrt.so /usr/lib/librknnrt.so - -# WORKDIR /xbot/Alt/src -# WORKDIR /xbot/Alt -# COPY non-base-requirements.txt /xbot/Alt/non-base-requirements.txt -# RUN pip install --no-cache-dir --prefer-binary -r non-base-requirements.txt - -# COPY ./src/assets/librknnrt.so /usr/lib/librknnrt.so - -# WORKDIR /xbot/Alt/src +RUN pip install --upgrade python-doctr[torch,viz,html,contrib]==0.11.0 diff --git a/Scripts/setup.sh b/Scripts/setup.sh old mode 100644 new mode 100755 index 4bc7078..7876b13 --- a/Scripts/setup.sh +++ b/Scripts/setup.sh @@ -35,7 +35,7 @@ fi export PYTHONPATH=$PYTHONPATH:$(realpath $SCRIPT_DIR)/src -pip install -r $SCRIPT_DIR/requirements-py-3-10.txt +pip install -r $SCRIPT_DIR/../requirements-py-3-10.txt pip install -r $SCRIPT_DIR/dev-requirements.txt pip install -r $SCRIPT_DIR/non-base-requirements.txt pip install --upgrade tensorflow diff --git a/docker-base/Dockerfile b/docker-base/Dockerfile index 32d1a0c..4a1e4e0 100644 --- a/docker-base/Dockerfile +++ b/docker-base/Dockerfile @@ -1,8 +1,8 @@ -FROM python:3.10.16-slim-bookworm +FROM python:3.11.11-slim-bookworm # Install basic dependencies, Python-related packages, and other libraries RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ - ca-certificates curl wget \ + ca-certificates cmake curl wget \ openssl git ffmpeg tar lsb-release \ procps manpages-dev unzip zip xauth swig \ python3-numpy python3-distutils python3-setuptools python3-pyqt5 python3-opencv \ @@ -15,7 +15,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ libxext6 libxine2-dev libxrender-dev libxvidcore-dev libx264-dev \ libgtkglext1 libgtkglext1-dev libvtk9-dev libdc1394-dev \ libgstreamer-plugins-base1.0-dev libgstreamer1.0-dev libopenexr-dev \ - openexr qv4l2 v4l-utils zlib1g-dev && \ + openexr qv4l2 v4l-utils zlib1g-dev build-essential && \ rm -rf /var/lib/apt/lists/* && apt-get clean # Set work directory in the container @@ -23,8 +23,11 @@ WORKDIR /app # Copy the local base-requirements.txt to the container COPY /docker-base/base-requirements.txt /app/base-requirements.txt +COPY /requirements-py-3-11.txt /app/requirements-py-3-11.txt # Install Python packages globally from base-requirements.txt RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir --prefer-binary -r /app/base-requirements.txt && \ - pip install --no-cache-dir --prefer-binary h5py + pip install --no-cache-dir --prefer-binary -r /app/requirements-py-3-11.txt + +RUN pip install --no-cache-dir --prefer-binary h5py rknn-toolkit2==2.3.0 diff --git a/requirements-py-3-11.txt b/requirements-py-3-11.txt new file mode 100644 index 0000000..130fe64 --- /dev/null +++ b/requirements-py-3-11.txt @@ -0,0 +1,167 @@ +absl-py==2.1.0 +alt==0.0.1 +anyascii==0.3.2 +asttokens==2.4.1 +astunparse==1.6.3 +backports.tarfile==1.2.0 +build==1.2.2.post1 +certifi==2024.8.30 +cffi==1.17.1 +cfgv==3.4.0 +charset-normalizer==3.4.0 +click==8.1.7 +cloudpickle==3.0.0 +colorama==0.4.6 +coloredlogs==15.0.1 +contourpy==1.2.1 +cryptography==44.0.0 +cycler==0.12.1 +deep_sort @ git+https://github.com/team488/deep_sort@79af029bb17ce867f88c5f96564c3a9824afbdcb +defusedxml==0.7.1 +depthai==2.30.0.0 +devtools==0.12.2 +distlib==0.3.8 +docutils==0.21.2 +ecdsa==0.19.0 +exceptiongroup==1.2.2 +executing==2.2.0 +fast-histogram==0.14 +filelock==3.16.1 +filetype==1.2.0 +filterpy==1.4.5 +flatbuffers==24.3.25 +fonttools==4.53.1 +fsspec==2024.9.0 +gast==0.6.0 +google-pasta==0.2.0 +gorilla==0.4.0 +grpcio==1.70.0 +grpcio-tools==1.70.0 +h5py==3.13.0 +huggingface-hub==0.29.3 +humanfriendly==10.0 +identify==2.6.1 +idna==3.10 +ifaddr==0.2.0 +importlib_metadata==8.5.0 +importlib_resources==6.4.5 +iniconfig==2.0.0 +jaraco.classes==3.4.0 +jaraco.context==6.0.1 +jaraco.functools==4.1.0 +jeepney==0.8.0 +Jinja2==3.1.4 +jwcrypto==1.5.6 +keras==3.5.0 +keyring==25.4.1 +Kivy==2.3.1 +Kivy-Garden==0.1.5 +kiwisolver==1.4.5 +langdetect==1.0.9 +libclang==18.1.1 +llvmlite==0.44.0 +Markdown==3.7 +markdown-it-py==3.0.0 +MarkupSafe==2.1.5 +matplotlib==3.9.2 +mdurl==0.1.2 +ml_dtypes==0.5.1 +more-itertools==10.5.0 +mpmath==1.3.0 +namex==0.0.8 +networkx==3.2.1 +nh3==0.2.18 +nodeenv==1.9.1 +ntcore==0.1.7 +numba==0.61.0 +numpy==1.26.4 +nvidia-cublas-cu12==12.8.4.1 +nvidia-cuda-cupti-cu12==12.8.90 +nvidia-cuda-nvrtc-cu12==12.8.93 +nvidia-cuda-runtime-cu12==12.8.90 +nvidia-cudnn-cu12==9.8.0.87 +nvidia-cufft-cu12==11.3.3.83 +nvidia-curand-cu12==10.3.9.90 +nvidia-cusolver-cu12==11.7.3.90 +nvidia-cusparse-cu12==12.5.8.93 +nvidia-nccl-cu12==2.26.2 +nvidia-nvjitlink-cu12==12.8.93 +nvidia-nvtx-cu12==12.8.90 +onnx==1.17.0 +onnxruntime==1.21.0 +onnxslim==0.1.48 +opencv-python==4.10.0.84 +opt-einsum==3.3.0 +optree==0.12.1 +packaging==24.1 +pandas==2.2.2 +pillow==10.4.0 +pkginfo==1.10.0 +platformdirs==4.3.6 +pluggy==1.5.0 +pre-commit==3.8.0 +protobuf==5.29.2 +psutil==6.1.0 +py-cpuinfo==9.0.0 +pyarmor.cli.core==7.6.2 +pyasn1==0.6.0 +pycapnp==2.0.0 +pyclipper==1.3.0.post6 +pycparser==2.22 +pyflame==0.3.2 +Pygments==2.18.0 +pynetworktables==2021.0.0 +pyparsing==3.1.2 +pypdfium2==4.30.1 +pyproject_hooks==1.2.0 +pytesseract==0.3.13 +pytest==8.3.3 +python-dateutil==2.9.0.post0 +python-doctr==0.11.0 +python-jose==3.3.0 +pytz==2024.1 +pywin32-ctypes==0.2.3 +PyYAML==6.0.2 +pyzmq==26.2.0 +RapidFuzz==3.12.2 +readme_renderer==44.0 +requests==2.32.3 +requests-futures==1.0.1 +requests-toolbelt==1.0.0 +rfc3986==2.0.0 +rich==13.9.2 +robotpy-apriltag==2025.3.2.0 +robotpy-wpimath==2025.3.2.0 +robotpy-wpiutil==2025.3.2.0 +rsa==4.9 +ruamel.yaml==0.18.10 +ruamel.yaml.clib==0.2.12 +scikit-fmm==2025.1.29 +scipy==1.13.0 +seaborn==0.13.2 +SecretStorage==3.3.3 +shapely==2.0.7 +singleton==0.1.0 +six==1.16.0 +sympy==1.13.3 +tensorboard==2.19.0 +tensorboard-data-server==0.7.2 +tensorflow==2.19.0 +tensorflow-io-gcs-filesystem==0.37.1 +termcolor==2.4.0 +tomli==2.0.1 +torch==2.6.0 +torchvision==0.21.0 +tqdm==4.66.5 +twine==5.1.1 +typing_extensions==4.12.2 +tzdata==2024.1 +ultralytics==8.3.96 +ultralytics-thop==2.0.8 +urllib3==2.2.3 +virtualenv==20.26.5 +Werkzeug==3.0.4 +wrapt==1.16.0 +XTablesClient==5.7.9 +zeroconf==0.145.1 +zipp==3.20.2 diff --git a/src/Alignment/DocTrAlignmentProvider.py b/src/Alignment/DocTrAlignmentProvider.py index fbb297b..130eeb9 100644 --- a/src/Alignment/DocTrAlignmentProvider.py +++ b/src/Alignment/DocTrAlignmentProvider.py @@ -1,11 +1,13 @@ from collections import defaultdict import cv2 import io +import re +import time from PIL import Image import numpy as np from abstract.AlignmentProvider import AlignmentProvider from Core import PropertyOperator, getLogger -from doctr.models import detection_predictor +from doctr.models import ocr_predictor from doctr.io import DocumentFile from doctr.utils.geometry import detach_scores @@ -18,84 +20,94 @@ def __init__(self): self.initalizerDetector() def initalizerDetector(self): - self.det_predictor = detection_predictor( - arch="fast_small", + self.ocr_predictor = ocr_predictor( + "fast_tiny", pretrained=True, - assume_straight_pages=True, - symmetric_pad=True, + assume_straight_pages=False, preserve_aspect_ratio=True, - batch_size=1, + resolve_blocks=True, ) # .cuda().half() # Uncomment this line if you have a GPU # Define the postprocessing parameters (optional) - self.det_predictor.model.postprocessor.bin_thresh = 0.3 - self.det_predictor.model.postprocessor.box_thresh = 0.1 + self.ocr_predictor.det_predictor.model.postprocessor.bin_thresh = 0.3 + self.ocr_predictor.det_predictor.model.postprocessor.box_thresh = 0.1 def isColorBased(self): return False # uses april tags so b/w frame def align(self, inputFrame, draw): + received_frame = time.time() + Sentinel.info(f"Received Time: {received_frame}") frame = inputFrame # move og ref of input frame to draw on original if not self.checkFrame(frame): # we assume if its not a b/w frame (eg checkframe false), that it means its a cv2 bgr and to change to b/w frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) - pil_image = Image.fromarray(frame) - img_byte_arr = io.BytesIO() - pil_image.save(img_byte_arr, format="PNG") + frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) - docs = DocumentFile.from_images([img_byte_arr.getvalue()]) - results = self.det_predictor(docs) + results = self.ocr_predictor([frame]) left = None right = None - - for doc, res in zip(docs, results): - img_shape = (doc.shape[0], doc.shape[1]) - # Detach the probability scores from the results - detached_coords, prob_scores = detach_scores([res.get("words")]) - - for i, coords in enumerate(detached_coords[0]): - coords = ( - coords.reshape(2, 2).tolist() - if coords.shape == (4,) - else coords.tolist() + if results is not None: + Sentinel.info(f"Found results from doctr: {results}") + Sentinel.info(f"results export: {results.export()}") + + processed_frame = time.time() + Sentinel.info(f"Processed Time: {processed_frame}") + if len(results.pages) < 1: + return None, None + + for doc, res in zip(frame, results.pages[0].blocks): + img_shape = results.pages[0].dimensions + matched_words = list( + filter( + lambda word: word.value is not None + and (word.value.startswith("ID") or re.search(word.value, r"\d+$")), + [word for line in res.lines for word in line.words], ) + ) + if matched_words is None or len(matched_words) == 0: + continue - # Convert relative to absolute pixel coordinates - points = np.array( - self._to_absolute(coords, img_shape), dtype=np.int32 - ).reshape((-1, 1, 2)) - - if draw: - cv2.polylines( - inputFrame, - [points], - isClosed=True, - color=(255, 0, 0), - thickness=2, - ) + for word in matched_words: + for coords in word.geometry: + Sentinel.info(f"coords: {coords}") - if len(points) > 0: - left = left if left is not None else points[0][0] - right = right if right is not None else points[0][0] - - vals = [point[0] for point in points] - vals.append(left) - vals.append(right) - left = min(vals) - right = max(vals) + # Convert relative to absolute pixel coordinates + points = np.array( + self._to_absolute([coords], img_shape), dtype=np.int32 + ) + Sentinel.info(f"points: {points}") + Sentinel.info(f"draw: {draw}") + + if draw: + cv2.polylines( + inputFrame, + [points], + isClosed=True, + color=(255, 0, 0), + thickness=2, + ) + + point_list = points.tolist() + if len(point_list) > 0: + left = left if left is not None else point_list[0][0] + right = right if right is not None else point_list[0][0] + + vals = [point[0] for point in point_list] + Sentinel.info(f"vals: {vals}") + vals.append(left) + vals.append(right) + left = min(vals) + right = max(vals) + + Sentinel.info("Left: " + str(left) + " Right: " + str(right)) return left, right # Helper function to convert relative coordinates to absolute pixel values - def _to_absolute(self, geom, img_shape: tuple[int, int]) -> list[list[int]]: + def _to_absolute( + self, coords: list[tuple[float, float]], img_shape: tuple[int, int] + ) -> list[list[int]]: h, w = img_shape - if ( - len(geom) == 2 - ): # Assume straight pages = True -> [[xmin, ymin], [xmax, ymax]] - (xmin, ymin), (xmax, ymax) = geom - xmin, xmax = int(round(w * xmin)), int(round(w * xmax)) - ymin, ymax = int(round(h * ymin)), int(round(h * ymax)) - return [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]] - else: # For polygons, convert each point to absolute coordinates - return [[int(point[0] * w), int(point[1] * h)] for point in geom] + return [[int(point[0] * w), int(point[1] * h)] for point in coords]