Add ArUco fiducial tag detection to mask Convey UI in screencasts

+142

observe/aruco.py

··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """ 5 + ArUco marker detection for Convey UI masking. 6 + 7 + Detects the 4 corner fiducial tags (ArUco DICT_4X4_50, IDs 2,4,6,7) used in the 8 + Convey web interface to identify and mask self-referential UI regions in 9 + screencast frames before vision processing. 10 + """ 11 + 12 + from __future__ import annotations 13 + 14 + from typing import Optional 15 + 16 + import cv2 17 + import numpy as np 18 + from PIL import Image, ImageDraw 19 + 20 + # Corner tag IDs from convey/static/tags/ 21 + # Tag positions: 6=TL, 7=TR, 4=BL, 2=BR 22 + CORNER_TAG_IDS = {6, 7, 4, 2} 23 + 24 + # Singleton detector instance (created on first use) 25 + _detector: Optional[cv2.aruco.ArucoDetector] = None 26 + 27 + 28 + def _get_detector() -> cv2.aruco.ArucoDetector: 29 + """Get or create the ArUco detector singleton.""" 30 + global _detector 31 + if _detector is None: 32 + dictionary = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_4X4_50) 33 + params = cv2.aruco.DetectorParameters() 34 + # Tuned parameters for small markers 35 + params.minMarkerPerimeterRate = 0.002 36 + params.maxMarkerPerimeterRate = 8.0 37 + params.adaptiveThreshWinSizeMin = 3 38 + params.adaptiveThreshWinSizeMax = 23 39 + params.cornerRefinementMethod = cv2.aruco.CORNER_REFINE_SUBPIX 40 + _detector = cv2.aruco.ArucoDetector(dictionary, params) 41 + return _detector 42 + 43 + 44 + def detect_convey_region(image: Image.Image) -> Optional[list[tuple[float, float]]]: 45 + """ 46 + Detect Convey UI region by finding all 4 corner fiducial tags. 47 + 48 + Parameters 49 + ---------- 50 + image : Image.Image 51 + PIL Image to scan for ArUco markers 52 + 53 + Returns 54 + ------- 55 + Optional[list[tuple[float, float]]] 56 + Polygon coordinates [(x,y), ...] in order [TL, TR, BR, BL] if all 4 57 + corner tags are detected, None otherwise. 58 + """ 59 + # Convert PIL to numpy array 60 + img_array = np.array(image) 61 + 62 + # Convert to grayscale for detection 63 + if len(img_array.shape) == 3: 64 + gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY) 65 + else: 66 + gray = img_array 67 + 68 + # Detect markers 69 + detector = _get_detector() 70 + corners, ids, _ = detector.detectMarkers(gray) 71 + 72 + if ids is None: 73 + return None 74 + 75 + # Build map of detected tag ID -> corner points 76 + id_to_corners = {} 77 + for tag_id, pts in zip(ids.flatten().tolist(), corners): 78 + id_to_corners[tag_id] = pts 79 + 80 + # Check if all 4 corner tags are present 81 + if not CORNER_TAG_IDS.issubset(id_to_corners.keys()): 82 + return None 83 + 84 + # Extract outer corners from each tag to form the bounding polygon 85 + # ArUco corner order within each marker: [TL, TR, BR, BL] 86 + tl = id_to_corners[6].reshape(4, 2)[0] # TL tag, TL corner 87 + tr = id_to_corners[7].reshape(4, 2)[1] # TR tag, TR corner 88 + br = id_to_corners[2].reshape(4, 2)[2] # BR tag, BR corner 89 + bl = id_to_corners[4].reshape(4, 2)[3] # BL tag, BL corner 90 + 91 + return [tuple(tl), tuple(tr), tuple(br), tuple(bl)] 92 + 93 + 94 + def mask_convey_region(image: Image.Image, polygon: list[tuple[float, float]]) -> None: 95 + """ 96 + Mask Convey UI region by filling polygon with black. 97 + 98 + Mutates the image in place. 99 + 100 + Parameters 101 + ---------- 102 + image : Image.Image 103 + PIL Image to mask (modified in place) 104 + polygon : list[tuple[float, float]] 105 + Polygon coordinates from detect_convey_region() 106 + """ 107 + draw = ImageDraw.Draw(image) 108 + draw.polygon(polygon, fill=(0, 0, 0)) 109 + 110 + 111 + def polygon_area(polygon: list[tuple[float, float]]) -> float: 112 + """ 113 + Calculate area of a polygon using the shoelace formula. 114 + 115 + Parameters 116 + ---------- 117 + polygon : list[tuple[float, float]] 118 + List of (x, y) coordinates 119 + 120 + Returns 121 + ------- 122 + float 123 + Area in square pixels 124 + """ 125 + n = len(polygon) 126 + if n < 3: 127 + return 0.0 128 + 129 + area = 0.0 130 + for i in range(n): 131 + j = (i + 1) % n 132 + area += polygon[i][0] * polygon[j][1] 133 + area -= polygon[j][0] * polygon[i][1] 134 + return abs(area) / 2.0 135 + 136 + 137 + __all__ = [ 138 + "CORNER_TAG_IDS", 139 + "detect_convey_region", 140 + "mask_convey_region", 141 + "polygon_area", 142 + ]

+20

observe/describe.py

··· 28 28 import av 29 29 from PIL import Image, ImageChops, ImageStat 30 30 31 + from observe.aruco import detect_convey_region, mask_convey_region, polygon_area 31 32 from observe.utils import get_segment_key 32 33 from think.callosum import callosum_send 33 34 from think.utils import setup_cli ··· 161 162 RMS_THRESHOLD = 0.05 162 163 # Downsample size for RMS comparison 163 164 COMPARE_SIZE = (160, 90) 165 + # Skip frame if Convey UI covers more than this fraction of the frame 166 + MASK_SKIP_THRESHOLD = 0.8 164 167 165 168 def __init__(self, video_path: Path): 166 169 self.video_path = video_path ··· 207 210 arr_rgb = frame.to_ndarray(format="rgb24") 208 211 pil_img = Image.fromarray(arr_rgb) 209 212 del arr_rgb 213 + 214 + # Detect and mask Convey UI region (fiducial corner tags) 215 + convey_polygon = detect_convey_region(pil_img) 216 + if convey_polygon is not None: 217 + # Check if Convey covers most of the frame 218 + mask_area = polygon_area(convey_polygon) 219 + frame_area = pil_img.width * pil_img.height 220 + if mask_area / frame_area > self.MASK_SKIP_THRESHOLD: 221 + # Skip frame entirely - Convey UI dominates 222 + pil_img.close() 223 + logger.debug( 224 + f"Skipping frame at {timestamp:.2f}s " 225 + f"(Convey UI covers {mask_area/frame_area:.0%})" 226 + ) 227 + continue 228 + # Mask the Convey region with black 229 + mask_convey_region(pil_img, convey_polygon) 210 230 211 231 # Downsample for comparison 212 232 current_small = self._downsample(pil_img)

+164

tests/test_aruco.py

··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """Tests for observe.aruco ArUco marker detection and masking.""" 5 + 6 + import cv2 7 + import numpy as np 8 + from PIL import Image 9 + 10 + from observe.aruco import ( 11 + CORNER_TAG_IDS, 12 + detect_convey_region, 13 + mask_convey_region, 14 + polygon_area, 15 + ) 16 + 17 + 18 + def test_corner_tag_ids(): 19 + """Test that corner tag IDs match expected values.""" 20 + assert CORNER_TAG_IDS == {2, 4, 6, 7} 21 + 22 + 23 + def test_polygon_area_square(): 24 + """Test polygon area calculation for a square.""" 25 + # 100x100 square 26 + polygon = [(0, 0), (100, 0), (100, 100), (0, 100)] 27 + assert polygon_area(polygon) == 10000.0 28 + 29 + 30 + def test_polygon_area_triangle(): 31 + """Test polygon area calculation for a triangle.""" 32 + # Right triangle with legs 10 and 20 33 + polygon = [(0, 0), (10, 0), (0, 20)] 34 + assert polygon_area(polygon) == 100.0 # (10 * 20) / 2 35 + 36 + 37 + def test_polygon_area_empty(): 38 + """Test polygon area with insufficient points.""" 39 + assert polygon_area([]) == 0.0 40 + assert polygon_area([(0, 0)]) == 0.0 41 + assert polygon_area([(0, 0), (1, 1)]) == 0.0 42 + 43 + 44 + def test_detect_convey_region_no_markers(): 45 + """Test detection returns None when no markers are present.""" 46 + # Plain white image - no markers 47 + img = Image.new("RGB", (640, 480), color="white") 48 + result = detect_convey_region(img) 49 + assert result is None 50 + 51 + 52 + def test_detect_convey_region_grayscale(): 53 + """Test detection works with grayscale input.""" 54 + # Grayscale image - should handle conversion 55 + img = Image.new("L", (640, 480), color=128) 56 + result = detect_convey_region(img) 57 + assert result is None # No markers, but shouldn't crash 58 + 59 + 60 + def test_mask_convey_region(): 61 + """Test masking fills polygon with black.""" 62 + img = Image.new("RGB", (100, 100), color="white") 63 + 64 + # Define a square polygon in the center 65 + polygon = [(25, 25), (75, 25), (75, 75), (25, 75)] 66 + mask_convey_region(img, polygon) 67 + 68 + # Check corners are still white 69 + assert img.getpixel((0, 0)) == (255, 255, 255) 70 + assert img.getpixel((99, 99)) == (255, 255, 255) 71 + 72 + # Check center is black 73 + assert img.getpixel((50, 50)) == (0, 0, 0) 74 + 75 + 76 + def test_mask_convey_region_triangle(): 77 + """Test masking works with non-rectangular polygon.""" 78 + img = Image.new("RGB", (100, 100), color="white") 79 + 80 + # Triangle 81 + polygon = [(50, 10), (90, 90), (10, 90)] 82 + mask_convey_region(img, polygon) 83 + 84 + # Center should be black (inside triangle) 85 + assert img.getpixel((50, 60)) == (0, 0, 0) 86 + 87 + # Top corners should still be white (outside triangle) 88 + assert img.getpixel((5, 5)) == (255, 255, 255) 89 + assert img.getpixel((95, 5)) == (255, 255, 255) 90 + 91 + 92 + def test_detect_convey_region_with_real_markers(): 93 + """Test detection with actual ArUco markers rendered into image.""" 94 + # Create a test image 95 + img_array = np.ones((480, 640, 3), dtype=np.uint8) * 255 96 + 97 + # Generate and place the 4 corner markers 98 + dictionary = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_4X4_50) 99 + marker_size = 50 100 + 101 + # Generate markers 102 + markers = {} 103 + for tag_id in [6, 7, 4, 2]: 104 + marker = cv2.aruco.generateImageMarker(dictionary, tag_id, marker_size) 105 + # Convert to 3-channel 106 + markers[tag_id] = cv2.cvtColor(marker, cv2.COLOR_GRAY2RGB) 107 + 108 + # Place markers at corners (with some padding) 109 + pad = 20 110 + # TL - tag 6 111 + img_array[pad : pad + marker_size, pad : pad + marker_size] = markers[6] 112 + # TR - tag 7 113 + img_array[pad : pad + marker_size, 640 - pad - marker_size : 640 - pad] = markers[7] 114 + # BL - tag 4 115 + img_array[480 - pad - marker_size : 480 - pad, pad : pad + marker_size] = markers[4] 116 + # BR - tag 2 117 + img_array[ 118 + 480 - pad - marker_size : 480 - pad, 640 - pad - marker_size : 640 - pad 119 + ] = markers[2] 120 + 121 + # Convert to PIL 122 + pil_img = Image.fromarray(img_array) 123 + 124 + # Detect 125 + result = detect_convey_region(pil_img) 126 + 127 + # Should find all 4 markers and return polygon 128 + assert result is not None 129 + assert len(result) == 4 130 + 131 + # Polygon should roughly bound the marker positions 132 + # Each point should be a tuple of numeric values 133 + for point in result: 134 + assert len(point) == 2 135 + assert np.issubdtype(type(point[0]), np.number) or isinstance( 136 + point[0], (int, float) 137 + ) 138 + assert np.issubdtype(type(point[1]), np.number) or isinstance( 139 + point[1], (int, float) 140 + ) 141 + 142 + 143 + def test_detect_convey_region_partial_markers(): 144 + """Test detection returns None when only some markers present.""" 145 + # Create a test image 146 + img_array = np.ones((480, 640, 3), dtype=np.uint8) * 255 147 + 148 + # Generate and place only 2 corner markers 149 + dictionary = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_4X4_50) 150 + marker_size = 50 151 + pad = 20 152 + 153 + # Only place TL and TR markers 154 + for tag_id, pos in [(6, (pad, pad)), (7, (pad, 640 - pad - marker_size))]: 155 + marker = cv2.aruco.generateImageMarker(dictionary, tag_id, marker_size) 156 + marker_rgb = cv2.cvtColor(marker, cv2.COLOR_GRAY2RGB) 157 + y, x = pos 158 + img_array[y : y + marker_size, x : x + marker_size] = marker_rgb 159 + 160 + pil_img = Image.fromarray(img_array) 161 + 162 + # Should return None - only 2 of 4 markers found 163 + result = detect_convey_region(pil_img) 164 + assert result is None

Configure Feed

Configure Feed