fast microservice to accompany n8n workflow
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Create main.py

authored by

Jasper Mayone and committed by
GitHub
2b2c07c1

+115
+115
main.py
··· 1 + # main.py 2 + from fastapi import FastAPI, File, UploadFile 3 + from fastapi.middleware.cors import CORSMiddleware 4 + import pytesseract 5 + from PIL import Image 6 + import io 7 + import cv2 8 + import numpy as np 9 + import uvicorn 10 + 11 + app = FastAPI() 12 + 13 + # Add CORS middleware 14 + app.add_middleware( 15 + CORSMiddleware, 16 + allow_origins=["*"], # Allows all origins 17 + allow_credentials=True, 18 + allow_methods=["*"], # Allows all methods 19 + allow_headers=["*"], # Allows all headers 20 + ) 21 + 22 + def process_image(image_bytes): 23 + # Convert bytes to numpy array 24 + nparr = np.frombuffer(image_bytes, np.uint8) 25 + img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) 26 + 27 + # Convert to grayscale 28 + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 29 + 30 + # Apply thresholding 31 + thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] 32 + 33 + # Convert back to PIL Image 34 + pil_image = Image.fromarray(thresh) 35 + 36 + # Perform OCR 37 + text = pytesseract.image_to_string(pil_image) 38 + 39 + return text 40 + 41 + def parse_schedule(text): 42 + lines = text.split('\n') 43 + schedule = {} 44 + current_person = None 45 + dates = {} 46 + 47 + for line in lines: 48 + # Skip empty lines 49 + if not line.strip(): 50 + continue 51 + 52 + parts = line.split() 53 + if not parts: 54 + continue 55 + 56 + # If we find the date row (contains multiple '1/' patterns) 57 + if sum('1/' in part for part in parts) > 1: 58 + dates = { 59 + 'monday': next((p for p in parts if '1/' in p), ''), 60 + 'tuesday': next((p for p in parts[1:] if '1/' in p), ''), 61 + 'wednesday': next((p for p in parts[2:] if '1/' in p), ''), 62 + 'thursday': next((p for p in parts[3:] if '1/' in p), ''), 63 + 'friday': next((p for p in parts[4:] if '1/' in p), ''), 64 + 'saturday': next((p for p in parts[5:] if '1/' in p), ''), 65 + 'sunday': next((p for p in parts[6:] if '1/' in p), '') 66 + } 67 + continue 68 + 69 + # Check if this line starts with a name (no numbers or 'OFF') 70 + if len(parts) >= 1 and not parts[0].replace('-', '').isdigit() and parts[0] != 'OFF' and ':' not in parts[0]: 71 + # Ignore "CASHIERS" header 72 + if parts[0] == "CASHIERS": 73 + continue 74 + current_person = ' '.join(parts) 75 + schedule[current_person] = { 76 + 'monday': {'date': dates.get('monday', ''), 'shift': ''}, 77 + 'tuesday': {'date': dates.get('tuesday', ''), 'shift': ''}, 78 + 'wednesday': {'date': dates.get('wednesday', ''), 'shift': ''}, 79 + 'thursday': {'date': dates.get('thursday', ''), 'shift': ''}, 80 + 'friday': {'date': dates.get('friday', ''), 'shift': ''}, 81 + 'saturday': {'date': dates.get('saturday', ''), 'shift': ''}, 82 + 'sunday': {'date': dates.get('sunday', ''), 'shift': ''} 83 + } 84 + # If we have shifts data and a current person 85 + elif current_person and len(parts) >= 7: 86 + schedule[current_person] = { 87 + 'monday': {'date': dates.get('monday', ''), 'shift': parts[0]}, 88 + 'tuesday': {'date': dates.get('tuesday', ''), 'shift': parts[1]}, 89 + 'wednesday': {'date': dates.get('wednesday', ''), 'shift': parts[2]}, 90 + 'thursday': {'date': dates.get('thursday', ''), 'shift': parts[3]}, 91 + 'friday': {'date': dates.get('friday', ''), 'shift': parts[4]}, 92 + 'saturday': {'date': dates.get('saturday', ''), 'shift': parts[5]}, 93 + 'sunday': {'date': dates.get('sunday', ''), 'shift': parts[6]} 94 + } 95 + 96 + return schedule 97 + 98 + @app.post("/ocr") 99 + async def ocr_endpoint(file: UploadFile = File(...)): 100 + # Read the image file 101 + image_bytes = await file.read() 102 + 103 + # Process the image and get text 104 + text = process_image(image_bytes) 105 + 106 + # Parse the schedule 107 + schedule = parse_schedule(text) 108 + 109 + return { 110 + "raw_text": text, 111 + "schedule": schedule 112 + } 113 + 114 + if __name__ == "__main__": 115 + uvicorn.run(app, host="0.0.0.0", port=8000)