backend

2024-11-19 20:11:58 +01:00
commit 4d7a52ee99
9 changed files with 670 additions and 0 deletions
--- a/ocr_server2.py
+++ b/ocr_server2.py
@@ -0,0 +1,114 @@
+from flask import Flask, request, jsonify
+from paddleocr import PaddleOCR
+import base64
+from PIL import Image
+from io import BytesIO
+import traceback
+import numpy as np
+import cv2  # Import von OpenCV
+import os  # Import für das Speichern von Dateien
+import time  # Import für Zeitstempel
+
+app = Flask(__name__)
+
+# Initialisiere PaddleOCR einmal außerhalb der Anfrage, um die Leistung zu verbessern
+ocr = PaddleOCR(use_angle_cls=True, lang='en')  # Initialisierung außerhalb des Handlers
+
+@app.route('/ocr', methods=['POST'])
+def ocr_endpoint():
+    try:
+        if not request.is_json:
+            return jsonify({'error': 'Content-Type must be application/json'}), 400
+
+        data = request.get_json()
+        if not data or 'image' not in data:
+            return jsonify({'error': 'No image provided'}), 400
+
+        image_b64 = data['image']
+        if not image_b64:
+            return jsonify({'error': 'Empty image data'}), 400
+
+        try:
+            image_data = base64.b64decode(image_b64)
+        except Exception as decode_err:
+            return jsonify({'error': 'Base64 decode error', 'details': str(decode_err)}), 400
+
+        try:
+            image = Image.open(BytesIO(image_data)).convert('RGB')
+            image_np = np.array(image)  # Konvertieren zu numpy.ndarray
+        except Exception as img_err:
+            return jsonify({'error': 'Invalid image data'}), 400
+
+        # Vorverarbeitung: Behalte nur dunkle (schwarze) Bereiche des Bildes
+        # Konvertiere das Bild zu Graustufen
+        gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
+
+        # Wende einen Schwellenwert an, um nur die dunklen Bereiche zu behalten
+        threshold_value = 150  # Passe diesen Wert nach Bedarf an
+        _, mask = cv2.threshold(gray, threshold_value, 255, cv2.THRESH_BINARY_INV)
+
+        # Optional: Morphologische Operationen zur Verbesserung der Maske
+        kernel = np.ones((3,3), np.uint8)
+        mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=1)
+        mask = cv2.morphologyEx(mask, cv2.MORPH_DILATE, kernel, iterations=1)
+
+        # Wende die Maske auf das Originalbild an
+        filtered_image_np = cv2.bitwise_and(image_np, image_np, mask=mask)
+
+        # Konvertiere das gefilterte Bild zurück zu PIL Image
+        filtered_image = Image.fromarray(filtered_image_np)
+
+        # Optional: Bildgröße anpassen, falls erforderlich
+        max_width = 1920
+        max_height = 1080
+        height, width, _ = filtered_image_np.shape
+        if width > max_width or height > max_height:
+            aspect_ratio = width / height
+            if aspect_ratio > 1:
+                new_width = max_width
+                new_height = int(max_width / aspect_ratio)
+            else:
+                new_height = max_height
+                new_width = int(max_height * aspect_ratio)
+            filtered_image = filtered_image.resize((new_width, new_height))
+            filtered_image_np = np.array(filtered_image)
+
+        # **Speichern des vorverarbeiteten Bildes zur Überprüfung**
+        output_dir = 'processed_images'
+        if not os.path.exists(output_dir):
+            os.makedirs(output_dir)
+
+        # Generiere einen einzigartigen Dateinamen basierend auf dem aktuellen Zeitstempel
+        timestamp = int(time.time() * 1000)
+        processed_image_path = os.path.join(output_dir, f'processed_{timestamp}.png')
+        filtered_image.save(processed_image_path)
+        print(f'Processed image saved at: {processed_image_path}')
+
+        # **Speichern der Maske zur Überprüfung**
+        mask_image = Image.fromarray(mask)
+        mask_image_path = os.path.join(output_dir, f'mask_{timestamp}.png')
+        mask_image.save(mask_image_path)
+        print(f'Mask image saved at: {mask_image_path}')
+
+        # Führe OCR auf dem gefilterten Bild durch
+        result = ocr.ocr(filtered_image_np, rec=True, cls=True)
+
+        # Extrahieren der Texte und Konfidenzwerte
+        extracted_results = []
+        for item in result:
+            box = item[0]        # Die Koordinaten der Textbox
+            text = item[1][0]    # Der erkannte Text
+            confidence = item[1][1]  # Der Konfidenzwert
+            extracted_results.append({
+                'box': box,
+                'text': text,
+                'confidence': confidence
+            })
+
+        return jsonify(extracted_results)
+    except Exception as e:
+        traceback.print_exc()
+        return jsonify({'error': str(e)}), 500
+
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=5000, debug=True, threaded=False)  # Single-Threaded