Anpassungen und Reduzierung auf OCR

This commit is contained in:
2025-01-17 23:11:10 +00:00
parent f3cd175ae6
commit a569b9a1ab
7 changed files with 289 additions and 511 deletions

View File

@@ -1,17 +1,98 @@
from flask import Flask, request, jsonify
from flask import Flask, request, jsonify, send_file
from paddleocr import PaddleOCR
import base64
from PIL import Image
from io import BytesIO
import traceback
import numpy as np # Importieren von numpy
import numpy as np
import cv2
import logging
import os
import uuid
import datetime
from app_factory import app
# from deck_endpoints import deck_bp # Importieren des Blueprints
app = Flask(__name__)
ocr = PaddleOCR(use_angle_cls=True, lang='en') # Passen Sie die Sprache nach Bedarf an
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
@app.route('/ocr', methods=['POST'])
# app = Flask(__name__)
# app.register_blueprint(deck_bp) # Registrieren des Blueprints
def get_dir_name():
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
unique_id = str(uuid.uuid4())[:8]
return f"{timestamp}_{unique_id}"
def create_debug_directory(dir_name):
"""Erstellt ein eindeutiges Verzeichnis für Debug-Bilder"""
base_dir = 'debug_images'
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
unique_id = str(uuid.uuid4())[:8]
full_path = os.path.join(base_dir, dir_name)
# Erstelle Hauptverzeichnis falls nicht vorhanden
if not os.path.exists(base_dir):
os.makedirs(base_dir)
# Erstelle spezifisches Verzeichnis für diesen Durchlauf
os.makedirs(full_path)
return full_path
def preprocess_image(image, debug_dir):
"""
Verarbeitet das Bild und speichert Zwischenergebnisse im angegebenen Verzeichnis,
einschließlich einer komprimierten JPG-Version und eines Thumbnails.
"""
try:
# Umwandlung in Graustufen
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# Anwendung von CLAHE zur Kontrastverbesserung
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
enhanced = clahe.apply(gray)
# Rauschunterdrückung
denoised = cv2.fastNlMeansDenoising(enhanced)
# Optional: Binärschwellenwert (auskommentiert)
# _, binary = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Speichern der Zwischenergebnisse im spezifischen Verzeichnis
cv2.imwrite(os.path.join(debug_dir, 'gray.png'), gray)
cv2.imwrite(os.path.join(debug_dir, 'enhanced.png'), enhanced)
cv2.imwrite(os.path.join(debug_dir, 'denoised.png'), denoised)
# cv2.imwrite(os.path.join(debug_dir, 'binary.png'), binary)
# Speichern der komprimierten JPG-Version des Originalbildes
compressed_jpg_path = os.path.join(debug_dir, 'original_compressed.jpg')
original_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
cv2.imwrite(compressed_jpg_path, original_bgr, [int(cv2.IMWRITE_JPEG_QUALITY), 80]) # Qualität auf 80 setzen
logger.info(f"Komprimiertes Original JPG gespeichert: {compressed_jpg_path}")
# Erstellen und Speichern des Thumbnails
thumbnail_path = os.path.join(debug_dir, 'thumbnail.jpg')
image_pil = Image.fromarray(denoised)
image_pil.thumbnail((128, 128)) # Thumbnail-Größe auf 128x128 Pixel setzen
image_pil.save(thumbnail_path, 'JPEG')
logger.info(f"Thumbnail gespeichert: {thumbnail_path}")
logger.info(f"Debug images saved in: {debug_dir}")
return denoised
except Exception as e:
logger.error(f"Preprocessing error: {str(e)}")
raise
@app.route('/api/ocr', methods=['POST'])
def ocr_endpoint():
try:
# Erstelle eindeutiges Debug-Verzeichnis für diesen Request
dir_name = get_dir_name()
debug_dir = create_debug_directory(dir_name)
logger.info(f"Created debug directory: {debug_dir}")
if not request.is_json:
return jsonify({'error': 'Content-Type must be application/json'}), 400
@@ -20,41 +101,167 @@ def ocr_endpoint():
return jsonify({'error': 'No image provided'}), 400
image_b64 = data['image']
if not image_b64:
return jsonify({'error': 'Empty image data'}), 400
# Base64 Dekodierung
try:
image_data = base64.b64decode(image_b64)
except Exception as decode_err:
return jsonify({'error': 'Base64 decode error', 'details': str(decode_err)}), 400
logger.error(f"Base64 decode error: {str(decode_err)}")
return jsonify({'error': 'Base64 decode error'}), 400
# Bildverarbeitung
try:
image = Image.open(BytesIO(image_data)).convert('RGB')
image = np.array(image) # Konvertieren zu numpy.ndarray
image = np.array(image)
logger.info(f"Image loaded successfully. Shape: {image.shape}")
# Originalbild speichern
cv2.imwrite(os.path.join(debug_dir, 'original.png'),
cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
except Exception as img_err:
return jsonify({'error': 'Invalid image data', 'details': str(img_err)}), 400
# Optional: Bildgröße anpassen, falls erforderlich
# PaddleOCR kann große Bilder verarbeiten, aber zur Effizienz können Sie eine maximale Größe setzen
max_width = 1920
max_height = 1080
height, width, _ = image.shape
if width > max_width or height > max_height:
aspect_ratio = width / height
if aspect_ratio > 1:
new_width = max_width
new_height = int(max_width / aspect_ratio)
else:
new_height = max_height
new_width = int(max_height * aspect_ratio)
image = np.array(Image.fromarray(image).resize((new_width, new_height)))
result = ocr.ocr(image, rec=True, cls=True)
return jsonify(result)
logger.error(f"Image processing error: {str(img_err)}")
return jsonify({'error': 'Invalid image data'}), 400
# Bildvorverarbeitung
processed_image = preprocess_image(image, debug_dir)
logger.info("Preprocessing completed")
# PaddleOCR Konfiguration
ocr = PaddleOCR(
use_angle_cls=True,
lang='en',
det_db_thresh=0.3,
det_db_box_thresh=0.3,
det_db_unclip_ratio=2.0,
rec_char_type='en',
det_limit_side_len=960,
det_limit_type='max',
use_dilation=True,
det_db_score_mode='fast',
show_log=True
)
# OCR durchführen
try:
result = ocr.ocr(processed_image, rec=True, cls=True)
# Debug-Informationen in Datei speichern
with open(os.path.join(debug_dir, 'ocr_results.txt'), 'w') as f:
f.write(f"Raw OCR result:\n{result}\n\n")
if not result:
logger.warning("No results returned from OCR")
return jsonify({
'warning': 'No text detected',
'debug_dir': debug_dir
}), 200
if not result[0]:
logger.warning("Empty results list from OCR")
return jsonify({
'warning': 'Empty results list',
'debug_dir': debug_dir
}), 200
# Ergebnisse verarbeiten
extracted_results = []
for idx, item in enumerate(result[0]):
try:
box = item[0]
text = item[1][0] if item[1] else ''
confidence = float(item[1][1]) if item[1] and len(item[1]) > 1 else 0.0
extracted_results.append({
'box': box,
'text': text,
'confidence': confidence,
'name': dir_name
})
except Exception as proc_err:
logger.error(f"Error processing result {idx}: {str(proc_err)}")
# Statistiken in Debug-Datei speichern
with open(os.path.join(debug_dir, 'statistics.txt'), 'w') as f:
f.write(f"Total results: {len(extracted_results)}\n")
if extracted_results:
avg_confidence = np.mean([r['confidence'] for r in extracted_results])
f.write(f"Average confidence: {avg_confidence}\n")
f.write("\nDetailed results:\n")
for idx, result in enumerate(extracted_results):
f.write(f"Result {idx+1}:\n")
f.write(f"Text: {result['text']}\n")
f.write(f"Confidence: {result['confidence']}\n")
f.write(f"Name: {dir_name}\n")
f.write(f"Box coordinates: {result['box']}\n\n")
return jsonify({
'status': 'success',
'results': extracted_results,
})
except Exception as ocr_err:
logger.error(f"OCR processing error: {str(ocr_err)}")
logger.error(traceback.format_exc())
return jsonify({
'error': 'OCR processing failed',
'details': str(ocr_err),
'debug_dir': debug_dir
}), 500
except Exception as e:
traceback.print_exc()
return jsonify({'error': str(e)}), 500
logger.error(f"Unexpected error: {str(e)}")
logger.error(traceback.format_exc())
return jsonify({
'error': 'Internal server error',
'debug_dir': debug_dir if 'debug_dir' in locals() else None
}), 500
@app.route('/api/debug_image/<name>/<filename>', methods=['GET'])
def get_debug_image(name, filename):
"""
Gibt das angeforderte Bild unter 'debug_images/[name]/[filename]' direkt zurück.
"""
try:
# Sicherheitsmaßnahme: Nur erlaubte Zeichen im Verzeichnisnamen
if not all(c.isalnum() or c in ('_', '-') for c in name):
logger.warning(f"Ungültiger Verzeichnisname angefordert: {name}")
return jsonify({'error': 'Invalid directory name'}), 400
# Sicherheitsmaßnahme: Nur erlaubte Zeichen im Dateinamen
if not all(c.isalnum() or c in ('_', '-', '.',) for c in filename):
logger.warning(f"Ungültiger Dateiname angefordert: {filename}")
return jsonify({'error': 'Invalid file name'}), 400
# Vollständigen Pfad zum Bild erstellen
image_path = os.path.join('debug_images', name, filename)
# Überprüfen, ob die Datei existiert
if not os.path.isfile(image_path):
logger.warning(f"Bild nicht gefunden: {image_path}")
return jsonify({'error': 'Image not found'}), 404
# Bestimmen des MIME-Typs basierend auf der Dateiendung
mime_type = 'image/png' # Standard-MIME-Typ
if filename.lower().endswith('.jpg') or filename.lower().endswith('.jpeg'):
mime_type = 'image/jpeg'
elif filename.lower().endswith('.gif'):
mime_type = 'image/gif'
elif filename.lower().endswith('.bmp'):
mime_type = 'image/bmp'
elif filename.lower().endswith('.tiff') or filename.lower().endswith('.tif'):
mime_type = 'image/tiff'
return send_file(
image_path,
mimetype=mime_type,
as_attachment=False
)
except Exception as e:
logger.error(f"Fehler beim Abrufen des Bildes '{name}/{filename}': {str(e)}")
logger.error(traceback.format_exc())
return jsonify({'error': 'Failed to retrieve image'}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)
app.run(host='0.0.0.0', port=5000, debug=True)