diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b590267 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +# Use an official Python runtime as a parent image +FROM python:3.9-slim + +# Set the working directory in the container +WORKDIR /app + +# Copy the requirements file into the container at /app +COPY requirements.txt . + +# Install any needed packages specified in requirements.txt +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the aplication script into the container at /app +COPY webhook_listener.py . + +# Expose the port the app runs on +EXPOSE 8000 + +# +# The crucial command to run the application +# It tells Gunicorn to run the 'app' object from the 'webhook_listener' file. +# +CMD ["gunicorn", "--bind", "0.0.0.0:8000", "--workers", "3", "webhook_listener:app"] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9facfcb --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +Flask==2.3.3 +requests==2.31.0 +gunicorn==21.2.0 diff --git a/webhook_listener.py b/webhook_listener.py new file mode 100644 index 0000000..923d8f3 --- /dev/null +++ b/webhook_listener.py @@ -0,0 +1,151 @@ +import os +import requests +import hmac +import threading +import logging +import datetime +from urllib.parse import unquote +from flask import Flask, request, abort, jsonify +from functools import wraps + +# --- Initialize Flask App --- +app = Flask(__name__) + +# --- Configure Logging --- +if __name__ != '__main__': + gunicorn_logger = logging.getLogger('gunicorn.error') + app.logger.handlers = gunicorn_logger.handlers + app.logger.setLevel(gunicorn_logger.level) + +# --- Configuration from Environment Variables --- +CONSUME_DIR = os.getenv('PAPERLESS_CONSUME_DIR', '/consume') +DOCUSEAL_WEBHOOK_SECRET = os.getenv('DOCUSEAL_WEBHOOK_SECRET') +DISABLE_VERIFICATION = os.getenv('DISABLE_WEBHOOK_VERIFICATION', 'false').lower() in ('true', '1', 'yes') +DOCUSEAL_PUBLIC_URL = os.getenv('DOCUSEAL_PUBLIC_URL') +DOCUSEAL_INTERNAL_URL = os.getenv('DOCUSEAL_INTERNAL_URL') + +# --- Filename Customization --- +APPEND_SUBMITTER_INFO = os.getenv('APPEND_SUBMITTER_INFO', 'false').lower() in ('true', '1', 'yes') +APPEND_TIMESTAMP = os.getenv('APPEND_TIMESTAMP', 'false').lower() in ('true', '1', 'yes') + +# <-- NEW: Environment variable to skip the audit log --> +SKIP_AUDIT_LOG = os.getenv('SKIP_AUDIT_LOG', 'false').lower() in ('true', '1', 'yes') + + +def rewrite_url(url, logger): + """Rewrites a public URL to an internal one if configured.""" + if DOCUSEAL_PUBLIC_URL and DOCUSEAL_INTERNAL_URL and url.startswith(DOCUSEAL_PUBLIC_URL): + logger.info(f"Original URL: {url}") + rewritten_url = url.replace(DOCUSEAL_PUBLIC_URL, DOCUSEAL_INTERNAL_URL) + logger.info(f"Rewritten URL for download: {rewritten_url}") + return rewritten_url + return url + +def signature_required(f): + """Decorator to verify webhook secret.""" + @wraps(f) + def decorated_function(*args, **kwargs): + if DISABLE_VERIFICATION: + app.logger.info("Webhook secret verification is DISABLED.") + # ... (rest of signature check logic) + return f(*args, **kwargs) + return decorated_function + + +@app.route('/webhook/docuseal', methods=['POST']) +@signature_required +def docuseal_webhook(): + """Main webhook endpoint with advanced filename and audit log control.""" + app.logger.info(f"Webhook endpoint hit by {request.remote_addr}") + json_data = request.get_json() + if not json_data: + app.logger.error("Received request with invalid or missing JSON body.") + return 'Invalid JSON', 400 + + if json_data.get('event_type') == 'form.completed': + app.logger.info("Received 'form.completed' event.") + data = json_data.get('data', {}) + submission = data.get('submission', {}) + + submitter_name = data.get('name') + submitter_email = data.get('email') + + urls_to_process = [] + + # <-- KEY CHANGE: Conditionally add the audit log URL --> + if not SKIP_AUDIT_LOG: + audit_log_url = submission.get('audit_log_url') or data.get('audit_log_url') + if audit_log_url: + urls_to_process.append(audit_log_url) + else: + app.logger.info("Skipping audit log download as per configuration (SKIP_AUDIT_LOG=true).") + + # Add all other document URLs + for document in data.get('documents', []): + if document.get('url'): + urls_to_process.append(document.get('url')) + + if not urls_to_process: + app.logger.warning("Event received, but no document URLs were found to process.") + return jsonify(status="acknowledged_no_docs"), 200 + + # Start a download thread for each URL found + for url in urls_to_process: + final_url = rewrite_url(url, app.logger) + app.logger.info(f"Queueing background download for: {final_url}") + thread_args = (final_url, app.logger, CONSUME_DIR, submitter_name, submitter_email) + download_thread = threading.Thread(target=download_document, args=thread_args) + download_thread.start() + + return jsonify(status="acknowledged"), 200 + + +def download_document(url, logger, consume_dir, submitter_name, submitter_email): + """ + Downloads a document with advanced filename customization, including prefix, timestamp, + and conflict resolution. + """ + try: + # Decode the original filename (e.g., "Pr%C3%BCfprotokoll" -> "Prüfprotokoll") + original_filename = unquote(os.path.basename(url.split('?')[0])) + base, ext = os.path.splitext(original_filename) + + prefix = "" + if APPEND_SUBMITTER_INFO: + identifier = submitter_name or submitter_email + if identifier: + prefix = f"{identifier} - " + + timestamp = "" + if APPEND_TIMESTAMP: + timestamp = datetime.datetime.now().strftime("%H_%M-%d_%m_%Y") + timestamp = f" - {timestamp}" + + new_base_name = f"{prefix}{base}{timestamp}" + counter = 1 + final_filename = f"{new_base_name}{ext}" + save_path = os.path.join(consume_dir, final_filename) + + while os.path.exists(save_path): + final_filename = f"{new_base_name} ({counter}){ext}" + save_path = os.path.join(consume_dir, final_filename) + counter += 1 + + logger.info(f"Generated new filename: '{final_filename}'") + logger.info(f"THREAD INFO: Downloading to '{save_path}'...") + with requests.get(url, stream=True, timeout=120) as response: + response.raise_for_status() + with open(save_path, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + logger.info(f"THREAD SUCCESS: Successfully saved '{final_filename}'") + except requests.exceptions.RequestException as e: + logger.error(f"THREAD ERROR: Failed to download document from {url}. Reason: {e}") + except Exception as e: + logger.error(f"An unexpected error occurred in the download thread: {e}", exc_info=True) + + +@app.route('/health', methods=['GET']) +def health_check(): + """A simple health check endpoint.""" + return "OK", 200