Implement full media crawler workflow with Flask backend and Vue frontend.

Add TMDB search and media detail pages, HDHive resource ingestion flow, unified error handling, Docker single-container runtime, and project docs/config updates for local deployment. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-09 16:16:18 +08:00
parent d3550bf79b
commit 82581d2949
49 changed files with 4959 additions and 0 deletions
--- a/backend/services/orchestrator.py
+++ b/backend/services/orchestrator.py
@@ -0,0 +1,176 @@
+import time
+from datetime import datetime
+
+from adapters.cms_adapter import create_resource
+from adapters.emby_adapter import exists_by_tmdb_id
+from adapters.hdhive_adapter import normalize_resource, search_resource, unlock_link
+from adapters.tmdb_adapter import get_media_detail
+from error_handling import AppServiceError, normalize_exception
+from storage import find_media_item, insert_log, upsert_media_item, upsert_task
+
+
+def now_iso():
+    return datetime.utcnow().isoformat() + "Z"
+
+
+def new_task_id():
+    return f"task_{int(time.time() * 1000)}"
+
+
+def log(task_id, step, level, message, detail=None):
+    insert_log(task_id, step, level, message, detail or {}, now_iso())
+
+
+def run_ingest_task(payload):
+    task_id = new_task_id()
+    trace_id = f"{task_id}_{hex(int(time.time() * 1000))[-6:]}"
+    task = {
+        "taskId": task_id,
+        "traceId": trace_id,
+        "status": "RUNNING",
+        "inputPayload": payload,
+        "startedAt": now_iso(),
+        "finishedAt": None,
+        "summary": None,
+    }
+    upsert_task(task)
+    log(task_id, "START", "INFO", "任务开始", {"payload": payload, "traceId": trace_id})
+
+    try:
+        deduped = find_media_item(payload["tmdbId"])
+        if deduped and deduped.get("ingest_status") == "SUCCESS":
+            task["status"] = "SUCCESS"
+            task["finishedAt"] = now_iso()
+            task["summary"] = {
+                "result": "SKIPPED_ALREADY_EXISTS",
+                "tmdbId": payload["tmdbId"],
+                "cmsId": deduped.get("cms_id"),
+            }
+            upsert_task(task)
+            log(task_id, "DEDUPE", "INFO", "命中本地幂等，跳过", task["summary"])
+            return task
+
+        tmdb_result = get_media_detail(payload["tmdbId"], payload["type"])
+        log(task_id, "TMDB_DETAIL", "INFO", "TMDB 元数据获取成功", {"status": tmdb_result["status"]})
+
+        hdhive_search = search_resource(payload["type"], payload["tmdbId"])
+        hdhive_first = None
+        search_data = hdhive_search.get("data") or {}
+        preferred_slug = str(payload.get("slug") or "").strip()
+        if isinstance(search_data, list) and search_data:
+            if preferred_slug:
+                hdhive_first = next(
+                    (item for item in search_data if (item or {}).get("slug") == preferred_slug),
+                    None,
+                )
+            if not hdhive_first:
+                hdhive_first = search_data[0]
+        elif isinstance(search_data, dict):
+            items = search_data.get("items") or []
+            if preferred_slug:
+                hdhive_first = next(
+                    (item for item in items if (item or {}).get("slug") == preferred_slug),
+                    None,
+                )
+            if items:
+                hdhive_first = hdhive_first or items[0]
+        if not hdhive_first:
+            raise AppServiceError(
+                "HDHIVE 未检索到可用资源",
+                category="not_found",
+                code="HDHIVE_RESOURCE_NOT_FOUND",
+                provider="hdhive",
+            )
+        log(task_id, "HDHIVE_SEARCH", "INFO", "HDHIVE 检索成功")
+
+        slug = hdhive_first.get("slug")
+        if not slug:
+            raise AppServiceError(
+                "HDHIVE 返回资源缺少 slug，无法解锁",
+                category="validation",
+                code="HDHIVE_INVALID_RESOURCE",
+                provider="hdhive",
+            )
+        hdhive_unlock = unlock_link(slug)
+        normalized_resource = normalize_resource(hdhive_first, hdhive_unlock.get("data"))
+        log(task_id, "HDHIVE_UNLOCK", "INFO", "HDHIVE 解锁成功", {"unlockUrl": normalized_resource["unlockUrl"]})
+
+        emby_exists = exists_by_tmdb_id(payload["tmdbId"])
+        log(task_id, "EMBY_EXISTS", "INFO", "Emby 查询完成", {"exists": emby_exists["exists"]})
+        if emby_exists["exists"]:
+            upsert_media_item(
+                {
+                    "tmdbId": payload["tmdbId"],
+                    "type": payload["type"],
+                    "title": tmdb_result["normalized"]["title"],
+                    "year": tmdb_result["normalized"]["year"],
+                    "tmdbRaw": tmdb_result["data"],
+                    "hdhiveRaw": hdhive_search["data"],
+                    "cmsId": None,
+                    "ingestStatus": "SKIPPED_ALREADY_EXISTS",
+                }
+            )
+            task["status"] = "SUCCESS"
+            task["finishedAt"] = now_iso()
+            task["summary"] = {"result": "SKIPPED_ALREADY_EXISTS", "source": "EMBY"}
+            upsert_task(task)
+            return task
+
+        cms_payload = {
+            "tmdbId": payload["tmdbId"],
+            "mediaType": payload["type"],
+            "title": tmdb_result["normalized"]["title"],
+            "originalTitle": tmdb_result["normalized"]["originalTitle"],
+            "year": tmdb_result["normalized"]["year"],
+            "overview": tmdb_result["normalized"]["overview"],
+            "posterPath": tmdb_result["normalized"]["posterPath"],
+            "rating": tmdb_result["normalized"]["rating"],
+            "genres": tmdb_result["normalized"]["genres"],
+            "resource": {
+                "title": normalized_resource["resourceTitle"],
+                "quality": normalized_resource["quality"],
+                "size": normalized_resource["size"],
+                "diskType": normalized_resource["diskType"],
+                "slug": normalized_resource["slug"],
+                "source": normalized_resource["source"],
+                "subtitleLanguage": normalized_resource["subtitleLanguage"],
+                "unlockUrl": normalized_resource["unlockUrl"],
+            },
+            "traceId": trace_id,
+        }
+        cms_result = create_resource(cms_payload)
+        cms_data = cms_result.get("data") or {}
+        cms_id = (
+            cms_data.get("id")
+            or cms_data.get("resourceId")
+            or (cms_data.get("data") or {}).get("id")
+            or (cms_data.get("data") or {}).get("resourceId")
+            or normalized_resource["slug"]
+        )
+        log(task_id, "CMS_CREATE", "INFO", "CMS 入库成功", {"cmsId": cms_id})
+
+        upsert_media_item(
+            {
+                "tmdbId": payload["tmdbId"],
+                "type": payload["type"],
+                "title": tmdb_result["normalized"]["title"],
+                "year": tmdb_result["normalized"]["year"],
+                "tmdbRaw": tmdb_result["data"],
+                "hdhiveRaw": hdhive_search["data"],
+                "cmsId": cms_id,
+                "ingestStatus": "SUCCESS",
+            }
+        )
+        task["status"] = "SUCCESS"
+        task["finishedAt"] = now_iso()
+        task["summary"] = {"result": "CREATED", "cmsId": cms_id}
+        upsert_task(task)
+        return task
+    except Exception as error:
+        normalized_error = normalize_exception(error)
+        log(task_id, "FAILED", "ERROR", str(normalized_error), normalized_error.to_dict())
+        task["status"] = "FAILED"
+        task["finishedAt"] = now_iso()
+        task["summary"] = {"error": normalized_error.to_dict()}
+        upsert_task(task)
+        return task