Implement full media crawler workflow with Flask backend and Vue frontend.

Add TMDB search and media detail pages, HDHive resource ingestion flow, unified error handling, Docker single-container runtime, and project docs/config updates for local deployment. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-09 16:16:18 +08:00
parent d3550bf79b
commit 82581d2949
49 changed files with 4959 additions and 0 deletions
--- a/backend/services/init.py
+++ b/backend/services/init.py
--- a/backend/services/media_service.py
+++ b/backend/services/media_service.py
@@ -0,0 +1,73 @@
+from adapters.hdhive_adapter import normalize_resource, search_resource, unlock_link
+from adapters.tmdb_adapter import get_media_detail, search_media
+from error_handling import AppServiceError
+
+
+def search_media_by_keyword(query, media_type):
+    result = search_media(query, media_type)
+    raw_items = result.get("items") or []
+    items = []
+    for item in raw_items:
+        items.append(
+            {
+                "id": item.get("id"),
+                "type": media_type,
+                "title": item.get("title") or item.get("name"),
+                "overview": item.get("overview") or "",
+                "posterPath": item.get("poster_path") or "",
+                "releaseDate": item.get("release_date") or item.get("first_air_date") or "",
+                "voteAverage": item.get("vote_average"),
+            }
+        )
+    return {"items": items}
+
+
+def get_media_resources(media_type, tmdb_id):
+    detail = get_media_detail(tmdb_id, media_type)
+    hdhive = search_resource(media_type, tmdb_id)
+    search_data = hdhive.get("data") or []
+    if isinstance(search_data, dict):
+        search_data = search_data.get("items") or []
+
+    resources = []
+    for item in search_data:
+        slug = (item or {}).get("slug")
+        unlock_data = {}
+        unlock_error = None
+        if slug:
+            try:
+                unlock = unlock_link(slug)
+                unlock_data = unlock.get("data") or {}
+            except Exception as error:
+                unlock_error = str(error)
+        normalized = normalize_resource(item, unlock_data)
+        normalized["unlockError"] = unlock_error
+        resources.append(normalized)
+
+    return {
+        "media": detail.get("normalized"),
+        "resources": resources,
+    }
+
+
+def validate_media_query(query, media_type):
+    if not query:
+        raise AppServiceError(
+            "query is required",
+            category="validation",
+            code="INVALID_INPUT",
+            status=400,
+            provider="api",
+        )
+    validate_media_type(media_type)
+
+
+def validate_media_type(media_type):
+    if media_type not in ("movie", "tv"):
+        raise AppServiceError(
+            "type must be movie or tv",
+            category="validation",
+            code="INVALID_INPUT",
+            status=400,
+            provider="api",
+        )
--- a/backend/services/orchestrator.py
+++ b/backend/services/orchestrator.py
@@ -0,0 +1,176 @@
+import time
+from datetime import datetime
+
+from adapters.cms_adapter import create_resource
+from adapters.emby_adapter import exists_by_tmdb_id
+from adapters.hdhive_adapter import normalize_resource, search_resource, unlock_link
+from adapters.tmdb_adapter import get_media_detail
+from error_handling import AppServiceError, normalize_exception
+from storage import find_media_item, insert_log, upsert_media_item, upsert_task
+
+
+def now_iso():
+    return datetime.utcnow().isoformat() + "Z"
+
+
+def new_task_id():
+    return f"task_{int(time.time() * 1000)}"
+
+
+def log(task_id, step, level, message, detail=None):
+    insert_log(task_id, step, level, message, detail or {}, now_iso())
+
+
+def run_ingest_task(payload):
+    task_id = new_task_id()
+    trace_id = f"{task_id}_{hex(int(time.time() * 1000))[-6:]}"
+    task = {
+        "taskId": task_id,
+        "traceId": trace_id,
+        "status": "RUNNING",
+        "inputPayload": payload,
+        "startedAt": now_iso(),
+        "finishedAt": None,
+        "summary": None,
+    }
+    upsert_task(task)
+    log(task_id, "START", "INFO", "任务开始", {"payload": payload, "traceId": trace_id})
+
+    try:
+        deduped = find_media_item(payload["tmdbId"])
+        if deduped and deduped.get("ingest_status") == "SUCCESS":
+            task["status"] = "SUCCESS"
+            task["finishedAt"] = now_iso()
+            task["summary"] = {
+                "result": "SKIPPED_ALREADY_EXISTS",
+                "tmdbId": payload["tmdbId"],
+                "cmsId": deduped.get("cms_id"),
+            }
+            upsert_task(task)
+            log(task_id, "DEDUPE", "INFO", "命中本地幂等，跳过", task["summary"])
+            return task
+
+        tmdb_result = get_media_detail(payload["tmdbId"], payload["type"])
+        log(task_id, "TMDB_DETAIL", "INFO", "TMDB 元数据获取成功", {"status": tmdb_result["status"]})
+
+        hdhive_search = search_resource(payload["type"], payload["tmdbId"])
+        hdhive_first = None
+        search_data = hdhive_search.get("data") or {}
+        preferred_slug = str(payload.get("slug") or "").strip()
+        if isinstance(search_data, list) and search_data:
+            if preferred_slug:
+                hdhive_first = next(
+                    (item for item in search_data if (item or {}).get("slug") == preferred_slug),
+                    None,
+                )
+            if not hdhive_first:
+                hdhive_first = search_data[0]
+        elif isinstance(search_data, dict):
+            items = search_data.get("items") or []
+            if preferred_slug:
+                hdhive_first = next(
+                    (item for item in items if (item or {}).get("slug") == preferred_slug),
+                    None,
+                )
+            if items:
+                hdhive_first = hdhive_first or items[0]
+        if not hdhive_first:
+            raise AppServiceError(
+                "HDHIVE 未检索到可用资源",
+                category="not_found",
+                code="HDHIVE_RESOURCE_NOT_FOUND",
+                provider="hdhive",
+            )
+        log(task_id, "HDHIVE_SEARCH", "INFO", "HDHIVE 检索成功")
+
+        slug = hdhive_first.get("slug")
+        if not slug:
+            raise AppServiceError(
+                "HDHIVE 返回资源缺少 slug，无法解锁",
+                category="validation",
+                code="HDHIVE_INVALID_RESOURCE",
+                provider="hdhive",
+            )
+        hdhive_unlock = unlock_link(slug)
+        normalized_resource = normalize_resource(hdhive_first, hdhive_unlock.get("data"))
+        log(task_id, "HDHIVE_UNLOCK", "INFO", "HDHIVE 解锁成功", {"unlockUrl": normalized_resource["unlockUrl"]})
+
+        emby_exists = exists_by_tmdb_id(payload["tmdbId"])
+        log(task_id, "EMBY_EXISTS", "INFO", "Emby 查询完成", {"exists": emby_exists["exists"]})
+        if emby_exists["exists"]:
+            upsert_media_item(
+                {
+                    "tmdbId": payload["tmdbId"],
+                    "type": payload["type"],
+                    "title": tmdb_result["normalized"]["title"],
+                    "year": tmdb_result["normalized"]["year"],
+                    "tmdbRaw": tmdb_result["data"],
+                    "hdhiveRaw": hdhive_search["data"],
+                    "cmsId": None,
+                    "ingestStatus": "SKIPPED_ALREADY_EXISTS",
+                }
+            )
+            task["status"] = "SUCCESS"
+            task["finishedAt"] = now_iso()
+            task["summary"] = {"result": "SKIPPED_ALREADY_EXISTS", "source": "EMBY"}
+            upsert_task(task)
+            return task
+
+        cms_payload = {
+            "tmdbId": payload["tmdbId"],
+            "mediaType": payload["type"],
+            "title": tmdb_result["normalized"]["title"],
+            "originalTitle": tmdb_result["normalized"]["originalTitle"],
+            "year": tmdb_result["normalized"]["year"],
+            "overview": tmdb_result["normalized"]["overview"],
+            "posterPath": tmdb_result["normalized"]["posterPath"],
+            "rating": tmdb_result["normalized"]["rating"],
+            "genres": tmdb_result["normalized"]["genres"],
+            "resource": {
+                "title": normalized_resource["resourceTitle"],
+                "quality": normalized_resource["quality"],
+                "size": normalized_resource["size"],
+                "diskType": normalized_resource["diskType"],
+                "slug": normalized_resource["slug"],
+                "source": normalized_resource["source"],
+                "subtitleLanguage": normalized_resource["subtitleLanguage"],
+                "unlockUrl": normalized_resource["unlockUrl"],
+            },
+            "traceId": trace_id,
+        }
+        cms_result = create_resource(cms_payload)
+        cms_data = cms_result.get("data") or {}
+        cms_id = (
+            cms_data.get("id")
+            or cms_data.get("resourceId")
+            or (cms_data.get("data") or {}).get("id")
+            or (cms_data.get("data") or {}).get("resourceId")
+            or normalized_resource["slug"]
+        )
+        log(task_id, "CMS_CREATE", "INFO", "CMS 入库成功", {"cmsId": cms_id})
+
+        upsert_media_item(
+            {
+                "tmdbId": payload["tmdbId"],
+                "type": payload["type"],
+                "title": tmdb_result["normalized"]["title"],
+                "year": tmdb_result["normalized"]["year"],
+                "tmdbRaw": tmdb_result["data"],
+                "hdhiveRaw": hdhive_search["data"],
+                "cmsId": cms_id,
+                "ingestStatus": "SUCCESS",
+            }
+        )
+        task["status"] = "SUCCESS"
+        task["finishedAt"] = now_iso()
+        task["summary"] = {"result": "CREATED", "cmsId": cms_id}
+        upsert_task(task)
+        return task
+    except Exception as error:
+        normalized_error = normalize_exception(error)
+        log(task_id, "FAILED", "ERROR", str(normalized_error), normalized_error.to_dict())
+        task["status"] = "FAILED"
+        task["finishedAt"] = now_iso()
+        task["summary"] = {"error": normalized_error.to_dict()}
+        upsert_task(task)
+        return task