Implement full media crawler workflow with Flask backend and Vue frontend.

Add TMDB search and media detail pages, HDHive resource ingestion flow, unified error handling, Docker single-container runtime, and project docs/config updates for local deployment.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
renjue
2026-05-09 16:16:18 +08:00
parent d3550bf79b
commit 82581d2949
49 changed files with 4959 additions and 0 deletions

View File

@@ -0,0 +1,176 @@
import time
from datetime import datetime
from adapters.cms_adapter import create_resource
from adapters.emby_adapter import exists_by_tmdb_id
from adapters.hdhive_adapter import normalize_resource, search_resource, unlock_link
from adapters.tmdb_adapter import get_media_detail
from error_handling import AppServiceError, normalize_exception
from storage import find_media_item, insert_log, upsert_media_item, upsert_task
def now_iso():
return datetime.utcnow().isoformat() + "Z"
def new_task_id():
return f"task_{int(time.time() * 1000)}"
def log(task_id, step, level, message, detail=None):
insert_log(task_id, step, level, message, detail or {}, now_iso())
def run_ingest_task(payload):
task_id = new_task_id()
trace_id = f"{task_id}_{hex(int(time.time() * 1000))[-6:]}"
task = {
"taskId": task_id,
"traceId": trace_id,
"status": "RUNNING",
"inputPayload": payload,
"startedAt": now_iso(),
"finishedAt": None,
"summary": None,
}
upsert_task(task)
log(task_id, "START", "INFO", "任务开始", {"payload": payload, "traceId": trace_id})
try:
deduped = find_media_item(payload["tmdbId"])
if deduped and deduped.get("ingest_status") == "SUCCESS":
task["status"] = "SUCCESS"
task["finishedAt"] = now_iso()
task["summary"] = {
"result": "SKIPPED_ALREADY_EXISTS",
"tmdbId": payload["tmdbId"],
"cmsId": deduped.get("cms_id"),
}
upsert_task(task)
log(task_id, "DEDUPE", "INFO", "命中本地幂等,跳过", task["summary"])
return task
tmdb_result = get_media_detail(payload["tmdbId"], payload["type"])
log(task_id, "TMDB_DETAIL", "INFO", "TMDB 元数据获取成功", {"status": tmdb_result["status"]})
hdhive_search = search_resource(payload["type"], payload["tmdbId"])
hdhive_first = None
search_data = hdhive_search.get("data") or {}
preferred_slug = str(payload.get("slug") or "").strip()
if isinstance(search_data, list) and search_data:
if preferred_slug:
hdhive_first = next(
(item for item in search_data if (item or {}).get("slug") == preferred_slug),
None,
)
if not hdhive_first:
hdhive_first = search_data[0]
elif isinstance(search_data, dict):
items = search_data.get("items") or []
if preferred_slug:
hdhive_first = next(
(item for item in items if (item or {}).get("slug") == preferred_slug),
None,
)
if items:
hdhive_first = hdhive_first or items[0]
if not hdhive_first:
raise AppServiceError(
"HDHIVE 未检索到可用资源",
category="not_found",
code="HDHIVE_RESOURCE_NOT_FOUND",
provider="hdhive",
)
log(task_id, "HDHIVE_SEARCH", "INFO", "HDHIVE 检索成功")
slug = hdhive_first.get("slug")
if not slug:
raise AppServiceError(
"HDHIVE 返回资源缺少 slug无法解锁",
category="validation",
code="HDHIVE_INVALID_RESOURCE",
provider="hdhive",
)
hdhive_unlock = unlock_link(slug)
normalized_resource = normalize_resource(hdhive_first, hdhive_unlock.get("data"))
log(task_id, "HDHIVE_UNLOCK", "INFO", "HDHIVE 解锁成功", {"unlockUrl": normalized_resource["unlockUrl"]})
emby_exists = exists_by_tmdb_id(payload["tmdbId"])
log(task_id, "EMBY_EXISTS", "INFO", "Emby 查询完成", {"exists": emby_exists["exists"]})
if emby_exists["exists"]:
upsert_media_item(
{
"tmdbId": payload["tmdbId"],
"type": payload["type"],
"title": tmdb_result["normalized"]["title"],
"year": tmdb_result["normalized"]["year"],
"tmdbRaw": tmdb_result["data"],
"hdhiveRaw": hdhive_search["data"],
"cmsId": None,
"ingestStatus": "SKIPPED_ALREADY_EXISTS",
}
)
task["status"] = "SUCCESS"
task["finishedAt"] = now_iso()
task["summary"] = {"result": "SKIPPED_ALREADY_EXISTS", "source": "EMBY"}
upsert_task(task)
return task
cms_payload = {
"tmdbId": payload["tmdbId"],
"mediaType": payload["type"],
"title": tmdb_result["normalized"]["title"],
"originalTitle": tmdb_result["normalized"]["originalTitle"],
"year": tmdb_result["normalized"]["year"],
"overview": tmdb_result["normalized"]["overview"],
"posterPath": tmdb_result["normalized"]["posterPath"],
"rating": tmdb_result["normalized"]["rating"],
"genres": tmdb_result["normalized"]["genres"],
"resource": {
"title": normalized_resource["resourceTitle"],
"quality": normalized_resource["quality"],
"size": normalized_resource["size"],
"diskType": normalized_resource["diskType"],
"slug": normalized_resource["slug"],
"source": normalized_resource["source"],
"subtitleLanguage": normalized_resource["subtitleLanguage"],
"unlockUrl": normalized_resource["unlockUrl"],
},
"traceId": trace_id,
}
cms_result = create_resource(cms_payload)
cms_data = cms_result.get("data") or {}
cms_id = (
cms_data.get("id")
or cms_data.get("resourceId")
or (cms_data.get("data") or {}).get("id")
or (cms_data.get("data") or {}).get("resourceId")
or normalized_resource["slug"]
)
log(task_id, "CMS_CREATE", "INFO", "CMS 入库成功", {"cmsId": cms_id})
upsert_media_item(
{
"tmdbId": payload["tmdbId"],
"type": payload["type"],
"title": tmdb_result["normalized"]["title"],
"year": tmdb_result["normalized"]["year"],
"tmdbRaw": tmdb_result["data"],
"hdhiveRaw": hdhive_search["data"],
"cmsId": cms_id,
"ingestStatus": "SUCCESS",
}
)
task["status"] = "SUCCESS"
task["finishedAt"] = now_iso()
task["summary"] = {"result": "CREATED", "cmsId": cms_id}
upsert_task(task)
return task
except Exception as error:
normalized_error = normalize_exception(error)
log(task_id, "FAILED", "ERROR", str(normalized_error), normalized_error.to_dict())
task["status"] = "FAILED"
task["finishedAt"] = now_iso()
task["summary"] = {"error": normalized_error.to_dict()}
upsert_task(task)
return task