Implement full media crawler workflow with Flask backend and Vue frontend.

Add TMDB search and media detail pages, HDHive resource ingestion flow, unified error handling, Docker single-container runtime, and project docs/config updates for local deployment.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
renjue
2026-05-09 16:16:18 +08:00
parent d3550bf79b
commit 82581d2949
49 changed files with 4959 additions and 0 deletions

View File

View File

@@ -0,0 +1,73 @@
from adapters.hdhive_adapter import normalize_resource, search_resource, unlock_link
from adapters.tmdb_adapter import get_media_detail, search_media
from error_handling import AppServiceError
def search_media_by_keyword(query, media_type):
result = search_media(query, media_type)
raw_items = result.get("items") or []
items = []
for item in raw_items:
items.append(
{
"id": item.get("id"),
"type": media_type,
"title": item.get("title") or item.get("name"),
"overview": item.get("overview") or "",
"posterPath": item.get("poster_path") or "",
"releaseDate": item.get("release_date") or item.get("first_air_date") or "",
"voteAverage": item.get("vote_average"),
}
)
return {"items": items}
def get_media_resources(media_type, tmdb_id):
detail = get_media_detail(tmdb_id, media_type)
hdhive = search_resource(media_type, tmdb_id)
search_data = hdhive.get("data") or []
if isinstance(search_data, dict):
search_data = search_data.get("items") or []
resources = []
for item in search_data:
slug = (item or {}).get("slug")
unlock_data = {}
unlock_error = None
if slug:
try:
unlock = unlock_link(slug)
unlock_data = unlock.get("data") or {}
except Exception as error:
unlock_error = str(error)
normalized = normalize_resource(item, unlock_data)
normalized["unlockError"] = unlock_error
resources.append(normalized)
return {
"media": detail.get("normalized"),
"resources": resources,
}
def validate_media_query(query, media_type):
if not query:
raise AppServiceError(
"query is required",
category="validation",
code="INVALID_INPUT",
status=400,
provider="api",
)
validate_media_type(media_type)
def validate_media_type(media_type):
if media_type not in ("movie", "tv"):
raise AppServiceError(
"type must be movie or tv",
category="validation",
code="INVALID_INPUT",
status=400,
provider="api",
)

View File

@@ -0,0 +1,176 @@
import time
from datetime import datetime
from adapters.cms_adapter import create_resource
from adapters.emby_adapter import exists_by_tmdb_id
from adapters.hdhive_adapter import normalize_resource, search_resource, unlock_link
from adapters.tmdb_adapter import get_media_detail
from error_handling import AppServiceError, normalize_exception
from storage import find_media_item, insert_log, upsert_media_item, upsert_task
def now_iso():
return datetime.utcnow().isoformat() + "Z"
def new_task_id():
return f"task_{int(time.time() * 1000)}"
def log(task_id, step, level, message, detail=None):
insert_log(task_id, step, level, message, detail or {}, now_iso())
def run_ingest_task(payload):
task_id = new_task_id()
trace_id = f"{task_id}_{hex(int(time.time() * 1000))[-6:]}"
task = {
"taskId": task_id,
"traceId": trace_id,
"status": "RUNNING",
"inputPayload": payload,
"startedAt": now_iso(),
"finishedAt": None,
"summary": None,
}
upsert_task(task)
log(task_id, "START", "INFO", "任务开始", {"payload": payload, "traceId": trace_id})
try:
deduped = find_media_item(payload["tmdbId"])
if deduped and deduped.get("ingest_status") == "SUCCESS":
task["status"] = "SUCCESS"
task["finishedAt"] = now_iso()
task["summary"] = {
"result": "SKIPPED_ALREADY_EXISTS",
"tmdbId": payload["tmdbId"],
"cmsId": deduped.get("cms_id"),
}
upsert_task(task)
log(task_id, "DEDUPE", "INFO", "命中本地幂等,跳过", task["summary"])
return task
tmdb_result = get_media_detail(payload["tmdbId"], payload["type"])
log(task_id, "TMDB_DETAIL", "INFO", "TMDB 元数据获取成功", {"status": tmdb_result["status"]})
hdhive_search = search_resource(payload["type"], payload["tmdbId"])
hdhive_first = None
search_data = hdhive_search.get("data") or {}
preferred_slug = str(payload.get("slug") or "").strip()
if isinstance(search_data, list) and search_data:
if preferred_slug:
hdhive_first = next(
(item for item in search_data if (item or {}).get("slug") == preferred_slug),
None,
)
if not hdhive_first:
hdhive_first = search_data[0]
elif isinstance(search_data, dict):
items = search_data.get("items") or []
if preferred_slug:
hdhive_first = next(
(item for item in items if (item or {}).get("slug") == preferred_slug),
None,
)
if items:
hdhive_first = hdhive_first or items[0]
if not hdhive_first:
raise AppServiceError(
"HDHIVE 未检索到可用资源",
category="not_found",
code="HDHIVE_RESOURCE_NOT_FOUND",
provider="hdhive",
)
log(task_id, "HDHIVE_SEARCH", "INFO", "HDHIVE 检索成功")
slug = hdhive_first.get("slug")
if not slug:
raise AppServiceError(
"HDHIVE 返回资源缺少 slug无法解锁",
category="validation",
code="HDHIVE_INVALID_RESOURCE",
provider="hdhive",
)
hdhive_unlock = unlock_link(slug)
normalized_resource = normalize_resource(hdhive_first, hdhive_unlock.get("data"))
log(task_id, "HDHIVE_UNLOCK", "INFO", "HDHIVE 解锁成功", {"unlockUrl": normalized_resource["unlockUrl"]})
emby_exists = exists_by_tmdb_id(payload["tmdbId"])
log(task_id, "EMBY_EXISTS", "INFO", "Emby 查询完成", {"exists": emby_exists["exists"]})
if emby_exists["exists"]:
upsert_media_item(
{
"tmdbId": payload["tmdbId"],
"type": payload["type"],
"title": tmdb_result["normalized"]["title"],
"year": tmdb_result["normalized"]["year"],
"tmdbRaw": tmdb_result["data"],
"hdhiveRaw": hdhive_search["data"],
"cmsId": None,
"ingestStatus": "SKIPPED_ALREADY_EXISTS",
}
)
task["status"] = "SUCCESS"
task["finishedAt"] = now_iso()
task["summary"] = {"result": "SKIPPED_ALREADY_EXISTS", "source": "EMBY"}
upsert_task(task)
return task
cms_payload = {
"tmdbId": payload["tmdbId"],
"mediaType": payload["type"],
"title": tmdb_result["normalized"]["title"],
"originalTitle": tmdb_result["normalized"]["originalTitle"],
"year": tmdb_result["normalized"]["year"],
"overview": tmdb_result["normalized"]["overview"],
"posterPath": tmdb_result["normalized"]["posterPath"],
"rating": tmdb_result["normalized"]["rating"],
"genres": tmdb_result["normalized"]["genres"],
"resource": {
"title": normalized_resource["resourceTitle"],
"quality": normalized_resource["quality"],
"size": normalized_resource["size"],
"diskType": normalized_resource["diskType"],
"slug": normalized_resource["slug"],
"source": normalized_resource["source"],
"subtitleLanguage": normalized_resource["subtitleLanguage"],
"unlockUrl": normalized_resource["unlockUrl"],
},
"traceId": trace_id,
}
cms_result = create_resource(cms_payload)
cms_data = cms_result.get("data") or {}
cms_id = (
cms_data.get("id")
or cms_data.get("resourceId")
or (cms_data.get("data") or {}).get("id")
or (cms_data.get("data") or {}).get("resourceId")
or normalized_resource["slug"]
)
log(task_id, "CMS_CREATE", "INFO", "CMS 入库成功", {"cmsId": cms_id})
upsert_media_item(
{
"tmdbId": payload["tmdbId"],
"type": payload["type"],
"title": tmdb_result["normalized"]["title"],
"year": tmdb_result["normalized"]["year"],
"tmdbRaw": tmdb_result["data"],
"hdhiveRaw": hdhive_search["data"],
"cmsId": cms_id,
"ingestStatus": "SUCCESS",
}
)
task["status"] = "SUCCESS"
task["finishedAt"] = now_iso()
task["summary"] = {"result": "CREATED", "cmsId": cms_id}
upsert_task(task)
return task
except Exception as error:
normalized_error = normalize_exception(error)
log(task_id, "FAILED", "ERROR", str(normalized_error), normalized_error.to_dict())
task["status"] = "FAILED"
task["finishedAt"] = now_iso()
task["summary"] = {"error": normalized_error.to_dict()}
upsert_task(task)
return task