Implement full media crawler workflow with Flask backend and Vue frontend.

Add TMDB search and media detail pages, HDHive resource ingestion flow, unified error handling, Docker single-container runtime, and project docs/config updates for local deployment.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
renjue
2026-05-09 16:16:18 +08:00
parent d3550bf79b
commit 82581d2949
49 changed files with 4959 additions and 0 deletions

View File

View File

@@ -0,0 +1,146 @@
import time
from config import Config
from http_client import request_json
from error_handling import AppServiceError
_CMS_TOKEN_CACHE = {"token": "", "expires_at": 0}
def _resolve_login_url():
if Config.CMS_LOGIN_URL:
return Config.CMS_LOGIN_URL
if Config.CMS_BASE_URL:
return f"{Config.CMS_BASE_URL.rstrip('/')}/api/auth/login"
raise AppServiceError(
"CMS login url is not configured",
category="validation",
code="CMS_CONFIG_MISSING",
provider="cms",
)
def _resolve_add_share_url():
if Config.CMS_ADD_SHARE_URL:
return Config.CMS_ADD_SHARE_URL
if Config.CMS_BASE_URL:
return f"{Config.CMS_BASE_URL.rstrip('/')}/api/cloud/add_share_down"
raise AppServiceError(
"CMS add share url is not configured",
category="validation",
code="CMS_CONFIG_MISSING",
provider="cms",
)
def _headers(token):
headers = {"Content-Type": "application/json"}
if token:
headers["Authorization"] = f"Bearer {token}"
return headers
def _extract_cms_token(login_result):
data = login_result.get("data") or {}
token = (data.get("data") or {}).get("token")
if not token:
raise AppServiceError(
"CMS login succeeded but token missing",
category="upstream",
code="CMS_TOKEN_MISSING",
provider="cms",
detail={"response": data},
)
return token
def _login_and_get_token():
if Config.CMS_TOKEN:
return Config.CMS_TOKEN
if not Config.CMS_USERNAME or not Config.CMS_PASSWORD:
raise AppServiceError(
"CMS username/password is required when CMS_TOKEN is not provided",
category="validation",
code="CMS_CONFIG_MISSING",
provider="cms",
)
login_result = request_json(
_resolve_login_url(),
method="POST",
payload={"username": Config.CMS_USERNAME, "password": Config.CMS_PASSWORD},
headers={"Content-Type": "application/json"},
max_retry=Config.MAX_RETRY,
retry_delay_ms=Config.RETRY_DELAY_MS,
provider="cms",
)
token = _extract_cms_token(login_result)
_CMS_TOKEN_CACHE["token"] = token
_CMS_TOKEN_CACHE["expires_at"] = int(time.time()) + 3500
return token
def _get_cached_token():
if Config.CMS_TOKEN:
return Config.CMS_TOKEN
if _CMS_TOKEN_CACHE["token"] and _CMS_TOKEN_CACHE["expires_at"] > int(time.time()):
return _CMS_TOKEN_CACHE["token"]
return _login_and_get_token()
def _should_refresh_token(response_data):
code = (response_data or {}).get("code")
msg = (response_data or {}).get("msg") or (response_data or {}).get("message") or ""
return code != 200 and msg != "提取分享链接失败"
def _add_share(url_value, token):
return request_json(
_resolve_add_share_url(),
method="POST",
payload={"url": url_value},
headers=_headers(token),
max_retry=Config.MAX_RETRY,
retry_delay_ms=Config.RETRY_DELAY_MS,
provider="cms",
)
def create_resource(payload):
resource = payload.get("resource") or {}
share_url = resource.get("unlockUrl") or payload.get("url")
if not share_url:
raise AppServiceError(
"CMS ingest requires unlockUrl",
category="validation",
code="CMS_INPUT_INVALID",
provider="cms",
)
token = _get_cached_token()
first_result = _add_share(share_url, token)
first_data = first_result.get("data") or {}
if _should_refresh_token(first_data):
refreshed_token = _login_and_get_token()
second_result = _add_share(share_url, refreshed_token)
second_data = second_result.get("data") or {}
if (second_data.get("code") or 0) != 200:
raise AppServiceError(
second_data.get("msg") or second_data.get("message") or "CMS ingest failed",
category="upstream",
code=str(second_data.get("code") or "CMS_INGEST_FAILED"),
provider="cms",
detail={"response": second_data},
)
return second_result
if (first_data.get("code") or 0) != 200:
raise AppServiceError(
first_data.get("msg") or first_data.get("message") or "CMS ingest failed",
category="business_rule"
if (first_data.get("msg") == "提取分享链接失败")
else "upstream",
code=str(first_data.get("code") or "CMS_INGEST_FAILED"),
provider="cms",
detail={"response": first_data},
)
return first_result

View File

@@ -0,0 +1,23 @@
from config import Config
from http_client import request_json
def _headers():
headers = {"Content-Type": "application/json"}
if Config.EMBY_TOKEN:
headers["X-Emby-Token"] = Config.EMBY_TOKEN
return headers
def exists_by_tmdb_id(tmdb_id):
url = f"{Config.EMBY_BASE_URL}/Items?AnyProviderIdEquals=Tmdb.{tmdb_id}&Recursive=true&Limit=1"
result = request_json(
url,
headers=_headers(),
max_retry=Config.MAX_RETRY,
retry_delay_ms=Config.RETRY_DELAY_MS,
provider="emby",
)
total = ((result.get("data") or {}).get("TotalRecordCount")) or 0
result["exists"] = total > 0
return result

View File

@@ -0,0 +1,60 @@
from config import Config
from http_client import request_json
def _headers():
headers = {
"Accept": "application/json",
"X-API-Key": Config.HDHIVE_API_KEY,
}
if Config.HDHIVE_ACCESS_TOKEN:
headers["Authorization"] = f"Bearer {Config.HDHIVE_ACCESS_TOKEN}"
return headers
def search_resource(media_type, tmdb_id):
url = f"{Config.HDHIVE_BASE_URL}/api/open/resources/{media_type}/{tmdb_id}"
return request_json(
url,
headers=_headers(),
max_retry=Config.MAX_RETRY,
retry_delay_ms=Config.RETRY_DELAY_MS,
provider="hdhive",
)
def unlock_link(slug):
url = f"{Config.HDHIVE_BASE_URL}/api/open/resources/unlock"
return request_json(
url,
method="POST",
payload={"slug": slug},
headers={**_headers(), "Content-Type": "application/json"},
max_retry=Config.MAX_RETRY,
retry_delay_ms=Config.RETRY_DELAY_MS,
provider="hdhive",
)
def normalize_resource(search_data, unlock_data):
resolution = (search_data or {}).get("video_resolution")
source = (search_data or {}).get("source")
subtitle_language = (search_data or {}).get("subtitle_language")
return {
"resourceTitle": (search_data or {}).get("title", ""),
"quality": ", ".join(resolution) if isinstance(resolution, list) else "",
"size": (search_data or {}).get("share_size", ""),
"diskType": (search_data or {}).get("pan_type", ""),
"source": ", ".join(source) if isinstance(source, list) else "",
"subtitleLanguage": ", ".join(subtitle_language)
if isinstance(subtitle_language, list)
else "",
"slug": (search_data or {}).get("slug", ""),
"unlockUrl": (unlock_data or {}).get("full_url")
or (unlock_data or {}).get("url")
or "",
"availability": "available"
if ((unlock_data or {}).get("full_url") or (unlock_data or {}).get("url"))
else "unknown",
"raw": {"searchData": search_data, "unlockData": unlock_data},
}

View File

@@ -0,0 +1,56 @@
from config import Config
from http_client import request_json
from urllib.parse import quote
def _headers():
headers = {"Content-Type": "application/json"}
if Config.TMDB_TOKEN:
headers["Authorization"] = f"Bearer {Config.TMDB_TOKEN}"
return headers
def search_media(query, media_type="movie", page=1):
normalized_type = "tv" if media_type == "tv" else "movie"
url = (
f"{Config.TMDB_BASE_URL}/search/{normalized_type}"
f"?language=zh-CN&query={quote(str(query))}&page={page}"
)
result = request_json(
url,
headers=_headers(),
max_retry=Config.MAX_RETRY,
retry_delay_ms=Config.RETRY_DELAY_MS,
provider="tmdb",
)
data = result.get("data") or {}
result["items"] = data.get("results") if isinstance(data, dict) else []
return result
def get_media_detail(tmdb_id, media_type):
normalized_type = "tv" if media_type == "tv" else "movie"
url = f"{Config.TMDB_BASE_URL}/{normalized_type}/{tmdb_id}?language=zh-CN"
result = request_json(
url,
headers=_headers(),
max_retry=Config.MAX_RETRY,
retry_delay_ms=Config.RETRY_DELAY_MS,
provider="tmdb",
)
data = result.get("data") or {}
normalized = {
"tmdbId": tmdb_id,
"type": normalized_type,
"title": data.get("title") or data.get("name") or "",
"originalTitle": data.get("original_title") or data.get("original_name") or "",
"overview": data.get("overview") or "",
"year": (data.get("release_date") or data.get("first_air_date") or "")[:4],
"rating": data.get("vote_average"),
"posterPath": data.get("poster_path") or "",
"genres": [g.get("name") for g in data.get("genres", []) if g.get("name")],
"seasons": len(data.get("seasons", [])) if isinstance(data.get("seasons"), list) else 0,
"raw": data,
}
result["normalized"] = normalized
return result