From 7471c3d36dc9a123693a7c1ab76a96b9de8f974c Mon Sep 17 00:00:00 2001
From: LockeShor <75901583+LockeShor@users.noreply.github.com>
Date: Mon, 2 Mar 2026 18:31:01 -0500
Subject: [PATCH] use correct screenshots

---
 README.md  |   1 +
 watcher.py | 106 ++++++++++++++++++++++-------------------------------
 2 files changed, 45 insertions(+), 62 deletions(-)

diff --git a/README.md b/README.md
index eb93fe0..b4d05f4 100644
--- a/README.md
+++ b/README.md
@@ -34,6 +34,7 @@ It also listens for Telegram commands from the configured chat:
 - `LOG_LEVEL` (default: `INFO`)
 - `MAX_SCREENSHOTS_PER_APP` (default: `3`)
 - `TELEGRAM_POLL_SECONDS` (default: `10`)
+- `MEDIA_BASE_URL` (default: `https://media.sys.truenas.net`)
 
 ## Build
 
diff --git a/watcher.py b/watcher.py
index 5189b27..12930c1 100644
--- a/watcher.py
+++ b/watcher.py
@@ -28,6 +28,7 @@ LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
 MAX_MESSAGE_LEN = 3900
 MAX_SCREENSHOTS_PER_APP = int(os.getenv("MAX_SCREENSHOTS_PER_APP", "3"))
 TELEGRAM_POLL_SECONDS = int(os.getenv("TELEGRAM_POLL_SECONDS", "10"))
+MEDIA_BASE_URL = os.getenv("MEDIA_BASE_URL", "https://media.sys.truenas.net")
 
 last_telegram_update_id: Optional[int] = None
 
@@ -246,6 +247,47 @@ def truncate_text(value: str, limit: int) -> str:
     return f"{text[: max(0, limit - 1)].rstrip()}…"
 
 
+def extract_app_id_from_url(app_url: str) -> str:
+    path_parts = [part for part in urlparse(app_url).path.strip("/").split("/") if part]
+    if not path_parts:
+        return ""
+
+    if "catalog" in path_parts:
+        catalog_index = path_parts.index("catalog")
+        if catalog_index + 1 < len(path_parts):
+            return path_parts[catalog_index + 1]
+
+    return path_parts[-1]
+
+
+def build_storj_screenshot_urls(session: requests.Session, app_id: str) -> List[str]:
+    if not app_id:
+        return []
+
+    screenshot_urls: List[str] = []
+    for index in range(1, MAX_SCREENSHOTS_PER_APP + 1):
+        screenshot_url = f"{MEDIA_BASE_URL.rstrip('/')}/apps/{app_id}/screenshots/screenshot{index}.png"
+        try:
+            response = session.get(
+                screenshot_url,
+                timeout=REQUEST_TIMEOUT_SECONDS,
+                headers={"User-Agent": USER_AGENT},
+            )
+        except requests.RequestException:
+            break
+
+        if response.status_code != 200:
+            break
+
+        content_type = str(response.headers.get("Content-Type", "")).lower()
+        if content_type and "image" not in content_type:
+            break
+
+        screenshot_urls.append(screenshot_url)
+
+    return screenshot_urls
+
+
 def fetch_new_app_page_details(session: requests.Session, app_url: str) -> Dict[str, object]:
     response = session.get(
         app_url,
@@ -257,68 +299,8 @@ def fetch_new_app_page_details(session: requests.Session, app_url: str) -> Dict[
     soup = BeautifulSoup(response.text, "html.parser")
 
     page_title = normalize_text(soup.title.get_text(" ", strip=True)) if soup.title else ""
-
-    screenshot_candidates: List[Tuple[int, int, str]] = []
-    seen_urls: set[str] = set()
-
-    og_image = soup.find("meta", attrs={"property": "og:image"})
-    if og_image and og_image.get("content"):
-        og_image_url = urljoin(app_url, str(og_image["content"]).strip())
-        if og_image_url.startswith("http"):
-            screenshot_candidates.append((2, 0, og_image_url))
-
-    for index, tag in enumerate(soup.find_all("img", src=True), start=1):
-        raw_src = str(tag.get("src", "")).strip()
-        if not raw_src or raw_src.startswith("data:"):
-            continue
-
-        image_url = urljoin(app_url, raw_src)
-        if not image_url.startswith("http") or image_url in seen_urls:
-            continue
-        seen_urls.add(image_url)
-
-        width_value = str(tag.get("width", "")).strip()
-        height_value = str(tag.get("height", "")).strip()
-        if width_value.isdigit() and int(width_value) < 200:
-            continue
-        if height_value.isdigit() and int(height_value) < 120:
-            continue
-
-        descriptor = " ".join(
-            [
-                str(tag.get("alt", "")),
-                str(tag.get("title", "")),
-                " ".join(tag.get("class", [])),
-                str(tag.get("id", "")),
-                image_url,
-            ]
-        ).lower()
-
-        if any(skip in descriptor for skip in ["logo", "favicon", "icon", "avatar", "badge"]):
-            continue
-
-        score = 0
-        if "screenshot" in descriptor or "screen-shot" in descriptor or "screen shot" in descriptor:
-            score += 4
-        if "gallery" in descriptor or "carousel" in descriptor or "preview" in descriptor:
-            score += 2
-        if re.search(r"\.(png|jpe?g|webp)(\?|$)", image_url, flags=re.IGNORECASE):
-            score += 1
-
-        if score > 0:
-            screenshot_candidates.append((score, index, image_url))
-
-    screenshot_candidates.sort(key=lambda item: (-item[0], item[1]))
-
-    screenshot_urls: List[str] = []
-    emitted: set[str] = set()
-    for _, _, image_url in screenshot_candidates:
-        if image_url in emitted:
-            continue
-        emitted.add(image_url)
-        screenshot_urls.append(image_url)
-        if len(screenshot_urls) >= MAX_SCREENSHOTS_PER_APP:
-            break
+    app_id = extract_app_id_from_url(app_url)
+    screenshot_urls = build_storj_screenshot_urls(session, app_id)
 
     return {
         "page_title": page_title,