Compare commits

..

2 Commits

Author SHA1 Message Date
LockeShor
abf7d58157 add screenshots
All checks were successful
Docker Image / build (push) Successful in 1m2s
2026-03-02 16:51:08 -05:00
LockeShor
46476aba51 remove excess info 2026-03-02 16:50:24 -05:00
2 changed files with 109 additions and 91 deletions

View File

@@ -14,7 +14,8 @@ The watcher pulls `https://apps.truenas.com/catalog` over plain HTTP (no browser
On changes, it sends Telegram messages with:
- one detailed message per newly added app (name, URL, train, added date, catalog summary, plus extra details parsed from the app page when available)
- one detailed message per newly added app (name, URL, train, added date, catalog summary, and page title)
- screenshot images from the app page, posted as Telegram photos (up to the configured per-app limit)
- removed apps (`-`)
- updated apps (`~`) and field-level diffs
@@ -27,6 +28,7 @@ On changes, it sends Telegram messages with:
- `CATALOG_URL` (default: `https://apps.truenas.com/catalog`)
- `REQUEST_TIMEOUT_SECONDS` (default: `30`)
- `LOG_LEVEL` (default: `INFO`)
- `MAX_SCREENSHOTS_PER_APP` (default: `3`)
## Build

View File

@@ -25,6 +25,7 @@ USER_AGENT = os.getenv(
)
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
MAX_MESSAGE_LEN = 3900
MAX_SCREENSHOTS_PER_APP = int(os.getenv("MAX_SCREENSHOTS_PER_APP", "3"))
@dataclass
@@ -238,77 +239,76 @@ def fetch_new_app_page_details(session: requests.Session, app_url: str) -> Dict[
soup = BeautifulSoup(response.text, "html.parser")
page_title = normalize_text(soup.title.get_text(" ", strip=True)) if soup.title else ""
description = ""
for attrs in (
{"property": "og:description"},
{"name": "description"},
{"name": "twitter:description"},
):
tag = soup.find("meta", attrs=attrs)
if tag and tag.get("content"):
description = normalize_text(str(tag["content"]))
if description:
break
headings: List[str] = []
for tag in soup.find_all(["h1", "h2", "h3"]):
heading = normalize_text(tag.get_text(" ", strip=True))
if not heading:
screenshot_candidates: List[Tuple[int, int, str]] = []
seen_urls: set[str] = set()
og_image = soup.find("meta", attrs={"property": "og:image"})
if og_image and og_image.get("content"):
og_image_url = urljoin(app_url, str(og_image["content"]).strip())
if og_image_url.startswith("http"):
screenshot_candidates.append((2, 0, og_image_url))
for index, tag in enumerate(soup.find_all("img", src=True), start=1):
raw_src = str(tag.get("src", "")).strip()
if not raw_src or raw_src.startswith("data:"):
continue
if heading not in headings:
headings.append(heading)
if len(headings) >= 6:
image_url = urljoin(app_url, raw_src)
if not image_url.startswith("http") or image_url in seen_urls:
continue
seen_urls.add(image_url)
width_value = str(tag.get("width", "")).strip()
height_value = str(tag.get("height", "")).strip()
if width_value.isdigit() and int(width_value) < 200:
continue
if height_value.isdigit() and int(height_value) < 120:
continue
descriptor = " ".join(
[
str(tag.get("alt", "")),
str(tag.get("title", "")),
" ".join(tag.get("class", [])),
str(tag.get("id", "")),
image_url,
]
).lower()
if any(skip in descriptor for skip in ["logo", "favicon", "icon", "avatar", "badge"]):
continue
score = 0
if "screenshot" in descriptor or "screen-shot" in descriptor or "screen shot" in descriptor:
score += 4
if "gallery" in descriptor or "carousel" in descriptor or "preview" in descriptor:
score += 2
if re.search(r"\.(png|jpe?g|webp)(\?|$)", image_url, flags=re.IGNORECASE):
score += 1
if score > 0:
screenshot_candidates.append((score, index, image_url))
screenshot_candidates.sort(key=lambda item: (-item[0], item[1]))
screenshot_urls: List[str] = []
emitted: set[str] = set()
for _, _, image_url in screenshot_candidates:
if image_url in emitted:
continue
emitted.add(image_url)
screenshot_urls.append(image_url)
if len(screenshot_urls) >= MAX_SCREENSHOTS_PER_APP:
break
external_links: List[str] = []
seen_links = set()
for anchor in soup.find_all("a", href=True):
href = str(anchor.get("href", "")).strip()
if not href or href.startswith("#"):
continue
full_href = urljoin(app_url, href)
if not full_href.startswith("http"):
continue
if full_href.startswith(CATALOG_URL):
continue
if full_href in seen_links:
continue
seen_links.add(full_href)
label = normalize_text(anchor.get_text(" ", strip=True))
if not label:
label = full_href
label = truncate_text(label, 60)
external_links.append(f"{label} -> {full_href}")
if len(external_links) >= 5:
break
detected_fields: List[str] = []
body_text = normalize_text(soup.get_text(" ", strip=True))
label_patterns = {
"Version": r"(?:App\s+Version|Version)\s*[:\-]\s*([^\n\r|]{1,80})",
"Chart": r"(?:Chart\s+Version|Helm\s+Chart)\s*[:\-]\s*([^\n\r|]{1,80})",
"Category": r"Category\s*[:\-]\s*([^\n\r|]{1,80})",
"Maintainer": r"Maintainer(?:s)?\s*[:\-]\s*([^\n\r|]{1,120})",
"Homepage": r"Homepage\s*[:\-]\s*([^\n\r|]{1,160})",
"Source": r"Source\s*[:\-]\s*([^\n\r|]{1,160})",
}
for label, pattern in label_patterns.items():
match = re.search(pattern, body_text, flags=re.IGNORECASE)
if match:
value = truncate_text(match.group(1), 120)
detected_fields.append(f"{label}: {value}")
return {
"page_title": page_title,
"description": description,
"headings": headings,
"external_links": external_links,
"detected_fields": detected_fields,
"screenshot_urls": screenshot_urls,
}
def build_new_app_message(session: requests.Session, app: AppSnapshot) -> str:
def build_new_app_message(app: AppSnapshot, page_title: str = "", screenshot_count: int = 0) -> str:
lines: List[str] = [
"🆕 New TrueNAS app detected",
f"Detected: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}",
@@ -322,35 +322,10 @@ def build_new_app_message(session: requests.Session, app: AppSnapshot) -> str:
lines.append(f"Added date: {app.added}")
if app.summary:
lines.append(f"Catalog summary: {truncate_text(app.summary, 700)}")
try:
details = fetch_new_app_page_details(session, app.url)
except requests.RequestException as exc:
logging.warning("Unable to fetch app details for %s: %s", app.url, exc)
details = {}
page_title = str(details.get("page_title", "")) if details else ""
if page_title:
lines.append(f"Page title: {truncate_text(page_title, 180)}")
description = str(details.get("description", "")) if details else ""
if description:
lines.append(f"Description: {truncate_text(description, 1000)}")
detected_fields = details.get("detected_fields", []) if details else []
if isinstance(detected_fields, list):
for field in detected_fields[:6]:
lines.append(str(field))
headings = details.get("headings", []) if details else []
if isinstance(headings, list) and headings:
lines.append(f"Headings: {truncate_text(' | '.join(headings[:6]), 320)}")
external_links = details.get("external_links", []) if details else []
if isinstance(external_links, list) and external_links:
lines.append("External links:")
for link in external_links[:5]:
lines.append(f"- {truncate_text(str(link), 220)}")
if screenshot_count > 0:
lines.append(f"Screenshots: {screenshot_count} attached")
message = "\n".join(lines)
if len(message) <= MAX_MESSAGE_LEN:
@@ -398,6 +373,23 @@ def send_telegram_message(session: requests.Session, text: str) -> None:
response.raise_for_status()
def send_telegram_photo(session: requests.Session, photo_url: str, caption: str = "") -> None:
if not TELEGRAM_BOT_TOKEN or not TELEGRAM_CHAT_ID:
logging.warning("Telegram token/chat id missing; skipping photo")
return
endpoint = f"https://api.telegram.org/bot{TELEGRAM_BOT_TOKEN}/sendPhoto"
payload = {
"chat_id": TELEGRAM_CHAT_ID,
"photo": photo_url,
}
if caption:
payload["caption"] = truncate_text(caption, 900)
response = session.post(endpoint, json=payload, timeout=REQUEST_TIMEOUT_SECONDS)
response.raise_for_status()
def send_startup_notification(session: requests.Session) -> None:
message = (
"TrueNAS catalog watcher is running ✅\n"
@@ -437,7 +429,31 @@ def run_once(session: requests.Session, first_run: bool) -> bool:
for url in added_urls:
app = current_state[url]
send_telegram_message(session, build_new_app_message(session, app))
page_title = ""
screenshot_urls: List[str] = []
try:
details = fetch_new_app_page_details(session, app.url)
page_title = str(details.get("page_title", ""))
screenshot_data = details.get("screenshot_urls", [])
if isinstance(screenshot_data, list):
screenshot_urls = [str(item) for item in screenshot_data if str(item).startswith("http")]
except requests.RequestException as exc:
logging.warning("Unable to fetch app page details for %s: %s", app.url, exc)
send_telegram_message(
session,
build_new_app_message(app, page_title=page_title, screenshot_count=len(screenshot_urls)),
)
for index, screenshot_url in enumerate(screenshot_urls, start=1):
try:
send_telegram_photo(
session,
screenshot_url,
caption=f"{app.name} screenshot {index}/{len(screenshot_urls)}",
)
except requests.RequestException as exc:
logging.warning("Failed to send screenshot for %s: %s", app.name, exc)
header, summary_lines = build_summary_message(
added_count=len(added_urls),