This commit is contained in:
@@ -14,7 +14,8 @@ The watcher pulls `https://apps.truenas.com/catalog` over plain HTTP (no browser
|
||||
|
||||
On changes, it sends Telegram messages with:
|
||||
|
||||
- one detailed message per newly added app (name, URL, train, added date, catalog summary, plus extra details parsed from the app page when available)
|
||||
- one detailed message per newly added app (name, URL, train, added date, catalog summary, and page title)
|
||||
- screenshot images from the app page, posted as Telegram photos (up to the configured per-app limit)
|
||||
- removed apps (`-`)
|
||||
- updated apps (`~`) and field-level diffs
|
||||
|
||||
@@ -27,6 +28,7 @@ On changes, it sends Telegram messages with:
|
||||
- `CATALOG_URL` (default: `https://apps.truenas.com/catalog`)
|
||||
- `REQUEST_TIMEOUT_SECONDS` (default: `30`)
|
||||
- `LOG_LEVEL` (default: `INFO`)
|
||||
- `MAX_SCREENSHOTS_PER_APP` (default: `3`)
|
||||
|
||||
## Build
|
||||
|
||||
|
||||
178
watcher.py
178
watcher.py
@@ -25,6 +25,7 @@ USER_AGENT = os.getenv(
|
||||
)
|
||||
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
|
||||
MAX_MESSAGE_LEN = 3900
|
||||
MAX_SCREENSHOTS_PER_APP = int(os.getenv("MAX_SCREENSHOTS_PER_APP", "3"))
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -225,6 +226,9 @@ def truncate_text(value: str, limit: int) -> str:
|
||||
if len(text) <= limit:
|
||||
return text
|
||||
return f"{text[: max(0, limit - 1)].rstrip()}…"
|
||||
|
||||
|
||||
def fetch_new_app_page_details(session: requests.Session, app_url: str) -> Dict[str, object]:
|
||||
response = session.get(
|
||||
app_url,
|
||||
timeout=REQUEST_TIMEOUT_SECONDS,
|
||||
@@ -235,77 +239,76 @@ def truncate_text(value: str, limit: int) -> str:
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
page_title = normalize_text(soup.title.get_text(" ", strip=True)) if soup.title else ""
|
||||
description = ""
|
||||
for attrs in (
|
||||
{"property": "og:description"},
|
||||
{"name": "description"},
|
||||
{"name": "twitter:description"},
|
||||
):
|
||||
tag = soup.find("meta", attrs=attrs)
|
||||
if tag and tag.get("content"):
|
||||
description = normalize_text(str(tag["content"]))
|
||||
if description:
|
||||
|
||||
screenshot_candidates: List[Tuple[int, int, str]] = []
|
||||
seen_urls: set[str] = set()
|
||||
|
||||
og_image = soup.find("meta", attrs={"property": "og:image"})
|
||||
if og_image and og_image.get("content"):
|
||||
og_image_url = urljoin(app_url, str(og_image["content"]).strip())
|
||||
if og_image_url.startswith("http"):
|
||||
screenshot_candidates.append((2, 0, og_image_url))
|
||||
|
||||
for index, tag in enumerate(soup.find_all("img", src=True), start=1):
|
||||
raw_src = str(tag.get("src", "")).strip()
|
||||
if not raw_src or raw_src.startswith("data:"):
|
||||
continue
|
||||
|
||||
image_url = urljoin(app_url, raw_src)
|
||||
if not image_url.startswith("http") or image_url in seen_urls:
|
||||
continue
|
||||
seen_urls.add(image_url)
|
||||
|
||||
width_value = str(tag.get("width", "")).strip()
|
||||
height_value = str(tag.get("height", "")).strip()
|
||||
if width_value.isdigit() and int(width_value) < 200:
|
||||
continue
|
||||
if height_value.isdigit() and int(height_value) < 120:
|
||||
continue
|
||||
|
||||
descriptor = " ".join(
|
||||
[
|
||||
str(tag.get("alt", "")),
|
||||
str(tag.get("title", "")),
|
||||
" ".join(tag.get("class", [])),
|
||||
str(tag.get("id", "")),
|
||||
image_url,
|
||||
]
|
||||
).lower()
|
||||
|
||||
if any(skip in descriptor for skip in ["logo", "favicon", "icon", "avatar", "badge"]):
|
||||
continue
|
||||
|
||||
score = 0
|
||||
if "screenshot" in descriptor or "screen-shot" in descriptor or "screen shot" in descriptor:
|
||||
score += 4
|
||||
if "gallery" in descriptor or "carousel" in descriptor or "preview" in descriptor:
|
||||
score += 2
|
||||
if re.search(r"\.(png|jpe?g|webp)(\?|$)", image_url, flags=re.IGNORECASE):
|
||||
score += 1
|
||||
|
||||
if score > 0:
|
||||
screenshot_candidates.append((score, index, image_url))
|
||||
|
||||
screenshot_candidates.sort(key=lambda item: (-item[0], item[1]))
|
||||
|
||||
screenshot_urls: List[str] = []
|
||||
emitted: set[str] = set()
|
||||
for _, _, image_url in screenshot_candidates:
|
||||
if image_url in emitted:
|
||||
continue
|
||||
emitted.add(image_url)
|
||||
screenshot_urls.append(image_url)
|
||||
if len(screenshot_urls) >= MAX_SCREENSHOTS_PER_APP:
|
||||
break
|
||||
|
||||
headings: List[str] = []
|
||||
for tag in soup.find_all(["h1", "h2", "h3"]):
|
||||
heading = normalize_text(tag.get_text(" ", strip=True))
|
||||
if not heading:
|
||||
continue
|
||||
if heading not in headings:
|
||||
headings.append(heading)
|
||||
if len(headings) >= 6:
|
||||
break
|
||||
|
||||
external_links: List[str] = []
|
||||
seen_links = set()
|
||||
for anchor in soup.find_all("a", href=True):
|
||||
href = str(anchor.get("href", "")).strip()
|
||||
if not href or href.startswith("#"):
|
||||
continue
|
||||
full_href = urljoin(app_url, href)
|
||||
if not full_href.startswith("http"):
|
||||
continue
|
||||
if full_href.startswith(CATALOG_URL):
|
||||
continue
|
||||
if full_href in seen_links:
|
||||
continue
|
||||
seen_links.add(full_href)
|
||||
|
||||
label = normalize_text(anchor.get_text(" ", strip=True))
|
||||
if not label:
|
||||
label = full_href
|
||||
label = truncate_text(label, 60)
|
||||
external_links.append(f"{label} -> {full_href}")
|
||||
if len(external_links) >= 5:
|
||||
break
|
||||
|
||||
detected_fields: List[str] = []
|
||||
body_text = normalize_text(soup.get_text(" ", strip=True))
|
||||
label_patterns = {
|
||||
"Version": r"(?:App\s+Version|Version)\s*[:\-]\s*([^\n\r|]{1,80})",
|
||||
"Chart": r"(?:Chart\s+Version|Helm\s+Chart)\s*[:\-]\s*([^\n\r|]{1,80})",
|
||||
"Category": r"Category\s*[:\-]\s*([^\n\r|]{1,80})",
|
||||
"Maintainer": r"Maintainer(?:s)?\s*[:\-]\s*([^\n\r|]{1,120})",
|
||||
"Homepage": r"Homepage\s*[:\-]\s*([^\n\r|]{1,160})",
|
||||
"Source": r"Source\s*[:\-]\s*([^\n\r|]{1,160})",
|
||||
}
|
||||
for label, pattern in label_patterns.items():
|
||||
match = re.search(pattern, body_text, flags=re.IGNORECASE)
|
||||
if match:
|
||||
value = truncate_text(match.group(1), 120)
|
||||
detected_fields.append(f"{label}: {value}")
|
||||
|
||||
return {
|
||||
"page_title": page_title,
|
||||
"description": description,
|
||||
"headings": headings,
|
||||
"external_links": external_links,
|
||||
"detected_fields": detected_fields,
|
||||
"screenshot_urls": screenshot_urls,
|
||||
}
|
||||
|
||||
|
||||
def build_new_app_message(session: requests.Session, app: AppSnapshot) -> str:
|
||||
def build_new_app_message(app: AppSnapshot, page_title: str = "", screenshot_count: int = 0) -> str:
|
||||
lines: List[str] = [
|
||||
"🆕 New TrueNAS app detected",
|
||||
f"Detected: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}",
|
||||
@@ -319,6 +322,10 @@ def build_new_app_message(session: requests.Session, app: AppSnapshot) -> str:
|
||||
lines.append(f"Added date: {app.added}")
|
||||
if app.summary:
|
||||
lines.append(f"Catalog summary: {truncate_text(app.summary, 700)}")
|
||||
if page_title:
|
||||
lines.append(f"Page title: {truncate_text(page_title, 180)}")
|
||||
if screenshot_count > 0:
|
||||
lines.append(f"Screenshots: {screenshot_count} attached")
|
||||
|
||||
message = "\n".join(lines)
|
||||
if len(message) <= MAX_MESSAGE_LEN:
|
||||
@@ -366,6 +373,23 @@ def send_telegram_message(session: requests.Session, text: str) -> None:
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
def send_telegram_photo(session: requests.Session, photo_url: str, caption: str = "") -> None:
|
||||
if not TELEGRAM_BOT_TOKEN or not TELEGRAM_CHAT_ID:
|
||||
logging.warning("Telegram token/chat id missing; skipping photo")
|
||||
return
|
||||
|
||||
endpoint = f"https://api.telegram.org/bot{TELEGRAM_BOT_TOKEN}/sendPhoto"
|
||||
payload = {
|
||||
"chat_id": TELEGRAM_CHAT_ID,
|
||||
"photo": photo_url,
|
||||
}
|
||||
if caption:
|
||||
payload["caption"] = truncate_text(caption, 900)
|
||||
|
||||
response = session.post(endpoint, json=payload, timeout=REQUEST_TIMEOUT_SECONDS)
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
def send_startup_notification(session: requests.Session) -> None:
|
||||
message = (
|
||||
"TrueNAS catalog watcher is running ✅\n"
|
||||
@@ -405,7 +429,31 @@ def run_once(session: requests.Session, first_run: bool) -> bool:
|
||||
|
||||
for url in added_urls:
|
||||
app = current_state[url]
|
||||
send_telegram_message(session, build_new_app_message(session, app))
|
||||
page_title = ""
|
||||
screenshot_urls: List[str] = []
|
||||
try:
|
||||
details = fetch_new_app_page_details(session, app.url)
|
||||
page_title = str(details.get("page_title", ""))
|
||||
screenshot_data = details.get("screenshot_urls", [])
|
||||
if isinstance(screenshot_data, list):
|
||||
screenshot_urls = [str(item) for item in screenshot_data if str(item).startswith("http")]
|
||||
except requests.RequestException as exc:
|
||||
logging.warning("Unable to fetch app page details for %s: %s", app.url, exc)
|
||||
|
||||
send_telegram_message(
|
||||
session,
|
||||
build_new_app_message(app, page_title=page_title, screenshot_count=len(screenshot_urls)),
|
||||
)
|
||||
|
||||
for index, screenshot_url in enumerate(screenshot_urls, start=1):
|
||||
try:
|
||||
send_telegram_photo(
|
||||
session,
|
||||
screenshot_url,
|
||||
caption=f"{app.name} screenshot {index}/{len(screenshot_urls)}",
|
||||
)
|
||||
except requests.RequestException as exc:
|
||||
logging.warning("Failed to send screenshot for %s: %s", app.name, exc)
|
||||
|
||||
header, summary_lines = build_summary_message(
|
||||
added_count=len(added_urls),
|
||||
|
||||
Reference in New Issue
Block a user