add new item details
All checks were successful
Docker Image / build (push) Successful in 1m5s

This commit is contained in:
LockeShor
2026-03-02 16:42:28 -05:00
parent 48b17634b4
commit e505fbe25a
2 changed files with 183 additions and 14 deletions

View File

@@ -14,7 +14,7 @@ The watcher pulls `https://apps.truenas.com/catalog` over plain HTTP (no browser
On changes, it sends Telegram messages with: On changes, it sends Telegram messages with:
- added apps (`+`) - one detailed message per newly added app (name, URL, train, added date, catalog summary, plus extra details parsed from the app page when available)
- removed apps (`-`) - removed apps (`-`)
- updated apps (`~`) and field-level diffs - updated apps (`~`) and field-level diffs

View File

@@ -2,6 +2,7 @@ import hashlib
import json import json
import logging import logging
import os import os
import re
import sys import sys
import time import time
from dataclasses import dataclass, asdict from dataclasses import dataclass, asdict
@@ -156,10 +157,10 @@ def format_field_change(label: str, old: str, new: str) -> str:
return f"{label}: '{old_clean}' -> '{new_clean}'" return f"{label}: '{old_clean}' -> '{new_clean}'"
def build_diff_message( def collect_diffs(
previous: Dict[str, AppSnapshot], previous: Dict[str, AppSnapshot],
current: Dict[str, AppSnapshot], current: Dict[str, AppSnapshot],
) -> Tuple[str, List[str], int]: ) -> Tuple[List[str], List[str], List[str], int]:
prev_urls = set(previous.keys()) prev_urls = set(previous.keys())
curr_urls = set(current.keys()) curr_urls = set(current.keys())
@@ -193,22 +194,172 @@ def build_diff_message(
for detail in details: for detail in details:
changed_lines.append(f" - {detail}") changed_lines.append(f" - {detail}")
return added_urls, removed_urls, changed_lines, updated_count
def build_summary_message(
added_count: int,
removed_urls: List[str],
changed_lines: List[str],
updated_count: int,
previous: Dict[str, AppSnapshot],
) -> Tuple[str, List[str]]:
removed_count = len(removed_urls)
header = ( header = (
f"TrueNAS catalog changed at {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}\n" f"TrueNAS catalog changed at {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}\n"
f"Added: {len(added_urls)} | Removed: {len(removed_urls)} | Updated: {updated_count}" f"Added: {added_count} | Removed: {removed_count} | Updated: {updated_count}"
) )
lines: List[str] = [] lines: List[str] = []
for url in added_urls:
app = current[url]
lines.append(f"+ {app.name} ({app.url})")
for url in removed_urls: for url in removed_urls:
app = previous[url] app = previous[url]
lines.append(f"- {app.name} ({app.url})") lines.append(f"- {app.name} ({app.url})")
lines.extend(changed_lines) lines.extend(changed_lines)
return header, lines, updated_count return header, lines
def truncate_text(value: str, limit: int) -> str:
text = normalize_text(value)
if len(text) <= limit:
return text
return f"{text[: max(0, limit - 1)].rstrip()}"
def fetch_new_app_page_details(session: requests.Session, app_url: str) -> Dict[str, object]:
response = session.get(
app_url,
timeout=REQUEST_TIMEOUT_SECONDS,
headers={"User-Agent": USER_AGENT},
)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
page_title = normalize_text(soup.title.get_text(" ", strip=True)) if soup.title else ""
description = ""
for attrs in (
{"property": "og:description"},
{"name": "description"},
{"name": "twitter:description"},
):
tag = soup.find("meta", attrs=attrs)
if tag and tag.get("content"):
description = normalize_text(str(tag["content"]))
if description:
break
headings: List[str] = []
for tag in soup.find_all(["h1", "h2", "h3"]):
heading = normalize_text(tag.get_text(" ", strip=True))
if not heading:
continue
if heading not in headings:
headings.append(heading)
if len(headings) >= 6:
break
external_links: List[str] = []
seen_links = set()
for anchor in soup.find_all("a", href=True):
href = str(anchor.get("href", "")).strip()
if not href or href.startswith("#"):
continue
full_href = urljoin(app_url, href)
if not full_href.startswith("http"):
continue
if full_href.startswith(CATALOG_URL):
continue
if full_href in seen_links:
continue
seen_links.add(full_href)
label = normalize_text(anchor.get_text(" ", strip=True))
if not label:
label = full_href
label = truncate_text(label, 60)
external_links.append(f"{label} -> {full_href}")
if len(external_links) >= 5:
break
detected_fields: List[str] = []
body_text = normalize_text(soup.get_text(" ", strip=True))
label_patterns = {
"Version": r"(?:App\s+Version|Version)\s*[:\-]\s*([^\n\r|]{1,80})",
"Chart": r"(?:Chart\s+Version|Helm\s+Chart)\s*[:\-]\s*([^\n\r|]{1,80})",
"Category": r"Category\s*[:\-]\s*([^\n\r|]{1,80})",
"Maintainer": r"Maintainer(?:s)?\s*[:\-]\s*([^\n\r|]{1,120})",
"Homepage": r"Homepage\s*[:\-]\s*([^\n\r|]{1,160})",
"Source": r"Source\s*[:\-]\s*([^\n\r|]{1,160})",
}
for label, pattern in label_patterns.items():
match = re.search(pattern, body_text, flags=re.IGNORECASE)
if match:
value = truncate_text(match.group(1), 120)
detected_fields.append(f"{label}: {value}")
return {
"page_title": page_title,
"description": description,
"headings": headings,
"external_links": external_links,
"detected_fields": detected_fields,
}
def build_new_app_message(session: requests.Session, app: AppSnapshot) -> str:
lines: List[str] = [
"🆕 New TrueNAS app detected",
f"Detected: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}",
f"Name: {app.name}",
f"URL: {app.url}",
]
if app.train:
lines.append(f"Train: {app.train}")
if app.added:
lines.append(f"Added date: {app.added}")
if app.summary:
lines.append(f"Catalog summary: {truncate_text(app.summary, 700)}")
try:
details = fetch_new_app_page_details(session, app.url)
except requests.RequestException as exc:
logging.warning("Unable to fetch app details for %s: %s", app.url, exc)
details = {}
page_title = str(details.get("page_title", "")) if details else ""
if page_title:
lines.append(f"Page title: {truncate_text(page_title, 180)}")
description = str(details.get("description", "")) if details else ""
if description:
lines.append(f"Description: {truncate_text(description, 1000)}")
detected_fields = details.get("detected_fields", []) if details else []
if isinstance(detected_fields, list):
for field in detected_fields[:6]:
lines.append(str(field))
headings = details.get("headings", []) if details else []
if isinstance(headings, list) and headings:
lines.append(f"Headings: {truncate_text(' | '.join(headings[:6]), 320)}")
external_links = details.get("external_links", []) if details else []
if isinstance(external_links, list) and external_links:
lines.append("External links:")
for link in external_links[:5]:
lines.append(f"- {truncate_text(str(link), 220)}")
message = "\n".join(lines)
if len(message) <= MAX_MESSAGE_LEN:
return message
trimmed_lines = [line if len(line) <= 280 else truncate_text(line, 280) for line in lines]
while len("\n".join(trimmed_lines)) > MAX_MESSAGE_LEN and len(trimmed_lines) > 8:
trimmed_lines.pop()
return "\n".join(trimmed_lines)
def split_message(header: str, lines: List[str], max_len: int = MAX_MESSAGE_LEN) -> List[str]: def split_message(header: str, lines: List[str], max_len: int = MAX_MESSAGE_LEN) -> List[str]:
@@ -273,12 +424,30 @@ def run_once(session: requests.Session, first_run: bool) -> bool:
logging.info("Initial snapshot saved with %d apps", len(current_state)) logging.info("Initial snapshot saved with %d apps", len(current_state))
return False return False
header, diff_lines, _ = build_diff_message(previous_state, current_state) added_urls, removed_urls, changed_lines, updated_count = collect_diffs(previous_state, current_state)
changed = bool(diff_lines) changed = bool(added_urls or removed_urls or changed_lines)
if changed: if changed:
logging.info("Catalog change detected with %d line items", len(diff_lines)) logging.info(
for message in split_message(header, diff_lines): "Catalog change detected (added=%d, removed=%d, updated=%d)",
len(added_urls),
len(removed_urls),
updated_count,
)
for url in added_urls:
app = current_state[url]
send_telegram_message(session, build_new_app_message(session, app))
header, summary_lines = build_summary_message(
added_count=len(added_urls),
removed_urls=removed_urls,
changed_lines=changed_lines,
updated_count=updated_count,
previous=previous_state,
)
if summary_lines:
for message in split_message(header, summary_lines):
send_telegram_message(session, message) send_telegram_message(session, message)
else: else:
logging.info("No catalog changes detected") logging.info("No catalog changes detected")