This commit is contained in:
@@ -14,7 +14,7 @@ The watcher pulls `https://apps.truenas.com/catalog` over plain HTTP (no browser
|
|||||||
|
|
||||||
On changes, it sends Telegram messages with:
|
On changes, it sends Telegram messages with:
|
||||||
|
|
||||||
- added apps (`+`)
|
- one detailed message per newly added app (name, URL, train, added date, catalog summary, plus extra details parsed from the app page when available)
|
||||||
- removed apps (`-`)
|
- removed apps (`-`)
|
||||||
- updated apps (`~`) and field-level diffs
|
- updated apps (`~`) and field-level diffs
|
||||||
|
|
||||||
|
|||||||
195
watcher.py
195
watcher.py
@@ -2,6 +2,7 @@ import hashlib
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
from dataclasses import dataclass, asdict
|
from dataclasses import dataclass, asdict
|
||||||
@@ -156,10 +157,10 @@ def format_field_change(label: str, old: str, new: str) -> str:
|
|||||||
return f"{label}: '{old_clean}' -> '{new_clean}'"
|
return f"{label}: '{old_clean}' -> '{new_clean}'"
|
||||||
|
|
||||||
|
|
||||||
def build_diff_message(
|
def collect_diffs(
|
||||||
previous: Dict[str, AppSnapshot],
|
previous: Dict[str, AppSnapshot],
|
||||||
current: Dict[str, AppSnapshot],
|
current: Dict[str, AppSnapshot],
|
||||||
) -> Tuple[str, List[str], int]:
|
) -> Tuple[List[str], List[str], List[str], int]:
|
||||||
prev_urls = set(previous.keys())
|
prev_urls = set(previous.keys())
|
||||||
curr_urls = set(current.keys())
|
curr_urls = set(current.keys())
|
||||||
|
|
||||||
@@ -193,22 +194,172 @@ def build_diff_message(
|
|||||||
for detail in details:
|
for detail in details:
|
||||||
changed_lines.append(f" - {detail}")
|
changed_lines.append(f" - {detail}")
|
||||||
|
|
||||||
|
return added_urls, removed_urls, changed_lines, updated_count
|
||||||
|
|
||||||
|
|
||||||
|
def build_summary_message(
|
||||||
|
added_count: int,
|
||||||
|
removed_urls: List[str],
|
||||||
|
changed_lines: List[str],
|
||||||
|
updated_count: int,
|
||||||
|
previous: Dict[str, AppSnapshot],
|
||||||
|
) -> Tuple[str, List[str]]:
|
||||||
|
removed_count = len(removed_urls)
|
||||||
|
|
||||||
header = (
|
header = (
|
||||||
f"TrueNAS catalog changed at {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}\n"
|
f"TrueNAS catalog changed at {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}\n"
|
||||||
f"Added: {len(added_urls)} | Removed: {len(removed_urls)} | Updated: {updated_count}"
|
f"Added: {added_count} | Removed: {removed_count} | Updated: {updated_count}"
|
||||||
)
|
)
|
||||||
|
|
||||||
lines: List[str] = []
|
lines: List[str] = []
|
||||||
for url in added_urls:
|
|
||||||
app = current[url]
|
|
||||||
lines.append(f"+ {app.name} ({app.url})")
|
|
||||||
|
|
||||||
for url in removed_urls:
|
for url in removed_urls:
|
||||||
app = previous[url]
|
app = previous[url]
|
||||||
lines.append(f"- {app.name} ({app.url})")
|
lines.append(f"- {app.name} ({app.url})")
|
||||||
|
|
||||||
lines.extend(changed_lines)
|
lines.extend(changed_lines)
|
||||||
return header, lines, updated_count
|
return header, lines
|
||||||
|
|
||||||
|
|
||||||
|
def truncate_text(value: str, limit: int) -> str:
|
||||||
|
text = normalize_text(value)
|
||||||
|
if len(text) <= limit:
|
||||||
|
return text
|
||||||
|
return f"{text[: max(0, limit - 1)].rstrip()}…"
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_new_app_page_details(session: requests.Session, app_url: str) -> Dict[str, object]:
|
||||||
|
response = session.get(
|
||||||
|
app_url,
|
||||||
|
timeout=REQUEST_TIMEOUT_SECONDS,
|
||||||
|
headers={"User-Agent": USER_AGENT},
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
|
|
||||||
|
page_title = normalize_text(soup.title.get_text(" ", strip=True)) if soup.title else ""
|
||||||
|
description = ""
|
||||||
|
for attrs in (
|
||||||
|
{"property": "og:description"},
|
||||||
|
{"name": "description"},
|
||||||
|
{"name": "twitter:description"},
|
||||||
|
):
|
||||||
|
tag = soup.find("meta", attrs=attrs)
|
||||||
|
if tag and tag.get("content"):
|
||||||
|
description = normalize_text(str(tag["content"]))
|
||||||
|
if description:
|
||||||
|
break
|
||||||
|
|
||||||
|
headings: List[str] = []
|
||||||
|
for tag in soup.find_all(["h1", "h2", "h3"]):
|
||||||
|
heading = normalize_text(tag.get_text(" ", strip=True))
|
||||||
|
if not heading:
|
||||||
|
continue
|
||||||
|
if heading not in headings:
|
||||||
|
headings.append(heading)
|
||||||
|
if len(headings) >= 6:
|
||||||
|
break
|
||||||
|
|
||||||
|
external_links: List[str] = []
|
||||||
|
seen_links = set()
|
||||||
|
for anchor in soup.find_all("a", href=True):
|
||||||
|
href = str(anchor.get("href", "")).strip()
|
||||||
|
if not href or href.startswith("#"):
|
||||||
|
continue
|
||||||
|
full_href = urljoin(app_url, href)
|
||||||
|
if not full_href.startswith("http"):
|
||||||
|
continue
|
||||||
|
if full_href.startswith(CATALOG_URL):
|
||||||
|
continue
|
||||||
|
if full_href in seen_links:
|
||||||
|
continue
|
||||||
|
seen_links.add(full_href)
|
||||||
|
|
||||||
|
label = normalize_text(anchor.get_text(" ", strip=True))
|
||||||
|
if not label:
|
||||||
|
label = full_href
|
||||||
|
label = truncate_text(label, 60)
|
||||||
|
external_links.append(f"{label} -> {full_href}")
|
||||||
|
if len(external_links) >= 5:
|
||||||
|
break
|
||||||
|
|
||||||
|
detected_fields: List[str] = []
|
||||||
|
body_text = normalize_text(soup.get_text(" ", strip=True))
|
||||||
|
label_patterns = {
|
||||||
|
"Version": r"(?:App\s+Version|Version)\s*[:\-]\s*([^\n\r|]{1,80})",
|
||||||
|
"Chart": r"(?:Chart\s+Version|Helm\s+Chart)\s*[:\-]\s*([^\n\r|]{1,80})",
|
||||||
|
"Category": r"Category\s*[:\-]\s*([^\n\r|]{1,80})",
|
||||||
|
"Maintainer": r"Maintainer(?:s)?\s*[:\-]\s*([^\n\r|]{1,120})",
|
||||||
|
"Homepage": r"Homepage\s*[:\-]\s*([^\n\r|]{1,160})",
|
||||||
|
"Source": r"Source\s*[:\-]\s*([^\n\r|]{1,160})",
|
||||||
|
}
|
||||||
|
for label, pattern in label_patterns.items():
|
||||||
|
match = re.search(pattern, body_text, flags=re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
value = truncate_text(match.group(1), 120)
|
||||||
|
detected_fields.append(f"{label}: {value}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"page_title": page_title,
|
||||||
|
"description": description,
|
||||||
|
"headings": headings,
|
||||||
|
"external_links": external_links,
|
||||||
|
"detected_fields": detected_fields,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_new_app_message(session: requests.Session, app: AppSnapshot) -> str:
|
||||||
|
lines: List[str] = [
|
||||||
|
"🆕 New TrueNAS app detected",
|
||||||
|
f"Detected: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}",
|
||||||
|
f"Name: {app.name}",
|
||||||
|
f"URL: {app.url}",
|
||||||
|
]
|
||||||
|
|
||||||
|
if app.train:
|
||||||
|
lines.append(f"Train: {app.train}")
|
||||||
|
if app.added:
|
||||||
|
lines.append(f"Added date: {app.added}")
|
||||||
|
if app.summary:
|
||||||
|
lines.append(f"Catalog summary: {truncate_text(app.summary, 700)}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
details = fetch_new_app_page_details(session, app.url)
|
||||||
|
except requests.RequestException as exc:
|
||||||
|
logging.warning("Unable to fetch app details for %s: %s", app.url, exc)
|
||||||
|
details = {}
|
||||||
|
|
||||||
|
page_title = str(details.get("page_title", "")) if details else ""
|
||||||
|
if page_title:
|
||||||
|
lines.append(f"Page title: {truncate_text(page_title, 180)}")
|
||||||
|
|
||||||
|
description = str(details.get("description", "")) if details else ""
|
||||||
|
if description:
|
||||||
|
lines.append(f"Description: {truncate_text(description, 1000)}")
|
||||||
|
|
||||||
|
detected_fields = details.get("detected_fields", []) if details else []
|
||||||
|
if isinstance(detected_fields, list):
|
||||||
|
for field in detected_fields[:6]:
|
||||||
|
lines.append(str(field))
|
||||||
|
|
||||||
|
headings = details.get("headings", []) if details else []
|
||||||
|
if isinstance(headings, list) and headings:
|
||||||
|
lines.append(f"Headings: {truncate_text(' | '.join(headings[:6]), 320)}")
|
||||||
|
|
||||||
|
external_links = details.get("external_links", []) if details else []
|
||||||
|
if isinstance(external_links, list) and external_links:
|
||||||
|
lines.append("External links:")
|
||||||
|
for link in external_links[:5]:
|
||||||
|
lines.append(f"- {truncate_text(str(link), 220)}")
|
||||||
|
|
||||||
|
message = "\n".join(lines)
|
||||||
|
if len(message) <= MAX_MESSAGE_LEN:
|
||||||
|
return message
|
||||||
|
|
||||||
|
trimmed_lines = [line if len(line) <= 280 else truncate_text(line, 280) for line in lines]
|
||||||
|
while len("\n".join(trimmed_lines)) > MAX_MESSAGE_LEN and len(trimmed_lines) > 8:
|
||||||
|
trimmed_lines.pop()
|
||||||
|
return "\n".join(trimmed_lines)
|
||||||
|
|
||||||
|
|
||||||
def split_message(header: str, lines: List[str], max_len: int = MAX_MESSAGE_LEN) -> List[str]:
|
def split_message(header: str, lines: List[str], max_len: int = MAX_MESSAGE_LEN) -> List[str]:
|
||||||
@@ -273,13 +424,31 @@ def run_once(session: requests.Session, first_run: bool) -> bool:
|
|||||||
logging.info("Initial snapshot saved with %d apps", len(current_state))
|
logging.info("Initial snapshot saved with %d apps", len(current_state))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
header, diff_lines, _ = build_diff_message(previous_state, current_state)
|
added_urls, removed_urls, changed_lines, updated_count = collect_diffs(previous_state, current_state)
|
||||||
changed = bool(diff_lines)
|
changed = bool(added_urls or removed_urls or changed_lines)
|
||||||
|
|
||||||
if changed:
|
if changed:
|
||||||
logging.info("Catalog change detected with %d line items", len(diff_lines))
|
logging.info(
|
||||||
for message in split_message(header, diff_lines):
|
"Catalog change detected (added=%d, removed=%d, updated=%d)",
|
||||||
send_telegram_message(session, message)
|
len(added_urls),
|
||||||
|
len(removed_urls),
|
||||||
|
updated_count,
|
||||||
|
)
|
||||||
|
|
||||||
|
for url in added_urls:
|
||||||
|
app = current_state[url]
|
||||||
|
send_telegram_message(session, build_new_app_message(session, app))
|
||||||
|
|
||||||
|
header, summary_lines = build_summary_message(
|
||||||
|
added_count=len(added_urls),
|
||||||
|
removed_urls=removed_urls,
|
||||||
|
changed_lines=changed_lines,
|
||||||
|
updated_count=updated_count,
|
||||||
|
previous=previous_state,
|
||||||
|
)
|
||||||
|
if summary_lines:
|
||||||
|
for message in split_message(header, summary_lines):
|
||||||
|
send_telegram_message(session, message)
|
||||||
else:
|
else:
|
||||||
logging.info("No catalog changes detected")
|
logging.info("No catalog changes detected")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user