add new item details
All checks were successful
Docker Image / build (push) Successful in 1m5s

This commit is contained in:
LockeShor
2026-03-02 16:42:28 -05:00
parent 48b17634b4
commit e505fbe25a
2 changed files with 183 additions and 14 deletions

View File

@@ -2,6 +2,7 @@ import hashlib
import json
import logging
import os
import re
import sys
import time
from dataclasses import dataclass, asdict
@@ -156,10 +157,10 @@ def format_field_change(label: str, old: str, new: str) -> str:
return f"{label}: '{old_clean}' -> '{new_clean}'"
def build_diff_message(
def collect_diffs(
previous: Dict[str, AppSnapshot],
current: Dict[str, AppSnapshot],
) -> Tuple[str, List[str], int]:
) -> Tuple[List[str], List[str], List[str], int]:
prev_urls = set(previous.keys())
curr_urls = set(current.keys())
@@ -193,22 +194,172 @@ def build_diff_message(
for detail in details:
changed_lines.append(f" - {detail}")
return added_urls, removed_urls, changed_lines, updated_count
def build_summary_message(
added_count: int,
removed_urls: List[str],
changed_lines: List[str],
updated_count: int,
previous: Dict[str, AppSnapshot],
) -> Tuple[str, List[str]]:
removed_count = len(removed_urls)
header = (
f"TrueNAS catalog changed at {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}\n"
f"Added: {len(added_urls)} | Removed: {len(removed_urls)} | Updated: {updated_count}"
f"Added: {added_count} | Removed: {removed_count} | Updated: {updated_count}"
)
lines: List[str] = []
for url in added_urls:
app = current[url]
lines.append(f"+ {app.name} ({app.url})")
for url in removed_urls:
app = previous[url]
lines.append(f"- {app.name} ({app.url})")
lines.extend(changed_lines)
return header, lines, updated_count
return header, lines
def truncate_text(value: str, limit: int) -> str:
text = normalize_text(value)
if len(text) <= limit:
return text
return f"{text[: max(0, limit - 1)].rstrip()}"
def fetch_new_app_page_details(session: requests.Session, app_url: str) -> Dict[str, object]:
response = session.get(
app_url,
timeout=REQUEST_TIMEOUT_SECONDS,
headers={"User-Agent": USER_AGENT},
)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
page_title = normalize_text(soup.title.get_text(" ", strip=True)) if soup.title else ""
description = ""
for attrs in (
{"property": "og:description"},
{"name": "description"},
{"name": "twitter:description"},
):
tag = soup.find("meta", attrs=attrs)
if tag and tag.get("content"):
description = normalize_text(str(tag["content"]))
if description:
break
headings: List[str] = []
for tag in soup.find_all(["h1", "h2", "h3"]):
heading = normalize_text(tag.get_text(" ", strip=True))
if not heading:
continue
if heading not in headings:
headings.append(heading)
if len(headings) >= 6:
break
external_links: List[str] = []
seen_links = set()
for anchor in soup.find_all("a", href=True):
href = str(anchor.get("href", "")).strip()
if not href or href.startswith("#"):
continue
full_href = urljoin(app_url, href)
if not full_href.startswith("http"):
continue
if full_href.startswith(CATALOG_URL):
continue
if full_href in seen_links:
continue
seen_links.add(full_href)
label = normalize_text(anchor.get_text(" ", strip=True))
if not label:
label = full_href
label = truncate_text(label, 60)
external_links.append(f"{label} -> {full_href}")
if len(external_links) >= 5:
break
detected_fields: List[str] = []
body_text = normalize_text(soup.get_text(" ", strip=True))
label_patterns = {
"Version": r"(?:App\s+Version|Version)\s*[:\-]\s*([^\n\r|]{1,80})",
"Chart": r"(?:Chart\s+Version|Helm\s+Chart)\s*[:\-]\s*([^\n\r|]{1,80})",
"Category": r"Category\s*[:\-]\s*([^\n\r|]{1,80})",
"Maintainer": r"Maintainer(?:s)?\s*[:\-]\s*([^\n\r|]{1,120})",
"Homepage": r"Homepage\s*[:\-]\s*([^\n\r|]{1,160})",
"Source": r"Source\s*[:\-]\s*([^\n\r|]{1,160})",
}
for label, pattern in label_patterns.items():
match = re.search(pattern, body_text, flags=re.IGNORECASE)
if match:
value = truncate_text(match.group(1), 120)
detected_fields.append(f"{label}: {value}")
return {
"page_title": page_title,
"description": description,
"headings": headings,
"external_links": external_links,
"detected_fields": detected_fields,
}
def build_new_app_message(session: requests.Session, app: AppSnapshot) -> str:
lines: List[str] = [
"🆕 New TrueNAS app detected",
f"Detected: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}",
f"Name: {app.name}",
f"URL: {app.url}",
]
if app.train:
lines.append(f"Train: {app.train}")
if app.added:
lines.append(f"Added date: {app.added}")
if app.summary:
lines.append(f"Catalog summary: {truncate_text(app.summary, 700)}")
try:
details = fetch_new_app_page_details(session, app.url)
except requests.RequestException as exc:
logging.warning("Unable to fetch app details for %s: %s", app.url, exc)
details = {}
page_title = str(details.get("page_title", "")) if details else ""
if page_title:
lines.append(f"Page title: {truncate_text(page_title, 180)}")
description = str(details.get("description", "")) if details else ""
if description:
lines.append(f"Description: {truncate_text(description, 1000)}")
detected_fields = details.get("detected_fields", []) if details else []
if isinstance(detected_fields, list):
for field in detected_fields[:6]:
lines.append(str(field))
headings = details.get("headings", []) if details else []
if isinstance(headings, list) and headings:
lines.append(f"Headings: {truncate_text(' | '.join(headings[:6]), 320)}")
external_links = details.get("external_links", []) if details else []
if isinstance(external_links, list) and external_links:
lines.append("External links:")
for link in external_links[:5]:
lines.append(f"- {truncate_text(str(link), 220)}")
message = "\n".join(lines)
if len(message) <= MAX_MESSAGE_LEN:
return message
trimmed_lines = [line if len(line) <= 280 else truncate_text(line, 280) for line in lines]
while len("\n".join(trimmed_lines)) > MAX_MESSAGE_LEN and len(trimmed_lines) > 8:
trimmed_lines.pop()
return "\n".join(trimmed_lines)
def split_message(header: str, lines: List[str], max_len: int = MAX_MESSAGE_LEN) -> List[str]:
@@ -273,13 +424,31 @@ def run_once(session: requests.Session, first_run: bool) -> bool:
logging.info("Initial snapshot saved with %d apps", len(current_state))
return False
header, diff_lines, _ = build_diff_message(previous_state, current_state)
changed = bool(diff_lines)
added_urls, removed_urls, changed_lines, updated_count = collect_diffs(previous_state, current_state)
changed = bool(added_urls or removed_urls or changed_lines)
if changed:
logging.info("Catalog change detected with %d line items", len(diff_lines))
for message in split_message(header, diff_lines):
send_telegram_message(session, message)
logging.info(
"Catalog change detected (added=%d, removed=%d, updated=%d)",
len(added_urls),
len(removed_urls),
updated_count,
)
for url in added_urls:
app = current_state[url]
send_telegram_message(session, build_new_app_message(session, app))
header, summary_lines = build_summary_message(
added_count=len(added_urls),
removed_urls=removed_urls,
changed_lines=changed_lines,
updated_count=updated_count,
previous=previous_state,
)
if summary_lines:
for message in split_message(header, summary_lines):
send_telegram_message(session, message)
else:
logging.info("No catalog changes detected")