This commit is contained in:
@@ -34,6 +34,7 @@ It also listens for Telegram commands from the configured chat:
|
|||||||
- `LOG_LEVEL` (default: `INFO`)
|
- `LOG_LEVEL` (default: `INFO`)
|
||||||
- `MAX_SCREENSHOTS_PER_APP` (default: `3`)
|
- `MAX_SCREENSHOTS_PER_APP` (default: `3`)
|
||||||
- `TELEGRAM_POLL_SECONDS` (default: `10`)
|
- `TELEGRAM_POLL_SECONDS` (default: `10`)
|
||||||
|
- `MEDIA_BASE_URL` (default: `https://media.sys.truenas.net`)
|
||||||
|
|
||||||
## Build
|
## Build
|
||||||
|
|
||||||
|
|||||||
106
watcher.py
106
watcher.py
@@ -28,6 +28,7 @@ LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
|
|||||||
MAX_MESSAGE_LEN = 3900
|
MAX_MESSAGE_LEN = 3900
|
||||||
MAX_SCREENSHOTS_PER_APP = int(os.getenv("MAX_SCREENSHOTS_PER_APP", "3"))
|
MAX_SCREENSHOTS_PER_APP = int(os.getenv("MAX_SCREENSHOTS_PER_APP", "3"))
|
||||||
TELEGRAM_POLL_SECONDS = int(os.getenv("TELEGRAM_POLL_SECONDS", "10"))
|
TELEGRAM_POLL_SECONDS = int(os.getenv("TELEGRAM_POLL_SECONDS", "10"))
|
||||||
|
MEDIA_BASE_URL = os.getenv("MEDIA_BASE_URL", "https://media.sys.truenas.net")
|
||||||
|
|
||||||
last_telegram_update_id: Optional[int] = None
|
last_telegram_update_id: Optional[int] = None
|
||||||
|
|
||||||
@@ -246,6 +247,47 @@ def truncate_text(value: str, limit: int) -> str:
|
|||||||
return f"{text[: max(0, limit - 1)].rstrip()}…"
|
return f"{text[: max(0, limit - 1)].rstrip()}…"
|
||||||
|
|
||||||
|
|
||||||
|
def extract_app_id_from_url(app_url: str) -> str:
|
||||||
|
path_parts = [part for part in urlparse(app_url).path.strip("/").split("/") if part]
|
||||||
|
if not path_parts:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
if "catalog" in path_parts:
|
||||||
|
catalog_index = path_parts.index("catalog")
|
||||||
|
if catalog_index + 1 < len(path_parts):
|
||||||
|
return path_parts[catalog_index + 1]
|
||||||
|
|
||||||
|
return path_parts[-1]
|
||||||
|
|
||||||
|
|
||||||
|
def build_storj_screenshot_urls(session: requests.Session, app_id: str) -> List[str]:
|
||||||
|
if not app_id:
|
||||||
|
return []
|
||||||
|
|
||||||
|
screenshot_urls: List[str] = []
|
||||||
|
for index in range(1, MAX_SCREENSHOTS_PER_APP + 1):
|
||||||
|
screenshot_url = f"{MEDIA_BASE_URL.rstrip('/')}/apps/{app_id}/screenshots/screenshot{index}.png"
|
||||||
|
try:
|
||||||
|
response = session.get(
|
||||||
|
screenshot_url,
|
||||||
|
timeout=REQUEST_TIMEOUT_SECONDS,
|
||||||
|
headers={"User-Agent": USER_AGENT},
|
||||||
|
)
|
||||||
|
except requests.RequestException:
|
||||||
|
break
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
break
|
||||||
|
|
||||||
|
content_type = str(response.headers.get("Content-Type", "")).lower()
|
||||||
|
if content_type and "image" not in content_type:
|
||||||
|
break
|
||||||
|
|
||||||
|
screenshot_urls.append(screenshot_url)
|
||||||
|
|
||||||
|
return screenshot_urls
|
||||||
|
|
||||||
|
|
||||||
def fetch_new_app_page_details(session: requests.Session, app_url: str) -> Dict[str, object]:
|
def fetch_new_app_page_details(session: requests.Session, app_url: str) -> Dict[str, object]:
|
||||||
response = session.get(
|
response = session.get(
|
||||||
app_url,
|
app_url,
|
||||||
@@ -257,68 +299,8 @@ def fetch_new_app_page_details(session: requests.Session, app_url: str) -> Dict[
|
|||||||
soup = BeautifulSoup(response.text, "html.parser")
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
|
|
||||||
page_title = normalize_text(soup.title.get_text(" ", strip=True)) if soup.title else ""
|
page_title = normalize_text(soup.title.get_text(" ", strip=True)) if soup.title else ""
|
||||||
|
app_id = extract_app_id_from_url(app_url)
|
||||||
screenshot_candidates: List[Tuple[int, int, str]] = []
|
screenshot_urls = build_storj_screenshot_urls(session, app_id)
|
||||||
seen_urls: set[str] = set()
|
|
||||||
|
|
||||||
og_image = soup.find("meta", attrs={"property": "og:image"})
|
|
||||||
if og_image and og_image.get("content"):
|
|
||||||
og_image_url = urljoin(app_url, str(og_image["content"]).strip())
|
|
||||||
if og_image_url.startswith("http"):
|
|
||||||
screenshot_candidates.append((2, 0, og_image_url))
|
|
||||||
|
|
||||||
for index, tag in enumerate(soup.find_all("img", src=True), start=1):
|
|
||||||
raw_src = str(tag.get("src", "")).strip()
|
|
||||||
if not raw_src or raw_src.startswith("data:"):
|
|
||||||
continue
|
|
||||||
|
|
||||||
image_url = urljoin(app_url, raw_src)
|
|
||||||
if not image_url.startswith("http") or image_url in seen_urls:
|
|
||||||
continue
|
|
||||||
seen_urls.add(image_url)
|
|
||||||
|
|
||||||
width_value = str(tag.get("width", "")).strip()
|
|
||||||
height_value = str(tag.get("height", "")).strip()
|
|
||||||
if width_value.isdigit() and int(width_value) < 200:
|
|
||||||
continue
|
|
||||||
if height_value.isdigit() and int(height_value) < 120:
|
|
||||||
continue
|
|
||||||
|
|
||||||
descriptor = " ".join(
|
|
||||||
[
|
|
||||||
str(tag.get("alt", "")),
|
|
||||||
str(tag.get("title", "")),
|
|
||||||
" ".join(tag.get("class", [])),
|
|
||||||
str(tag.get("id", "")),
|
|
||||||
image_url,
|
|
||||||
]
|
|
||||||
).lower()
|
|
||||||
|
|
||||||
if any(skip in descriptor for skip in ["logo", "favicon", "icon", "avatar", "badge"]):
|
|
||||||
continue
|
|
||||||
|
|
||||||
score = 0
|
|
||||||
if "screenshot" in descriptor or "screen-shot" in descriptor or "screen shot" in descriptor:
|
|
||||||
score += 4
|
|
||||||
if "gallery" in descriptor or "carousel" in descriptor or "preview" in descriptor:
|
|
||||||
score += 2
|
|
||||||
if re.search(r"\.(png|jpe?g|webp)(\?|$)", image_url, flags=re.IGNORECASE):
|
|
||||||
score += 1
|
|
||||||
|
|
||||||
if score > 0:
|
|
||||||
screenshot_candidates.append((score, index, image_url))
|
|
||||||
|
|
||||||
screenshot_candidates.sort(key=lambda item: (-item[0], item[1]))
|
|
||||||
|
|
||||||
screenshot_urls: List[str] = []
|
|
||||||
emitted: set[str] = set()
|
|
||||||
for _, _, image_url in screenshot_candidates:
|
|
||||||
if image_url in emitted:
|
|
||||||
continue
|
|
||||||
emitted.add(image_url)
|
|
||||||
screenshot_urls.append(image_url)
|
|
||||||
if len(screenshot_urls) >= MAX_SCREENSHOTS_PER_APP:
|
|
||||||
break
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"page_title": page_title,
|
"page_title": page_title,
|
||||||
|
|||||||
Reference in New Issue
Block a user