This commit is contained in:
198
sync.py
Normal file
198
sync.py
Normal file
@@ -0,0 +1,198 @@
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from datetime import datetime, timedelta
|
||||
from caldav import DAVClient
|
||||
import hashlib
|
||||
import re
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# load environment from .env when present
|
||||
load_dotenv()
|
||||
import hashlib
|
||||
|
||||
# Workhive and credentials (must be set via environment variables)
|
||||
# Required secrets: WORKHIVE_TOKEN, CALDAV_USER, CALDAV_PASSWORD, CALDAV_LOCATION
|
||||
BASE_URL = os.environ.get('BASE_URL')
|
||||
TOKEN = os.environ.get('WORKHIVE_TOKEN')
|
||||
NAME = os.environ.get('NAME', 'Jonathan Slivka')
|
||||
|
||||
# CalDAV credentials (must be provided via env)
|
||||
USER = os.environ.get('CALDAV_USER')
|
||||
PASSWORD = os.environ.get('CALDAV_PASSWORD')
|
||||
LOCATION = os.environ.get('CALDAV_LOCATION')
|
||||
|
||||
# timezone to use for parsed datetimes (script runs in EST by default)
|
||||
TIMEZONE = os.environ.get('TIMEZONE', 'America/New_York')
|
||||
|
||||
if not TOKEN:
|
||||
raise SystemExit('WORKHIVE_TOKEN not set in environment')
|
||||
if not USER or not PASSWORD or not LOCATION:
|
||||
raise SystemExit('CALDAV_USER, CALDAV_PASSWORD, and CALDAV_LOCATION must be set in environment')
|
||||
|
||||
|
||||
# with cookie passed as raw header (avoid requests' cookie encoding issues)
|
||||
headers = {"Cookie": f"workhive_session={TOKEN}"}
|
||||
|
||||
#calculate the mondays of last week and the next 3 weeks
|
||||
periods = []
|
||||
today = datetime.today()
|
||||
monday = today - timedelta(days=today.weekday()) - timedelta(weeks=1) # start from last week to catch any late-posted shifts
|
||||
for i in range(4):
|
||||
period = (monday + timedelta(weeks=i)).strftime("%Y-%m-%d")
|
||||
periods.append(period)
|
||||
|
||||
print("Periods to check: " + ", ".join(periods))
|
||||
|
||||
facilities = {
|
||||
"fac_8ed0d011c748":"Cary Swim Club",
|
||||
"fac_f38dd7211e7e":"Scottish Hills"
|
||||
}
|
||||
|
||||
urls = [f"{BASE_URL}?facility_id={facility}&period={period}" for facility in facilities for period in periods]
|
||||
|
||||
shifts = []
|
||||
|
||||
for url in urls:
|
||||
response = requests.get(url, headers=headers)
|
||||
soup = BeautifulSoup(response.content, "html.parser")
|
||||
for li in soup.find_all("li"):
|
||||
if NAME in li.get_text().strip():
|
||||
shift_time = list(li.find_parent("div", class_="text-white").children)[1].get_text().strip().split("\n")[1].strip()
|
||||
|
||||
td = li.find_parent('td')
|
||||
row = td.find_parent('tr')
|
||||
cells = [c for c in row.find_all(['td', 'th'], recursive=False) if getattr(c, 'name', None) is not None]
|
||||
col_index = None
|
||||
for i, c in enumerate(cells):
|
||||
if c is td:
|
||||
col_index = i
|
||||
break
|
||||
|
||||
table = row.find_parent('table')
|
||||
header_ths = []
|
||||
thead = table.find('thead')
|
||||
header_row = thead.find('tr')
|
||||
header_ths = [th for th in header_row.find_all('th', recursive=False)]
|
||||
|
||||
# extract date text from header (<th> may contain month and day on separate lines)
|
||||
shift_date_text = header_ths[col_index].get_text().strip()
|
||||
month, day = shift_date_text.split("\n")
|
||||
day = day.split(" ")[1]
|
||||
shift_date = f"{month} {day}"
|
||||
|
||||
facility = url.split("facility_id=")[1].split("&")[0]
|
||||
facility_name = facilities.get(facility, "Unknown Facility")
|
||||
|
||||
# extract year from the period parameter in the URL so parsed datetimes use correct year
|
||||
period_param = None
|
||||
if "period=" in url:
|
||||
period_param = url.split("period=")[1].split("&")[0]
|
||||
if period_param:
|
||||
year = int(period_param.split("-")[0])
|
||||
else:
|
||||
year = datetime.today().year
|
||||
|
||||
#e.g
|
||||
#date=Jun 18
|
||||
#time=2:50 PM -8:15 PM
|
||||
|
||||
# parse start and end times separately to avoid duplicate regex group names
|
||||
parts = [p.strip() for p in shift_time.split('-')]
|
||||
if len(parts) >= 2:
|
||||
start_str, end_str = parts[0], parts[1]
|
||||
else:
|
||||
start_str = parts[0]
|
||||
end_str = None
|
||||
|
||||
# try abbreviated month (%b) then full month (%B)
|
||||
start_dt = end_dt = None
|
||||
for fmt in ("%Y %b %d %I:%M %p", "%Y %B %d %I:%M %p"):
|
||||
try:
|
||||
start_dt = datetime.strptime(f"{year} {month} {day} {start_str}", fmt)
|
||||
if end_str:
|
||||
end_dt = datetime.strptime(f"{year} {month} {day} {end_str}", fmt)
|
||||
break
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
if start_dt is None:
|
||||
raise ValueError(f"Could not parse shift time: {shift_time!r} with date {shift_date!r}")
|
||||
|
||||
shifts.append({
|
||||
"facility": facility_name,
|
||||
"date": shift_date,
|
||||
"time": shift_time,
|
||||
"start_datetime": start_dt,
|
||||
"end_datetime": end_dt,
|
||||
})
|
||||
|
||||
|
||||
|
||||
|
||||
for shift in shifts:
|
||||
print(f"{shift['facility']} - {shift['date']} - {shift['time']}")
|
||||
print(f" Start: {shift['start_datetime']}")
|
||||
print(f" End: {shift['end_datetime']}")
|
||||
print()
|
||||
|
||||
# push to CalDAV server with duplicate checking
|
||||
client = DAVClient(LOCATION, username=USER, password=PASSWORD)
|
||||
cals = client.principal().calendars()
|
||||
work_calendar = None
|
||||
for c in cals:
|
||||
if getattr(c, 'url', '').rstrip('/') == LOCATION.rstrip('/'):
|
||||
work_calendar = c
|
||||
break
|
||||
|
||||
if not work_calendar:
|
||||
print("No CalDAV calendar found; skipping push")
|
||||
else:
|
||||
print(f"Using calendar: {getattr(work_calendar, 'id', 'unknown')}")
|
||||
for shift in shifts:
|
||||
summary = f"{shift['facility']} Shift"
|
||||
start_dt = shift['start_datetime']
|
||||
end_dt = shift['end_datetime'] or start_dt
|
||||
|
||||
uid_src = f"{shift['facility']}|{start_dt.isoformat()}|{end_dt.isoformat()}"
|
||||
uid = hashlib.sha1(uid_src.encode()).hexdigest() + "@workhive-sync"
|
||||
|
||||
exists = False
|
||||
for e in work_calendar.events():
|
||||
raw = e.data
|
||||
txt = raw.decode('utf-8', 'ignore') if isinstance(raw, bytes) else str(raw)
|
||||
|
||||
m = re.search(r'^UID:(.+)$', txt, re.M)
|
||||
if m and m.group(1).strip() == uid:
|
||||
exists = True
|
||||
break
|
||||
|
||||
msum = re.search(r'^SUMMARY:(.+)$', txt, re.M)
|
||||
mdt = re.search(r'^DTSTART(?:;TZID=[^:]+)?:([0-9T]+)', txt, re.M)
|
||||
if msum and mdt:
|
||||
if msum.group(1).strip() == summary and mdt.group(1).strip().startswith(start_dt.strftime('%Y%m%dT%H%M%S')):
|
||||
exists = True
|
||||
break
|
||||
|
||||
if exists:
|
||||
print(f"Skipping existing event: {summary} at {start_dt}")
|
||||
continue
|
||||
|
||||
dtstart_str = start_dt.strftime('%Y%m%dT%H%M%S')
|
||||
dtend_str = end_dt.strftime('%Y%m%dT%H%M%S')
|
||||
ical = (
|
||||
'BEGIN:VCALENDAR\r\n'
|
||||
'VERSION:2.0\r\n'
|
||||
'PRODID:-//workhive-caldav-sync//EN\r\n'
|
||||
'BEGIN:VEVENT\r\n'
|
||||
f'UID:{uid}\r\n'
|
||||
f'SUMMARY:{summary}\r\n'
|
||||
f'DTSTART;TZID=America/New_York:{dtstart_str}\r\n'
|
||||
f'DTEND;TZID=America/New_York:{dtend_str}\r\n'
|
||||
f'DESCRIPTION:Work shift at {shift["facility"]} on {shift["date"]} from {shift["time"]}\r\n'
|
||||
'END:VEVENT\r\n'
|
||||
'END:VCALENDAR\r\n'
|
||||
)
|
||||
|
||||
work_calendar.add_event(ical)
|
||||
print(f"Added event for shift on {shift['date']} at {shift['facility']}")
|
||||
Reference in New Issue
Block a user