scraper-hafensommer/scraper.py

107 lines
2.7 KiB
Python

import requests
from bs4 import BeautifulSoup
import json
import os
import re
URL = "https://www.adticket.de/Hafensommer-Wurzburg.html"
BASE_URL = "https://www.adticket.de"
# Download the page
# response = requests.get(URL)
# response.raise_for_status()
#
# soup = BeautifulSoup(response.text, 'html.parser')
with open('hafensommer-2025.html', 'r', encoding='utf-8') as f:
html_content = f.read()
soup = BeautifulSoup(html_content, 'html.parser')
events = soup.select('.w-paged-listing__list-item')
for event in events:
event_node = event.select_one('.c-list-item-event')
if not event_node:
continue
# ID from data-sync-id
sync_id = event_node.get('data-sync-id')
if not sync_id:
continue
# Name
headline = event_node.select_one('h3.c-list-item-event__headline')
if headline:
raw_name = headline.text.strip().split('|')[0].strip()
name = f"Hafensommer: {raw_name}"
else:
continue
# Performer
performer = {
"@type": "Person",
"name": raw_name
}
# Start Date
time_elem = event_node.select_one('time[datetime]')
if time_elem:
start_date = time_elem['datetime']
else:
continue
# Image
img_elem = event_node.select_one('img.c-list-item-event__image')
image_url = img_elem['src'] if img_elem else None
# Offer URL
offer_url = event_node.get('href')
# Price
price_elem = event_node.select_one('.c-list-item-event__event-min-price span')
if price_elem:
price_match = re.search(r'([\d,]+)', price_elem.text)
if price_match:
price = float(price_match.group(1).replace(',', '.'))
else:
price = None
else:
price = None
event_json = {
"@context": "https://schema.org",
"@type": "Event",
"name": name,
"startDate": start_date,
"performer": performer,
"location": {
"@type": "PostalAddress",
"name": "Freitreppe Alter Hafen",
"streetAddress": "Oskar-Laredo-Platz 1",
"postalCode": "97080",
"addressLocality": "Würzburg"
},
"superEvent": {
"@type": "Event",
"name": "Hafensommer Würzburg 2025"
}
}
if image_url:
event_json["image"] = image_url
if offer_url and price is not None:
event_json["offers"] = {
"@type": "Offer",
"url": offer_url,
"price": price,
"priceCurrency": "EUR"
}
filename = f"hafensommer-{sync_id}.json"
with open(filename, 'w', encoding='utf-8') as f:
json.dump(event_json, f, ensure_ascii=False, indent=2)
print(f"Saved {filename}")