import requests from bs4 import BeautifulSoup import json import os import re URL = "https://www.adticket.de/Hafensommer-Wurzburg.html" BASE_URL = "https://www.adticket.de" # Download the page # response = requests.get(URL) # response.raise_for_status() # # soup = BeautifulSoup(response.text, 'html.parser') with open('hafensommer-2025.html', 'r', encoding='utf-8') as f: html_content = f.read() soup = BeautifulSoup(html_content, 'html.parser') events = soup.select('.w-paged-listing__list-item') for event in events: event_node = event.select_one('.c-list-item-event') if not event_node: continue # ID from data-sync-id sync_id = event_node.get('data-sync-id') if not sync_id: continue # Name headline = event_node.select_one('h3.c-list-item-event__headline') if headline: raw_name = headline.text.strip().split('|')[0].strip() name = f"Hafensommer: {raw_name}" else: continue # Performer performer = { "@type": "Person", "name": raw_name } # Start Date time_elem = event_node.select_one('time[datetime]') if time_elem: start_date = time_elem['datetime'] else: continue # Image img_elem = event_node.select_one('img.c-list-item-event__image') image_url = img_elem['src'] if img_elem else None # Offer URL offer_url = event_node.get('href') # Price price_elem = event_node.select_one('.c-list-item-event__event-min-price span') if price_elem: price_match = re.search(r'([\d,]+)', price_elem.text) if price_match: price = float(price_match.group(1).replace(',', '.')) else: price = None else: price = None event_json = { "@context": "https://schema.org", "@type": "Event", "name": name, "startDate": start_date, "performer": performer, "location": { "@type": "PostalAddress", "name": "Freitreppe Alter Hafen", "streetAddress": "Oskar-Laredo-Platz 1", "postalCode": "97080", "addressLocality": "Würzburg" }, "superEvent": { "@type": "Event", "name": "Hafensommer Würzburg 2025" } } if image_url: event_json["image"] = image_url if offer_url and price is not None: event_json["offers"] = { "@type": "Offer", "url": offer_url, "price": price, "priceCurrency": "EUR" } filename = f"hafensommer-{sync_id}.json" with open(filename, 'w', encoding='utf-8') as f: json.dump(event_json, f, ensure_ascii=False, indent=2) print(f"Saved {filename}")