From bfed2d431e7d5158c9763277aa8470c91429ed8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Raupach?= Date: Sun, 26 Jan 2025 13:52:29 +0100 Subject: [PATCH] Proof of Concept (POC) --- .gitignore | 38 ++++++++++ .idea/.gitignore | 8 +++ .idea/encodings.xml | 7 ++ .idea/misc.xml | 14 ++++ README.md | 1 + pom.xml | 29 ++++++++ src/main/java/org/wuevents/App.java | 15 ++++ src/main/java/org/wuevents/Cairo.java | 79 +++++++++++++++++++++ src/main/java/org/wuevents/Kellerperle.java | 62 ++++++++++++++++ src/schema/schema.json | 52 ++++++++++++++ 10 files changed, 305 insertions(+) create mode 100644 .gitignore create mode 100644 .idea/.gitignore create mode 100644 .idea/encodings.xml create mode 100644 .idea/misc.xml create mode 100644 README.md create mode 100644 pom.xml create mode 100644 src/main/java/org/wuevents/App.java create mode 100644 src/main/java/org/wuevents/Cairo.java create mode 100644 src/main/java/org/wuevents/Kellerperle.java create mode 100644 src/schema/schema.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5ff6309 --- /dev/null +++ b/.gitignore @@ -0,0 +1,38 @@ +target/ +!.mvn/wrapper/maven-wrapper.jar +!**/src/main/**/target/ +!**/src/test/**/target/ + +### IntelliJ IDEA ### +.idea/modules.xml +.idea/jarRepositories.xml +.idea/compiler.xml +.idea/libraries/ +*.iws +*.iml +*.ipr + +### Eclipse ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ +build/ +!**/src/main/**/build/ +!**/src/test/**/build/ + +### VS Code ### +.vscode/ + +### Mac OS ### +.DS_Store \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 0000000..aa00ffa --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..c5be1cd --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,14 @@ + + + + + + + + + + \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..5af2783 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +...TODO \ No newline at end of file diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..9d8585c --- /dev/null +++ b/pom.xml @@ -0,0 +1,29 @@ + + + 4.0.0 + + org.wuevents + wuevents + 1.0-SNAPSHOT + + + 23 + 23 + UTF-8 + + + + org.jsoup + jsoup + 1.17.2 + + + com.fasterxml.jackson.core + jackson-databind + 2.18.2 + + + + \ No newline at end of file diff --git a/src/main/java/org/wuevents/App.java b/src/main/java/org/wuevents/App.java new file mode 100644 index 0000000..bef2dbc --- /dev/null +++ b/src/main/java/org/wuevents/App.java @@ -0,0 +1,15 @@ +package org.wuevents; + +import java.io.IOException; + +public class App { + + public static void main(String[] args) { + try { + //new Kellerperle().fetch(); + new Cairo().fetch(); + } catch (IOException | InterruptedException e) { + throw new RuntimeException(e); + } + } +} diff --git a/src/main/java/org/wuevents/Cairo.java b/src/main/java/org/wuevents/Cairo.java new file mode 100644 index 0000000..73dccd1 --- /dev/null +++ b/src/main/java/org/wuevents/Cairo.java @@ -0,0 +1,79 @@ +package org.wuevents; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.jsoup.Connection; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Element; + +import java.io.IOException; +import java.time.Instant; +import java.util.function.Consumer; +import java.util.logging.Logger; + +class Cairo { + + static String venue_name = "Cairo"; + static String venue_url = "https://cairo.wue.de"; + static String venue_address = "Fred-Joseph-Platz 3, 97082 Würzburg"; + + Logger logger = Logger.getLogger("org.wuevents"); + Connection connection; + ObjectMapper objectMapper; + + public Cairo() { + this.connection = Jsoup.connect("https://cairo.wue.de/programm"); + this.objectMapper = new ObjectMapper(); + } + + /** + * Extract, transform, load (ETL) + */ + void fetch() throws IOException, InterruptedException { + + var input = connection.get(); + var output = objectMapper.createObjectNode(); + + output.put("created", Instant.now().toString()); + + var venue = objectMapper.createObjectNode(); + venue.put("name", venue_name); + venue.put("url", venue_url); + venue.put("address", venue_address); + output.put("venue", venue); + + var events = objectMapper.createArrayNode(); + var items = input.select("h2.event-name > a"); + Consumer consumer = element -> events.add(event((element))); + items.forEach(consumer); + + output.put("events", events); + objectMapper.writeValue(System.out, output); + + logger.info("Cairo...................................................................[DONE]"); + } + + JsonNode event(Element link) { + var event = objectMapper.createObjectNode(); + try { + var document = connection.newRequest(venue_url + link.attr("href")).get(); + var title = document.title(); + var url = venue_url + link.attr("href"); + var date = document.select(".event-date > .data-date").text(); + var begin = document.select(".event-start > .data-start").text(); + var doors = document.select(".event-entrance > .data-entrance").text(); + var description = document.select(".event-info-right").remove().text(); + + event.put("title", title); + event.put("url", url); + event.put("description", description); + event.put("date", date); + event.put("doors", doors); + event.put("begin", begin); + return event; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/src/main/java/org/wuevents/Kellerperle.java b/src/main/java/org/wuevents/Kellerperle.java new file mode 100644 index 0000000..ac08cca --- /dev/null +++ b/src/main/java/org/wuevents/Kellerperle.java @@ -0,0 +1,62 @@ +package org.wuevents; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.io.IOException; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.time.Instant; +import java.util.logging.Logger; + +class Kellerperle { + + static String venue_name = "Kellerperle"; + static String venue_url = "https://www.kellerperle.de"; + static String venue_address = "Am Studentenhaus 1, 97072 Würzburg"; + + Logger logger = Logger.getLogger("org.wuevents"); + + /** + * Extract, transform, load (ETL) + */ + void fetch() throws IOException, InterruptedException { + var uri = URI.create("https://www.kellerperle.de/programm.json"); + var http = HttpClient.newHttpClient(); + var request = HttpRequest.newBuilder(uri).build(); + var response = http.send(request, HttpResponse.BodyHandlers.ofInputStream()); + + var responseInputStream = response.body(); + var parser = new ObjectMapper(); + var input = parser.readTree(responseInputStream); + var output = parser.createObjectNode(); + + output.put("created", Instant.now().toString()); + + var venue = parser.createObjectNode(); + venue.put("name", venue_name); + venue.put("url", venue_url); + venue.put("address", venue_address); + output.put("venue", venue); + + var events = parser.createArrayNode(); + for (var item : input) { + var event = parser.createObjectNode(); + event.put("title", item.get("title")); + event.put("url", "https://www.kellerperle.de/#/" + item.get("vaId").asText()); + event.put("description", item.get("text")); + event.put("date", item.get("date")); + event.put("doors", item.get("einlass")); + event.put("begin", item.get("beginn")); + event.put("price", item.get("ak")); + events.add(event); + } + output.put("events", events); + + parser.writeValue(System.out, output); + + logger.info("Kellerperle.............................................................[DONE]"); + } + +} diff --git a/src/schema/schema.json b/src/schema/schema.json new file mode 100644 index 0000000..db21018 --- /dev/null +++ b/src/schema/schema.json @@ -0,0 +1,52 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://events.wue.social/schema.json", + "title": "wuevents", + "description": "A description of wuevents.", + "type": "object", + "properties": { + "created": { + "type": "string" + }, + "venue": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "url": { + "type": "string" + }, + "address": { + "type": "string" + } + }, + "required": ["name", "url", "address"] + }, + "events": { + "type": "array", + "items": { + "type": "object", + "properties": { + "title": { + "type": "string" + }, + "url": { + "type": "string" + }, + "description": { + "type": "string" + }, + "date": { + "type": "string" + }, + "doors": { + "type": "string" + } + } + } + } + }, + "required": ["created", "venue", "events"] +} +