Initial Commit
This commit is contained in:
commit
7ca161caeb
46
.gitignore
vendored
Normal file
46
.gitignore
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
.gradle
|
||||
build/
|
||||
!gradle/wrapper/gradle-wrapper.jar
|
||||
!**/src/main/**/build/
|
||||
!**/src/test/**/build/
|
||||
|
||||
### IntelliJ IDEA ###
|
||||
.idea/modules.xml
|
||||
.idea/jarRepositories.xml
|
||||
.idea/compiler.xml
|
||||
.idea/libraries/
|
||||
*.iws
|
||||
*.iml
|
||||
*.ipr
|
||||
out/
|
||||
!**/src/main/**/out/
|
||||
!**/src/test/**/out/
|
||||
|
||||
### Eclipse ###
|
||||
.apt_generated
|
||||
.classpath
|
||||
.factorypath
|
||||
.project
|
||||
.settings
|
||||
.springBeans
|
||||
.sts4-cache
|
||||
bin/
|
||||
!**/src/main/**/bin/
|
||||
!**/src/test/**/bin/
|
||||
|
||||
### NetBeans ###
|
||||
/nbproject/private/
|
||||
/nbbuild/
|
||||
/dist/
|
||||
/nbdist/
|
||||
/.nb-gradle/
|
||||
|
||||
### VS Code ###
|
||||
.vscode/
|
||||
|
||||
### Mac OS ###
|
||||
.DS_Store
|
||||
|
||||
|
||||
/run
|
||||
/run_
|
||||
42
build.gradle
Normal file
42
build.gradle
Normal file
@ -0,0 +1,42 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id 'application'
|
||||
id("com.gradleup.shadow") version "9.0.0-rc1"
|
||||
}
|
||||
|
||||
group = 'de.bommels05'
|
||||
version = '1.0-SNAPSHOT'
|
||||
|
||||
repositories {
|
||||
mavenCentral()
|
||||
mavenLocal()
|
||||
}
|
||||
|
||||
dependencies {
|
||||
testImplementation platform('org.junit:junit-bom:5.10.0')
|
||||
testImplementation 'org.junit.jupiter:junit-jupiter'
|
||||
|
||||
implementation("org.slf4j:slf4j-simple:2.0.12")
|
||||
implementation("io.javalin:javalin:6.7.0")
|
||||
implementation("com.google.code.gson:gson:2.10.1")
|
||||
implementation("com.github.ben-manes.caffeine:caffeine:3.2.2")
|
||||
implementation("org.jsoup:jsoup:1.21.1")
|
||||
|
||||
implementation("it.uniroma1.dis.wsngroup.gexf4j:gexf4j:1.0.0")
|
||||
|
||||
implementation("de.bommels05:DBLib:1.0-SNAPSHOT")
|
||||
}
|
||||
|
||||
application {
|
||||
mainClass = 'de.bommels05.befatorweb.BefatorWeb'
|
||||
}
|
||||
|
||||
shadowJar {
|
||||
manifest {
|
||||
attributes 'Main-Class': 'de.bommels05.befatorweb.BefatorWeb'
|
||||
}
|
||||
}
|
||||
|
||||
test {
|
||||
useJUnitPlatform()
|
||||
}
|
||||
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Normal file
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Normal file
Binary file not shown.
6
gradle/wrapper/gradle-wrapper.properties
vendored
Normal file
6
gradle/wrapper/gradle-wrapper.properties
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
#Tue Feb 25 20:26:04 CET 2025
|
||||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.11-bin.zip
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
zipStorePath=wrapper/dists
|
||||
234
gradlew
vendored
Normal file
234
gradlew
vendored
Normal file
@ -0,0 +1,234 @@
|
||||
#!/bin/sh
|
||||
|
||||
#
|
||||
# Copyright © 2015-2021 the original authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
##############################################################################
|
||||
#
|
||||
# Gradle start up script for POSIX generated by Gradle.
|
||||
#
|
||||
# Important for running:
|
||||
#
|
||||
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
|
||||
# noncompliant, but you have some other compliant shell such as ksh or
|
||||
# bash, then to run this script, type that shell name before the whole
|
||||
# command line, like:
|
||||
#
|
||||
# ksh Gradle
|
||||
#
|
||||
# Busybox and similar reduced shells will NOT work, because this script
|
||||
# requires all of these POSIX shell features:
|
||||
# * functions;
|
||||
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
|
||||
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
|
||||
# * compound commands having a testable exit status, especially «case»;
|
||||
# * various built-in commands including «command», «set», and «ulimit».
|
||||
#
|
||||
# Important for patching:
|
||||
#
|
||||
# (2) This script targets any POSIX shell, so it avoids extensions provided
|
||||
# by Bash, Ksh, etc; in particular arrays are avoided.
|
||||
#
|
||||
# The "traditional" practice of packing multiple parameters into a
|
||||
# space-separated string is a well documented source of bugs and security
|
||||
# problems, so this is (mostly) avoided, by progressively accumulating
|
||||
# options in "$@", and eventually passing that to Java.
|
||||
#
|
||||
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
|
||||
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
|
||||
# see the in-line comments for details.
|
||||
#
|
||||
# There are tweaks for specific operating systems such as AIX, CygWin,
|
||||
# Darwin, MinGW, and NonStop.
|
||||
#
|
||||
# (3) This script is generated from the Groovy template
|
||||
# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
|
||||
# within the Gradle project.
|
||||
#
|
||||
# You can find Gradle at https://github.com/gradle/gradle/.
|
||||
#
|
||||
##############################################################################
|
||||
|
||||
# Attempt to set APP_HOME
|
||||
|
||||
# Resolve links: $0 may be a link
|
||||
app_path=$0
|
||||
|
||||
# Need this for daisy-chained symlinks.
|
||||
while
|
||||
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
|
||||
[ -h "$app_path" ]
|
||||
do
|
||||
ls=$( ls -ld "$app_path" )
|
||||
link=${ls#*' -> '}
|
||||
case $link in #(
|
||||
/*) app_path=$link ;; #(
|
||||
*) app_path=$APP_HOME$link ;;
|
||||
esac
|
||||
done
|
||||
|
||||
APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
|
||||
|
||||
APP_NAME="Gradle"
|
||||
APP_BASE_NAME=${0##*/}
|
||||
|
||||
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
||||
|
||||
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||
MAX_FD=maximum
|
||||
|
||||
warn () {
|
||||
echo "$*"
|
||||
} >&2
|
||||
|
||||
die () {
|
||||
echo
|
||||
echo "$*"
|
||||
echo
|
||||
exit 1
|
||||
} >&2
|
||||
|
||||
# OS specific support (must be 'true' or 'false').
|
||||
cygwin=false
|
||||
msys=false
|
||||
darwin=false
|
||||
nonstop=false
|
||||
case "$( uname )" in #(
|
||||
CYGWIN* ) cygwin=true ;; #(
|
||||
Darwin* ) darwin=true ;; #(
|
||||
MSYS* | MINGW* ) msys=true ;; #(
|
||||
NONSTOP* ) nonstop=true ;;
|
||||
esac
|
||||
|
||||
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
||||
|
||||
|
||||
# Determine the Java command to use to start the JVM.
|
||||
if [ -n "$JAVA_HOME" ] ; then
|
||||
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
||||
# IBM's JDK on AIX uses strange locations for the executables
|
||||
JAVACMD=$JAVA_HOME/jre/sh/java
|
||||
else
|
||||
JAVACMD=$JAVA_HOME/bin/java
|
||||
fi
|
||||
if [ ! -x "$JAVACMD" ] ; then
|
||||
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
else
|
||||
JAVACMD=java
|
||||
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
|
||||
# Increase the maximum file descriptors if we can.
|
||||
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
|
||||
case $MAX_FD in #(
|
||||
max*)
|
||||
MAX_FD=$( ulimit -H -n ) ||
|
||||
warn "Could not query maximum file descriptor limit"
|
||||
esac
|
||||
case $MAX_FD in #(
|
||||
'' | soft) :;; #(
|
||||
*)
|
||||
ulimit -n "$MAX_FD" ||
|
||||
warn "Could not set maximum file descriptor limit to $MAX_FD"
|
||||
esac
|
||||
fi
|
||||
|
||||
# Collect all arguments for the java command, stacking in reverse order:
|
||||
# * args from the command line
|
||||
# * the main class name
|
||||
# * -classpath
|
||||
# * -D...appname settings
|
||||
# * --module-path (only if needed)
|
||||
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
|
||||
|
||||
# For Cygwin or MSYS, switch paths to Windows format before running java
|
||||
if "$cygwin" || "$msys" ; then
|
||||
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
|
||||
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
|
||||
|
||||
JAVACMD=$( cygpath --unix "$JAVACMD" )
|
||||
|
||||
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
||||
for arg do
|
||||
if
|
||||
case $arg in #(
|
||||
-*) false ;; # don't mess with options #(
|
||||
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
|
||||
[ -e "$t" ] ;; #(
|
||||
*) false ;;
|
||||
esac
|
||||
then
|
||||
arg=$( cygpath --path --ignore --mixed "$arg" )
|
||||
fi
|
||||
# Roll the args list around exactly as many times as the number of
|
||||
# args, so each arg winds up back in the position where it started, but
|
||||
# possibly modified.
|
||||
#
|
||||
# NB: a `for` loop captures its iteration list before it begins, so
|
||||
# changing the positional parameters here affects neither the number of
|
||||
# iterations, nor the values presented in `arg`.
|
||||
shift # remove old arg
|
||||
set -- "$@" "$arg" # push replacement arg
|
||||
done
|
||||
fi
|
||||
|
||||
# Collect all arguments for the java command;
|
||||
# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
|
||||
# shell script including quotes and variable substitutions, so put them in
|
||||
# double quotes to make sure that they get re-expanded; and
|
||||
# * put everything else in single quotes, so that it's not re-expanded.
|
||||
|
||||
set -- \
|
||||
"-Dorg.gradle.appname=$APP_BASE_NAME" \
|
||||
-classpath "$CLASSPATH" \
|
||||
org.gradle.wrapper.GradleWrapperMain \
|
||||
"$@"
|
||||
|
||||
# Use "xargs" to parse quoted args.
|
||||
#
|
||||
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
|
||||
#
|
||||
# In Bash we could simply go:
|
||||
#
|
||||
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
|
||||
# set -- "${ARGS[@]}" "$@"
|
||||
#
|
||||
# but POSIX shell has neither arrays nor command substitution, so instead we
|
||||
# post-process each arg (as a line of input to sed) to backslash-escape any
|
||||
# character that might be a shell metacharacter, then use eval to reverse
|
||||
# that process (while maintaining the separation between arguments), and wrap
|
||||
# the whole thing up as a single "set" statement.
|
||||
#
|
||||
# This will of course break if any of these variables contains a newline or
|
||||
# an unmatched quote.
|
||||
#
|
||||
|
||||
eval "set -- $(
|
||||
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
|
||||
xargs -n1 |
|
||||
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
|
||||
tr '\n' ' '
|
||||
)" '"$@"'
|
||||
|
||||
exec "$JAVACMD" "$@"
|
||||
89
gradlew.bat
vendored
Normal file
89
gradlew.bat
vendored
Normal file
@ -0,0 +1,89 @@
|
||||
@rem
|
||||
@rem Copyright 2015 the original author or authors.
|
||||
@rem
|
||||
@rem Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@rem you may not use this file except in compliance with the License.
|
||||
@rem You may obtain a copy of the License at
|
||||
@rem
|
||||
@rem https://www.apache.org/licenses/LICENSE-2.0
|
||||
@rem
|
||||
@rem Unless required by applicable law or agreed to in writing, software
|
||||
@rem distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@rem See the License for the specific language governing permissions and
|
||||
@rem limitations under the License.
|
||||
@rem
|
||||
|
||||
@if "%DEBUG%" == "" @echo off
|
||||
@rem ##########################################################################
|
||||
@rem
|
||||
@rem Gradle startup script for Windows
|
||||
@rem
|
||||
@rem ##########################################################################
|
||||
|
||||
@rem Set local scope for the variables with windows NT shell
|
||||
if "%OS%"=="Windows_NT" setlocal
|
||||
|
||||
set DIRNAME=%~dp0
|
||||
if "%DIRNAME%" == "" set DIRNAME=.
|
||||
set APP_BASE_NAME=%~n0
|
||||
set APP_HOME=%DIRNAME%
|
||||
|
||||
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
|
||||
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
|
||||
|
||||
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
|
||||
|
||||
@rem Find java.exe
|
||||
if defined JAVA_HOME goto findJavaFromJavaHome
|
||||
|
||||
set JAVA_EXE=java.exe
|
||||
%JAVA_EXE% -version >NUL 2>&1
|
||||
if "%ERRORLEVEL%" == "0" goto execute
|
||||
|
||||
echo.
|
||||
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
echo.
|
||||
echo Please set the JAVA_HOME variable in your environment to match the
|
||||
echo location of your Java installation.
|
||||
|
||||
goto fail
|
||||
|
||||
:findJavaFromJavaHome
|
||||
set JAVA_HOME=%JAVA_HOME:"=%
|
||||
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
||||
|
||||
if exist "%JAVA_EXE%" goto execute
|
||||
|
||||
echo.
|
||||
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
||||
echo.
|
||||
echo Please set the JAVA_HOME variable in your environment to match the
|
||||
echo location of your Java installation.
|
||||
|
||||
goto fail
|
||||
|
||||
:execute
|
||||
@rem Setup the command line
|
||||
|
||||
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
||||
|
||||
|
||||
@rem Execute Gradle
|
||||
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
|
||||
|
||||
:end
|
||||
@rem End local scope for the variables with windows NT shell
|
||||
if "%ERRORLEVEL%"=="0" goto mainEnd
|
||||
|
||||
:fail
|
||||
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
||||
rem the _cmd.exe /c_ return code!
|
||||
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
||||
exit /b 1
|
||||
|
||||
:mainEnd
|
||||
if "%OS%"=="Windows_NT" endlocal
|
||||
|
||||
:omega
|
||||
2
settings.gradle
Normal file
2
settings.gradle
Normal file
@ -0,0 +1,2 @@
|
||||
rootProject.name = 'BefatorWeb'
|
||||
|
||||
220
src/main/java/de/bommels05/befatorweb/BefatorRewriter.java
Normal file
220
src/main/java/de/bommels05/befatorweb/BefatorRewriter.java
Normal file
@ -0,0 +1,220 @@
|
||||
package de.bommels05.befatorweb;
|
||||
|
||||
import de.bommels05.befatorweb.links.LinkDatabase;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Attribute;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class BefatorRewriter {
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(BefatorRewriter.class);
|
||||
private final Consumer<String> pageRequester;
|
||||
|
||||
public BefatorRewriter(Consumer<String> pageRequester) {
|
||||
this.pageRequester = pageRequester;
|
||||
}
|
||||
|
||||
public byte[] rewrite(byte[] input, String url, SavedPaged.ContentType contentType) {
|
||||
String inputString = new String(input, StandardCharsets.UTF_8);
|
||||
if (contentType == SavedPaged.ContentType.HTML) {
|
||||
LOGGER.info("Starting Rewrite...");
|
||||
Document html = Jsoup.parse(inputString);
|
||||
boolean foundIcon = false;
|
||||
|
||||
Elements dataLinks = html.getElementsByTag("link");
|
||||
for (Element link : dataLinks) {
|
||||
Attribute href = link.attribute("href");
|
||||
if (href != null) {
|
||||
Attribute rel = link.attribute("rel");
|
||||
if (rel != null) {
|
||||
String newLink = rewriteLink(href.getValue());
|
||||
if (rel.getValue().equalsIgnoreCase("stylesheet")) {
|
||||
LOGGER.info("Rewriting Stylesheet Link: {} -> {}", href.getValue(), newLink);
|
||||
href.setValue(newLink);
|
||||
} else if (rel.getValue().equals("icon")) {
|
||||
foundIcon = true;
|
||||
LOGGER.info("Rewriting Favicon Link: {} -> {}", href.getValue(), newLink);
|
||||
href.setValue(newLink);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Elements metas = html.getElementsByTag("meta");
|
||||
for (Element meta : metas) {
|
||||
Attribute content = meta.attribute("content");
|
||||
if (content != null) {
|
||||
Attribute type = meta.attribute("http-equiv");
|
||||
if (type != null) {
|
||||
if (type.getValue().equalsIgnoreCase("refresh")) {
|
||||
String link = content.getValue().substring(content.getValue().indexOf("=") + 1);
|
||||
String newLink = rewriteLink(link);
|
||||
LOGGER.info("Rewriting <meta> refresh Link: {} -> {}", link, newLink);
|
||||
content.setValue(content.getValue().replaceFirst(link, newLink));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Elements bases = html.getElementsByTag("base");
|
||||
for (Element base : bases) {
|
||||
Attribute href = base.attribute("href");
|
||||
if (href != null) {
|
||||
String newLink = rewriteLink(href.getValue());
|
||||
LOGGER.info("Rewriting <base> Link: {} -> {}", href.getValue(), newLink);
|
||||
href.setValue(newLink);
|
||||
}
|
||||
}
|
||||
|
||||
Elements scripts = html.getElementsByTag("script");
|
||||
for (Element script : scripts) {
|
||||
Attribute src = script.attribute("src");
|
||||
if (src != null) {
|
||||
String newLink = rewriteLink(src.getValue());
|
||||
LOGGER.info("Rewriting <script> Link: {} -> {}", src.getValue(), newLink);
|
||||
src.setValue(newLink);
|
||||
}
|
||||
}
|
||||
|
||||
Elements images = html.getElementsByTag("img");
|
||||
for (Element img : images) {
|
||||
Attribute src = img.attribute("src");
|
||||
if (src != null) {
|
||||
String newLink = rewriteLink(src.getValue());
|
||||
LOGGER.info("Rewriting <img> Link: {} -> {}", src.getValue(), newLink);
|
||||
src.setValue(newLink);
|
||||
}
|
||||
}
|
||||
|
||||
Elements links = html.getElementsByTag("a");
|
||||
for (Element link : links) {
|
||||
Attribute href = link.attribute("href");
|
||||
if (href != null) {
|
||||
String newLink = rewriteLink(href.getValue());
|
||||
LOGGER.info("Rewriting <a> Link: {} -> {}", href.getValue(), newLink);
|
||||
href.setValue(newLink);
|
||||
LinkDatabase.addLink(url, newLink);
|
||||
}
|
||||
}
|
||||
|
||||
Elements iframes = html.getElementsByTag("iframe");
|
||||
for (Element iframe : iframes) {
|
||||
Attribute src = iframe.attribute("src");
|
||||
if (src != null) {
|
||||
String newLink = rewriteLink(src.getValue());
|
||||
LOGGER.info("Rewriting <iframe> Link: {} -> {}", src.getValue(), newLink);
|
||||
src.setValue(newLink);
|
||||
}
|
||||
}
|
||||
|
||||
Elements styles = html.getElementsByTag("style");
|
||||
for (Element style : styles) {
|
||||
style.html(new String(rewrite(style.html().getBytes(StandardCharsets.UTF_8), url, SavedPaged.ContentType.CSS), StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
if (!foundIcon) {
|
||||
html.head().append("<link rel=\"icon\" href=\"/proxy/" + (url.contains("/") ? url.substring(0, url.indexOf("/")) : url) + "/favicon.ico\">");
|
||||
}
|
||||
input = html.toString().getBytes(StandardCharsets.UTF_8);
|
||||
} else if (contentType == SavedPaged.ContentType.CSS) {
|
||||
Matcher matcher = Pattern.compile("url\\([\"']?(.*?)[\"']?\\)").matcher(inputString);
|
||||
while (matcher.find()) {
|
||||
String newLink = rewriteLink(matcher.group(1));
|
||||
LOGGER.info("Rewriting CSS url() Link: {} -> {}", matcher.group(1), newLink);
|
||||
inputString = inputString.replace(matcher.group(), "url(" + newLink + ")");
|
||||
}
|
||||
|
||||
matcher = Pattern.compile("@import \"(.*?)\"").matcher(inputString);
|
||||
while (matcher.find()) {
|
||||
String newLink = rewriteLink(matcher.group(1));
|
||||
LOGGER.info("Rewriting CSS @import Link: {} -> {}", matcher.group(1), newLink);
|
||||
inputString = inputString.replace(matcher.group(), "@import \"" + newLink + "\"");
|
||||
}
|
||||
input = inputString.getBytes(StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
if (contentType == SavedPaged.ContentType.HTML || contentType == SavedPaged.ContentType.CSS || contentType == SavedPaged.ContentType.JS) {
|
||||
inputString = new String(input, StandardCharsets.UTF_8);
|
||||
while (inputString.contains("\"https://web.archive.org/web/")) {
|
||||
int i = inputString.indexOf("\"https://web.archive.org/web/");
|
||||
String link = inputString.substring(i + 1, inputString.indexOf("\"", i + 1));
|
||||
String newLink = rewriteLink(link);
|
||||
LOGGER.info("Rewriting undetected Link: {} -> {}", link, newLink);
|
||||
inputString = inputString.replace("\"" + link + "\"", newLink);
|
||||
}
|
||||
}
|
||||
|
||||
return input;
|
||||
}
|
||||
|
||||
public static String rewriteLink(String input) {
|
||||
if (input.startsWith("https://web-static.archive.org")) {
|
||||
return input/*.replace("https://web-static.archive.org", "/invalid")*/;
|
||||
}
|
||||
if (input.startsWith("https://web.archive.org")) {
|
||||
return rewriteLink(input.replace("https://web.archive.org", ""));
|
||||
}
|
||||
|
||||
if (input.startsWith("/web/")) {
|
||||
String s = input.replaceFirst("/web/", "");
|
||||
String original = s.substring(s.indexOf("/") + 1);
|
||||
|
||||
String newLink = rewriteLink(original);
|
||||
if (!newLink.startsWith("/proxy/")) {
|
||||
newLink = "/proxy/" + newLink;
|
||||
}
|
||||
return newLink;
|
||||
}
|
||||
|
||||
if (input.startsWith("http://")) {
|
||||
return input.replaceFirst("http://", "/proxy/");
|
||||
}
|
||||
if (input.startsWith("https://")) {
|
||||
return input.replaceFirst("https://", "/proxy/");
|
||||
}
|
||||
return input;
|
||||
}
|
||||
|
||||
public static boolean sameTargets(String link1, String link2) {
|
||||
if (link1.startsWith("https://web.archive.org/web/") && link2.startsWith("https://web.archive.org/web/")) {
|
||||
link1 = link1.replaceFirst("https://web.archive.org/web/", "");
|
||||
link1 = link1.substring(link1.indexOf("/") + 1);
|
||||
link2 = link2.replaceFirst("https://web.archive.org/web/", "");
|
||||
link2 = link2.substring(link2.indexOf("/") + 1);
|
||||
|
||||
if (link1.startsWith("http://")) {
|
||||
link1 = link1.replaceFirst("http://", "");
|
||||
}
|
||||
if (link1.startsWith("https://")) {
|
||||
link1 = link1.replaceFirst("https://", "");
|
||||
}
|
||||
if (link2.startsWith("http://")) {
|
||||
link2 = link2.replaceFirst("http://", "");
|
||||
}
|
||||
if (link2.startsWith("https://")) {
|
||||
link2 = link2.replaceFirst("https://", "");
|
||||
}
|
||||
|
||||
return link1/*.replaceAll("/", "")*/.equals(link2/*.replaceAll("/", "")*/);
|
||||
}
|
||||
return link1.equals(link2);
|
||||
}
|
||||
|
||||
public static String stripProtocol(String url) {
|
||||
if (url.startsWith("https://")) {
|
||||
return url.replaceFirst("https://", "");
|
||||
} else if (url.startsWith("http://")) {
|
||||
return url.replaceFirst("http://", "");
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
}
|
||||
188
src/main/java/de/bommels05/befatorweb/BefatorWeb.java
Normal file
188
src/main/java/de/bommels05/befatorweb/BefatorWeb.java
Normal file
@ -0,0 +1,188 @@
|
||||
package de.bommels05.befatorweb;
|
||||
|
||||
import com.github.benmanes.caffeine.cache.Caffeine;
|
||||
import com.github.benmanes.caffeine.cache.LoadingCache;
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.JsonArray;
|
||||
import com.google.gson.JsonElement;
|
||||
import com.google.gson.JsonObject;
|
||||
import de.bommels05.befatorweb.links.LinkDatabase;
|
||||
import io.javalin.Javalin;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.net.URLEncoder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Calendar;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class BefatorWeb {
|
||||
public static final int TARGET_YEAR = 2004;
|
||||
public static final int TARGET_MONTH = Calendar.OCTOBER;
|
||||
public static final int REQUEST_DELAY = 5000;
|
||||
public static final String TARGET_URL = "www.microsoft.com/"/*"www.mozilla.org/releases/mozilla1.0/"*/;
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(BefatorWeb.class);
|
||||
private static final LoadingCache<String, String> urlCache = Caffeine.newBuilder().build(BefatorWeb::getClosestArchiveUrl);
|
||||
private static final ReentrantLock requestLock = new ReentrantLock();
|
||||
|
||||
public static void main(String[] args) {
|
||||
Javalin app = Javalin.create().start(3636);
|
||||
|
||||
app.get("/", ctx -> {
|
||||
ctx.result("Befator Inc grüßt Sie!");
|
||||
});
|
||||
|
||||
app.get("/api/progressMessage", ctx -> {
|
||||
String target = ctx.queryParam("targetPage") == null ? TARGET_URL : ctx.queryParam("targetPage");
|
||||
String url = ctx.queryParam("currentPage");
|
||||
if (url == null) {
|
||||
ctx.result("Dein aktuelles Ziel ist: " + target);
|
||||
} else {
|
||||
int distance = LinkDatabase.getDistance(url, target);
|
||||
if (distance == -1) {
|
||||
ctx.result("Es ist (noch) kein sicherer Weg zu deinem Ziel (" + target + ") bekannt");
|
||||
} else {
|
||||
ctx.result("Du bist " + distance + " Links von deinem Ziel (" + target + ") entfernt");
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
/*app.before(ctx -> {
|
||||
String path = ctx.path();
|
||||
|
||||
if (!path.replaceFirst("/proxy/", "").contains("/")/* && !path.matches(".*\\.[a-zA-Z0-9]+$")*//*) {
|
||||
String query = ctx.queryString() != null ? "?" + ctx.queryString() : "";
|
||||
ctx.redirect(path + "/" + query);
|
||||
}
|
||||
});*/
|
||||
|
||||
app.get("/proxy/*", ctx -> {
|
||||
String url = BefatorRewriter.stripProtocol(ctx.path().replaceFirst("/proxy/", ""));
|
||||
/*if (url.endsWith("/")) {
|
||||
url = url.substring(0, url.length() - 1);
|
||||
}*/
|
||||
url = url + (ctx.queryString() != null ? "?" + ctx.queryString() : "");
|
||||
|
||||
if (Pattern.compile("^[^\\/.]+(?:\\/|$)").matcher(url).find()) {
|
||||
LOGGER.warn("Tried to request invalid Page {}", url);
|
||||
ctx.status(404);
|
||||
return;
|
||||
}
|
||||
|
||||
LOGGER.info("Requesting Page {}", url);
|
||||
|
||||
if (!SiteCache.containsPage(url)) {
|
||||
try {
|
||||
requestLock.lock();
|
||||
|
||||
String archiveUrl = urlCache.get(url);
|
||||
if (archiveUrl == null) {
|
||||
ctx.result("Page not found!");
|
||||
ctx.status(404);
|
||||
return;
|
||||
} else {
|
||||
PageDownloadData content = downloadPage(archiveUrl);
|
||||
SavedPaged.ContentType contentType = content.contentType();
|
||||
LOGGER.info("Downloaded page {} from {} with type {}", url, archiveUrl, contentType);
|
||||
SiteCache.addPage(url, content.content(), contentType, content.status());
|
||||
}
|
||||
} finally {
|
||||
requestLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
SavedPaged page = SiteCache.getPage(url);
|
||||
|
||||
if (page.status() == SavedPaged.StatusCode.REDIRECT) {
|
||||
ctx.redirect(BefatorRewriter.rewriteLink(new String(page.getContent(), StandardCharsets.UTF_8)));
|
||||
return;
|
||||
}
|
||||
|
||||
ctx.result(page.getContent());
|
||||
ctx.res().setCharacterEncoding(StandardCharsets.UTF_8.name());
|
||||
ctx.contentType(page.type().toString());
|
||||
});
|
||||
}
|
||||
|
||||
private static PageDownloadData downloadPage(String url) {
|
||||
try {
|
||||
//Thread.sleep(REQUEST_DELAY);
|
||||
URLConnection connection = new URL(url).openConnection();
|
||||
connection.connect();
|
||||
connection.getInputStream();
|
||||
|
||||
String finalLocation = connection.getURL().toString();
|
||||
if (!BefatorRewriter.sameTargets(finalLocation, url)) {
|
||||
LinkDatabase.addLink(BefatorRewriter.rewriteLink(url), BefatorRewriter.rewriteLink(finalLocation));
|
||||
return new PageDownloadData(finalLocation.getBytes(StandardCharsets.UTF_8), SavedPaged.ContentType.REDIRECT, SavedPaged.StatusCode.REDIRECT);
|
||||
} else {
|
||||
return new PageDownloadData(connection.getInputStream().readAllBytes(), SavedPaged.ContentType.fromString(connection.getContentType()), SavedPaged.StatusCode.OK);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private static JsonObject getJsonContent(String url) {
|
||||
try {
|
||||
URLConnection connection = new URL(url).openConnection();
|
||||
connection.connect();
|
||||
return new Gson().fromJson(new String(connection.getInputStream().readAllBytes(), StandardCharsets.UTF_8), JsonObject.class);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private static String toTwoDigitString(int i) {
|
||||
return i < 10 ? "0" + i : String.valueOf(i);
|
||||
}
|
||||
|
||||
|
||||
private static String getClosestArchiveUrl(String url) {
|
||||
String archiveUrl = getClosestArchiveUrl(url, TARGET_YEAR, TARGET_MONTH);
|
||||
if (archiveUrl == null) {
|
||||
return getClosestArchiveUrl(url, TARGET_YEAR + 1, 1);
|
||||
}
|
||||
return archiveUrl;
|
||||
}
|
||||
|
||||
private static String getClosestArchiveUrl(String url, int targetYear, int targetMonth) {
|
||||
String encoded = URLEncoder.encode(url, StandardCharsets.UTF_8);
|
||||
JsonObject yearInfo = getJsonContent("https://web.archive.org/__wb/calendarcaptures/2?url=" + encoded + "&date=" + targetYear + "&groupby=day");
|
||||
if (yearInfo.has("items")) {
|
||||
JsonArray days = yearInfo.get("items").getAsJsonArray();
|
||||
if (!days.isEmpty()) {
|
||||
Calendar calendar = Calendar.getInstance();
|
||||
calendar.setTimeInMillis(0);
|
||||
List<Long> times = new ArrayList<>();
|
||||
for (JsonElement day : days) {
|
||||
char[] chars = day.getAsJsonArray().get(0).getAsString().toCharArray();
|
||||
int dayOfMonth = Integer.parseInt(String.valueOf(chars[chars.length - 2]) + chars[chars.length - 1]);
|
||||
int month = Integer.parseInt(chars.length == 3 ? String.valueOf(chars[0]) : String.valueOf(chars[0]) + chars[1]);
|
||||
calendar.set(targetYear, month - 1, dayOfMonth);
|
||||
times.add(calendar.getTimeInMillis());
|
||||
}
|
||||
|
||||
calendar.set(targetYear, targetMonth, 15);
|
||||
Long closest = times.stream().map(millis -> millis - calendar.getTimeInMillis()).map(millis -> millis < 0 ? -millis : millis).sorted().findFirst().orElseThrow();
|
||||
calendar.setTimeInMillis(calendar.getTimeInMillis() + (times.contains(closest + calendar.getTimeInMillis()) ? closest : -closest));
|
||||
|
||||
JsonObject dayInfo = getJsonContent("https://web.archive.org/__wb/calendarcaptures/2?url=" + encoded + "&date=" + targetYear + toTwoDigitString(calendar.get(Calendar.MONTH) + 1) + toTwoDigitString(calendar.get(Calendar.DAY_OF_MONTH)));
|
||||
String timeOfDay = dayInfo.get("items").getAsJsonArray().get(0).getAsJsonArray().get(0).getAsString();
|
||||
timeOfDay = timeOfDay.length() == 5 ? "0" + timeOfDay : timeOfDay;
|
||||
return "https://web.archive.org/web/" + targetYear + toTwoDigitString(calendar.get(Calendar.MONTH) + 1) + toTwoDigitString(calendar.get(Calendar.DAY_OF_MONTH)) + timeOfDay + "/" + url;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private record PageDownloadData(byte[] content, SavedPaged.ContentType contentType, SavedPaged.StatusCode status) {}
|
||||
|
||||
}
|
||||
79
src/main/java/de/bommels05/befatorweb/SavedPaged.java
Normal file
79
src/main/java/de/bommels05/befatorweb/SavedPaged.java
Normal file
@ -0,0 +1,79 @@
|
||||
package de.bommels05.befatorweb;
|
||||
|
||||
public record SavedPaged(String url, ContentType type, StatusCode status) {
|
||||
|
||||
public byte[] getContent() {
|
||||
return SiteCache.getPageContent(url);
|
||||
}
|
||||
|
||||
public enum ContentType {
|
||||
HTML("text/html"),
|
||||
CSS("text/css"),
|
||||
JS("application/x-javascript", "text/javascript"),
|
||||
|
||||
GIF("image/gif"),
|
||||
JPEG("image/jpeg"),
|
||||
PNG("image/png"),
|
||||
ICO("image/x-icon"),
|
||||
|
||||
BINARY("application/octet-stream"),
|
||||
PLAIN("text/plain"),
|
||||
|
||||
REDIRECT("befator/redirect");
|
||||
|
||||
private final String value;
|
||||
private final String[] aliases;
|
||||
|
||||
ContentType(String value, String... aliases) {
|
||||
this.value = value;
|
||||
this.aliases = aliases;
|
||||
}
|
||||
|
||||
public static ContentType fromString(String contentType) {
|
||||
if (contentType == null) {
|
||||
return PLAIN;
|
||||
}
|
||||
|
||||
for (ContentType type : values()) {
|
||||
if (contentType.startsWith(type.value)) {
|
||||
return type;
|
||||
}
|
||||
for (String alias : type.aliases) {
|
||||
if (contentType.startsWith(alias)) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Invalid content type: " + contentType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
public enum StatusCode {
|
||||
OK(200),
|
||||
REDIRECT(301);
|
||||
|
||||
private final int value;
|
||||
|
||||
StatusCode(int value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public static StatusCode fromInt(int status) {
|
||||
for (StatusCode statusCode : values()) {
|
||||
if (status == statusCode.value) {
|
||||
return statusCode;
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Invalid status code: " + status);
|
||||
}
|
||||
|
||||
public int toInt() {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
110
src/main/java/de/bommels05/befatorweb/SiteCache.java
Normal file
110
src/main/java/de/bommels05/befatorweb/SiteCache.java
Normal file
@ -0,0 +1,110 @@
|
||||
package de.bommels05.befatorweb;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.JsonArray;
|
||||
import com.google.gson.JsonElement;
|
||||
import com.google.gson.JsonObject;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class SiteCache {
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(SiteCache.class);
|
||||
private static final List<SavedPaged> pages = new ArrayList<>();
|
||||
private static final BefatorRewriter rewriter = new BefatorRewriter(url -> {});
|
||||
static {
|
||||
try {
|
||||
File cache = new File("cache");
|
||||
if (!cache.exists()) {
|
||||
Files.createDirectory(cache.toPath());
|
||||
}
|
||||
File cacheIndex = new File("cache_index.json");
|
||||
if (cacheIndex.exists()) {
|
||||
loadIndex();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public static SavedPaged getPage(String url) {
|
||||
for (SavedPaged page : pages) {
|
||||
if (page.url().equals(url)) {
|
||||
return page;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static boolean containsPage(String url) {
|
||||
return getPage(url) != null;
|
||||
}
|
||||
|
||||
public static byte[] getPageContent(String url) {
|
||||
return readFile("cache/" + pages.indexOf(getPage(url)));
|
||||
}
|
||||
|
||||
public static void addPage(String url, byte[] content, SavedPaged.ContentType contentType, SavedPaged.StatusCode status) {
|
||||
if (!containsPage(url)) {
|
||||
pages.add(new SavedPaged(url, contentType, status));
|
||||
overwriteFile("cache/" + pages.indexOf(getPage(url)), rewriter.rewrite(content, url, contentType));
|
||||
saveIndex();
|
||||
} else {
|
||||
throw new IllegalArgumentException("Page " + url + " is already cached");
|
||||
}
|
||||
}
|
||||
|
||||
private static byte[] readFile(String fileName) {
|
||||
try {
|
||||
FileInputStream stream = new FileInputStream(fileName);
|
||||
byte[] content = stream.readAllBytes();
|
||||
stream.close();
|
||||
return content;
|
||||
} catch (FileNotFoundException e) {
|
||||
LOGGER.error("Cache seems to be invalid - Resetting", e);
|
||||
pages.clear();
|
||||
throw new RuntimeException(e);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private static void overwriteFile(String fileName, byte[] content) {
|
||||
try {
|
||||
File file = new File(fileName);
|
||||
Files.deleteIfExists(file.toPath());
|
||||
|
||||
FileOutputStream stream = new FileOutputStream(file);
|
||||
stream.write(content);
|
||||
stream.close();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private static void saveIndex() {
|
||||
Gson gson = new Gson();
|
||||
JsonObject index = new JsonObject();
|
||||
JsonArray array = new JsonArray();
|
||||
for (SavedPaged page : pages) {
|
||||
array.add(gson.toJsonTree(page));
|
||||
}
|
||||
index.add("pages", array);
|
||||
overwriteFile("cache_index.json", gson.toJson(index).getBytes(StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
private static void loadIndex() {
|
||||
Gson gson = new Gson();
|
||||
JsonObject index = gson.fromJson(new String(readFile("cache_index.json"), StandardCharsets.UTF_8), JsonObject.class);
|
||||
JsonArray array = index.get("pages").getAsJsonArray();
|
||||
for (JsonElement page : array) {
|
||||
pages.add(gson.fromJson(page, SavedPaged.class));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,68 @@
|
||||
package de.bommels05.befatorweb.links;
|
||||
|
||||
import it.uniroma1.dis.wsngroup.gexf4j.core.Gexf;
|
||||
import it.uniroma1.dis.wsngroup.gexf4j.core.Graph;
|
||||
import it.uniroma1.dis.wsngroup.gexf4j.core.Mode;
|
||||
import it.uniroma1.dis.wsngroup.gexf4j.core.Node;
|
||||
import it.uniroma1.dis.wsngroup.gexf4j.core.impl.GexfImpl;
|
||||
import it.uniroma1.dis.wsngroup.gexf4j.core.impl.StaxGraphWriter;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class GexfExporter {
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
Gexf gexf = new GexfImpl();
|
||||
gexf.setVisualization(true);
|
||||
|
||||
Graph graph = gexf.getGraph();
|
||||
graph.setMode(Mode.DYNAMIC);
|
||||
|
||||
List<String> pages = new ArrayList<>();
|
||||
for (LinkDBEntry linkDBEntry : LinkDatabase.LINKS_TABLE.getAll()) {
|
||||
String source = linkDBEntry.getSource();
|
||||
String destination = linkDBEntry.getDestination();
|
||||
if (!pages.contains(source)) {
|
||||
pages.add(source);
|
||||
}
|
||||
if (!pages.contains(destination)) {
|
||||
pages.add(destination);
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, Node> nodes = new HashMap<>();
|
||||
for (String page : pages) {
|
||||
//if (!LinkDatabase.getLinks(page).toList().isEmpty()) {
|
||||
Node node = graph.createNode(String.valueOf(pages.indexOf(page)));
|
||||
node.setLabel(page);
|
||||
node.setSize(LinkDatabase.LINKS_TABLE.getAll().stream().filter(link -> link.getDestination().equals(page)).toList().size());
|
||||
nodes.put(page, node);
|
||||
//}
|
||||
}
|
||||
|
||||
for (String page : nodes.keySet()) {
|
||||
Node node = nodes.get(page);
|
||||
LinkDatabase.getLinks(page).forEach(s -> {
|
||||
Node target = nodes.get(s);
|
||||
if (target != null) {
|
||||
node.connectTo(target);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
StaxGraphWriter graphWriter = new StaxGraphWriter();
|
||||
File file = new File("links.gexf");
|
||||
FileOutputStream out = new FileOutputStream(file);
|
||||
graphWriter.writeToStream(gexf, out, "UTF-8");
|
||||
out.close();
|
||||
}
|
||||
|
||||
}
|
||||
34
src/main/java/de/bommels05/befatorweb/links/LinkDBEntry.java
Normal file
34
src/main/java/de/bommels05/befatorweb/links/LinkDBEntry.java
Normal file
@ -0,0 +1,34 @@
|
||||
package de.bommels05.befatorweb.links;
|
||||
|
||||
import de.bommels05.dblib.core.DBEntry;
|
||||
import de.bommels05.dblib.core.DBEntryField;
|
||||
import de.bommels05.dblib.core.QueryHolder;
|
||||
import de.bommels05.dblib.core.Table;
|
||||
|
||||
public class LinkDBEntry extends DBEntry<LinkDBEntry, Integer> {
|
||||
@DBEntryField(name = "source")
|
||||
private String source;
|
||||
@DBEntryField(name = "destination")
|
||||
private String destination;
|
||||
|
||||
protected LinkDBEntry(Table<LinkDBEntry, Integer> table, QueryHolder queryHolder) {
|
||||
super(table, queryHolder);
|
||||
finalize(this);
|
||||
fill();
|
||||
}
|
||||
|
||||
protected LinkDBEntry(Table<LinkDBEntry, Integer> table, String source, String destination) {
|
||||
super(table, null);
|
||||
this.source = source;
|
||||
this.destination = destination;
|
||||
finalize(this);
|
||||
}
|
||||
|
||||
public String getSource() {
|
||||
return source;
|
||||
}
|
||||
|
||||
public String getDestination() {
|
||||
return destination;
|
||||
}
|
||||
}
|
||||
133
src/main/java/de/bommels05/befatorweb/links/LinkDatabase.java
Normal file
133
src/main/java/de/bommels05/befatorweb/links/LinkDatabase.java
Normal file
@ -0,0 +1,133 @@
|
||||
package de.bommels05.befatorweb.links;
|
||||
|
||||
import de.bommels05.befatorweb.BefatorRewriter;
|
||||
import de.bommels05.befatorweb.links.calc.BefatorDistanceCalculator;
|
||||
import de.bommels05.befatorweb.links.calc.DistanceCalculator;
|
||||
import de.bommels05.befatorweb.links.calc.V2DistanceCalculator;
|
||||
import de.bommels05.dblib.core.DBEntry;
|
||||
import de.bommels05.dblib.core.Database;
|
||||
import de.bommels05.dblib.core.QueryHolder;
|
||||
import de.bommels05.dblib.core.Table;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.sql.SQLException;
|
||||
import java.util.*;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public class LinkDatabase {
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(LinkDatabase.class);
|
||||
private static final List<DistanceCalculator> calculators = new ArrayList<>();
|
||||
public static final Database LINKS_DB = new Database("links");
|
||||
|
||||
static {
|
||||
LINKS_DB.registerTable("links", "source String", "destination String");
|
||||
//calculators.add(new BefatorDistanceCalculator());
|
||||
calculators.add(new V2DistanceCalculator());
|
||||
}
|
||||
|
||||
public static final Table<LinkDBEntry, Integer> LINKS_TABLE = LINKS_DB.getTable("links", LinkDBEntry::new);
|
||||
|
||||
public static void addLink(String source, String destination) {
|
||||
if (destination.startsWith("#")) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (source.startsWith("/proxy/")) {
|
||||
source = source.replaceFirst("/proxy/", "");
|
||||
}
|
||||
if (destination.startsWith("/proxy/")) {
|
||||
destination = destination.replaceFirst("/proxy/", "https://");
|
||||
}
|
||||
|
||||
try {
|
||||
URI base = new URI(source);
|
||||
URI relative = new URI(destination);
|
||||
URI result = base.resolve(relative);
|
||||
|
||||
String finalDestination = result.toString();
|
||||
if (finalDestination.startsWith("https://")) {
|
||||
finalDestination = finalDestination.replaceFirst("https://", "");
|
||||
}
|
||||
|
||||
if (source.equals(finalDestination) || hasDirectLink(source, finalDestination)) {
|
||||
return;
|
||||
}
|
||||
|
||||
new LinkDBEntry(LINKS_TABLE, source, finalDestination).save();
|
||||
} catch (URISyntaxException e) {
|
||||
LOGGER.error("Invalid Link URL", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean hasDirectLink(String source, String destination) {
|
||||
return LINKS_TABLE.getAll().stream().filter(link -> link.getSource().equals(source)).anyMatch(link -> link.getDestination().equals(destination));
|
||||
}
|
||||
|
||||
public static int getDistance(String source, String destination) {
|
||||
if (source.startsWith("/proxy/")) {
|
||||
source = source.replaceFirst("/proxy/", "");
|
||||
}
|
||||
source = BefatorRewriter.stripProtocol(source);
|
||||
/*if (destination.endsWith("/")) {
|
||||
destination = destination.substring(0, destination.length() - 1);
|
||||
}*/
|
||||
|
||||
for (DistanceCalculator calculator : calculators) {
|
||||
/*LOGGER.info("Trying {}", calculator.getName());
|
||||
long millis = System.currentTimeMillis();
|
||||
int distance = calculator.getDistance(source, destination, LinkDatabase::getLinks);
|
||||
LOGGER.info("Distance took {}ms ({})", System.currentTimeMillis() - millis, distance);*/
|
||||
long millis = System.currentTimeMillis();
|
||||
List<String> path = calculator.getPath(source, destination, LinkDatabase::getLinks);
|
||||
StringJoiner joiner = new StringJoiner(" <- ");
|
||||
if (path != null) {
|
||||
path.forEach(joiner::add);
|
||||
} else {
|
||||
joiner.add("null");
|
||||
}
|
||||
LOGGER.info("Path took {}ms ({})", System.currentTimeMillis() - millis, joiner);
|
||||
if (path != null) {
|
||||
return path.size() - 1;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return calculators.get(0).getDistance(source, destination, LinkDatabase::getLinks);
|
||||
|
||||
/*if (USE_V2) {
|
||||
return getDistance2(source, destination);
|
||||
} else {*/
|
||||
/*List<String> path = new BefatorDistanceCalculator.getPath(source, destination, LinkDatabase::getLinks);
|
||||
if (path != null) {
|
||||
System.out.print(destination);
|
||||
for (String link : path) {
|
||||
System.out.print(" <- " + link);
|
||||
}
|
||||
System.out.println();
|
||||
return path.size();
|
||||
} else {
|
||||
return -1;
|
||||
}*/
|
||||
/*}*/
|
||||
}
|
||||
|
||||
public static Stream<String> getLinks(String source) {
|
||||
try {
|
||||
QueryHolder result = LINKS_DB.executeQuery("SELECT * FROM links WHERE source = ?", source);
|
||||
List<String> links = new ArrayList<>();
|
||||
while(result.getResultSet().next()) {
|
||||
links.add(new LinkDBEntry(LINKS_TABLE,result).getDestination());
|
||||
}
|
||||
result.close();
|
||||
return links.stream();
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
//return LINKS_TABLE.getAll().stream().filter(link -> link.getSource().equals(source)).map(LinkDBEntry::getDestination);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,60 @@
|
||||
package de.bommels05.befatorweb.links.calc;
|
||||
|
||||
import de.bommels05.befatorweb.links.LinkDBEntry;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public class BefatorDistanceCalculator implements DistanceCalculator {
|
||||
@Override
|
||||
public int getDistance(String source, String destination, Function<String, Stream<String>> linkGetter) {
|
||||
List<String> path = getPath(source, destination, linkGetter);
|
||||
if (path != null) {
|
||||
return path.size() - 1;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getPath(String source, String destination, Function<String, Stream<String>> linkGetter) {
|
||||
List<String> path = getPath(source, destination, new ArrayList<>(), linkGetter);
|
||||
if (path != null) {
|
||||
path.add(0, destination);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
private static List<String> getPath(String source, String destination, List<String> blacklist, Function<String, Stream<String>> linkGetter) {
|
||||
if (source.equals(destination)) {
|
||||
return List.of();
|
||||
}
|
||||
|
||||
List<String> outgoing = linkGetter.apply(source).filter(link -> !blacklist.contains(link)).toList();
|
||||
if (outgoing.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Optional<String> finalLink = outgoing.stream().filter(link -> link.equals(destination)).findFirst();
|
||||
if (finalLink.isPresent()) {
|
||||
return List.of(source);
|
||||
} else {
|
||||
blacklist.addAll(outgoing);
|
||||
|
||||
Optional<List<String>> shortest = outgoing.stream().map(link -> getPath(link, destination, new ArrayList<>(blacklist), linkGetter)).filter(Objects::nonNull).min(Comparator.comparingInt(List::size));
|
||||
if (shortest.isPresent()) {
|
||||
ArrayList<String> path = new ArrayList<>(shortest.get());
|
||||
path.add(source);
|
||||
return path;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return "Befator";
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,15 @@
|
||||
package de.bommels05.befatorweb.links.calc;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public interface DistanceCalculator {
|
||||
|
||||
public int getDistance(String source, String destination, Function<String, Stream<String>> linkGetter);
|
||||
|
||||
public List<String> getPath(String source, String destination, Function<String, Stream<String>> linkGetter);
|
||||
|
||||
public String getName();
|
||||
|
||||
}
|
||||
@ -0,0 +1,72 @@
|
||||
package de.bommels05.befatorweb.links.calc;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public class V2DistanceCalculator implements DistanceCalculator {
|
||||
|
||||
@Override
|
||||
public int getDistance(String source, String destination, Function<String, Stream<String>> linkGetter) {
|
||||
Deque<String> queue = new ArrayDeque<>();
|
||||
queue.add(source);
|
||||
|
||||
Map<String, Integer> distances = new HashMap<>();
|
||||
distances.put(source, 0);
|
||||
|
||||
while (!queue.isEmpty()) {
|
||||
String target = queue.poll();
|
||||
for (String link : linkGetter.apply(target).toList()) {
|
||||
if (!distances.containsKey(link)) {
|
||||
distances.put(link, distances.get(target) + 1);
|
||||
queue.add(link);
|
||||
}
|
||||
if (link.equals(destination)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return distances.getOrDefault(destination, -1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getPath(String source, String destination, Function<String, Stream<String>> linkGetter) {
|
||||
Deque<String> queue = new ArrayDeque<>();
|
||||
queue.add(source);
|
||||
|
||||
Map<String, String> sources = new HashMap<>();
|
||||
sources.put(source, null);
|
||||
|
||||
while (!queue.isEmpty()) {
|
||||
String target = queue.poll();
|
||||
for (String link : linkGetter.apply(target).toList()) {
|
||||
if (!sources.containsKey(link)) {
|
||||
sources.put(link, target);
|
||||
queue.add(link);
|
||||
}
|
||||
if (link.equals(destination)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
List<String> path = new ArrayList<>();
|
||||
String current = destination;
|
||||
while (current != null) {
|
||||
path.add(current);
|
||||
current = sources.get(current);
|
||||
}
|
||||
|
||||
if (path.get(path.size() - 1).equals(source)) {
|
||||
return path;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return "V2";
|
||||
}
|
||||
}
|
||||
3
src/main/resources/META-INF/MANIFEST.MF
Normal file
3
src/main/resources/META-INF/MANIFEST.MF
Normal file
@ -0,0 +1,3 @@
|
||||
Manifest-Version: 1.0
|
||||
Main-Class: de.bommels05.befatorweb.BefatorWeb
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user