diff --git a/docs/concepts/other.md b/docs/concepts/other.md
index 0fac3aec1..382cd20f2 100644
--- a/docs/concepts/other.md
+++ b/docs/concepts/other.md
@@ -208,6 +208,10 @@ Vector _("Embedding")_ databases of possible future interest:
## AI
+### Retrieval-Augmented Generation (RAG) and [Entity Resolution](https://github.com/GavinMendelGleason/blog/blob/main/entries/entity_resolution.md)
+
+* [TerminusDB VectorLink](https://terminusdb.com/vectorlink/), see [Overview](https://terminusdb.com/blog/terminusdb-internals/) #commercial #[opensource](https://github.com/terminusdb/terminusdb) #trial
+
### Symbolic AI Reasoning (GOFAI?)
[Semantic Reasoning](https://en.wikipedia.org/wiki/Semantic_reasoner) through [modus ponens](https://en.wikipedia.org/wiki/Modus_ponens) of an [Inference Engine](https://en.wikipedia.org/wiki/Inference_engine) by
@@ -215,6 +219,7 @@ _Forward Chaining; also see Backward Chaining, Backtracking, Backpropagation -
[TBD](https://en.wikipedia.org/wiki/Symbolic_artificial_intelligence)._
* [Mangle](https://github.com/google/mangle) #OpenSource #Google
+* [Leuchtkraft](https://github.com/Wuelle/Leuchtkraft) #OpenSource
## Platforms
diff --git a/java/dev/enola/common/io/iri/IRI.java b/java/dev/enola/common/io/iri/IRI.java
index d95c81388..d809cb6d7 100644
--- a/java/dev/enola/common/io/iri/IRI.java
+++ b/java/dev/enola/common/io/iri/IRI.java
@@ -18,7 +18,6 @@
package dev.enola.common.io.iri;
import com.google.common.base.CharMatcher;
-import com.google.common.collect.ImmutableMultimap;
import com.google.common.net.HostAndPort;
import com.google.common.net.HostSpecifier;
import com.google.common.net.InetAddresses;
@@ -45,10 +44,10 @@
*
This class strictly speaking represents an IRI Reference, not just an
* IRI; meaning that it can either an absolute with a scheme:
, or relative.
*
- *
This class is logically (but not technically, for efficiency) immutable. It has a {@link
- * Builder} to programmatically configure instances of it. You can also just construct it from a
- * String with {@link #parseUnencoded(String)}. To modify, use {@link #newBuilder()}, set what you
- * need, and {@link Builder#build()} it.
+ *
This class is thread safe, because it is logically (but not technically, for efficiency)
+ * immutable. It has a {@link Builder} to programmatically configure instances of it. You can also
+ * just construct it from a String with {@link #parseUnencoded(String)}. To modify, use {@link
+ * #newBuilder()}, set what you need, and {@link Builder#build()} it.
*
*
This class never throws any runtime exceptions for supposedly "invalid" input. It allows e.g.
* "http://example.org/~{username}" (e.g. URI Templates à la RFC 6570, or other similar syntaxes) or
@@ -81,6 +80,9 @@
* add support for that, if you need it.
*
*
This class if null-safe. Its accessor methods never return null, but empty Strings instead.
+ * This means callers cannot distinguish e.g. between "schema:" and "schema:/" and "schema:/?" and
+ * "schema:/?#"; but this is an intentional design decision, for a simpler and more convenient null
+ * friendly API.
*
*
This class never makes any network access! (Yes, looking at you, {@link
* java.net.URL#equals(Object)} - OMG!)
@@ -111,6 +113,8 @@ public final class IRI implements Comparable {
// TODO Actually fully read https://url.spec.whatwg.org first.. :=)
// TODO Research existing implementations for inspiration...
+ // - https://developer.android.com/reference/android/net/Uri, with
+ // https://cs.android.com/android/platform/superproject/main/+/main:frameworks/base/core/java/android/net/Uri.java
// - https://github.com/square/okhttp/issues/1486
// - https://github.com/palominolabs/url-builder
// - https://github.com/dmfs/uri-toolkit
@@ -233,67 +237,69 @@ public Builder fragment(String fragment) {
// TODO private @Nullable Multimap queryMap;
private @Nullable String fragment;
private @Nullable String string;
+ private @Nullable IRI normalized;
+ // TODO Naming? android.net.Uri calls this buildUpon() ...
public Builder newBuilder() {
var builder = new Builder();
+ builder.scheme(scheme());
+ builder.authority(authority());
+ builder.path(path());
+ builder.query(query());
+ builder.fragment(fragment());
return builder;
}
public String scheme() {
- if (scheme == null) scheme = find_scheme();
+ if (scheme == null) parse();
return scheme;
}
- private String find_scheme() {
- return null; // TODO
- }
-
public boolean hasScheme(String scheme) {
- // TODO Implement more optimized
- return scheme().equals(scheme);
+ assert scheme.contains(":");
+ return normalizeScheme(scheme()).equals(normalizeScheme(scheme));
}
public boolean isAbsolute() {
return !scheme().isBlank();
}
- // /** Scheme specific part is just everything after the : colon of the scheme. */
- /* public CharSequence schemeSpecificPart() {
- return null; // TODO
- } */
+ public boolean isRelative() {
+ return !isAbsolute();
+ }
public String authority() {
- return null; // TODO
+ if (authority == null) parse();
+ return authority;
}
public String path() {
- return null; // TODO
+ if (path == null) parse();
+ return path;
}
public String query() {
- return null; // TODO
+ if (query == null) parse();
+ return query;
}
public String fragment() {
- return null; // TODO
+ if (fragment == null) parse();
+ return fragment;
}
- // TODO Allow both & and ; as query delimiters?!
- public ImmutableMultimap queryMap() {
- return null; // TODO
- }
+ // Allows (interprets) only '&' and not ';' as query delimiter!
+ // public ImmutableMultimap queryMap() { return null; } // TODO needed?
- public ImmutableMultimap queryParameter(String key) {
- return null; // TODO
- }
+ // public ImmutableMultimap queryParameter(String key) { return null; } // TODO?
public IRI base() {
- return null; // TODO as in URIs.base()
+ throw new UnsupportedOperationException("TODO"); // TODO as in URIs.base()
}
/** Resolve, e.g. as in {@link URI#resolve(URI)}. */
public IRI resolve(IRI iri) {
- return null; // TODO
+ throw new UnsupportedOperationException("TODO"); // TODO
}
/** Resolve, e.g. as in {@link URI#resolve(URI)}. */
@@ -303,30 +309,125 @@ public IRI resolve(String string) {
/** Relativize, e.g. as in {@link URI#relativize(URI)}. */
public IRI relativize(IRI iri) {
- return null; // TODO
+ throw new UnsupportedOperationException("TODO"); // TODO
+ }
+
+ private void parse() {
+ try {
+ parse_();
+ } catch (StringIndexOutOfBoundsException e) {
+ throw new IllegalArgumentException("TODO FIXME: " + string, e);
+ }
+ }
+
+ private void parse_() {
+ if (string == null) {
+ scheme = "";
+ authority = "";
+ path = "";
+ query = "";
+ fragment = "";
+ return;
+ }
+
+ // Scheme! Note it [must be] is (very limited) ASCII, only; there's no decoding for scheme.
+ int end = string.indexOf(':');
+ if (end == -1) scheme = "";
+ else scheme = normalizeScheme(string.substring(0, end).trim());
+
+ // Authority! TODO Decoding..
+ var len = string.length();
+ var start = end + 1;
+ if (start >= len) authority = "";
+ else {
+ while (start < len) {
+ if (string.charAt(start) == '/') ++start;
+ else break;
+ }
+ end = start + 1;
+ while (end < len) {
+ if (string.charAt(end) != '/') ++end;
+ else break;
+ }
+ authority = string.substring(start, end);
+ }
+
+ // Path! TODO Decoding..
+ start = end;
+ if (start >= len) path = "";
+ else {
+ end = start + 1;
+ while (end < len) {
+ if (string.charAt(end) != '?' && string.charAt(end) != '#') ++end;
+ else break;
+ }
+ path = string.substring(start, end);
+ }
+
+ // Query! TODO Decoding..
+ start = end + 1;
+ if (start >= len) query = "";
+ else {
+ end = start + 1;
+ while (end < len) {
+ if (string.charAt(end) != '#') ++end;
+ else break;
+ }
+ query = string.substring(start, end);
+ }
+
+ // Fragment! TODO Decoding..
+ if (end >= len) fragment = "";
+ else fragment = string.substring(end + 1);
}
@Override
public String toString() {
- if (string == null) string = stringify();
+ // TODO Encoding! Offer x2 different toString!
+ if (string == null) {
+ var sb = new StringBuilder();
+ if (!scheme().isBlank()) {
+ sb.append(scheme());
+ sb.append(':');
+ }
+ if (!authority().isBlank()) {
+ sb.append("//");
+ sb.append(authority());
+ }
+ if (!path().isBlank()) {
+ sb.append(path);
+ } else sb.append('/');
+ if (!query().isBlank()) {
+ sb.append('?');
+ sb.append(query());
+ }
+ if (!fragment().isBlank()) {
+ sb.append('#');
+ sb.append(fragment());
+ }
+ string = sb.toString();
+ }
return string;
}
- private String stringify() {
- return "TODO";
- }
-
public URI toURI() throws URISyntaxException {
return new URI(toString());
}
public IRI normalize() {
- // TODO Keep result in a lazily initialized field? But... memory?!
- var builder = newBuilder();
- builder.scheme(scheme().toLowerCase(Locale.ROOT));
- // TODO ... FIXME
- // TODO Should we drop default ports for a few well-known schemes?
- return builder.build();
+ if (normalized == null) {
+ // TODO Keep result in a lazily initialized field? But... memory?!
+ var builder = newBuilder();
+ builder.scheme(normalizeScheme(scheme()));
+ // TODO ... FIXME
+ // TODO Should we drop default ports for a few well-known schemes?
+ normalized = builder.build();
+ }
+ return normalized;
+ }
+
+ private static String normalizeScheme(String scheme) {
+ return scheme.toLowerCase(Locale.ROOT);
}
/** Equality check, with {@link #normalize()}-ation. */
@@ -364,7 +465,7 @@ public void validate() throws ValidationException {
var scheme = scheme();
if (isAbsolute() && scheme.isBlank()) throw new ValidationException(this, "Blank scheme");
if (isAbsolute() && !CharAscii.INSTANCE.matchesAllOf(scheme))
- throw new ValidationException(this, "Invalid scheme: " + scheme);
+ throw new ValidationException(this, "Invalid non-ASCII [a-zA-Z0-9] scheme: " + scheme);
try {
var authority = authority();
diff --git a/java/dev/enola/common/io/iri/IRITest.java b/java/dev/enola/common/io/iri/IRITest.java
index 6c5680213..aca4acab5 100644
--- a/java/dev/enola/common/io/iri/IRITest.java
+++ b/java/dev/enola/common/io/iri/IRITest.java
@@ -18,6 +18,7 @@
package dev.enola.common.io.iri;
import static com.google.common.truth.Truth.assertThat;
+import static com.google.common.truth.Truth.assertWithMessage;
import static org.junit.Assert.assertThrows;
@@ -29,6 +30,9 @@
public class IRITest {
+ // TODO https://github.com/web-platform-tests/wpt/blob/master/url/resources/urltestdata.json
+ // see https://github.com/web-platform-tests/wpt/tree/master/url
+
record TestIRI(
boolean validIRI,
boolean validURI,
@@ -40,19 +44,65 @@ record TestIRI(
String query,
String fragment) {}
- // TODO Use e.g. a CSV instead of coding these out here?
+ // TODO Instead of coding this out here, use (another) JSON like WHATWG urltestdata.json
TestIRI[] tests =
new TestIRI[] {
new TestIRI(
true,
true,
+ "hTtPs://enola.dev/index.html?query#fragment",
+ "https://enola.dev/index.html?query#fragment",
+ "https",
+ "enola.dev",
+ "/index.html",
+ "query",
+ "fragment"),
+ new TestIRI(
+ true,
+ true,
+ "hTtPs://enola.dev/index.html?query",
+ "https://enola.dev/index.html?query",
+ "https",
+ "enola.dev",
+ "/index.html",
+ "query",
+ ""),
+ // TODO FIXME
+ // new TestIRI(
+ // true,
+ // true,
+ // "hTtPs://enola.dev/index.html#fragment",
+ // "https://enola.dev/index.html#fragment",
+ // "https",
+ // "enola.dev",
+ // "/index.html",
+ // "",
+ // "fragment"),
+ new TestIRI(
+ true,
+ false, // java.net.URI does not append trailing /
"https://enola.dev",
"https://enola.dev/",
"https",
"enola.dev",
"",
"",
- "")
+ ""),
+ new TestIRI(
+ true,
+ true,
+ "schema:authority",
+ "schema:authority",
+ "schema",
+ "authority",
+ "",
+ "",
+ ""),
+ new TestIRI(true, true, "schema:", "schema:", "schema", "", "", "", ""),
+ new TestIRI(true, true, "relative", "relative", "", "", "relative", "", ""),
+ new TestIRI(false, true, "?query", "", "", "", "", "query", ""),
+ new TestIRI(true, true, "#fragment", "", "", "", "", "", "fragment"),
+ new TestIRI(true, true, "", "", "", "", "", "", ""),
};
// TODO Test handling of + or %20 for space in path, query and fragment
@@ -78,32 +128,33 @@ public void iri() throws URISyntaxException, IRI.ValidationException {
builder.path(test.path);
builder.query(test.query);
builder.fragment(test.fragment);
- check2(builder.build(), test);
+ check(builder.build(), test, test.normalized);
}
}
void check2(IRI iri, TestIRI test) throws URISyntaxException, IRI.ValidationException {
- check(iri, test);
+ check(iri, test, test.text);
var rebuiltIRI = iri.newBuilder().build();
- check(rebuiltIRI, test);
+ check(rebuiltIRI, test, test.normalized);
}
- void check(IRI iri, TestIRI test) throws URISyntaxException, IRI.ValidationException {
- assertThat(iri.toString()).isEqualTo(test.text);
+ void check(IRI iri, TestIRI test, String string)
+ throws URISyntaxException, IRI.ValidationException {
+ assertThat(iri.toString()).isEqualTo(string);
- assertThat(iri.scheme()).isEqualTo(test.scheme());
- assertThat(iri.authority()).isEqualTo(test.authority());
- assertThat(iri.path()).isEqualTo(test.path());
- assertThat(iri.query()).isEqualTo(test.query());
- assertThat(iri.fragment()).isEqualTo(test.fragment());
+ assertWithMessage(iri.toString()).that(iri.scheme()).isEqualTo(test.scheme());
+ assertWithMessage(iri.toString()).that(iri.authority()).isEqualTo(test.authority());
+ assertWithMessage(iri.toString()).that(iri.path()).isEqualTo(test.path());
+ assertWithMessage(iri.toString()).that(iri.query()).isEqualTo(test.query());
+ assertWithMessage(iri.toString()).that(iri.fragment()).isEqualTo(test.fragment());
if (test.validIRI) iri.validate();
else assertThrows(IRI.ValidationException.class, iri::validate);
if (test.validURI) {
- assertThat(iri.toURI()).isEqualTo(new URI(test.text));
- assertThat(IRI.from(iri.toURI())).isEqualTo(iri);
+ assertWithMessage(iri.toString()).that(iri.toURI()).isEqualTo(new URI(test.text));
+ assertWithMessage(iri.toString()).that(IRI.from(iri.toURI())).isEqualTo(iri);
}
}
}