diff --git a/docs/concepts/other.md b/docs/concepts/other.md index 0fac3aec1..382cd20f2 100644 --- a/docs/concepts/other.md +++ b/docs/concepts/other.md @@ -208,6 +208,10 @@ Vector _("Embedding")_ databases of possible future interest: ## AI +### Retrieval-Augmented Generation (RAG) and [Entity Resolution](https://github.com/GavinMendelGleason/blog/blob/main/entries/entity_resolution.md) + +* [TerminusDB VectorLink](https://terminusdb.com/vectorlink/), see [Overview](https://terminusdb.com/blog/terminusdb-internals/) #commercial #[opensource](https://github.com/terminusdb/terminusdb) #trial + ### Symbolic AI Reasoning (GOFAI?) [Semantic Reasoning](https://en.wikipedia.org/wiki/Semantic_reasoner) through [modus ponens](https://en.wikipedia.org/wiki/Modus_ponens) of an [Inference Engine](https://en.wikipedia.org/wiki/Inference_engine) by @@ -215,6 +219,7 @@ _Forward Chaining; also see Backward Chaining, Backtracking, Backpropagation - [TBD](https://en.wikipedia.org/wiki/Symbolic_artificial_intelligence)._ * [Mangle](https://github.com/google/mangle) #OpenSource #Google +* [Leuchtkraft](https://github.com/Wuelle/Leuchtkraft) #OpenSource ## Platforms diff --git a/java/dev/enola/common/io/iri/IRI.java b/java/dev/enola/common/io/iri/IRI.java index d95c81388..d809cb6d7 100644 --- a/java/dev/enola/common/io/iri/IRI.java +++ b/java/dev/enola/common/io/iri/IRI.java @@ -18,7 +18,6 @@ package dev.enola.common.io.iri; import com.google.common.base.CharMatcher; -import com.google.common.collect.ImmutableMultimap; import com.google.common.net.HostAndPort; import com.google.common.net.HostSpecifier; import com.google.common.net.InetAddresses; @@ -45,10 +44,10 @@ *

This class strictly speaking represents an IRI Reference, not just an * IRI; meaning that it can either an absolute with a scheme:, or relative. * - *

This class is logically (but not technically, for efficiency) immutable. It has a {@link - * Builder} to programmatically configure instances of it. You can also just construct it from a - * String with {@link #parseUnencoded(String)}. To modify, use {@link #newBuilder()}, set what you - * need, and {@link Builder#build()} it. + *

This class is thread safe, because it is logically (but not technically, for efficiency) + * immutable. It has a {@link Builder} to programmatically configure instances of it. You can also + * just construct it from a String with {@link #parseUnencoded(String)}. To modify, use {@link + * #newBuilder()}, set what you need, and {@link Builder#build()} it. * *

This class never throws any runtime exceptions for supposedly "invalid" input. It allows e.g. * "http://example.org/~{username}" (e.g. URI Templates à la RFC 6570, or other similar syntaxes) or @@ -81,6 +80,9 @@ * add support for that, if you need it. * *

This class if null-safe. Its accessor methods never return null, but empty Strings instead. + * This means callers cannot distinguish e.g. between "schema:" and "schema:/" and "schema:/?" and + * "schema:/?#"; but this is an intentional design decision, for a simpler and more convenient null + * friendly API. * *

This class never makes any network access! (Yes, looking at you, {@link * java.net.URL#equals(Object)} - OMG!) @@ -111,6 +113,8 @@ public final class IRI implements Comparable { // TODO Actually fully read https://url.spec.whatwg.org first.. :=) // TODO Research existing implementations for inspiration... + // - https://developer.android.com/reference/android/net/Uri, with + // https://cs.android.com/android/platform/superproject/main/+/main:frameworks/base/core/java/android/net/Uri.java // - https://github.com/square/okhttp/issues/1486 // - https://github.com/palominolabs/url-builder // - https://github.com/dmfs/uri-toolkit @@ -233,67 +237,69 @@ public Builder fragment(String fragment) { // TODO private @Nullable Multimap queryMap; private @Nullable String fragment; private @Nullable String string; + private @Nullable IRI normalized; + // TODO Naming? android.net.Uri calls this buildUpon() ... public Builder newBuilder() { var builder = new Builder(); + builder.scheme(scheme()); + builder.authority(authority()); + builder.path(path()); + builder.query(query()); + builder.fragment(fragment()); return builder; } public String scheme() { - if (scheme == null) scheme = find_scheme(); + if (scheme == null) parse(); return scheme; } - private String find_scheme() { - return null; // TODO - } - public boolean hasScheme(String scheme) { - // TODO Implement more optimized - return scheme().equals(scheme); + assert scheme.contains(":"); + return normalizeScheme(scheme()).equals(normalizeScheme(scheme)); } public boolean isAbsolute() { return !scheme().isBlank(); } - // /** Scheme specific part is just everything after the : colon of the scheme. */ - /* public CharSequence schemeSpecificPart() { - return null; // TODO - } */ + public boolean isRelative() { + return !isAbsolute(); + } public String authority() { - return null; // TODO + if (authority == null) parse(); + return authority; } public String path() { - return null; // TODO + if (path == null) parse(); + return path; } public String query() { - return null; // TODO + if (query == null) parse(); + return query; } public String fragment() { - return null; // TODO + if (fragment == null) parse(); + return fragment; } - // TODO Allow both & and ; as query delimiters?! - public ImmutableMultimap queryMap() { - return null; // TODO - } + // Allows (interprets) only '&' and not ';' as query delimiter! + // public ImmutableMultimap queryMap() { return null; } // TODO needed? - public ImmutableMultimap queryParameter(String key) { - return null; // TODO - } + // public ImmutableMultimap queryParameter(String key) { return null; } // TODO? public IRI base() { - return null; // TODO as in URIs.base() + throw new UnsupportedOperationException("TODO"); // TODO as in URIs.base() } /** Resolve, e.g. as in {@link URI#resolve(URI)}. */ public IRI resolve(IRI iri) { - return null; // TODO + throw new UnsupportedOperationException("TODO"); // TODO } /** Resolve, e.g. as in {@link URI#resolve(URI)}. */ @@ -303,30 +309,125 @@ public IRI resolve(String string) { /** Relativize, e.g. as in {@link URI#relativize(URI)}. */ public IRI relativize(IRI iri) { - return null; // TODO + throw new UnsupportedOperationException("TODO"); // TODO + } + + private void parse() { + try { + parse_(); + } catch (StringIndexOutOfBoundsException e) { + throw new IllegalArgumentException("TODO FIXME: " + string, e); + } + } + + private void parse_() { + if (string == null) { + scheme = ""; + authority = ""; + path = ""; + query = ""; + fragment = ""; + return; + } + + // Scheme! Note it [must be] is (very limited) ASCII, only; there's no decoding for scheme. + int end = string.indexOf(':'); + if (end == -1) scheme = ""; + else scheme = normalizeScheme(string.substring(0, end).trim()); + + // Authority! TODO Decoding.. + var len = string.length(); + var start = end + 1; + if (start >= len) authority = ""; + else { + while (start < len) { + if (string.charAt(start) == '/') ++start; + else break; + } + end = start + 1; + while (end < len) { + if (string.charAt(end) != '/') ++end; + else break; + } + authority = string.substring(start, end); + } + + // Path! TODO Decoding.. + start = end; + if (start >= len) path = ""; + else { + end = start + 1; + while (end < len) { + if (string.charAt(end) != '?' && string.charAt(end) != '#') ++end; + else break; + } + path = string.substring(start, end); + } + + // Query! TODO Decoding.. + start = end + 1; + if (start >= len) query = ""; + else { + end = start + 1; + while (end < len) { + if (string.charAt(end) != '#') ++end; + else break; + } + query = string.substring(start, end); + } + + // Fragment! TODO Decoding.. + if (end >= len) fragment = ""; + else fragment = string.substring(end + 1); } @Override public String toString() { - if (string == null) string = stringify(); + // TODO Encoding! Offer x2 different toString! + if (string == null) { + var sb = new StringBuilder(); + if (!scheme().isBlank()) { + sb.append(scheme()); + sb.append(':'); + } + if (!authority().isBlank()) { + sb.append("//"); + sb.append(authority()); + } + if (!path().isBlank()) { + sb.append(path); + } else sb.append('/'); + if (!query().isBlank()) { + sb.append('?'); + sb.append(query()); + } + if (!fragment().isBlank()) { + sb.append('#'); + sb.append(fragment()); + } + string = sb.toString(); + } return string; } - private String stringify() { - return "TODO"; - } - public URI toURI() throws URISyntaxException { return new URI(toString()); } public IRI normalize() { - // TODO Keep result in a lazily initialized field? But... memory?! - var builder = newBuilder(); - builder.scheme(scheme().toLowerCase(Locale.ROOT)); - // TODO ... FIXME - // TODO Should we drop default ports for a few well-known schemes? - return builder.build(); + if (normalized == null) { + // TODO Keep result in a lazily initialized field? But... memory?! + var builder = newBuilder(); + builder.scheme(normalizeScheme(scheme())); + // TODO ... FIXME + // TODO Should we drop default ports for a few well-known schemes? + normalized = builder.build(); + } + return normalized; + } + + private static String normalizeScheme(String scheme) { + return scheme.toLowerCase(Locale.ROOT); } /** Equality check, with {@link #normalize()}-ation. */ @@ -364,7 +465,7 @@ public void validate() throws ValidationException { var scheme = scheme(); if (isAbsolute() && scheme.isBlank()) throw new ValidationException(this, "Blank scheme"); if (isAbsolute() && !CharAscii.INSTANCE.matchesAllOf(scheme)) - throw new ValidationException(this, "Invalid scheme: " + scheme); + throw new ValidationException(this, "Invalid non-ASCII [a-zA-Z0-9] scheme: " + scheme); try { var authority = authority(); diff --git a/java/dev/enola/common/io/iri/IRITest.java b/java/dev/enola/common/io/iri/IRITest.java index 6c5680213..aca4acab5 100644 --- a/java/dev/enola/common/io/iri/IRITest.java +++ b/java/dev/enola/common/io/iri/IRITest.java @@ -18,6 +18,7 @@ package dev.enola.common.io.iri; import static com.google.common.truth.Truth.assertThat; +import static com.google.common.truth.Truth.assertWithMessage; import static org.junit.Assert.assertThrows; @@ -29,6 +30,9 @@ public class IRITest { + // TODO https://github.com/web-platform-tests/wpt/blob/master/url/resources/urltestdata.json + // see https://github.com/web-platform-tests/wpt/tree/master/url + record TestIRI( boolean validIRI, boolean validURI, @@ -40,19 +44,65 @@ record TestIRI( String query, String fragment) {} - // TODO Use e.g. a CSV instead of coding these out here? + // TODO Instead of coding this out here, use (another) JSON like WHATWG urltestdata.json TestIRI[] tests = new TestIRI[] { new TestIRI( true, true, + "hTtPs://enola.dev/index.html?query#fragment", + "https://enola.dev/index.html?query#fragment", + "https", + "enola.dev", + "/index.html", + "query", + "fragment"), + new TestIRI( + true, + true, + "hTtPs://enola.dev/index.html?query", + "https://enola.dev/index.html?query", + "https", + "enola.dev", + "/index.html", + "query", + ""), + // TODO FIXME + // new TestIRI( + // true, + // true, + // "hTtPs://enola.dev/index.html#fragment", + // "https://enola.dev/index.html#fragment", + // "https", + // "enola.dev", + // "/index.html", + // "", + // "fragment"), + new TestIRI( + true, + false, // java.net.URI does not append trailing / "https://enola.dev", "https://enola.dev/", "https", "enola.dev", "", "", - "") + ""), + new TestIRI( + true, + true, + "schema:authority", + "schema:authority", + "schema", + "authority", + "", + "", + ""), + new TestIRI(true, true, "schema:", "schema:", "schema", "", "", "", ""), + new TestIRI(true, true, "relative", "relative", "", "", "relative", "", ""), + new TestIRI(false, true, "?query", "", "", "", "", "query", ""), + new TestIRI(true, true, "#fragment", "", "", "", "", "", "fragment"), + new TestIRI(true, true, "", "", "", "", "", "", ""), }; // TODO Test handling of + or %20 for space in path, query and fragment @@ -78,32 +128,33 @@ public void iri() throws URISyntaxException, IRI.ValidationException { builder.path(test.path); builder.query(test.query); builder.fragment(test.fragment); - check2(builder.build(), test); + check(builder.build(), test, test.normalized); } } void check2(IRI iri, TestIRI test) throws URISyntaxException, IRI.ValidationException { - check(iri, test); + check(iri, test, test.text); var rebuiltIRI = iri.newBuilder().build(); - check(rebuiltIRI, test); + check(rebuiltIRI, test, test.normalized); } - void check(IRI iri, TestIRI test) throws URISyntaxException, IRI.ValidationException { - assertThat(iri.toString()).isEqualTo(test.text); + void check(IRI iri, TestIRI test, String string) + throws URISyntaxException, IRI.ValidationException { + assertThat(iri.toString()).isEqualTo(string); - assertThat(iri.scheme()).isEqualTo(test.scheme()); - assertThat(iri.authority()).isEqualTo(test.authority()); - assertThat(iri.path()).isEqualTo(test.path()); - assertThat(iri.query()).isEqualTo(test.query()); - assertThat(iri.fragment()).isEqualTo(test.fragment()); + assertWithMessage(iri.toString()).that(iri.scheme()).isEqualTo(test.scheme()); + assertWithMessage(iri.toString()).that(iri.authority()).isEqualTo(test.authority()); + assertWithMessage(iri.toString()).that(iri.path()).isEqualTo(test.path()); + assertWithMessage(iri.toString()).that(iri.query()).isEqualTo(test.query()); + assertWithMessage(iri.toString()).that(iri.fragment()).isEqualTo(test.fragment()); if (test.validIRI) iri.validate(); else assertThrows(IRI.ValidationException.class, iri::validate); if (test.validURI) { - assertThat(iri.toURI()).isEqualTo(new URI(test.text)); - assertThat(IRI.from(iri.toURI())).isEqualTo(iri); + assertWithMessage(iri.toString()).that(iri.toURI()).isEqualTo(new URI(test.text)); + assertWithMessage(iri.toString()).that(IRI.from(iri.toURI())).isEqualTo(iri); } } }