From e1cd34bb03526a09c2221df84ea01b9e75ac63ba Mon Sep 17 00:00:00 2001 From: Jark Wu Date: Wed, 5 Mar 2025 18:15:42 +0800 Subject: [PATCH 1/5] [website] Support multi-version for documentation (#63) --- .gitignore | 3 + website/build_versioned_docs.sh | 133 ++++++++++++++++++ website/deploy.sh | 22 --- website/docusaurus.config.ts | 5 + .../DocsVersionDropdownNavbarItem.js | 34 +++++ 5 files changed, 175 insertions(+), 22 deletions(-) create mode 100755 website/build_versioned_docs.sh delete mode 100755 website/deploy.sh create mode 100644 website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js diff --git a/.gitignore b/.gitignore index 292d6faaec..6facb4983b 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,6 @@ website/npm-debug.log* website/yarn-debug.log* website/yarn-error.log* website/package-lock.json +website/versioned_docs +website/versioned_sidebars +website/versions.json diff --git a/website/build_versioned_docs.sh b/website/build_versioned_docs.sh new file mode 100755 index 0000000000..c2a89c9a9a --- /dev/null +++ b/website/build_versioned_docs.sh @@ -0,0 +1,133 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2025 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +SCRIPT_PATH=$(cd "$(dirname "$0")" && pwd) +VERSIONED_DOCS="$SCRIPT_PATH/versioned_docs" +VERSIONED_SIDEBARS="$SCRIPT_PATH/versioned_sidebars" + +mkdir -p "$VERSIONED_DOCS" +mkdir -p "$VERSIONED_SIDEBARS" + +# Configure the remote repository URL and temporary directory +REPO_URL="https://github.com/alibaba/fluss.git" +TEMP_DIR=$(mktemp -d) + +# Check if the temporary directory was successfully created +if [ ! -d "$TEMP_DIR" ]; then + echo "Failed to create temporary directory" + exit 1 +fi + +echo "Cloning remote repository to temporary directory: $TEMP_DIR" +git clone "$REPO_URL" "$TEMP_DIR" + +# Enter the temporary directory +cd "$TEMP_DIR" || { echo "Failed to enter temporary directory"; exit 1; } + + +# Match branches in the format "release-x.y" +regex='release-[0-9]+\.[0-9]+$' # Regular expression to match release-x.y +branches=$(git branch -a | grep -E "$regex") # Filter branches that match the criteria + +# Exit the script if no matching branches are found +if [ -z "$branches" ]; then + echo "No branches matching 'release-x.y' format found" + exit 0 +fi + +echo "Matched branches:" +echo "$branches" + +################################################################################################## +# Generate versions.json file +################################################################################################## + +# Initialize JSON array +versions_json="[" + +# Iterate over each matched branch +for branch in $branches; do + # Extract the version number part (remove the "release-" prefix) + version=$(echo "$branch" | sed 's|remotes/origin/release-||') + + # Add to the JSON array + versions_json+="\"$version\", " +done + +# Remove the last comma and space, and close the JSON array +versions_json="${versions_json%, }]" +echo "Generated the versions JSON: $versions_json" + +# Output to versions.json file +echo "$versions_json" > "$SCRIPT_PATH/versions.json" +echo "Operation completed! Versions information has been saved to $SCRIPT_PATH/versions.json file." + +################################################################################################## +# Generate versioned sidebars JSON file +################################################################################################## + +sidebar_json='{ + "docsSidebar": [ + { + "type": "autogenerated", + "dirName": "." + } + ] +}' + +# handle OS-specific cp command +if [ "$(uname)" == "Darwin" ]; then + CP_CMD="cp -R website/docs/ " +else + CP_CMD="cp -r website/docs/* " +fi + +# Iterate over each matched branch +for branch in $branches; do + # Remove the remote branch prefix "remotes/origin/" + clean_branch_name=$(echo "$branch" | sed 's|remotes/origin/||') + version=$(echo "$branch" | sed 's|remotes/origin/release-||') + + echo "Processing branch: $clean_branch_name" + + # 检出分支 + git checkout "$clean_branch_name" || { echo "Failed to checkout branch: $clean_branch_name"; continue; } + + version_sidebar_file="$VERSIONED_SIDEBARS/version-$version-sidebars.json" + echo "$sidebar_json" > "$version_sidebar_file" || { echo "Failed to generate sidebar file for version '$version'"; continue; } + echo "Generated sidebar file for version '$version': $version_sidebar_file" + + # Check if the website/docs directory exists + if [ -d "website/docs" ]; then + # Create the target subdirectory (named after the branch) + version_dir="$VERSIONED_DOCS/version-$version" + mkdir -p "$version_dir" + + # Copy the website/docs directory to the target directory + $CP_CMD "$version_dir/" || { echo "Failed to copy for branch: $clean_branch_name"; continue; } + echo "Copied documentation for branch '$clean_branch_name' to '$version_dir'" + else + echo "The website/docs directory does not exist in branch '$clean_branch_name', skipping..." + fi +done + +# Clean up the temporary directory +echo "Cleaning up temporary directory: $TEMP_DIR" + +rm -rf "$TEMP_DIR" + +echo "Build versioned docs completed!" \ No newline at end of file diff --git a/website/deploy.sh b/website/deploy.sh deleted file mode 100755 index 61affb51d0..0000000000 --- a/website/deploy.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright (c) 2024 Alibaba Group Holding Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# docusaurus currently has bug with css minify, so we need to skip it -npm run clear -npm install -npm run build -- --no-minify -npm run deploy -- --skip-build \ No newline at end of file diff --git a/website/docusaurus.config.ts b/website/docusaurus.config.ts index 39ff0a1641..e185512593 100644 --- a/website/docusaurus.config.ts +++ b/website/docusaurus.config.ts @@ -123,6 +123,11 @@ const config: Config = { {to: '/community', label: 'Community', position: 'left'}, {to: '/roadmap', label: 'Roadmap', position: 'left'}, {to: '/downloads', label: 'Downloads', position: 'left'}, + { + type: 'docsVersionDropdown', + position: 'right', + dropdownActiveClassDisabled: true, + }, { href: 'https://github.com/alibaba/fluss', position: 'right', diff --git a/website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js b/website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js new file mode 100644 index 0000000000..a4e0c3da11 --- /dev/null +++ b/website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import React from 'react'; +import DocsVersionDropdownNavbarItem from '@theme-original/NavbarItem/DocsVersionDropdownNavbarItem'; +import useRouteContext from '@docusaurus/useRouteContext'; + +export default function DocsVersionDropdownNavbarItemWrapper(props) { + const {plugin} = useRouteContext(); + + if (plugin.name === "docusaurus-plugin-content-docs") { + return ( + <> + + + ); + } else { + return null; + } +} \ No newline at end of file From fd67f03396e9d5f1cfe34ed2b853ea097c384f5d Mon Sep 17 00:00:00 2001 From: Jark Wu Date: Wed, 5 Mar 2025 18:16:03 +0800 Subject: [PATCH 2/5] [docs] Support dynamic version expression in documentation Given the current doc version is 0.6.0: $FLUSS_VERSION$ => 0.6.0 $FLUSS_VERSION_SHORT$ => 0.6 --- website/docusaurus.config.ts | 13 +-- website/fluss-versions.json | 17 ++++ website/package.json | 4 +- .../plugins/remark-version-replace/index.js | 84 +++++++++++++++++++ 4 files changed, 108 insertions(+), 10 deletions(-) create mode 100644 website/fluss-versions.json create mode 100644 website/src/plugins/remark-version-replace/index.js diff --git a/website/docusaurus.config.ts b/website/docusaurus.config.ts index e185512593..7fb5e59b3d 100644 --- a/website/docusaurus.config.ts +++ b/website/docusaurus.config.ts @@ -17,6 +17,7 @@ import {themes as prismThemes} from 'prism-react-renderer'; import type {Config} from '@docusaurus/types'; import type * as Preset from '@docusaurus/preset-classic'; +import versionReplace from './src/plugins/remark-version-replace/index'; const config: Config = { title: 'Fluss', @@ -55,10 +56,9 @@ const config: Config = { { docs: { sidebarPath: './sidebars.ts', - // Please change this to your repo. - // Remove this to remove the "edit this page" links. - // editUrl: - // 'https://github.com/facebook/docusaurus/tree/main/packages/create-docusaurus/templates/shared/', + editUrl: ({versionDocsDirPath, docPath}) => + `https://github.com/alibaba/fluss/tree/main/website/docs/${docPath}`, + remarkPlugins: [versionReplace], }, blog: { showReadingTime: false, @@ -66,11 +66,6 @@ const config: Config = { type: ['rss', 'atom'], xslt: true, }, - // Please change this to your repo. - // Remove this to remove the "edit this page" links. - //editUrl: - // 'https://github.com/facebook/docusaurus/tree/main/packages/create-docusaurus/templates/shared/', - // Useful options to enforce blogging best practices onInlineTags: 'warn', onInlineAuthors: 'warn', onUntruncatedBlogPosts: 'warn', diff --git a/website/fluss-versions.json b/website/fluss-versions.json new file mode 100644 index 0000000000..74a1f3dbf9 --- /dev/null +++ b/website/fluss-versions.json @@ -0,0 +1,17 @@ +[ + { + "versionName": "next", + "fullVersion": "0.7-SNAPSHOT", + "shortVersion": "0.7-SNAPSHOT" + }, + { + "versionName": "version-0.6", + "fullVersion": "0.6.0", + "shortVersion": "0.6" + }, + { + "versionName": "version-0.5", + "fullVersion": "0.5.0", + "shortVersion": "0.5" + } +] \ No newline at end of file diff --git a/website/package.json b/website/package.json index 56571ed445..d2a2f22114 100644 --- a/website/package.json +++ b/website/package.json @@ -23,7 +23,9 @@ "clsx": "^2.0.0", "prism-react-renderer": "^2.3.0", "react": "^18.0.0", - "react-dom": "^18.0.0" + "react-dom": "^18.0.0", + "escape-string-regexp": "^4.0.0", + "unist-util-visit": "^5.0.0" }, "devDependencies": { "@docusaurus/module-type-aliases": "^3.6.1", diff --git a/website/src/plugins/remark-version-replace/index.js b/website/src/plugins/remark-version-replace/index.js new file mode 100644 index 0000000000..645311a143 --- /dev/null +++ b/website/src/plugins/remark-version-replace/index.js @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {visit} from 'unist-util-visit' +import escapeStringRegexp from 'escape-string-regexp' +const VERSIONS = require('../../../fluss-versions.json'); + +// convert the versions into a Map +const versionsMap = new Map(); +VERSIONS.map((version) => { + versionsMap.set(version.versionName, version); +}); + +function getDocsVersionName(pathname) { + const parts = pathname.split('/'); + const websiteIndex = parts.lastIndexOf('website'); + + if (websiteIndex === -1 || websiteIndex + 1 >= parts.length) return ''; + + const docsName = parts[websiteIndex + 1]; + if (docsName === 'docs') { + return 'next'; + } else if (docsName === 'versioned_docs' && websiteIndex + 2 < parts.length) { + return parts[websiteIndex + 2]; + } else { + return ''; + } +} + +const plugin = (options) => { + const transformer = async (ast, vfile) => { + const versionName = getDocsVersionName(vfile.path); + const version = versionsMap.get(versionName); + + if (!version) { + return; + } + + const replacements = { + "$FLUSS_VERSION$": version.fullVersion, + "$FLUSS_VERSION_SHORT$": version.shortVersion, + }; + + // RegExp to find any replacement keys. + const regexp = RegExp( + '(' + + Object.keys(replacements) + .map(key => escapeStringRegexp(key)) + .join('|') + + ')', + 'g', + ) + + const replacer = (_match, name) => replacements[name] + + // Go through all text, html, code, inline code, and links. + visit(ast, ['text', 'html', 'code', 'inlineCode', 'link'], node => { + if (node.type === 'link') { + // For links, the text value is replaced by text node, so we change the + // URL value. + node.url = node.url.replace(regexp, replacer) + } else { + // For all other nodes, replace the node value. + node.value = node.value.replace(regexp, replacer) + } + }); + }; + return transformer; +}; + +export default plugin; From 62eb8ba5571227880b4909f30933b6680cc8d01f Mon Sep 17 00:00:00 2001 From: Jark Wu Date: Wed, 5 Mar 2025 18:16:27 +0800 Subject: [PATCH 3/5] [docs] Use $FLUSS_VERSION$ expr in docs and fix broken links --- website/docs/engine-flink/ddl.md | 8 +-- website/docs/engine-flink/getting-started.md | 10 ++-- website/docs/engine-flink/lookups.md | 4 +- .../deploying-distributed-cluster.md | 10 ++-- .../install-deploy/deploying-local-cluster.md | 8 +-- .../install-deploy/deploying-with-docker.md | 51 ++++++++----------- website/docs/install-deploy/overview.md | 26 +++++----- website/docs/intro.md | 10 ++-- website/docs/maintenance/configuration.md | 2 +- .../docs/maintenance/observability/logging.md | 2 +- .../observability/monitor-metrics.md | 2 +- .../maintenance/observability/quickstart.md | 18 +++---- .../tiered-storage/remote-storage.md | 2 +- website/docs/quickstart/flink.md | 28 ++++------ .../integrate-data-lakes/paimon.md | 2 +- website/docs/streaming-lakehouse/overview.md | 2 +- .../data-distribution/bucketing.md | 2 +- .../data-distribution/partitioning.md | 4 +- .../table-design/data-distribution/ttl.md | 2 +- website/docs/table-design/overview.md | 2 +- .../table-design/table-types/log-table.md | 2 +- .../table-types/pk-table/index.md | 10 ++-- .../pk-table/merge-engines/index.md | 4 +- 23 files changed, 98 insertions(+), 113 deletions(-) diff --git a/website/docs/engine-flink/ddl.md b/website/docs/engine-flink/ddl.md index 2a0ced6da2..2a5811dbd5 100644 --- a/website/docs/engine-flink/ddl.md +++ b/website/docs/engine-flink/ddl.md @@ -118,7 +118,7 @@ CREATE TABLE my_part_log_table ( ) PARTITIONED BY (dt); ``` :::note -After the Partitioned (PrimaryKey/Log) Table is created, you need first manually create the corresponding partition using the [Add Partition](/docs/engine-flink/ddl.md#add-partition) statement +After the Partitioned (PrimaryKey/Log) Table is created, you need first manually create the corresponding partition using the [Add Partition](engine-flink/ddl.md#add-partition) statement before you write/read data into this partition. ::: @@ -157,7 +157,7 @@ CREATE TABLE my_auto_part_log_table ( ); ``` -For more details about Auto Partitioned (PrimaryKey/Log) Table, refer to [Auto Partitioning Options](/docs/table-design/data-distribution/partitioning/#auto-partitioning-options). +For more details about Auto Partitioned (PrimaryKey/Log) Table, refer to [Auto Partitioning Options](table-design/data-distribution/partitioning.md#auto-partitioning-options). ### Options @@ -167,8 +167,8 @@ The supported option in "with" parameters when creating a table are as follows: |------------------------------------|----------|----------|-------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | bucket.num | int | optional | The bucket number of Fluss cluster. | The number of buckets of a Fluss table. | | bucket.key | String | optional | (none) | Specific the distribution policy of the Fluss table. Data will be distributed to each bucket according to the hash value of bucket-key. If you specify multiple fields, delimiter is ','. If the table is with primary key, you can't specific bucket key currently. The bucket keys will always be the primary key(excluding partition key). If the table is not with primary key, you can specific bucket key, and when the bucket key is not specified, the data will be distributed to each bucket randomly. | -| table.* | | | | All the [`table.` prefix configuration](/docs/maintenance/configuration.md) are supported to be defined in "with" options. | -| client.* | | | | All the [`client.` prefix configuration](/docs/maintenance/configuration.md) are supported to be defined in "with" options. | +| table.* | | | | All the [`table.` prefix configuration](maintenance/configuration.md) are supported to be defined in "with" options. | +| client.* | | | | All the [`client.` prefix configuration](maintenance/configuration.md) are supported to be defined in "with" options. | ## Create Table Like diff --git a/website/docs/engine-flink/getting-started.md b/website/docs/engine-flink/getting-started.md index 72659ad886..af382b23ff 100644 --- a/website/docs/engine-flink/getting-started.md +++ b/website/docs/engine-flink/getting-started.md @@ -5,13 +5,13 @@ sidebar_position: 1 # Getting Started with Flink Engine ## Quick Start -For a quick introduction to running Flink, refer to the [Quick Start](/docs/quickstart/flink.md) guide. +For a quick introduction to running Flink, refer to the [Quick Start](quickstart/flink.md) guide. ## Support Flink Versions | Fluss Connector Versions | Supported Flink Versions | |--------------------------|--------------------------| -| 0.5 | 1.18, 1.19, 1.20 | +| $FLUSS_VERSION_SHORT$ | 1.18, 1.19, 1.20 | ## Feature Support @@ -43,10 +43,10 @@ tar -xzf flink-1.20.1-bin-scala_2.12.tgz Download [Fluss connector jar](/downloads#fluss-connector) and copy to the lib directory of your Flink home. ```shell -cp fluss-connector-flink-.jar /lib/ +cp fluss-connector-flink-$FLUSS_VERSION$.jar /lib/ ``` :::note -If you use [Amazon S3](http://aws.amazon.com/s3/), [Aliyun OSS](https://www.aliyun.com/product/oss) or [HDFS(Hadoop Distributed File System)](https://hadoop.apache.org/docs/stable/) as Fluss's [remote storage](/docs/maintenance/tiered-storage/remote-storage), +If you use [Amazon S3](http://aws.amazon.com/s3/), [Aliyun OSS](https://www.aliyun.com/product/oss) or [HDFS(Hadoop Distributed File System)](https://hadoop.apache.org/docs/stable/) as Fluss's [remote storage](maintenance/tiered-storage/remote-storage.md), you should download the corresponding [Fluss filesystem jar](/downloads#filesystem-jars) and also copy it to the lib directory of your Flink home. ::: @@ -79,7 +79,7 @@ CREATE CATALOG fluss_catalog WITH ( :::note 1. The `bootstrap.servers` means the Fluss server address. Before you config the `bootstrap.servers`, - you should start the Fluss server first. See [Deploying Fluss](/docs/install-deploy/overview/#how-to-deploy-fluss) + you should start the Fluss server first. See [Deploying Fluss](install-deploy/overview.md#how-to-deploy-fluss) for how to build a Fluss cluster. Here, it is assumed that there is a Fluss cluster running on your local machine and the CoordinatorServer port is 9123. 2. The` bootstrap.servers` configuration is used to discover all nodes within the Fluss cluster. It can be set with one or more (up to three) Fluss server addresses (either CoordinatorServer or TabletServer) separated by commas. diff --git a/website/docs/engine-flink/lookups.md b/website/docs/engine-flink/lookups.md index ed0a320ebf..22ad0661c8 100644 --- a/website/docs/engine-flink/lookups.md +++ b/website/docs/engine-flink/lookups.md @@ -122,7 +122,7 @@ FOR SYSTEM_TIME AS OF `o`.`ptime` AS `c` ON `o`.`o_custkey` = `c`.`c_custkey` AND `o`.`o_dt` = `c`.`dt`; ``` -For more details about Fluss partitioned table, see [Partitioned Tables](/docs/table-design/data-distribution/partitioning.md). +For more details about Fluss partitioned table, see [Partitioned Tables](table-design/data-distribution/partitioning.md). ### Lookup Options @@ -266,4 +266,4 @@ ON `o`.`o_custkey` = `c`.`c_custkey` AND `o`.`o_dt` = `c`.`dt`; -- join key is a prefix set of dimension table primary keys (excluding partition key) + partition key. ``` -For more details about Fluss partitioned table, see [Partitioned Tables](/docs/table-design/data-distribution/partitioning.md). \ No newline at end of file +For more details about Fluss partitioned table, see [Partitioned Tables](table-design/data-distribution/partitioning.md). \ No newline at end of file diff --git a/website/docs/install-deploy/deploying-distributed-cluster.md b/website/docs/install-deploy/deploying-distributed-cluster.md index 8c7de83cc1..24a1fc0181 100644 --- a/website/docs/install-deploy/deploying-distributed-cluster.md +++ b/website/docs/install-deploy/deploying-distributed-cluster.md @@ -47,8 +47,8 @@ Node1 will deploy the CoordinatorServer and one TabletServer, Node2 and Node3 wi Go to the [downloads page](/downloads) and download the latest Fluss release. After downloading the latest release, copy the archive to all the nodes and extract it: ```shell -tar -xzf fluss--bin.tgz -cd fluss-/ +tar -xzf fluss-$FLUSS_VERSION$-bin.tgz +cd fluss-$FLUSS_VERSION$/ ``` ### Configuring Fluss @@ -86,7 +86,7 @@ tablet-server.id: 3 :::note - `tablet-server.id` is the unique id of the TabletServer, if you have multiple TabletServers, you should set different id for each TabletServer. -- In this example, we only set the properties that must be configured, and for some other properties, you can refer to [Configuration](/docs/maintenance/configuration/) for more details. +- In this example, we only set the properties that must be configured, and for some other properties, you can refer to [Configuration](maintenance/configuration.md) for more details. ::: ### Starting Fluss @@ -121,7 +121,7 @@ Using Flink SQL Client to interact with Fluss. #### Preparation -You can start a Flink standalone cluster refer to [Flink Environment Preparation](/docs/engine-flink/getting-started#preparation-when-using-flink-sql-client) +You can start a Flink standalone cluster refer to [Flink Environment Preparation](engine-flink/getting-started.md#preparation-when-using-flink-sql-client) **Note**: Make sure the [Fluss connector jar](/downloads/) already has copied to the `lib` directory of your Flink home. @@ -138,4 +138,4 @@ CREATE CATALOG fluss_catalog WITH ( #### Do more with Fluss After the catalog is created, you can use Flink SQL Client to do more with Fluss, for example, create a table, insert data, query data, etc. -More details please refer to [Flink Getting Started](/docs/engine-flink/getting-started/). +More details please refer to [Flink Getting Started](engine-flink/getting-started.md). diff --git a/website/docs/install-deploy/deploying-local-cluster.md b/website/docs/install-deploy/deploying-local-cluster.md index 6b5cd315e7..ee3620685f 100644 --- a/website/docs/install-deploy/deploying-local-cluster.md +++ b/website/docs/install-deploy/deploying-local-cluster.md @@ -25,8 +25,8 @@ Go to the [downloads page](/downloads) and download the latest Fluss release. Ma package **matching your Java version**. After downloading the latest release, extract it: ```shell -tar -xzf fluss--bin.tgz -cd fluss-/ +tar -xzf fluss-$FLUSS_VERSION$-bin.tgz +cd fluss-$FLUSS_VERSION$/ ``` ## Starting Fluss Local Cluster @@ -49,7 +49,7 @@ Using Flink SQL Client to interact with Fluss. #### Preparation -You can start a Flink standalone cluster refer to [Flink Environment Preparation](/docs/engine-flink/getting-started#preparation-when-using-flink-sql-client) +You can start a Flink standalone cluster refer to [Flink Environment Preparation](engine-flink/getting-started.md#preparation-when-using-flink-sql-client) **Note**: Make sure the [Fluss connector jar](/downloads/) already has copied to the `lib` directory of your Flink home. @@ -66,4 +66,4 @@ CREATE CATALOG fluss_catalog WITH ( #### Do more with Fluss After the catalog is created, you can use Flink SQL Client to do more with Fluss, for example, create a table, insert data, query data, etc. -More details please refer to [Flink Getting started](/docs/engine-flink/getting-started/) \ No newline at end of file +More details please refer to [Flink Getting started](engine-flink/getting-started.md) \ No newline at end of file diff --git a/website/docs/install-deploy/deploying-with-docker.md b/website/docs/install-deploy/deploying-with-docker.md index 3c6a504e89..9b13c8a826 100644 --- a/website/docs/install-deploy/deploying-with-docker.md +++ b/website/docs/install-deploy/deploying-with-docker.md @@ -22,15 +22,6 @@ Recommended configuration: 4 cores, 16GB memory. Docker and the Docker Compose plugin. All commands were tested with Docker version 27.4.0 and Docker Compose version v2.30.3. -**Environment Variables** - -Set the following environment variables in the shell where you execute the commands. - -```bash -export FLUSS_VERSION=0.5.0 -export FLUSS_QUICKSTART_FLINK_VERSION=1.20-0.5 -``` - ## Deploy with Docker The following is a brief overview of how to quickly create a complete Fluss testing cluster @@ -75,7 +66,7 @@ docker run \ --env FLUSS_PROPERTIES="zookeeper.address: zookeeper:2181 coordinator.host: coordinator-server" \ -p 9123:9123 \ - -d fluss/fluss:${FLUSS_VERSION} coordinatorServer + -d fluss/fluss:$FLUSS_VERSION$ coordinatorServer ``` ### Start Fluss TabletServer @@ -99,7 +90,7 @@ data.dir: /tmp/fluss/data remote.data.dir: /tmp/fluss/remote-data" \ -p 9124:9124 \ --volume shared-tmpfs:/tmp/fluss \ - -d fluss/fluss:${FLUSS_VERSION} tabletServer + -d fluss/fluss:$FLUSS_VERSION$ tabletServer ``` #### Start with Multiple TabletServer @@ -120,7 +111,7 @@ data.dir: /tmp/fluss/data/tablet-server-0 remote.data.dir: /tmp/fluss/remote-data" \ -p 9124:9124 \ --volume shared-tmpfs:/tmp/fluss \ - -d fluss/fluss:${FLUSS_VERSION} tabletServer + -d fluss/fluss:$FLUSS_VERSION$ tabletServer ``` 2. start tablet-server-1 @@ -136,7 +127,7 @@ data.dir: /tmp/fluss/data/tablet-server-1 remote.data.dir: /tmp/fluss/remote-data" \ -p 9125:9125 \ --volume shared-tmpfs:/tmp/fluss \ - -d fluss/fluss:${FLUSS_VERSION} tabletServer + -d fluss/fluss:$FLUSS_VERSION$ tabletServer ``` 3. start tablet-server-2 @@ -152,7 +143,7 @@ data.dir: /tmp/fluss/data/tablet-server-2 remote.data.dir: /tmp/fluss/remote-data" \ -p 9126:9126 \ --volume shared-tmpfs:/tmp/fluss \ - -d fluss/fluss:${FLUSS_VERSION} tabletServer + -d fluss/fluss:$FLUSS_VERSION$ tabletServer ``` Now all the Fluss related components are running. @@ -180,7 +171,7 @@ docker run \ --env FLINK_PROPERTIES=" jobmanager.rpc.address: jobmanager" \ -p 8083:8081 \ --volume shared-tmpfs:/tmp/fluss \ - -d fluss/quickstart-flink:${FLUSS_QUICKSTART_FLINK_VERSION} jobmanager + -d fluss/quickstart-flink:1.20-$FLUSS_VERSION_SHORT$ jobmanager ``` 2. start taskManager @@ -191,7 +182,7 @@ docker run \ --network=fluss-demo \ --env FLINK_PROPERTIES=" jobmanager.rpc.address: jobmanager" \ --volume shared-tmpfs:/tmp/fluss \ - -d fluss/quickstart-flink:${FLUSS_QUICKSTART_FLINK_VERSION} taskmanager + -d fluss/quickstart-flink:1.20-$FLUSS_VERSION_SHORT$ taskmanager ``` #### Enter into SQL-Client @@ -222,7 +213,7 @@ USE CATALOG fluss_catalog; #### Do more with Fluss After the catalog is created, you can use Flink SQL Client to do more with Fluss, for example, create a table, insert data, query data, etc. -More details please refer to [Flink Getting started](/docs/engine-flink/getting-started/) +More details please refer to [Flink Getting started](engine-flink/getting-started.md) ## Deploy with Docker Compose @@ -237,7 +228,7 @@ You can use the following `docker-compose.yml` file to start a Fluss cluster wit ```yaml services: coordinator-server: - image: fluss/fluss:${FLUSS_VERSION} + image: fluss/fluss:$FLUSS_VERSION$ command: coordinatorServer depends_on: - zookeeper @@ -248,7 +239,7 @@ services: coordinator.host: coordinator-server remote.data.dir: /tmp/fluss/remote-data tablet-server: - image: fluss/fluss:${FLUSS_VERSION} + image: fluss/fluss:$FLUSS_VERSION$ command: tabletServer depends_on: - coordinator-server @@ -282,7 +273,7 @@ You can use the following `docker-compose.yml` file to start a Fluss cluster wit ```yaml services: coordinator-server: - image: fluss/fluss:${FLUSS_VERSION} + image: fluss/fluss:$FLUSS_VERSION$ command: coordinatorServer depends_on: - zookeeper @@ -293,7 +284,7 @@ services: coordinator.host: coordinator-server remote.data.dir: /tmp/fluss/remote-data tablet-server-0: - image: fluss/fluss:${FLUSS_VERSION} + image: fluss/fluss:$FLUSS_VERSION$ command: tabletServer depends_on: - coordinator-server @@ -309,7 +300,7 @@ services: volumes: - shared-tmpfs:/tmp/fluss tablet-server-1: - image: fluss/fluss:${FLUSS_VERSION} + image: fluss/fluss:$FLUSS_VERSION$ command: tabletServer depends_on: - coordinator-server @@ -325,7 +316,7 @@ services: volumes: - shared-tmpfs:/tmp/fluss tablet-server-2: - image: fluss/fluss:${FLUSS_VERSION} + image: fluss/fluss:$FLUSS_VERSION$ command: tabletServer depends_on: - coordinator-server @@ -371,7 +362,7 @@ The changed `docker-compose.yml` file is as follows: ```yaml services: coordinator-server: - image: fluss/fluss:${FLUSS_VERSION} + image: fluss/fluss:$FLUSS_VERSION$ command: coordinatorServer depends_on: - zookeeper @@ -382,7 +373,7 @@ services: coordinator.host: coordinator-server remote.data.dir: /tmp/fluss/remote-data tablet-server-0: - image: fluss/fluss:${FLUSS_VERSION} + image: fluss/fluss:$FLUSS_VERSION$ command: tabletServer depends_on: - coordinator-server @@ -398,7 +389,7 @@ services: volumes: - shared-tmpfs:/tmp/fluss tablet-server-1: - image: fluss/fluss:${FLUSS_VERSION} + image: fluss/fluss:$FLUSS_VERSION$ command: tabletServer depends_on: - coordinator-server @@ -414,7 +405,7 @@ services: volumes: - shared-tmpfs:/tmp/fluss tablet-server-2: - image: fluss/fluss:${FLUSS_VERSION} + image: fluss/fluss:$FLUSS_VERSION$ command: tabletServer depends_on: - coordinator-server @@ -433,7 +424,7 @@ services: restart: always image: zookeeper:3.9.2 jobmanager: - image: fluss/quickstart-flink:${FLUSS_QUICKSTART_FLINK_VERSION} + image: fluss/quickstart-flink:1.20-$FLUSS_VERSION_SHORT$ ports: - "8083:8081" command: jobmanager @@ -444,7 +435,7 @@ services: volumes: - shared-tmpfs:/tmp/fluss taskmanager: - image: fluss/quickstart-flink:${FLUSS_QUICKSTART_FLINK_VERSION} + image: fluss/quickstart-flink:1.20-$FLUSS_VERSION_SHORT$ depends_on: - jobmanager command: taskmanager @@ -486,4 +477,4 @@ USE CATALOG fluss_catalog; #### Do more with Fluss After the catalog is created, you can use Flink SQL Client to do more with Fluss, for example, create a table, insert data, query data, etc. -More details please refer to [Flink Getting started](/docs/engine-flink/getting-started/) \ No newline at end of file +More details please refer to [Flink Getting started](engine-flink/getting-started.md) \ No newline at end of file diff --git a/website/docs/install-deploy/overview.md b/website/docs/install-deploy/overview.md index 6f03c63ebd..e84706ebb8 100644 --- a/website/docs/install-deploy/overview.md +++ b/website/docs/install-deploy/overview.md @@ -41,7 +41,7 @@ We have listed them in the table below the figure.
    -
  • [Flink Connector](/docs/engine-flink/getting-started/)
  • +
  • [Flink Connector](engine-flink/getting-started.md)
@@ -60,9 +60,9 @@ We have listed them in the table below the figure.
    -
  • [Local Cluster](/docs/install-deploy/deploying-local-cluster/)
  • -
  • [Distributed Cluster](/docs/install-deploy/deploying-distributed-cluster/)
  • -
  • [Docker run / Docker compose](/docs/install-deploy/deploying-with-docker/)
  • +
  • [Local Cluster](install-deploy/deploying-local-cluster.md)
  • +
  • [Distributed Cluster](install-deploy/deploying-distributed-cluster.md)
  • +
  • [Docker run / Docker compose](install-deploy/deploying-with-docker.md)
@@ -101,9 +101,9 @@ We have listed them in the table below the figure. Fluss uses file systems as remote storage to store snapshots for Primary-Key Table and store tiered log segments for Log Table. -
  • [HDFS](/docs/maintenance/filesystems/hdfs/)
  • -
  • [Aliyun OSS](/docs/maintenance/filesystems/oss/)
  • -
  • [Amazon S3](/docs/maintenance/filesystems/s3/)
  • +
  • [HDFS](maintenance/filesystems/hdfs.md)
  • +
  • [Aliyun OSS](maintenance/filesystems/oss.md)
  • +
  • [Amazon S3](maintenance/filesystems/s3.md)
  • @@ -114,7 +114,7 @@ We have listed them in the table below the figure. by query engines such as Flink, Spark, StarRocks, Trino. -
  • [Paimon](/docs/maintenance/tiered-storage/lakehouse-storage/)
  • +
  • [Paimon](maintenance/tiered-storage/lakehouse-storage.md)
  • [Iceberg (Roadmap)](/roadmap/)
  • @@ -124,8 +124,8 @@ We have listed them in the table below the figure. CoordinatorServer/TabletServer report internal metrics and Fluss client (e.g., connector in Flink jobs) can report additional, client specific metrics as well. -
  • [JMX](/docs/maintenance/observability/metric-reporters#jmx)
  • -
  • [Prometheus](/docs/maintenance/observability/metric-reporters#prometheus)
  • +
  • [JMX](maintenance/observability/metric-reporters.md#jmx)
  • +
  • [Prometheus](maintenance/observability/metric-reporters.md#prometheus)
  • @@ -134,9 +134,9 @@ We have listed them in the table below the figure. ## How to deploy Fluss Fluss can be deployed in three different ways: -- [Local Cluster](/docs/install-deploy/deploying-local-cluster/) -- [Distributed Cluster](/docs/install-deploy/deploying-distributed-cluster/) -- [Docker run/ Docker compose](/docs/install-deploy/deploying-with-docker/) +- [Local Cluster](install-deploy/deploying-local-cluster.md) +- [Distributed Cluster](install-deploy/deploying-distributed-cluster.md) +- [Docker run/ Docker compose](install-deploy/deploying-with-docker.md) **NOTE**: - Local Cluster is for testing purpose only. \ No newline at end of file diff --git a/website/docs/intro.md b/website/docs/intro.md index 52704bc545..dc5f25f34f 100644 --- a/website/docs/intro.md +++ b/website/docs/intro.md @@ -20,8 +20,8 @@ With built-in replication for fault tolerance, horizontal scalability, and advan ## Where to go Next? -- [QuickStart](/docs/quickstart/flink/): Get started with Fluss in minutes. -- [Architecture](/docs/concepts/architecture/): Learn about Fluss's architecture. -- [Table Design](/docs/table-design/overview): Explore Fluss's table types, partitions and buckets. -- [Lakehouse](/docs/streaming-lakehouse/overview/): Integrate Fluss with your Lakehouse to bring low-latency data to your Lakehouse analytics. -- [Development](/docs/dev/ide-setup/): Set up your development environment and contribute to the community. +- [QuickStart](quickstart/flink.md): Get started with Fluss in minutes. +- [Architecture](concepts/architecture.md): Learn about Fluss's architecture. +- [Table Design](table-design/overview.md): Explore Fluss's table types, partitions and buckets. +- [Lakehouse](streaming-lakehouse/overview.md): Integrate Fluss with your Lakehouse to bring low-latency data to your Lakehouse analytics. +- [Development](dev/ide-setup.md): Set up your development environment and contribute to the community. diff --git a/website/docs/maintenance/configuration.md b/website/docs/maintenance/configuration.md index 774609c818..5f9eb8982b 100644 --- a/website/docs/maintenance/configuration.md +++ b/website/docs/maintenance/configuration.md @@ -184,7 +184,7 @@ during the Fluss cluster working. | table.log.tiered.local-segments | Integer | 2 | The number of log segments to retain in local for each table when log tiered storage is enabled. It must be greater that 0. The default is 2. | | table.datalake.enabled | Boolean | false | Whether enable lakehouse storage for the table. Disabled by default. When this option is set to ture and the datalake tiering service is up, the table will be tiered and compacted into datalake format stored on lakehouse storage. | | table.datalake.format | Enum | (None) | The data lake format of the table specifies the tiered Lakehouse storage format, such as Paimon, Iceberg, DeltaLake, or Hudi. Currently, only 'paimon' is supported. Once the `table.datalake.format` property is configured, Fluss adopts the key encoding and bucketing strategy used by the corresponding data lake format. This ensures consistency in key encoding and bucketing, enabling seamless **Union Read** functionality across Fluss and Lakehouse. The `table.datalake.format` can be pre-defined before enabling `table.datalake.enabled`. This allows the data lake feature to be dynamically enabled on the table without requiring table recreation. If `table.datalake.format` is not explicitly set during table creation, the table will default to the format specified by the `datalake.format` configuration in the Fluss cluster | -| table.merge-engine | Enum | (None) | Defines the merge engine for the primary key table. By default, primary key table doesn't have merge engine. The supported merge engines are 'first_row' and 'versioned'. The [first_row merge engine](/docs/table-design/table-types/pk-table/merge-engines/first-row) will keep the first row of the same primary key. The [versioned merge engine](/docs/table-design/table-types/pk-table/merge-engines/versioned) will keep the row with the largest version of the same primary key. | +| table.merge-engine | Enum | (None) | Defines the merge engine for the primary key table. By default, primary key table doesn't have merge engine. The supported merge engines are 'first_row' and 'versioned'. The [first_row merge engine](table-design/table-types/pk-table/merge-engines/first-row.md) will keep the first row of the same primary key. The [versioned merge engine](table-design/table-types/pk-table/merge-engines/versioned.md) will keep the row with the largest version of the same primary key. | | table.merge-engine.versioned.ver-column | String | (None) | The column name of the version column for the 'versioned' merge engine. If the merge engine is set to 'versioned', the version column must be set. | ### Config by Flink SQL diff --git a/website/docs/maintenance/observability/logging.md b/website/docs/maintenance/observability/logging.md index 7676cc7dd5..1e3b63b70f 100644 --- a/website/docs/maintenance/observability/logging.md +++ b/website/docs/maintenance/observability/logging.md @@ -21,7 +21,7 @@ Log4j periodically scans this file for changes and adjusts the logging behavior ### Log4j 2 configuration -The following [logging-related configuration options](../configuration.md) are available: +The following [logging-related configuration options](maintenance/configuration.md) are available: | Configuration | Description | Default | |---------------------------------|-------------------------------------------------------------------------|--------------------------------| diff --git a/website/docs/maintenance/observability/monitor-metrics.md b/website/docs/maintenance/observability/monitor-metrics.md index 4cc6865fd4..8eafb26986 100644 --- a/website/docs/maintenance/observability/monitor-metrics.md +++ b/website/docs/maintenance/observability/monitor-metrics.md @@ -31,7 +31,7 @@ number of order of variables. Variables are case-sensitive. ## Reporter -For information on how to set up Fluss's metric reporters please take a look at the [Metric Reporters](./metric-reporters.md) page. +For information on how to set up Fluss's metric reporters please take a look at the [Metric Reporters](metric-reporters.md) page. ## Metrics List diff --git a/website/docs/maintenance/observability/quickstart.md b/website/docs/maintenance/observability/quickstart.md index 64068b6f61..f8307de50f 100644 --- a/website/docs/maintenance/observability/quickstart.md +++ b/website/docs/maintenance/observability/quickstart.md @@ -5,13 +5,13 @@ sidebar_position: 1 # Observability Quickstart Guides -On this page, you can find the following guides to set up an observability stack **based on the instructions in the [Flink quickstart guide](/docs/quickstart/flink)**: +On this page, you can find the following guides to set up an observability stack **based on the instructions in the [Flink quickstart guide](quickstart/flink.md)**: - [Observability with Prometheus, Loki and Grafana](#observability-with-prometheus-loki-and-grafana) :::warning Make sure the environment variables for the Fluss and the Quickstart version are set. - For further information, check the [Flink quickstart guide](/docs/quickstart/flink#starting-required-components). + For further information, check the [Flink quickstart guide](quickstart/flink.md#starting-required-components). ::: ## Observability with Prometheus, Loki and Grafana @@ -21,7 +21,7 @@ We provide a minimal quickstart configuration for application observability with The quickstart configuration comes with 2 metric dashboards. - `Fluss – overview`: Selected metrics to observe the overall cluster status -- `Fluss – detail`: Majority of metrics listed in [metrics list](./monitor-metrics.md#metrics-list) +- `Fluss – detail`: Majority of metrics listed in [metrics list](monitor-metrics.md#metrics-list) Follow the instructions below to add observability capabilities to your setup. @@ -52,7 +52,7 @@ The container manifest below configures Fluss to use Logback and Loki4j. Save it ```dockerfile ARG FLUSS_VERSION -FROM fluss/fluss:${FLUSS_VERSION} +FROM fluss/fluss:$FLUSS_VERSION$ # remove default logging backend from classpath and add logback to classpath RUN rm -rf ${FLUSS_HOME}/lib/log4j-slf4j-impl-*.jar && \ @@ -67,7 +67,7 @@ COPY fluss-quickstart-observability/slf4j/logback-loki-console.xml ${FLUSS_HOME} ``` :::note -Detailed configuration instructions for Fluss and Logback can be found [here](./logging.md#configuring-logback). +Detailed configuration instructions for Fluss and Logback can be found [here](logging.md#configuring-logback). ::: 3. Additionally, you need to adapt the `docker-compose.yml` and @@ -84,10 +84,10 @@ To do this, you can simply copy the manifest below into your `docker-compose.yml services: #begin Fluss cluster coordinator-server: - image: fluss-slf4j-logback:${FLUSS_VERSION} + image: fluss-slf4j-logback:$FLUSS_VERSION$ build: args: - FLUSS_VERSION: ${FLUSS_VERSION} + FLUSS_VERSION: $FLUSS_VERSION$ dockerfile: fluss-slf4j-logback.Dockerfile command: coordinatorServer depends_on: @@ -106,10 +106,10 @@ services: logback.configurationFile: logback-loki-console.xml - APP_NAME=coordinator-server tablet-server: - image: fluss-slf4j-logback:${FLUSS_VERSION} + image: fluss-slf4j-logback:$FLUSS_VERSION$ build: args: - FLUSS_VERSION: ${FLUSS_VERSION} + FLUSS_VERSION: $FLUSS_VERSION$ dockerfile: fluss-slf4j-logback.Dockerfile command: tabletServer depends_on: diff --git a/website/docs/maintenance/tiered-storage/remote-storage.md b/website/docs/maintenance/tiered-storage/remote-storage.md index 909e1b50a7..2f7f20c6d9 100644 --- a/website/docs/maintenance/tiered-storage/remote-storage.md +++ b/website/docs/maintenance/tiered-storage/remote-storage.md @@ -5,7 +5,7 @@ sidebar_position: 2 # Remote Storage Remote storage usually means a cost-efficient and fault-tolerant storage comparing to local disk, such as S3, HDFS, OSS. -See more detail about how to configure remote storage in documentation of [filesystems](../../maintenance/filesystems/overview.md). +See more detail about how to configure remote storage in documentation of [filesystems](maintenance/filesystems/overview.md). For log table, Fluss will use remote storage to store the tiered log segments of data. For primary key table, Fluss will use remote storage to store the snapshot as well as the tiered log segments for change log. diff --git a/website/docs/quickstart/flink.md b/website/docs/quickstart/flink.md index fee1f72ef9..d127192b44 100644 --- a/website/docs/quickstart/flink.md +++ b/website/docs/quickstart/flink.md @@ -32,21 +32,14 @@ mkdir fluss-quickstart-flink cd fluss-quickstart-flink ``` -Then, set the following environment variables. +2. Create a `docker-compose.yml` file with the following content: -```bash -export FLUSS_VERSION=0.5.0 -export FLUSS_QUICKSTART_FLINK_VERSION=1.20-0.5 -``` - -2. Next, create a `docker-compose.yml` file with the following content: -[//]: # (IMPORTANT NOTE TO CONTRIBUTORS: if you change config options or versions here, also change them in other quickstart guides that build upon this one!) ```yaml services: #begin Fluss cluster coordinator-server: - image: fluss/fluss:${FLUSS_VERSION} + image: fluss/fluss:$FLUSS_VERSION$ command: coordinatorServer depends_on: - zookeeper @@ -60,7 +53,7 @@ services: datalake.paimon.metastore: filesystem datalake.paimon.warehouse: /tmp/paimon tablet-server: - image: fluss/fluss:${FLUSS_VERSION} + image: fluss/fluss:$FLUSS_VERSION$ command: tabletServer depends_on: - coordinator-server @@ -81,7 +74,7 @@ services: #end #begin Flink cluster jobmanager: - image: fluss/quickstart-flink:${FLUSS_QUICKSTART_FLINK_VERSION} + image: fluss/quickstart-flink:1.20-$FLUSS_VERSION_SHORT$ ports: - "8083:8081" command: jobmanager @@ -92,7 +85,7 @@ services: volumes: - shared-tmpfs:/tmp/paimon taskmanager: - image: fluss/quickstart-flink:${FLUSS_QUICKSTART_FLINK_VERSION} + image: fluss/quickstart-flink:1.20-$FLUSS_VERSION_SHORT$ depends_on: - jobmanager command: taskmanager @@ -119,7 +112,8 @@ The Docker Compose environment consists of the following containers: - **Fluss Cluster:** a Fluss `CoordinatorServer`, a Fluss `TabletServer` and a `ZooKeeper` server. - **Flink Cluster**: a Flink `JobManager` and a Flink `TaskManager` container to execute queries. -**Note:** The `fluss/quickstart-flink` includes the [fluss-connector-flink](engine-flink/getting-started.md), [paimon-flink](https://paimon.apache.org/docs/master/flink/quick-start/) and +**Note:** The `fluss/quickstart-flink` image is based on [flink:1.20.1-java17](https://hub.docker.com/layers/library/flink/1.20-java17/images/sha256:bf1af6406c4f4ad8faa46efe2b3d0a0bf811d1034849c42c1e3484712bc83505) and +includes the [fluss-connector-flink](engine-flink/getting-started.md), [paimon-flink](https://paimon.apache.org/docs/1.0/flink/quick-start/) and [flink-connector-faker](https://flink-packages.org/packages/flink-faker) to simplify this guide. 3. To start all containers, run: @@ -136,9 +130,9 @@ to check whether all containers are running properly. You can also visit http://localhost:8083/ to see if Flink is running normally. -:::note -- If you want to additionally use an observability stack, follow one of the provided quickstart guides [here](../maintenance/observability/quickstart.md) and then continue with this guide. -- If you want to run with your own Flink environment, remember to download the [fluss-connector-flink](/downloads), [flink-connector-faker](https://github.com/knaufk/flink-faker/releases), [paimon-flink](https://paimon.apache.org/docs/master/flink/quick-start/) connector jars and then put them to `FLINK_HOME/lib/`. +:::note +- If you want to additionally use an observability stack, follow one of the provided quickstart guides [here](maintenance/observability/quickstart.md) and then continue with this guide. +- If you want to run with your own Flink environment, remember to download the [fluss-connector-flink](/downloads), [flink-connector-faker](https://github.com/knaufk/flink-faker/releases), [paimon-flink](https://paimon.apache.org/docs/1.0/flink/quick-start/) connector jars and then put them to `FLINK_HOME/lib/`. - All the following commands involving `docker compose` should be executed in the created working directory that contains the `docker-compose.yml` file. ::: @@ -496,4 +490,4 @@ docker compose down -v to stop all containers. ## Learn more -Now that you're up an running with Fluss and Flink, check out the [Apache Flink Engine](engine-flink/getting-started.md) docs to learn more features with Flink or [this guide](../maintenance/observability/quickstart.md) to learn how to set up an observability stack for Fluss and Flink. \ No newline at end of file +Now that you're up an running with Fluss and Flink, check out the [Apache Flink Engine](engine-flink/getting-started.md) docs to learn more features with Flink or [this guide](/maintenance/observability/quickstart.md) to learn how to set up an observability stack for Fluss and Flink. \ No newline at end of file diff --git a/website/docs/streaming-lakehouse/integrate-data-lakes/paimon.md b/website/docs/streaming-lakehouse/integrate-data-lakes/paimon.md index cf2ee94d30..6f2d193299 100644 --- a/website/docs/streaming-lakehouse/integrate-data-lakes/paimon.md +++ b/website/docs/streaming-lakehouse/integrate-data-lakes/paimon.md @@ -92,7 +92,7 @@ SELECT * FROM paimon_catalog.fluss.enriched_orders$snapshots; ## Data Type Mapping When integrate with Paimon, Fluss automatically converts between Fluss data type and Paimon data type. -The following content shows the mapping between [Fluss data type](../../table-design/data-types.md) and Paimon data type: +The following content shows the mapping between [Fluss data type](table-design/data-types.md) and Paimon data type: | Fluss Data Type | Paimon Data Type | |-------------------------------|-------------------------------| diff --git a/website/docs/streaming-lakehouse/overview.md b/website/docs/streaming-lakehouse/overview.md index 3de60cb03e..c3dda8623d 100644 --- a/website/docs/streaming-lakehouse/overview.md +++ b/website/docs/streaming-lakehouse/overview.md @@ -43,4 +43,4 @@ Some powerful features it provided are: - **Analytical Streams**: The union reads help data streams to have the powerful analytics capabilities. This reduces complexity when developing streaming applications, simplifies debugging, and allows for immediate access to live data insights. - **Connect to Lakehouse Ecosystem**: Fluss keeps the table metadata in sync with data lake catalogs while compacting data into Lakehouse. This allows external engines like Spark, StarRocks, Flink, Trino to read the data directly by connecting to the data lake catalog. -Currently, Fluss supports [Paimon as Lakehouse Storage](./integrate-data-lakes/paimon.md), more kinds of data lake formats are on the roadmap. \ No newline at end of file +Currently, Fluss supports [Paimon as Lakehouse Storage](integrate-data-lakes/paimon.md), more kinds of data lake formats are on the roadmap. \ No newline at end of file diff --git a/website/docs/table-design/data-distribution/bucketing.md b/website/docs/table-design/data-distribution/bucketing.md index 17274d96e6..36b3dad819 100644 --- a/website/docs/table-design/data-distribution/bucketing.md +++ b/website/docs/table-design/data-distribution/bucketing.md @@ -7,7 +7,7 @@ sidebar_position: 1 A bucketing strategy is a data distribution technique that divides table data into small pieces and distributes the data to multiple hosts and services. -When creating a Fluss table, you can specify the number of buckets by setting `'bucket.num' = ''` property for the table, see more details in [DDL](/docs/engine-flink/ddl.md). +When creating a Fluss table, you can specify the number of buckets by setting `'bucket.num' = ''` property for the table, see more details in [DDL](engine-flink/ddl.md). Currently, Fluss supports 3 bucketing strategies: **Hash Bucketing**, **Sticky Bucketing** and **Round-Robin Bucketing**. Primary-Key Tables only allow to use **Hash Bucketing**. Log Tables use **Sticky Bucketing** by default but can use other two bucketing strategies. diff --git a/website/docs/table-design/data-distribution/partitioning.md b/website/docs/table-design/data-distribution/partitioning.md index fbd38da26d..afc177844f 100644 --- a/website/docs/table-design/data-distribution/partitioning.md +++ b/website/docs/table-design/data-distribution/partitioning.md @@ -8,8 +8,8 @@ sidebar_position: 2 In Fluss, a **Partitioned Table** organizes data based on one or more partition keys, providing a way to improve query performance and manageability for large datasets. Partitions allow the system to divide data into distinct segments, each corresponding to specific values of the partition keys. For partitioned tables, Fluss not only supports manage partitions by users, like create/drop partitions, but also supports automatic manage partitions. - - For manually managing partitions, user can create new partitions or drop exists partitions. Learn how to create or drop partitions please refer to [Add Partition](/docs/engine-flink/ddl.md#add-partition) and [Drop Partition](/docs/engine-flink/ddl.md#drop-partition). - - For automatically managing partitions, the partitions will be created based on the auto partitioning rules configured at the time of table creation, and expired partitions are automatically removed, ensuring data not expanding unlimited. See [Auto Partitioning Options](/docs/table-design/data-distribution/partitioning.md#auto-partitioning-options). + - For manually managing partitions, user can create new partitions or drop exists partitions. Learn how to create or drop partitions please refer to [Add Partition](engine-flink/ddl.md#add-partition) and [Drop Partition](engine-flink/ddl.md#drop-partition). + - For automatically managing partitions, the partitions will be created based on the auto partitioning rules configured at the time of table creation, and expired partitions are automatically removed, ensuring data not expanding unlimited. See [Auto Partitioning Options](table-design/data-distribution/partitioning.md#auto-partitioning-options). - Manual management and automated management are orthogonal and can coexist on the same table ### Key Benefits of Partitioned Tables diff --git a/website/docs/table-design/data-distribution/ttl.md b/website/docs/table-design/data-distribution/ttl.md index cea076b7db..d7c34217a8 100644 --- a/website/docs/table-design/data-distribution/ttl.md +++ b/website/docs/table-design/data-distribution/ttl.md @@ -7,4 +7,4 @@ sidebar_position: 3 Fluss supports TTL for data by setting the TTL attribute for tables with `'table.log.ttl' = ''` (default is 7 days). Fluss can periodically and automatically check for and clean up expired data in the table. For log tables, this attribute indicates the expiration time of the log table data. -For primary key tables, this attribute indicates the expiration time of their binlog and does not represent the expiration time of the primary key table data. If you also want the data in the primary key table to expire automatically, please use [auto partitioning](./partitioning.md). +For primary key tables, this attribute indicates the expiration time of their binlog and does not represent the expiration time of the primary key table data. If you also want the data in the primary key table to expire automatically, please use [auto partitioning](partitioning.md). diff --git a/website/docs/table-design/overview.md b/website/docs/table-design/overview.md index 3974bb55ab..6d66ac3073 100644 --- a/website/docs/table-design/overview.md +++ b/website/docs/table-design/overview.md @@ -19,7 +19,7 @@ Tables are classified into two types based on the presence of a primary key: - Used for updating and managing data in business databases. - Support INSERT, UPDATE, and DELETE operations based on the defined primary key. -A Table becomes a [Partitioned Table](../table-design/data-distribution/partitioning.md) when a partition column is defined. Data with the same partition value is stored in the same partition. Partition columns can be applied to both Log Tables and PrimaryKey Tables, but with specific considerations: +A Table becomes a [Partitioned Table](table-design/data-distribution/partitioning.md) when a partition column is defined. Data with the same partition value is stored in the same partition. Partition columns can be applied to both Log Tables and PrimaryKey Tables, but with specific considerations: - **For Log Tables**, partitioning is commonly used for log data, typically based on date columns, to facilitate data separation and cleaning. - **For PrimaryKey Tables**, the partition column must be a subset of the primary key to ensure uniqueness. diff --git a/website/docs/table-design/table-types/log-table.md b/website/docs/table-design/table-types/log-table.md index c06d4e1268..482483c3d4 100644 --- a/website/docs/table-design/table-types/log-table.md +++ b/website/docs/table-design/table-types/log-table.md @@ -113,4 +113,4 @@ In the above example, we set the compression codec to `LZ4_FRAME` and the compre ::: ## Log Tiering -Log Table supports tiering data to different storage tiers. See more details about [Remote Log](/docs/maintenance/tiered-storage/remote-storage/). \ No newline at end of file +Log Table supports tiering data to different storage tiers. See more details about [Remote Log](maintenance/tiered-storage/remote-storage.md). \ No newline at end of file diff --git a/website/docs/table-design/table-types/pk-table/index.md b/website/docs/table-design/table-types/pk-table/index.md index 2f03cf24ac..aee466b7fd 100644 --- a/website/docs/table-design/table-types/pk-table/index.md +++ b/website/docs/table-design/table-types/pk-table/index.md @@ -82,8 +82,8 @@ However, users can specify a different merge engine to customize the merging beh The following merge engines are supported: -1. [FirstRow Merge Engine](/docs/table-design/table-types/pk-table/merge-engines/first-row) -2. [Versioned Merge Engine](/docs/table-design/table-types/pk-table/merge-engines/versioned) +1. [FirstRow Merge Engine](table-design/table-types/pk-table/merge-engines/first-row.md) +2. [Versioned Merge Engine](table-design/table-types/pk-table/merge-engines/versioned.md) ## Changelog Generation @@ -122,13 +122,13 @@ For primary key tables, Fluss supports various kinds of querying abilities. For a primary key table, the default read method is a full snapshot followed by incremental data. First, the snapshot data of the table is consumed, followed by the binlog data of the table. -It is also possible to only consume the binlog data of the table. For more details, please refer to the [Flink Reads](/docs/engine-flink/reads.md) +It is also possible to only consume the binlog data of the table. For more details, please refer to the [Flink Reads](engine-flink/reads.md) ### Lookup -Fluss primary key table can lookup data by the primary keys. If the key exists in Fluss, lookup will return a unique row. it always used in [Flink Lookup Join](/docs/engine-flink//lookups.md#lookup). +Fluss primary key table can lookup data by the primary keys. If the key exists in Fluss, lookup will return a unique row. it always used in [Flink Lookup Join](engine-flink/lookups.md#lookup). ### Prefix Lookup Fluss primary key table can also do prefix lookup by the prefix subset primary keys. Unlike lookup, prefix lookup -will scan data based on the prefix of primary keys and may return multiple rows. It always used in [Flink Prefix Lookup Join](/docs/engine-flink/lookups.md#prefix-lookup). \ No newline at end of file +will scan data based on the prefix of primary keys and may return multiple rows. It always used in [Flink Prefix Lookup Join](engine-flink/lookups.md#prefix-lookup). \ No newline at end of file diff --git a/website/docs/table-design/table-types/pk-table/merge-engines/index.md b/website/docs/table-design/table-types/pk-table/merge-engines/index.md index 11c6314879..d23fd5d9e3 100644 --- a/website/docs/table-design/table-types/pk-table/merge-engines/index.md +++ b/website/docs/table-design/table-types/pk-table/merge-engines/index.md @@ -11,5 +11,5 @@ However, users can specify a different merge engine to customize the merging beh The following merge engines are supported: -1. [FirstRow Merge Engine](/docs/table-design/table-types/pk-table/merge-engines/first-row) -2. [Versioned Merge Engine](/docs/table-design/table-types/pk-table/merge-engines/versioned) \ No newline at end of file +1. [FirstRow Merge Engine](table-design/table-types/pk-table/merge-engines/first-row.md) +2. [Versioned Merge Engine](table-design/table-types/pk-table/merge-engines/versioned.md) \ No newline at end of file From 2b0f40590ad91a3a7d4a91333298c0a90d264227 Mon Sep 17 00:00:00 2001 From: Jark Wu Date: Wed, 5 Mar 2025 18:23:50 +0800 Subject: [PATCH 4/5] [docs] Add GitHub Action to automatically check broken links in docs --- .github/workflows/docs-check.yaml | 43 +++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 .github/workflows/docs-check.yaml diff --git a/.github/workflows/docs-check.yaml b/.github/workflows/docs-check.yaml new file mode 100644 index 0000000000..0043cf9934 --- /dev/null +++ b/.github/workflows/docs-check.yaml @@ -0,0 +1,43 @@ +################################################################################ +# Copyright (c) 2025 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +# This workflow is meant for checking broken links in the documentation. +name: Checks Documentation +on: + pull_request: + branches: [main, release-*, ci-*] + paths: + - 'website/**' + +jobs: + test-deploy: + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./website + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Generate versioned docs + run: ./build_versioned_docs.sh + - uses: actions/setup-node@v4 + with: + node-version: 18 + - name: Install dependencies + run: npm install + - name: Test build website + run: npm run build -- --no-minify \ No newline at end of file From 952d238ff385a8db17ca5f678c0eb4a74e5f4591 Mon Sep 17 00:00:00 2001 From: Jark Wu Date: Wed, 5 Mar 2025 18:24:21 +0800 Subject: [PATCH 5/5] [website] Add GitHub Action to automatically deploy website --- .github/workflows/docs-deploy.yaml | 51 ++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 .github/workflows/docs-deploy.yaml diff --git a/.github/workflows/docs-deploy.yaml b/.github/workflows/docs-deploy.yaml new file mode 100644 index 0000000000..6798eae774 --- /dev/null +++ b/.github/workflows/docs-deploy.yaml @@ -0,0 +1,51 @@ +################################################################################ +# Copyright (c) 2025 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ +name: Deploy Documentation +on: + push: + branches: [main, release-*] + paths: + - 'website/**' + +jobs: + deploy: + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./website + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Generate versioned docs + run: ./build_versioned_docs.sh + - uses: actions/setup-node@v4 + with: + node-version: 18 + - name: Install dependencies + run: npm install + - name: Build website + run: npm run build -- --no-minify + - uses: webfactory/ssh-agent@v0.5.0 + with: + ssh-private-key: ${{ secrets.GH_PAGES_DEPLOY }} + - name: Deploy website + env: + USE_SSH: true + run: | + git config --global user.email "actions@github.com" + git config --global user.name "gh-actions" + npm run deploy -- --skip-build \ No newline at end of file