diff --git a/.clang-format b/.clang-format new file mode 120000 index 00000000..9a13bb63 --- /dev/null +++ b/.clang-format @@ -0,0 +1 @@ +duckdb/.clang-format \ No newline at end of file diff --git a/.clang-tidy b/.clang-tidy new file mode 120000 index 00000000..b438d44f --- /dev/null +++ b/.clang-tidy @@ -0,0 +1 @@ +duckdb/.clang-tidy \ No newline at end of file diff --git a/.editorconfig b/.editorconfig new file mode 120000 index 00000000..ec7786c1 --- /dev/null +++ b/.editorconfig @@ -0,0 +1 @@ +duckdb/.editorconfig \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 00000000..eaa295a8 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,122 @@ +name: Bug report +description: Create a report to help us improve +labels: + - needs triage +body: + - type: markdown + attributes: + value: | + **Disclaimer:** Please note that this is a research project. + While I am committed to improving the project, responses to issues and bug fixes might take longer than expected. + I appreciate your patience and understanding as I work to address issues. Thank you for helping make this project better! + + + - type: textarea + attributes: + label: What happens? + description: A short, clear and concise description of what the bug is. + validations: + required: true + + - type: textarea + attributes: + label: To Reproduce + description: | + Please provide steps to reproduce the behavior, preferably a [minimal reproducible example](https://en.wikipedia.org/wiki/Minimal_reproducible_example). Please adhere the following guidelines: + + * Format the code and the output as [code blocks](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/creating-and-highlighting-code-blocks) using triple backticks: + + ```` + ``` + CODE HERE + ``` + ```` + * Add all required imports for scripts, e.g., `import duckdb`, `import pandas as pd`. + * Remove all prompts from the scripts. This include DuckDB's 'D' prompt and Python's `>>>` prompt. Removing these prompts makes reproduction attempts quicker. + * Make sure that the script and its outputs are provided in separate code blocks. + * If applicable, please check whether the issue is reproducible via running plain SQL queries from the DuckDB CLI client. + validations: + required: true + + - type: markdown + attributes: + value: "# Environment (please complete the following information):" + - type: input + attributes: + label: "OS:" + placeholder: e.g., iOS + description: Please include operating system version and architecture (e.g., aarch64, x86, x64, etc) + validations: + required: true + - type: input + attributes: + label: "DuckDB Version:" + placeholder: e.g., 22 + validations: + required: true + - type: input + attributes: + label: "DuckDB Client:" + placeholder: e.g., Python + validations: + required: true + + - type: markdown + attributes: + value: "# Identity Disclosure:" + - type: input + attributes: + label: "Full Name:" + placeholder: e.g., John Doe + validations: + required: true + - type: input + attributes: + label: "Affiliation:" + placeholder: e.g., Acme Corporation + validations: + required: true + + - type: markdown + attributes: + value: | + If the above is not given and is not obvious from your GitHub profile page, we might close your issue without further review. Please refer to the [reasoning behind this rule](https://berthub.eu/articles/posts/anonymous-help/) if you have questions. + + # Before Submitting: + + - type: dropdown + attributes: + label: How did you load the extension? + description: | + Visit [Loading DuckPGQ](https://duckpgq.notion.site/Loading-DuckPGQ-29eda93a97b140e1861614cce1f5498c) and [Building DuckPGQ](https://www.notion.so/duckpgq/Building-DuckPGQ-619783a5af604efbb7c93f09811d996f) for more information. + options: + - Community extension version + - Latest version + - Built from source + validations: + required: true + + - type: dropdown + attributes: + label: Did you include all relevant data sets for reproducing the issue? + options: + - "No - Other reason (please specify in the issue body)" + - "No - I cannot share the data sets because they are confidential" + - "No - I cannot easily share my data sets due to their large size" + - "Not applicable - the reproduction does not require a data set" + - "Yes" + default: 0 + validations: + required: true + + - type: checkboxes + attributes: + label: Did you include all code required to reproduce the issue? + options: + - label: Yes, I have + + - type: checkboxes + attributes: + label: Did you include all relevant configuration (e.g., CPU architecture, Python version, Linux distribution) to reproduce the issue? + options: + - label: Yes, I have diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..760284fb --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,9 @@ +blank_issues_enabled: false +contact_links: + - name: Feature Request + # manual template until discussion templates are GA + url: https://github.com/cwida/duckpgq-extension/discussions/new?category=ideas&title=Feature%20Request:%20...&labels=feature&body=Why%20do%20you%20want%20this%20feature%3F + about: Submit feature requests here + - name: Discussions + url: https://github.com/cwida/duckpgq-extension/discussions + about: Please ask and answer general questions here. \ No newline at end of file diff --git a/.github/workflows/ExtensionTemplate.yml b/.github/workflows/ExtensionTemplate.yml new file mode 100644 index 00000000..fa6b5ce3 --- /dev/null +++ b/.github/workflows/ExtensionTemplate.yml @@ -0,0 +1,162 @@ +# +# NOTE: this workflow is for testing the extension template itself, +# this workflow will be removed when scripts/bootstrap-template.py is run +# +name: Extension Template +on: [push, pull_request,repository_dispatch] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} + cancel-in-progress: true + +jobs: + linux: + name: Linux + if: ${{ vars.RUN_RENAME_TEST == 'true' || github.repository == 'duckdb/extension-template' }} + runs-on: ubuntu-latest + strategy: + matrix: + # Add commits/tags to build against other DuckDB versions + duckdb_version: [ 'v1.4.1' ] + env: + VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake + VCPKG_TARGET_TRIPLET: 'x64-linux' + GEN: ninja + ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true + defaults: + run: + shell: bash + + steps: + - name: Install Ninja + shell: bash + run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build + + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: 'true' + + - name: Checkout DuckDB to version + if: ${{ matrix.duckdb_version != ''}} + run: | + cd duckdb + git checkout ${{ matrix.duckdb_version }} + + - name: Setup vcpkg + uses: lukka/run-vcpkg@v11.1 + with: + vcpkgGitCommitId: 5e5d0e1cd7785623065e77eff011afdeec1a3574 + + - name: Rename extension + run: | + python3 scripts/bootstrap-template.py ext_1_a_123b_b11 + + - name: Build + run: | + make + + - name: Test + run: | + make test + + macos: + name: MacOS + if: ${{ vars.RUN_RENAME_TEST == 'true' || github.repository == 'duckdb/extension-template' }} + runs-on: macos-latest + strategy: + matrix: + # Add commits/tags to build against other DuckDB versions + duckdb_version: [ 'v1.4.1'] + env: + VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake + VCPKG_TARGET_TRIPLET: 'x64-osx' + OSX_BUILD_ARCH: 'x86_64' + GEN: ninja + defaults: + run: + shell: bash + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: 'true' + + - name: Install Ninja + run: brew install ninja + + - uses: actions/setup-python@v2 + with: + python-version: '3.11' + + - name: Checkout DuckDB to version + if: ${{ matrix.duckdb_version != ''}} + run: | + cd duckdb + git checkout ${{ matrix.duckdb_version }} + + - name: Setup vcpkg + uses: lukka/run-vcpkg@v11.1 + with: + vcpkgGitCommitId: 5e5d0e1cd7785623065e77eff011afdeec1a3574 + + - name: Rename extension + run: | + python scripts/bootstrap-template.py ext_1_a_123b_b11 + + - name: Build + run: | + make + + - name: Test + run: | + make test + + windows: + name: Windows + if: ${{ vars.RUN_RENAME_TEST == 'true' || github.repository == 'duckdb/extension-template' }} + runs-on: windows-latest + strategy: + matrix: + # Add commits/tags to build against other DuckDB versions + duckdb_version: [ '1.4.1' ] + env: + VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake + VCPKG_TARGET_TRIPLET: 'x64-windows-static-md' + defaults: + run: + shell: bash + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: 'true' + + - uses: actions/setup-python@v2 + with: + python-version: '3.7' + + - name: Checkout DuckDB to version + # Add commits/tags to build against other DuckDB versions + if: ${{ matrix.duckdb_version != ''}} + run: | + cd duckdb + git checkout ${{ matrix.duckdb_version }} + + - name: Setup vcpkg + uses: lukka/run-vcpkg@v11.1 + with: + vcpkgGitCommitId: 5e5d0e1cd7785623065e77eff011afdeec1a3574 + + - name: Rename extension + run: | + python scripts/bootstrap-template.py ext_1_a_123b_b11 + + - name: Build + run: | + make + + - name: Test extension + run: | + build/release/test/Release/unittest.exe diff --git a/.github/workflows/LinuxRelease.yml b/.github/workflows/LinuxRelease.yml deleted file mode 100644 index a4e0f835..00000000 --- a/.github/workflows/LinuxRelease.yml +++ /dev/null @@ -1,122 +0,0 @@ -name: Linux -on: [push, pull_request,repository_dispatch] -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} - cancel-in-progress: true - -jobs: - linux-extensions-64: - name: Linux Extensions - runs-on: ubuntu-latest - container: ${{ matrix.container }} - strategy: - matrix: - # empty string builds current submodule version, add commits/tags to build against other DuckDB versions - duckdb_version: [ '' ] - arch: ['linux_amd64', 'linux_arm64', 'linux_amd64_gcc4'] - include: - - arch: 'linux_amd64' - container: 'ubuntu:16.04' - - arch: 'linux_arm64' - container: 'ubuntu:18.04' - - arch: 'linux_amd64_gcc4' - container: 'quay.io/pypa/manylinux2014_x86_64' - env: - GEN: ninja - - steps: - - name: Install required ubuntu packages - if: ${{ matrix.arch == 'linux_amd64' || matrix.arch == 'linux_arm64' }} - shell: bash - run: | - apt-get update -y -qq - apt-get install -y -qq software-properties-common - add-apt-repository ppa:git-core/ppa - apt-get update -y -qq - apt-get install -y -qq ninja-build make gcc-multilib g++-multilib libssl-dev wget openjdk-8-jdk zip maven unixodbc-dev libc6-dev-i386 lib32readline6-dev libssl-dev libcurl4-gnutls-dev libexpat1-dev gettext unzip build-essential checkinstall libffi-dev curl libz-dev openssh-client - -# - name: Install cross compiler -# shell: bash -# if: ${{ matrix.arch == 'linux_arm64' }} -# run: | -# apt-get install -y -qq gcc-aarch64-linux-gnu g++-aarch64-linux-gnu - - - name: Install Git 2.18.5 - if: ${{ matrix.arch == 'linux_amd64' || matrix.arch == 'linux_arm64' }} - shell: bash - run: | - wget https://github.com/git/git/archive/refs/tags/v2.18.5.tar.gz - tar xvf v2.18.5.tar.gz - cd git-2.18.5 - make - make prefix=/usr install - git --version - - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - submodules: 'true' - - - if: ${{ matrix.arch == 'linux_amd64_gcc4' }} - uses: ./duckdb/.github/actions/centos_7_setup - with: - openssl: 0 - - - if: ${{ matrix.arch == 'linux_amd64' || matrix.arch == 'linux_arm64' }} - uses: ./duckdb/.github/actions/ubuntu_16_setup - with: - aarch64_cross_compile: ${{ matrix.arch == 'linux_arm64' && 1 }} - - - name: Checkout DuckDB to version - if: ${{ matrix.duckdb_version != ''}} - shell: bash - run: | - cd duckdb - git checkout ${{ matrix.duckdb_version }} - - # Build extension - - name: Build extension - shell: bash - env: - GEN: ninja - STATIC_LIBCPP: 1 - CC: ${{ matrix.arch == 'linux_arm64' && 'aarch64-linux-gnu-gcc' || '' }} - CXX: ${{ matrix.arch == 'linux_arm64' && 'aarch64-linux-gnu-g++' || '' }} - run: | - make release - - - name: Build extension - if: ${{ matrix.arch != 'linux_arm64'}} - shell: bash - run: | - make test - - - uses: actions/upload-artifact@v2 - with: - name: ${{matrix.arch}}-extensions - path: | - build/release/extension/boilerplate/boilerplate.duckdb_extension - - - name: Deploy - shell: bash - env: - AWS_ACCESS_KEY_ID: ${{ secrets.S3_DEPLOY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_DEPLOY_KEY }} - AWS_DEFAULT_REGION: ${{ secrets.S3_REGION }} - BUCKET_NAME: ${{ secrets.S3_BUCKET }} - run: | - git config --global --add safe.directory '*' - cd duckdb - git fetch --tags - export DUCKDB_VERSION=`git tag --points-at HEAD` - export DUCKDB_VERSION=${DUCKDB_VERSION:=`git log -1 --format=%h`} - cd .. - if [[ "$AWS_ACCESS_KEY_ID" == "" ]] ; then - echo 'No key set, skipping' - elif [[ "$GITHUB_REF" =~ ^(refs/tags/v.+)$ ]] ; then - python3 -m pip install pip awscli - ./scripts/extension-upload.sh boilerplate ${{ github.ref_name }} $DUCKDB_VERSION ${{matrix.arch}} $BUCKET_NAME - elif [[ "$GITHUB_REF" =~ ^(refs/heads/main)$ ]] ; then - python3 -m pip install pip awscli - ./scripts/extension-upload.sh boilerplate `git log -1 --format=%h` $DUCKDB_VERSION ${{matrix.arch}} $BUCKET_NAME - fi \ No newline at end of file diff --git a/.github/workflows/MacOS.yml b/.github/workflows/MacOS.yml deleted file mode 100644 index b7c57e4a..00000000 --- a/.github/workflows/MacOS.yml +++ /dev/null @@ -1,71 +0,0 @@ -name: MacOS -on: [push, pull_request,repository_dispatch] -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} - cancel-in-progress: true - -jobs: - macos-extensions: - name: OSX Extensions (Universal) - runs-on: macos-latest - strategy: - matrix: - # empty string builds current submodule version, add commits/tags to build against other DuckDB versions - duckdb_version: [ '' ] - env: - OSX_BUILD_UNIVERSAL: 1 - GEN: ninja - - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - submodules: 'true' - - - name: Install Ninja - run: brew install ninja - - - uses: actions/setup-python@v2 - with: - python-version: '3.7' - - - name: Checkout DuckDB to version - if: ${{ matrix.duckdb_version != ''}} - shell: bash - run: | - cd duckdb - git checkout ${{ matrix.duckdb_version }} - - # Build extension - - name: Build extension - shell: bash - run: | - make release - make test - - - name: Deploy - shell: bash - env: - AWS_ACCESS_KEY_ID: ${{ secrets.S3_DEPLOY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_DEPLOY_KEY }} - AWS_DEFAULT_REGION: ${{ secrets.S3_REGION }} - BUCKET_NAME: ${{ secrets.S3_BUCKET }} - run: | - cd duckdb - git fetch --tags - export DUCKDB_VERSION=`git tag --points-at HEAD` - echo $DUCKDB_VERSION - export DUCKDB_VERSION=${DUCKDB_VERSION:=`git log -1 --format=%h`} - echo $DUCKDB_VERSION - cd .. - if [[ "$AWS_ACCESS_KEY_ID" == "" ]] ; then - echo 'No key set, skipping' - elif [[ "$GITHUB_REF" =~ ^(refs/tags/v.+)$ ]] ; then - python -m pip install awscli - ./scripts/extension-upload.sh boilerplate ${{ github.ref_name }} $DUCKDB_VERSION osx_amd64 $BUCKET_NAME - ./scripts/extension-upload.sh boilerplate ${{ github.ref_name }} $DUCKDB_VERSION osx_arm64 $BUCKET_NAME - elif [[ "$GITHUB_REF" =~ ^(refs/heads/main)$ ]] ; then - python -m pip install awscli - ./scripts/extension-upload.sh boilerplate `git log -1 --format=%h` $DUCKDB_VERSION osx_amd64 $BUCKET_NAME - ./scripts/extension-upload.sh boilerplate `git log -1 --format=%h` $DUCKDB_VERSION osx_arm64 $BUCKET_NAME - fi \ No newline at end of file diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml new file mode 100644 index 00000000..dfd5716d --- /dev/null +++ b/.github/workflows/MainDistributionPipeline.yml @@ -0,0 +1,38 @@ +# +# This workflow calls the main distribution pipeline from DuckDB to build, test and (optionally) release the extension +# +name: Main Extension Distribution Pipeline +on: + push: + pull_request: + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' && github.sha || '' }} + cancel-in-progress: true + +jobs: + duckdb-next-build: + name: Build extension binaries + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main + with: + duckdb_version: main + ci_tools_version: main + extension_name: duckpgq + + duckdb-stable-build: + name: Build extension binaries + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.1 + with: + duckdb_version: v1.4.1 + ci_tools_version: v1.4.1 + extension_name: duckpgq + + code-quality-check: + name: Code Quality Check + uses: duckdb/extension-ci-tools/.github/workflows/_extension_code_quality.yml@v1.4.1 + with: + duckdb_version: v1.4.1 + ci_tools_version: main + extension_name: duckpgq + format_checks: 'format;tidy' diff --git a/.github/workflows/NodeJS.yml b/.github/workflows/NodeJS.yml deleted file mode 100644 index 0a63e5c9..00000000 --- a/.github/workflows/NodeJS.yml +++ /dev/null @@ -1,41 +0,0 @@ -# -# NOTE: if NodeJS tests are unused, deleting this file or disabling the workflow on GitHub will speed up CI -# - -name: NodeJS -on: [push, pull_request,repository_dispatch] -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} - cancel-in-progress: true - -defaults: - run: - shell: bash - -jobs: - nodejs: - name: NodeJS - runs-on: ubuntu-latest - env: - GEN: ninja - - steps: - - name: Install Ninja - run: | - sudo apt-get update -y -qq - sudo apt-get install -y -qq ninja-build - - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - submodules: 'true' - - - uses: actions/setup-python@v2 - with: - python-version: '3.9' - - - name: Build DuckDB NodeJS client - run: make debug_js - - - name: Run NodeJS client tests - run: make test_debug_js \ No newline at end of file diff --git a/.github/workflows/Python.yml b/.github/workflows/Python.yml deleted file mode 100644 index 50c4f1ae..00000000 --- a/.github/workflows/Python.yml +++ /dev/null @@ -1,45 +0,0 @@ -# -# NOTE: if python tests are unused, deleting this file or disabling the workflow on GitHub will speed up CI -# - -name: Python -on: [push, pull_request,repository_dispatch] -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} - cancel-in-progress: true - -defaults: - run: - shell: bash - -jobs: - python: - name: Python - runs-on: ubuntu-latest - env: - GEN: ninja - - steps: - - name: Install Ninja - run: | - sudo apt-get update -y -qq - sudo apt-get install -y -qq ninja-build - - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - submodules: 'true' - - - uses: actions/setup-python@v2 - with: - python-version: '3.9' - - - name: Build DuckDB Python client - run: make debug_python - - - name: Install Python test dependencies - run: python -m pip install --upgrade pytest - - - name: Run Python client tests - run: | - make test_debug_python \ No newline at end of file diff --git a/.github/workflows/Windows.yml b/.github/workflows/Windows.yml deleted file mode 100644 index 895a96e9..00000000 --- a/.github/workflows/Windows.yml +++ /dev/null @@ -1,67 +0,0 @@ -name: Windows -on: [push, pull_request,repository_dispatch] -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} - cancel-in-progress: true - -jobs: - win-extensions-64: - name: Windows Extensions (x64) - runs-on: windows-latest - strategy: - matrix: - # empty string builds current submodule version, add commits/tags to build against other DuckDB versions - duckdb_version: [ '' ] - - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - submodules: 'true' - - - uses: actions/setup-python@v2 - with: - python-version: '3.7' - - - name: Checkout DuckDB to version - if: ${{ matrix.duckdb_version != ''}} - shell: bash - run: | - cd duckdb - git checkout ${{ matrix.duckdb_version }} - - - name: Build extension - shell: bash - run: | - make release - build/release/test/Release/unittest.exe - - - uses: actions/upload-artifact@v2 - with: - name: linux-extensions-64-aarch64 - path: | - build/release/extension/boilerplate/boilerplate.duckdb_extension - - - name: Deploy - shell: bash - env: - AWS_ACCESS_KEY_ID: ${{ secrets.S3_DEPLOY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_DEPLOY_KEY }} - AWS_DEFAULT_REGION: ${{ secrets.S3_REGION }} - BUCKET_NAME: ${{ secrets.S3_BUCKET }} - TEST_VARIABLE: ${{ env.TEST_VARIABLE }} - run: | - cd duckdb - git fetch --tags - export DUCKDB_VERSION=`git tag --points-at HEAD` - export DUCKDB_VERSION=${DUCKDB_VERSION:=`git log -1 --format=%h`} - cd .. - if [[ "$AWS_ACCESS_KEY_ID" == "" ]] ; then - echo 'No key set, skipping' - elif [[ "$GITHUB_REF" =~ ^(refs/tags/v.+)$ ]] ; then - python -m pip install awscli - ./scripts/extension-upload.sh boilerplate ${{ github.ref_name }} $DUCKDB_VERSION windows_amd64 $BUCKET_NAME - elif [[ "$GITHUB_REF" =~ ^(refs/heads/main)$ ]] ; then - python -m pip install awscli - ./scripts/extension-upload.sh boilerplate `git log -1 --format=%h` $DUCKDB_VERSION windows_amd64 $BUCKET_NAME - fi \ No newline at end of file diff --git a/.gitignore b/.gitignore index b9f264b9..23bce775 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ duckdb_unittest_tempdir/ testext test/python/__pycache__/ .Rhistory +.venv \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index fd4e99e5..8d3ace76 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,7 +1,8 @@ [submodule "duckdb"] path = duckdb - url = https://github.com/duckdb/duckdb - branch = master -[submodule "master"] - path = master - url = https://github.com/duckdb/duckdb + url = git@github.com:cwida/duckdb-pgq.git + branch = main +[submodule "extension-ci-tools"] + path = extension-ci-tools + url = git@github.com:duckdb/extension-ci-tools.git + branch = main \ No newline at end of file diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..2cb52bf7 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,8 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: +- family-names: "ten Wolde" + given-names: "Daniel" + orcid: "https://orcid.org/0009-0008-8502-1148" +title: "DuckPGQ" +url: "https://github.com/cwida/duckpgq-extension" diff --git a/CMakeLists.txt b/CMakeLists.txt index 15503395..c8351d4d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,20 +1,31 @@ -cmake_minimum_required(VERSION 2.8.12) +cmake_minimum_required(VERSION 3.5) -# Set extension name here -set(TARGET_NAME boilerplate) +set(TARGET_NAME duckpgq) +set(CMAKE_CXX_STANDARD 11) + +# DuckDB's extension distribution supports vcpkg. As such, dependencies can be +# added in ./vcpkg.json and then used in cmake with find_package. Feel free to +# remove or replace with other dependencies. Note that it should also be removed +# from vcpkg.json to prevent needlessly installing it.. +find_package(OpenSSL REQUIRED) set(EXTENSION_NAME ${TARGET_NAME}_extension) +set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) + project(${TARGET_NAME}) include_directories(src/include) +add_subdirectory(src) +include_directories(../duckdb/third_party/libpg_query/include) -set(EXTENSION_SOURCES src/boilerplate_extension.cpp) -add_library(${EXTENSION_NAME} STATIC ${EXTENSION_SOURCES}) +build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) +build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) -set(PARAMETERS "-warnings") -build_loadable_extension(${TARGET_NAME} ${PARAMETERS} ${EXTENSION_SOURCES}) +# Link OpenSSL in both the static library as the loadable extension +target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto) +target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto) install( TARGETS ${EXTENSION_NAME} EXPORT "${DUCKDB_EXPORT_SET}" LIBRARY DESTINATION "${INSTALL_LIB_DIR}" - ARCHIVE DESTINATION "${INSTALL_LIB_DIR}") \ No newline at end of file + ARCHIVE DESTINATION "${INSTALL_LIB_DIR}") diff --git a/LICENSE b/LICENSE index 5723ab97..a14aed31 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright 2018-2022 DuckDB Labs BV +Copyright 2018-2025 Stichting DuckDB Foundation Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: diff --git a/Makefile b/Makefile index 673a37fa..5e2664e2 100644 --- a/Makefile +++ b/Makefile @@ -1,98 +1,8 @@ -.PHONY: all clean format debug release duckdb_debug duckdb_release pull update +PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) -all: release +# Configuration of extension +EXT_NAME=duckpgq +EXT_CONFIG=${PROJ_DIR}extension_config.cmake -MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) -PROJ_DIR := $(dir $(MKFILE_PATH)) - -OSX_BUILD_UNIVERSAL_FLAG= -ifeq (${OSX_BUILD_UNIVERSAL}, 1) - OSX_BUILD_UNIVERSAL_FLAG=-DOSX_BUILD_UNIVERSAL=1 -endif -ifeq (${STATIC_LIBCPP}, 1) - STATIC_LIBCPP=-DSTATIC_LIBCPP=TRUE -endif - -ifeq ($(GEN),ninja) - GENERATOR=-G "Ninja" - FORCE_COLOR=-DFORCE_COLORED_OUTPUT=1 -endif - -BUILD_FLAGS=-DEXTENSION_STATIC_BUILD=1 -DBUILD_TPCH_EXTENSION=1 -DBUILD_PARQUET_EXTENSION=1 ${OSX_BUILD_UNIVERSAL_FLAG} ${STATIC_LIBCPP} - -CLIENT_FLAGS := - -# These flags will make DuckDB build the extension -EXTENSION_FLAGS=-DDUCKDB_OOT_EXTENSION_NAMES="boilerplate" -DDUCKDB_OOT_EXTENSION_BOILERPLATE_PATH="$(PROJ_DIR)" -DDUCKDB_OOT_EXTENSION_BOILERPLATE_SHOULD_LINK="TRUE" -DDUCKDB_OOT_EXTENSION_BOILERPLATE_INCLUDE_PATH="$(PROJ_DIR)src/include" - -pull: - git submodule init - git submodule update --recursive --remote - -clean: - rm -rf build - rm -rf testext - cd duckdb && make clean - -# Main build -debug: - mkdir -p build/debug && \ - cmake $(GENERATOR) $(FORCE_COLOR) $(EXTENSION_FLAGS) ${CLIENT_FLAGS} -DEXTENSION_STATIC_BUILD=1 -DCMAKE_BUILD_TYPE=Debug ${BUILD_FLAGS} -S ./duckdb/ -B build/debug && \ - cmake --build build/debug --config Debug - -release: - mkdir -p build/release && \ - cmake $(GENERATOR) $(FORCE_COLOR) $(EXTENSION_FLAGS) ${CLIENT_FLAGS} -DEXTENSION_STATIC_BUILD=1 -DCMAKE_BUILD_TYPE=Release ${BUILD_FLAGS} -S ./duckdb/ -B build/release && \ - cmake --build build/release --config Release - -# Client build -debug_js: CLIENT_FLAGS=-DBUILD_NODE=1 -DBUILD_JSON_EXTENSION=1 -debug_js: debug - -debug_r: CLIENT_FLAGS=-DBUILD_R=1 -debug_r: debug - -debug_python: CLIENT_FLAGS=-DBUILD_PYTHON=1 -DBUILD_JSON_EXTENSION=1 -DBUILD_FTS_EXTENSION=1 -DBUILD_TPCH_EXTENSION=1 -DBUILD_VISUALIZER_EXTENSION=1 -DBUILD_TPCDS_EXTENSION=1 -debug_python: debug - -release_js: CLIENT_FLAGS=-DBUILD_NODE=1 -DBUILD_JSON_EXTENSION=1 -release_js: release - -release_r: CLIENT_FLAGS=-DBUILD_R=1 -release_r: release - -release_python: CLIENT_FLAGS=-DBUILD_PYTHON=1 -DBUILD_JSON_EXTENSION=1 -DBUILD_FTS_EXTENSION=1 -DBUILD_TPCH_EXTENSION=1 -DBUILD_VISUALIZER_EXTENSION=1 -DBUILD_TPCDS_EXTENSION=1 -release_python: debug - -# Main tests -test: test_release - -test_release: release - ./build/release/test/unittest --test-dir . "[sql]" - -test_debug: debug - ./build/debug/test/unittest --test-dir . "[sql]" - -# Client tests -test_js: test_debug_js -test_debug_js: debug_js - cd duckdb/tools/nodejs && npm run test-path -- "../../../test/nodejs/**/*.js" - -test_release_js: release_js - cd duckdb/tools/nodejs && npm run test-path -- "../../../test/nodejs/**/*.js" - -test_python: test_debug_python -test_debug_python: debug_python - cd test/python && python3 -m pytest - -test_release_python: release_python - cd test/python && python3 -m pytest - -# TODO make this clever -format: - clang-format --sort-includes=0 -style=file -i src/boilerplate_extension.cpp - cmake-format -i CMakeLists.txt - cmake-format -i src/CMakeLists.txt - -update: - git submodule update --remote --merge \ No newline at end of file +# Include the Makefile from extension-ci-tools +include extension-ci-tools/makefiles/duckdb_extension.Makefile diff --git a/README.md b/README.md index bf4793a5..9d2674f7 100644 --- a/README.md +++ b/README.md @@ -1,37 +1,466 @@ -# WIP Disclaimer -This template is currently work-in-progress. Feel free to play around with it and give us feedback. Note also that this template depends on a development version of DuckDB. Follow https://duckdb.org/news for more information on official launch. +# DuckPGQ +A DuckDB extension for graph workloads that supports the SQL/PGQ standard. For more information, please see the [documentation page](https://duckpgq.org/). -# DuckDB Extension Template -The main goal of this template is to allow users to easily develop, test and distribute their own DuckDB extension. +[![Discord](https://discordapp.com/api/guilds/1225369321077866496/widget.png?style=banner3)](https://discord.gg/8X95XHhQB7) +## WIP Disclaimer +This repository is currently a research project and a work in progress. Feel free to play around with it and give us feedback. -## Build -To build the extension: +--- + +## Loading DuckPGQ +Since DuckDB v1.0.0, DuckPGQ can be loaded as a community extension without requiring the `unsigned` flag. From any DuckDB instance, the following two commands will allow you to install and load DuckPGQ: +```sql +install duckpgq from community; +load duckpgq; +``` +See the [DuckPGQ community extension page](https://community-extensions.duckdb.org/extensions/duckpgq.html) for more information. + +For older DuckDB versions there are two ways to install the DuckPGQ extension. +Both ways require DuckDB to be launched in the `unsigned` mode. +The first way is by setting the `custom_extension_repository` command (see below). The other way is by directly downloading the extension file for your OS + architecture (see the [DuckPGQ availibility section](#duckpgq-extension-availability)) + +For CLI: +```bash +duckdb -unsigned +``` +```bash +set custom_extension_repository = 'http://duckpgq.s3.eu-north-1.amazonaws.com'; +force install 'duckpgq'; +load 'duckpgq'; +``` + +For Python: +```python +import duckdb + +conn = duckdb.connect(config = {"allow_unsigned_extensions": "true"}) +conn.execute("set custom_extension_repository = 'http://duckpgq.s3.eu-north-1.amazonaws.com';") +conn.execute("force install 'duckpgq';") +conn.execute("load 'duckpgq';") +``` + +## Direct download +To use the extension, check the direct download links below. To install and load the extension, launch DuckDB in unsigned mode and execute the commands: +```sql +force install 'path/to/duckpgq_extension'; +load 'duckpgq'; +``` + +## DuckPGQ Extension Availability + +
+Version v1.2.2 + +### Linux + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [linux_amd64]() | +| amd64_musl | [linux_amd64_musl]() | +| arm64 | [linux_arm64]() | + +### Osx + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [osx_amd64]() | +| arm64 | [osx_arm64]() | + +### Wasm + +| Architecture | Download Link | +|--------------|---------------| +| eh | [wasm_eh]() | +| mvp | [wasm_mvp]() | +| threads | [wasm_threads]() | + +### Windows + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [windows_amd64]() | +| amd64_mingw | [windows_amd64_mingw]() | + +
+ +
+Version v1.2.1 + +### Linux + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [linux_amd64]() | +| amd64_musl | [linux_amd64_musl]() | +| arm64 | [linux_arm64]() | + +### Osx + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [osx_amd64]() | +| arm64 | [osx_arm64]() | + +### Wasm + +| Architecture | Download Link | +|--------------|---------------| +| eh | [wasm_eh]() | +| mvp | [wasm_mvp]() | +| threads | [wasm_threads]() | + +### Windows + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [windows_amd64]() | +| amd64_mingw | [windows_amd64_mingw]() | + +
+ +
+Version v1.2.0 + +### Linux + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [linux_amd64]() | +| amd64_musl | [linux_amd64_musl]() | +| arm64 | [linux_arm64]() | + +### Osx + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [osx_amd64]() | +| arm64 | [osx_arm64]() | + +### Wasm + +| Architecture | Download Link | +|--------------|---------------| +| eh | [wasm_eh]() | +| mvp | [wasm_mvp]() | +| threads | [wasm_threads]() | + +### Windows + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [windows_amd64]() | +| amd64_mingw | [windows_amd64_mingw]() | + +
+ +
+Version v1.1.3 + +### Linux + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [linux_amd64]() | +| arm64 | [linux_arm64]() | + +### Osx + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [osx_amd64]() | +| arm64 | [osx_arm64]() | + +### Wasm + +| Architecture | Download Link | +|--------------|---------------| +| eh | [wasm_eh]() | +| mvp | [wasm_mvp]() | +| threads | [wasm_threads]() | + +### Windows + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [windows_amd64]() | +| amd64_rtools | [windows_amd64_rtools]() | + +
+ +
+Version v1.1.2 + +### Linux + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [linux_amd64]() | +| arm64 | [linux_arm64]() | + +### Osx + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [osx_amd64]() | +| arm64 | [osx_arm64]() | + +### Wasm + +| Architecture | Download Link | +|--------------|---------------| +| eh | [wasm_eh]() | +| mvp | [wasm_mvp]() | +| threads | [wasm_threads]() | + +### Windows + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [windows_amd64]() | +| amd64_rtools | [windows_amd64_rtools]() | + +
+ +
+Version v1.1.1 + +### Linux + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [linux_amd64]() | +| arm64 | [linux_arm64]() | + +### Osx + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [osx_amd64]() | +| arm64 | [osx_arm64]() | + +### Wasm + +| Architecture | Download Link | +|--------------|---------------| +| eh | [wasm_eh]() | +| mvp | [wasm_mvp]() | +| threads | [wasm_threads]() | + +### Windows + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [windows_amd64]() | +| amd64_rtools | [windows_amd64_rtools]() | + +
+ +
+Version v1.1.0 + +### Linux + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [linux_amd64]() | +| arm64 | [linux_arm64]() | + +### Osx + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [osx_amd64]() | +| arm64 | [osx_arm64]() | + +### Wasm + +| Architecture | Download Link | +|--------------|---------------| +| eh | [wasm_eh]() | +| mvp | [wasm_mvp]() | +| threads | [wasm_threads]() | + +### Windows + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [windows_amd64]() | +| amd64_rtools | [windows_amd64_rtools]() | + +
+ +
+Version v1.0.0 + +### Linux + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [linux_amd64]() | +| amd64_gcc4 | [linux_amd64_gcc4]() | +| arm64 | [linux_arm64]() | + +### Osx + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [osx_amd64]() | +| arm64 | [osx_arm64]() | + +### Wasm + +| Architecture | Download Link | +|--------------|---------------| +| eh | [wasm_eh]() | +| mvp | [wasm_mvp]() | +| threads | [wasm_threads]() | + +### Windows + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [windows_amd64]() | +| amd64_rtools | [windows_amd64_rtools]() | + +
+ +
+Version v0.10.3 + +### Linux + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [linux_amd64]() | +| amd64_gcc4 | [linux_amd64_gcc4]() | +| arm64 | [linux_arm64]() | + +### Osx + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [osx_amd64]() | +| arm64 | [osx_arm64]() | + +### Wasm + +| Architecture | Download Link | +|--------------|---------------| +| eh | [wasm_eh]() | +| mvp | [wasm_mvp]() | +| threads | [wasm_threads]() | + +
+ +
+Version v0.10.2 + +### Linux + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [linux_amd64]() | +| amd64_gcc4 | [linux_amd64_gcc4]() | +| arm64 | [linux_arm64]() | + +### Osx + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [osx_amd64]() | +| arm64 | [osx_arm64]() | + +### Wasm + +| Architecture | Download Link | +|--------------|---------------| +| eh | [wasm_eh]() | +| mvp | [wasm_mvp]() | +| threads | [wasm_threads]() | + +
+ +
+Version v0.10.1 + +### Linux + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [linux_amd64]() | +| amd64_gcc4 | [linux_amd64_gcc4]() | +| arm64 | [linux_arm64]() | + +### Osx + +| Architecture | Download Link | +|--------------|---------------| +| amd64 | [osx_amd64]() | +| arm64 | [osx_arm64]() | + +### Wasm + +| Architecture | Download Link | +|--------------|---------------| +| eh | [wasm_eh]() | +| mvp | [wasm_mvp]() | +| threads | [wasm_threads]() | + +
+ +
+Version v0.10.0 + +### Osx + +| Architecture | Download Link | +|--------------|---------------| +| arm64 | [osx_arm64]() | +| arm64 | [osx_arm64]() | + +
+ +## Building +### Managing dependencies +DuckDB extensions uses VCPKG for dependency management. Enabling VCPKG is very simple: follow the [installation instructions](https://vcpkg.io/en/getting-started) or just run the following: +```shell +git clone https://github.com/Microsoft/vcpkg.git +./vcpkg/bootstrap-vcpkg.sh +export VCPKG_TOOLCHAIN_PATH=`pwd`/vcpkg/scripts/buildsystems/vcpkg.cmake +``` +Note: VCPKG is only required for extensions that want to rely on it for dependency management. If you want to develop an extension without dependencies, or want to do your own dependency management, just skip this step. Note that the example extension uses VCPKG to build with a dependency for instructive purposes, so when skipping this step the build may not work without removing the dependency. + +### Build steps +Checkout git submodules: ```sh -make +git submodule update --init --recursive ``` -The main binaries that will be built are: +Now to build the extension, run: ```sh -./build/release/duckdb/duckdb -./build/release/duckdb/test/unittest -./build/release/duckdb/extension//.duckdb_extension +make GEN=ninja ``` -- `duckdb` is the binary for the duckdb shell with the extension code automatically loaded. +The location of the binaries depends on the `BUILD_TYPE` flag (`release` or `debug`) specified during the build process. By default, the binaries are organized as follows: + +### For a **release build** (`make release`): +- DuckDB binary: `./build/release/duckdb` +- Unit tests: `./build/release/test/unittest` +- DuckPGQ extension: `./build/release/extension/duckpgq/duckpgq.duckdb_extension` + +### For a **debug build** (`make debug`): +- DuckDB binary: `./build/debug/duckdb` +- Unit tests: `./build/debug/test/unittest` +- DuckPGQ extension: `./build/debug/extension/duckpgq/duckpgq.duckdb_extension` + +Ensure you specify the appropriate `BUILD_TYPE` flag when configuring the build to place binaries in the corresponding directory. +- `duckdb` is the binary for the duckdb shell with the extension code automatically loaded. - `unittest` is the test runner of duckdb. Again, the extension is already linked into the binary. -- `.duckdb_extension` is the loadable binary as it would be distributed. +- `duckpgq.duckdb_extension` is the loadable binary as it would be distributed. ## Running the extension To run the extension code, simply start the shell with `./build/release/duckdb`. -Now we can use the features from the extension directly in DuckDB. The template contains a single scalar function `do_a_boilerplate` that takes no arguments and returns a string: -``` -D select do_a_boilerplate() as result; -┌────────────────────┐ -│ result │ -│ varchar │ -├────────────────────┤ -│ I'm a boilerplate! │ -└────────────────────┘ -``` +> [!CAUTION] +> Any query containing SQL/PGQ syntax requires a `-` at the start of the query when building the extension from the source, otherwise, you will experience a segmentation fault. This is not the case when loading the extension from DuckDB. ## Running the tests Different tests can be created for DuckDB extensions. The primary way of testing DuckDB extensions should be the SQL tests in `./test/sql`. These SQL tests can be run using: @@ -39,37 +468,35 @@ Different tests can be created for DuckDB extensions. The primary way of testing make test ``` -## Getting started with your own extension -After creating a repository from this template, the first step is to name your extension. To rename the extension, run: -``` -./scripts/set_extension_name.sh -``` -Feel free to delete the script after this step. +### Installing the deployed binaries +To install your extension binaries from S3, you will need to do two things. Firstly, DuckDB should be launched with the +`allow_unsigned_extensions` option set to true. How to set this will depend on the client you're using. Some examples: -Now you're good to go! After a (re)build, you should now be able to use your duckdb extension: +CLI: +```shell +duckdb -unsigned ``` -./build/release/duckdb/duckdb -D select do_a_() as result; -┌───────────────────────────────────┐ -│ result │ -│ varchar │ -├───────────────────────────────────┤ -│ I'm a ! │ -└───────────────────────────────────┘ -``` - -For inspiration/examples on how to extend DuckDB in a more meaningful way, check out the in-tree extensions in https://github.com/duckdb/duckdb and the out-of-tree extensions in https://github.com/duckdblabs! -## Distributing your extension -Easy distribution of extensions built with this template is facilitated using a similar process used by DuckDB itself. Binaries are generated for various versions/platforms allowing duckdb to automatically install the correct binary. +Python: +```python +con = duckdb.connect(':memory:', config={'allow_unsigned_extensions' : 'true'}) +``` -This step requires that you pass the following 4 parameters to your github repo as action secrets: +NodeJS: +```js +db = new duckdb.Database(':memory:', {"allow_unsigned_extensions": "true"}); +``` -secret name | description ---- | --- -S3_REGION | s3 region holding your bucket -S3_BUCKET | the name of the bucket to deploy to -S3_DEPLOY_ID | the S3 key id -S3_DEPLOY_KEY | the S3 key secret +Secondly, you will need to set the repository endpoint in DuckDB to the HTTP url of your bucket + version of the extension +you want to install. To do this run the following SQL query in DuckDB: +```sql +SET custom_extension_repository='bucket.s3.eu-west-1.amazonaws.com//latest'; +``` +Note that the `/latest` path will allow you to install the latest extension version available for your current version of +DuckDB. To specify a specific version, you can pass the version instead. -After setting these variables, all pushes to master will trigger a new (dev) release. +After running these steps, you can install and load your extension using the regular INSTALL/LOAD commands in DuckDB: +```sql +INSTALL duckpgq +LOAD duckpgq +``` diff --git a/docs/UPDATING.md b/docs/UPDATING.md new file mode 100644 index 00000000..a3ac73ef --- /dev/null +++ b/docs/UPDATING.md @@ -0,0 +1,23 @@ +# Extension updating +When cloning this template, the target version of DuckDB should be the latest stable release of DuckDB. However, there +will inevitably come a time when a new DuckDB is released and the extension repository needs updating. This process goes +as follows: + +- Bump submodules + - `./duckdb` should be set to latest tagged release + - `./extension-ci-tools` should be set to updated branch corresponding to latest DuckDB release. So if you're building for DuckDB `v1.1.0` there will be a branch in `extension-ci-tools` named `v1.1.0` to which you should check out. +- Bump versions in `./github/workflows` + - `duckdb_version` input in `duckdb-stable-build` job in `MainDistributionPipeline.yml` should be set to latest tagged release + - `duckdb_version` input in `duckdb-stable-deploy` job in `MainDistributionPipeline.yml` should be set to latest tagged release + - the reusable workflow `duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml` for the `duckdb-stable-build` job should be set to latest tagged release + +# API changes +DuckDB extensions built with this extension template are built against the internal C++ API of DuckDB. This API is not guaranteed to be stable. +What this means for extension development is that when updating your extensions DuckDB target version using the above steps, you may run into the fact that your extension no longer builds properly. + +Currently, DuckDB does not (yet) provide a specific change log for these API changes, but it is generally not too hard to figure out what has changed. + +For figuring out how and why the C++ API changed, we recommend using the following resources: +- DuckDB's [Release Notes](https://github.com/duckdb/duckdb/releases) +- DuckDB's history of [Core extension patches](https://github.com/duckdb/duckdb/commits/main/.github/patches/extensions) +- The git history of the relevant C++ Header file of the API that has changed \ No newline at end of file diff --git a/duckdb b/duckdb index c1f2d10c..ccaef277 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit c1f2d10c781e071fd446bdf68519113057ea5e87 +Subproject commit ccaef2778428109a2ceecff90a159c2b9215c372 diff --git a/extension-ci-tools b/extension-ci-tools new file mode 160000 index 00000000..c098325d --- /dev/null +++ b/extension-ci-tools @@ -0,0 +1 @@ +Subproject commit c098325d7e622b52747a0df810a8146ab10a9ab5 diff --git a/extension_config.cmake b/extension_config.cmake new file mode 100644 index 00000000..76c40e7c --- /dev/null +++ b/extension_config.cmake @@ -0,0 +1,10 @@ +# This file is included by DuckDB's build system. It specifies which extension to load + +# Extension from this repo +duckdb_extension_load(duckpgq + LOAD_TESTS + SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR} +) + +# Any extra extensions that should be built +# e.g.: duckdb_extension_load(json) diff --git a/index.html b/index.html new file mode 100644 index 00000000..7bf0ebc4 --- /dev/null +++ b/index.html @@ -0,0 +1 @@ +This is the SQL/PGQ extension for DuckPGQ diff --git a/scripts/copy_tests.py b/scripts/copy_tests.py new file mode 100644 index 00000000..f6b683c9 --- /dev/null +++ b/scripts/copy_tests.py @@ -0,0 +1,53 @@ +from os import listdir, mkdir +from os.path import isfile, join, exists + +import shutil +from textwrap import dedent +import sys +import getopt +import os + + +def main(argv): + mode = '' + opts, args = getopt.getopt(argv, "hm:", ["mode=", "ofile="]) + for opt, arg in opts: + if opt == '-h': + print('copy_tests.py -m ') + sys.exit() + elif opt in ("-m", "--mode"): + mode = arg + + if mode != "release" and mode != "debug": + raise Exception("Invalid parameter, --mode should be release or debug") + abspath = os.path.abspath(__file__) + dname = os.path.dirname(abspath) + os.chdir(dname) + test_path_duckpgq = "../test/sql" + test_path_duckdb = "../duckdb/test/extension/duckpgq" + + onlyfiles = [str(f) for f in listdir(test_path_duckpgq) if isfile(join(test_path_duckpgq, f))] + + if not exists(test_path_duckdb): + mkdir(test_path_duckdb) + else: + shutil.rmtree(test_path_duckdb) + mkdir(test_path_duckdb) + + for file in onlyfiles: + f = open(test_path_duckpgq + "/" + file, "r") + content = f.read() + content = content.replace("require duckpgq\n", + dedent("statement ok\n" + "force install '__BUILD_DIRECTORY__/../../../build/"+mode+"/extension/duckpgq/duckpgq.duckdb_extension';\n" + "\n" + "statement ok\n" + "load 'duckpgq';\n")) + + new_file = open(test_path_duckdb + "/" + file, "w") + new_file.write(content) + new_file.close() + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/scripts/extension-upload.sh b/scripts/extension-upload.sh index cc2fc219..05c11a85 100755 --- a/scripts/extension-upload.sh +++ b/scripts/extension-upload.sh @@ -1,13 +1,87 @@ #!/bin/bash -# Usage: ./extension-upload.sh +# Extension upload script + +# Usage: ./extension-upload.sh +# : Name of the extension +# : Version (commit / version tag) of the extension +# : Version (commit / version tag) of DuckDB +# : Architecture target of the extension binary +# : S3 bucket to upload to +# : Set this as the latest version ("true" / "false", default: "false") +# : Set this as a versioned version that will prevent its deletion set -e -ext="build/release/extension/$1/$1.duckdb_extension" +if [[ $4 == wasm* ]]; then + ext="/tmp/extension/$1.duckdb_extension.wasm" +else + ext="/tmp/extension/$1.duckdb_extension" +fi + +echo $ext + +script_dir="$(dirname "$(readlink -f "$0")")" + +# calculate SHA256 hash of extension binary +cat $ext > $ext.append + +if [[ $4 == wasm* ]]; then + # 0 for custom section + # 113 in hex = 275 in decimal, total lenght of what follows (1 + 16 + 2 + 256) + # [1(continuation) + 0010011(payload) = \x93, 0(continuation) + 10(payload) = \x02] + echo -n -e '\x00' >> $ext.append + echo -n -e '\x93\x02' >> $ext.append + # 10 in hex = 16 in decimal, lenght of name, 1 byte + echo -n -e '\x10' >> $ext.append + echo -n -e 'duckdb_signature' >> $ext.append + # the name of the WebAssembly custom section, 16 bytes + # 100 in hex, 256 in decimal + # [1(continuation) + 0000000(payload) = ff, 0(continuation) + 10(payload)], + # for a grand total of 2 bytes + echo -n -e '\x80\x02' >> $ext.append +fi + +# (Optionally) Sign binary +if [ "$DUCKDB_EXTENSION_SIGNING_PK" != "" ]; then + echo "$DUCKDB_EXTENSION_SIGNING_PK" > private.pem + $script_dir/../duckdb/scripts/compute-extension-hash.sh $ext.append > $ext.hash + openssl pkeyutl -sign -in $ext.hash -inkey private.pem -pkeyopt digest:sha256 -out $ext.sign + rm -f private.pem +fi + +# Signature is always there, potentially defaulting to 256 zeros +truncate -s 256 $ext.sign # compress extension binary -gzip < $ext > "$1.duckdb_extension.gz" +if [[ $4 == wasm_* ]]; then + brotli < $ext.append > "$ext.compressed" +else + gzip < $ext.append > "$ext.compressed" +fi + +set -e + +# Abort if AWS key is not set +if [ -z "$AWS_ACCESS_KEY_ID" ]; then + echo "No AWS key found, skipping.." + exit 0 +fi + +# upload versioned version +if [[ $7 = 'true' ]]; then + if [[ $4 == wasm* ]]; then + aws s3 cp $ext.compressed s3://$5/$1/$2/$3/$4/$1.duckdb_extension.wasm --acl public-read --content-encoding br --content-type="application/wasm" + else + aws s3 cp $ext.compressed s3://$5/$1/$2/$3/$4/$1.duckdb_extension.gz --acl public-read + fi +fi -# upload compressed extension binary to S3 -aws s3 cp $1.duckdb_extension.gz s3://$5/$1/$2/$3/$4/$1.duckdb_extension.gz --acl public-read \ No newline at end of file +# upload to latest version +if [[ $6 = 'true' ]]; then + if [[ $4 == wasm* ]]; then + aws s3 cp $ext.compressed s3://$5/$3/$4/$1.duckdb_extension.wasm --acl public-read --content-encoding br --content-type="application/wasm" + else + aws s3 cp $ext.compressed s3://$5/$3/$4/$1.duckdb_extension.gz --acl public-read + fi +fi diff --git a/scripts/python_helpers.py b/scripts/python_helpers.py new file mode 100644 index 00000000..eb605f83 --- /dev/null +++ b/scripts/python_helpers.py @@ -0,0 +1,23 @@ +def open_utf8(fpath, flags): + import sys + if sys.version_info[0] < 3: + return open(fpath, flags) + else: + return open(fpath, flags, encoding="utf8") + +def normalize_path(path): + import os + + def normalize(p): + return os.path.sep.join(p.split('/')) + + if isinstance(path, list): + normed = map(lambda p: normalize(p), path) + return list(normed) + + if (isinstance, str): + return normalize(path) + + raise Exception("Can only be called with a str or list argument") + + \ No newline at end of file diff --git a/scripts/s3_availability.py b/scripts/s3_availability.py new file mode 100644 index 00000000..fd7aab8b --- /dev/null +++ b/scripts/s3_availability.py @@ -0,0 +1,53 @@ +import boto3 + +s3_client = boto3.client('s3') +bucket_name = 'duckpgq' +prefix = 'v' + +def list_extensions(bucket, prefix): + response = s3_client.list_objects_v2(Bucket=bucket, Prefix=prefix) + extensions = {} + + for obj in response.get('Contents', []): + path_parts = obj['Key'].split('/') + if len(path_parts) == 3: + version = path_parts[0] + os_arch = path_parts[1] + parts = os_arch.split('_') + if len(parts) > 2: + os = parts[0] + arch = '_'.join(parts[1:]) + else: + os, arch = parts + url = f'https://{bucket}.s3.eu-north-1.amazonaws.com/{obj["Key"]}' + + if version not in extensions: + extensions[version] = {} + if os not in extensions[version]: + extensions[version][os] = [] + extensions[version][os].append((arch, url)) + + return extensions + +def generate_markdown_table(extensions): + table = '## DuckPGQ Extension Availability\n\n' + + for version in sorted(extensions.keys(), reverse=True): + os_dict = extensions[version] + table += f'
\nVersion {version}\n\n' + for os, builds in os_dict.items(): + table += f'### {os.capitalize()}\n\n' + table += '| Architecture | Download Link |\n' + table += '|--------------|---------------|\n' + for arch, url in builds: + table += f'| {arch} | [{os}_{arch}](<{url}>) |\n' + table += '\n' + table += '
\n\n' + + return table + +extensions = list_extensions(bucket_name, prefix) +markdown_table = generate_markdown_table(extensions) + +with open('extension_availability.md', 'w') as readme_file: + readme_file.write(markdown_table) diff --git a/scripts/set_extension_name.py b/scripts/set_extension_name.py old mode 100644 new mode 100755 index a99b1e6d..14b6391f --- a/scripts/set_extension_name.py +++ b/scripts/set_extension_name.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 import sys, os from pathlib import Path @@ -6,14 +6,14 @@ if (len(sys.argv) != 2): raise Exception('usage: python3 set_extension_name.py ') -string_to_find = "boilerplate" +string_to_find = "quack" string_to_replace = sys.argv[1] def replace(file_name, to_find, to_replace): - with open(file_name, 'r') as file : + with open(file_name, 'r', encoding="utf8") as file : filedata = file.read() filedata = filedata.replace(to_find, to_replace) - with open(file_name, 'w') as file: + with open(file_name, 'w', encoding="utf8") as file: file.write(filedata) files_to_search = [] @@ -36,8 +36,8 @@ def replace(file_name, to_find, to_replace): replace("./README.md", string_to_find, string_to_replace) # rename files -os.rename(f'test/python/{string_to_find}_test.py',f'test/python/{string_to_replace}_test.py') -os.rename(f'test/sql/{string_to_find}.test',f'test/sql/{string_to_replace}.test') -os.rename(f'src/{string_to_find}_extension.cpp',f'src/{string_to_replace}_extension.cpp') -os.rename(f'src/include/{string_to_find}_extension.hpp',f'src/include/{string_to_replace}_extension.hpp') -os.rename(f'test/nodejs/{string_to_find}_test.js',f'test/nodejs/{string_to_replace}_test.js') \ No newline at end of file +os.rename(f'test/python/{string_to_find}_test.py', f'test/python/{string_to_replace}_test.py') +os.rename(f'test/sql/{string_to_find}.test', f'test/sql/{string_to_replace}.test') +os.rename(f'src/{string_to_find}_extension.cpp', f'src/{string_to_replace}_extension.cpp') +os.rename(f'src/include/{string_to_find}_extension.hpp', f'src/include/{string_to_replace}_extension.hpp') +os.rename(f'test/nodejs/{string_to_find}_test.js', f'test/nodejs/{string_to_replace}_test.js') diff --git a/scripts/set_tag.sh b/scripts/set_tag.sh new file mode 100755 index 00000000..3aa3a11e --- /dev/null +++ b/scripts/set_tag.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +set -eu +set -o pipefail + +cd "$( cd "$( dirname "${BASH_SOURCE[0]:-${(%):-%x}}" )" >/dev/null 2>&1 && pwd )" +cd .. + +cd duckdb + +TAG_NAME="$1" +REMOTE_NAME="origin" # Explicitly define the target remote + +echo "--- Updating tag '$TAG_NAME' on remote '$REMOTE_NAME' ---" + +# 1. Delete the remote tag on your fork, ignoring errors if it doesn't exist. +git push --delete "$REMOTE_NAME" "$TAG_NAME" || true + +# 2. Force-delete the local tag to ensure it can be recreated. +git tag -d "$TAG_NAME" || true + +# 3. Create the new annotated tag locally on your current commit. +# The '-f' flag will force replacement if it somehow still exists. +git tag -fa "$TAG_NAME" -m "DuckPGQ custom tag '$TAG_NAME'" + +# 4. Force-push the new tag to your fork, overwriting the remote one if it exists. +git push "$REMOTE_NAME" --tags --force + +echo "--- Successfully updated tag '$TAG_NAME' on remote '$REMOTE_NAME' ---" \ No newline at end of file diff --git a/scripts/test_header_generation.py b/scripts/test_header_generation.py new file mode 100644 index 00000000..c7bda00c --- /dev/null +++ b/scripts/test_header_generation.py @@ -0,0 +1,38 @@ +import os +def generate_headers(base_dir): + # Ensure to include the 'test' directory in the path + test_dir = os.path.join(base_dir, "test") # Adjust this if the base_dir does not already include 'test' + + for root, dirs, files in os.walk(test_dir): + for file in files: + if ".test" not in file: + continue + file_path = os.path.join(root, file) + relative_path = os.path.relpath(file_path, base_dir) # This assumes base_dir is the parent of 'test' + group = "duckpgq" + subpath = os.path.dirname(relative_path) + if subpath: # Check if there is a subdirectory path + group += "_" + subpath.replace('/', '_').replace('test_', '') # Correct the group to exclude 'test_' + + with open(file_path, 'r+') as f: + content = f.read() + + # Create the header content + header = f"# name: {relative_path}\n" \ + f"# description: ""\n" \ + f"# group: [{group}]\n\n" + + # Search and replace old header if exists + if header.split("\n")[0] in content: + continue + if content.startswith("# name:"): + end_of_header = content.find('\n\n') + 2 + content = content[end_of_header:] # Remove old header + f.seek(0) + f.write(header + content) # Write new header and original content + f.truncate() # Truncate file in case new content is shorter than old + + +# Usage +base_dir = "/Users/dljtw/git/duckpgq" # Adjust this path to the correct directory which includes the 'test' folder +generate_headers(base_dir) \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 00000000..e77b2df2 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,6 @@ +add_subdirectory(core) + +set(EXTENSION_SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/duckpgq_extension.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/duckpgq_state.cpp ${EXTENSION_SOURCES} + PARENT_SCOPE) diff --git a/src/boilerplate_extension.cpp b/src/boilerplate_extension.cpp deleted file mode 100644 index 4b5308c6..00000000 --- a/src/boilerplate_extension.cpp +++ /dev/null @@ -1,49 +0,0 @@ -#define DUCKDB_EXTENSION_MAIN - -#include "boilerplate_extension.hpp" -#include "duckdb.hpp" - -#include - -namespace duckdb { - -inline void BoilerplateScalarFun(DataChunk &args, ExpressionState &state, Vector &result) { - result.Reference(Value("I'm a boilerplate!")); -} - -static void LoadInternal(DatabaseInstance &instance) { - Connection con(instance); - con.BeginTransaction(); - - auto &catalog = Catalog::GetSystemCatalog(*con.context); - - CreateScalarFunctionInfo boilerplate_fun_info( - ScalarFunction("boilerplate", {}, LogicalType::VARCHAR, BoilerplateScalarFun)); - boilerplate_fun_info.on_conflict = OnCreateConflict::ALTER_ON_CONFLICT; - catalog.CreateFunction(*con.context, &boilerplate_fun_info); - con.Commit(); -} - -void BoilerplateExtension::Load(DuckDB &db) { - LoadInternal(*db.instance); -} -std::string BoilerplateExtension::Name() { - return "boilerplate"; -} - -} // namespace duckdb - -extern "C" { - -DUCKDB_EXTENSION_API void boilerplate_init(duckdb::DatabaseInstance &db) { - LoadInternal(db); -} - -DUCKDB_EXTENSION_API const char *boilerplate_version() { - return duckdb::DuckDB::LibraryVersion(); -} -} - -#ifndef DUCKDB_EXTENSION_MAIN -#error DUCKDB_EXTENSION_MAIN not defined -#endif diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt new file mode 100644 index 00000000..fb14d31f --- /dev/null +++ b/src/core/CMakeLists.txt @@ -0,0 +1,9 @@ +add_subdirectory(functions) +add_subdirectory(operator) +add_subdirectory(parser) +add_subdirectory(pragma) +add_subdirectory(utils) + +set(EXTENSION_SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/module.cpp ${EXTENSION_SOURCES} + PARENT_SCOPE) diff --git a/src/core/functions/CMakeLists.txt b/src/core/functions/CMakeLists.txt new file mode 100644 index 00000000..f34aa0cf --- /dev/null +++ b/src/core/functions/CMakeLists.txt @@ -0,0 +1,7 @@ +add_subdirectory(function_data) +add_subdirectory(scalar) +add_subdirectory(table) + +set(EXTENSION_SOURCES + ${EXTENSION_SOURCES} + PARENT_SCOPE) diff --git a/src/core/functions/function_data/CMakeLists.txt b/src/core/functions/function_data/CMakeLists.txt new file mode 100644 index 00000000..b3e34a8f --- /dev/null +++ b/src/core/functions/function_data/CMakeLists.txt @@ -0,0 +1,8 @@ +set(EXTENSION_SOURCES + ${EXTENSION_SOURCES} + ${CMAKE_CURRENT_SOURCE_DIR}/cheapest_path_length_function_data.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/iterative_length_function_data.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/local_clustering_coefficient_function_data.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/pagerank_function_data.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/weakly_connected_component_function_data.cpp + PARENT_SCOPE) diff --git a/src/core/functions/function_data/cheapest_path_length_function_data.cpp b/src/core/functions/function_data/cheapest_path_length_function_data.cpp new file mode 100644 index 00000000..d050a888 --- /dev/null +++ b/src/core/functions/function_data/cheapest_path_length_function_data.cpp @@ -0,0 +1,43 @@ +#include "duckpgq/core/functions/function_data/cheapest_path_length_function_data.hpp" +#include "duckpgq/core/utils/duckpgq_utils.hpp" +#include "duckdb/execution/expression_executor.hpp" + +namespace duckdb { + +unique_ptr +CheapestPathLengthFunctionData::CheapestPathLengthBind(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments) { + + if (!arguments[0]->IsFoldable()) { + throw InvalidInputException("Id must be constant."); + } + + auto duckpgq_state = GetDuckPGQState(context); + + int32_t csr_id = ExpressionExecutor::EvaluateScalar(context, *arguments[0]).GetValue(); + CSR *csr = duckpgq_state->GetCSR(csr_id); + duckpgq_state->csr_to_delete.insert(csr_id); + + if (!(csr->initialized_v && csr->initialized_e && csr->initialized_w)) { + throw ConstraintException("Need to initialize CSR before doing cheapest path"); + } + + if (csr->w.empty()) { + bound_function.return_type = LogicalType::DOUBLE; + } else { + bound_function.return_type = LogicalType::BIGINT; + } + + return make_uniq(context, csr_id); +} + +unique_ptr CheapestPathLengthFunctionData::Copy() const { + return make_uniq(context, csr_id); +} + +bool CheapestPathLengthFunctionData::Equals(const FunctionData &other_p) const { + auto &other = other_p.Cast(); + return other.csr_id == csr_id; +} + +} // namespace duckdb diff --git a/src/core/functions/function_data/iterative_length_function_data.cpp b/src/core/functions/function_data/iterative_length_function_data.cpp new file mode 100644 index 00000000..bd68a963 --- /dev/null +++ b/src/core/functions/function_data/iterative_length_function_data.cpp @@ -0,0 +1,32 @@ +#include "duckpgq/core/functions/function_data/iterative_length_function_data.hpp" +#include "duckdb/execution/expression_executor.hpp" +#include "duckpgq/common.hpp" + +#include + +namespace duckdb { + +unique_ptr IterativeLengthFunctionData::Copy() const { + return make_uniq(context, csr_id); +} + +bool IterativeLengthFunctionData::Equals(const FunctionData &other_p) const { + auto &other = other_p.Cast(); + return other.csr_id == csr_id; +} + +unique_ptr IterativeLengthFunctionData::IterativeLengthBind(ClientContext &context, + ScalarFunction &bound_function, + vector> &arguments) { + if (!arguments[0]->IsFoldable()) { + throw InvalidInputException("Id must be constant."); + } + + int32_t csr_id = ExpressionExecutor::EvaluateScalar(context, *arguments[0]).GetValue(); + auto duckpgq_state = GetDuckPGQState(context); + duckpgq_state->csr_to_delete.insert(csr_id); + + return make_uniq(context, csr_id); +} + +} // namespace duckdb diff --git a/src/core/functions/function_data/local_clustering_coefficient_function_data.cpp b/src/core/functions/function_data/local_clustering_coefficient_function_data.cpp new file mode 100644 index 00000000..1c25adc7 --- /dev/null +++ b/src/core/functions/function_data/local_clustering_coefficient_function_data.cpp @@ -0,0 +1,33 @@ +#include "duckpgq/core/functions/function_data/local_clustering_coefficient_function_data.hpp" +#include "duckdb/execution/expression_executor.hpp" + +#include + +namespace duckdb { + +LocalClusteringCoefficientFunctionData::LocalClusteringCoefficientFunctionData(ClientContext &context, int32_t csr_id) + : context(context), csr_id(csr_id) { +} + +unique_ptr LocalClusteringCoefficientFunctionData::LocalClusteringCoefficientBind( + ClientContext &context, ScalarFunction &bound_function, vector> &arguments) { + if (!arguments[0]->IsFoldable()) { + throw InvalidInputException("Id must be constant."); + } + + int32_t csr_id = ExpressionExecutor::EvaluateScalar(context, *arguments[0]).GetValue(); + auto duckpgq_state = GetDuckPGQState(context); + duckpgq_state->csr_to_delete.insert(csr_id); + return make_uniq(context, csr_id); +} + +unique_ptr LocalClusteringCoefficientFunctionData::Copy() const { + return make_uniq(context, csr_id); +} + +bool LocalClusteringCoefficientFunctionData::Equals(const FunctionData &other_p) const { + auto &other = other_p.Cast(); + return other.csr_id == csr_id; +} + +} // namespace duckdb diff --git a/src/core/functions/function_data/pagerank_function_data.cpp b/src/core/functions/function_data/pagerank_function_data.cpp new file mode 100644 index 00000000..69c0e242 --- /dev/null +++ b/src/core/functions/function_data/pagerank_function_data.cpp @@ -0,0 +1,70 @@ +#include "duckpgq/core/functions/function_data/pagerank_function_data.hpp" + +#include + +namespace duckdb { + +// Constructor +PageRankFunctionData::PageRankFunctionData(ClientContext &ctx, int32_t csr) + : context(ctx), csr_id(csr), damping_factor(0.85), convergence_threshold(1e-6), iteration_count(0), + state_initialized(false), converged(false) { +} + +unique_ptr PageRankFunctionData::PageRankBind(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments) { + if (!arguments[0]->IsFoldable()) { + throw InvalidInputException("Id must be constant."); + } + + int32_t csr_id = ExpressionExecutor::EvaluateScalar(context, *arguments[0]).GetValue(); + auto duckpgq_state = GetDuckPGQState(context); + duckpgq_state->csr_to_delete.insert(csr_id); + + return make_uniq(context, csr_id); +} + +// Copy method +unique_ptr PageRankFunctionData::Copy() const { + auto result = make_uniq(context, csr_id); + result->rank = rank; // Deep copy of rank vector + result->temp_rank = temp_rank; // Deep copy of temp_rank vector + result->damping_factor = damping_factor; + result->convergence_threshold = convergence_threshold; + result->iteration_count = iteration_count; + result->state_initialized = state_initialized; + result->converged = converged; + // Note: state_lock is not copied as mutexes are not copyable + return std::move(result); +} + +// Equals method +bool PageRankFunctionData::Equals(const FunctionData &other_p) const { + auto &other = other_p.Cast(); + if (csr_id != other.csr_id) { + return false; + } + if (rank != other.rank) { + return false; + } + if (temp_rank != other.temp_rank) { + return false; + } + if (damping_factor != other.damping_factor) { + return false; + } + if (convergence_threshold != other.convergence_threshold) { + return false; + } + if (iteration_count != other.iteration_count) { + return false; + } + if (state_initialized != other.state_initialized) { + return false; + } + if (converged != other.converged) { + return false; + } + return true; +} + +} // namespace duckdb diff --git a/src/core/functions/function_data/weakly_connected_component_function_data.cpp b/src/core/functions/function_data/weakly_connected_component_function_data.cpp new file mode 100644 index 00000000..26329c76 --- /dev/null +++ b/src/core/functions/function_data/weakly_connected_component_function_data.cpp @@ -0,0 +1,42 @@ +#include "duckpgq/core/functions/function_data/weakly_connected_component_function_data.hpp" + +#include + +namespace duckdb { + +WeaklyConnectedComponentFunctionData::WeaklyConnectedComponentFunctionData(ClientContext &context, int32_t csr_id) + : context(context), csr_id(csr_id) { + state_converged = false; // Initialize state + state_initialized = false; +} + +unique_ptr WeaklyConnectedComponentFunctionData::WeaklyConnectedComponentBind( + ClientContext &context, ScalarFunction &bound_function, vector> &arguments) { + if (!arguments[0]->IsFoldable()) { + throw InvalidInputException("Id must be constant."); + } + + int32_t csr_id = ExpressionExecutor::EvaluateScalar(context, *arguments[0]).GetValue(); + auto duckpgq_state = GetDuckPGQState(context); + duckpgq_state->csr_to_delete.insert(csr_id); + + return make_uniq(context, csr_id); +} + +unique_ptr WeaklyConnectedComponentFunctionData::Copy() const { + auto result = make_uniq(context, csr_id); + return std::move(result); +} + +bool WeaklyConnectedComponentFunctionData::Equals(const FunctionData &other_p) const { + auto &other = other_p.Cast(); + if (csr_id != other.csr_id) { + return false; + } + if (state_converged != other.state_converged) { + return false; + } + return true; +} + +} // namespace duckdb diff --git a/src/core/functions/scalar/CMakeLists.txt b/src/core/functions/scalar/CMakeLists.txt new file mode 100644 index 00000000..0d3e9cd9 --- /dev/null +++ b/src/core/functions/scalar/CMakeLists.txt @@ -0,0 +1,16 @@ +set(EXTENSION_SOURCES + ${EXTENSION_SOURCES} + ${CMAKE_CURRENT_SOURCE_DIR}/cheapest_path_length.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/csr_creation.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/csr_deletion.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/csr_get_w_type.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/iterativelength.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/iterativelength2.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/iterativelength_bidirectional.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/pagerank.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/reachability.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/shortest_path.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/csr_creation.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/local_clustering_coefficient.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/weakly_connected_component.cpp + PARENT_SCOPE) diff --git a/src/core/functions/scalar/cheapest_path_length.cpp b/src/core/functions/scalar/cheapest_path_length.cpp new file mode 100644 index 00000000..3aadc555 --- /dev/null +++ b/src/core/functions/scalar/cheapest_path_length.cpp @@ -0,0 +1,173 @@ +#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckpgq/common.hpp" +#include "duckpgq/core/functions/function_data/cheapest_path_length_function_data.hpp" +#include +#include + +#include + +namespace duckdb { + +template +static int16_t InitialiseBellmanFord(const DataChunk &args, int64_t input_size, const UnifiedVectorFormat &vdata_src, + const int64_t *src_data, idx_t result_size, vector> &dists) { + dists.resize(input_size, std::vector(lane_limit, std::numeric_limits::max() / 2)); + + int16_t lanes = 0; + for (idx_t i = result_size; i < args.size() && lanes < lane_limit; i++) { + auto src_index = vdata_src.sel->get_index(i); + if (vdata_src.validity.RowIsValid(src_index)) { + const int64_t &src_entry = src_data[src_index]; + dists[src_entry][lanes] = 0; + lanes++; + } + } + return lanes; +} + +template +int64_t UpdateOneLane(T &n_dist, T v_dist, T weight) { + T new_dist = v_dist + weight; + bool better = new_dist < n_dist; + T min = better ? new_dist : n_dist; + n_dist = min; + return better; +} + +template +bool UpdateLanes(vector> &dists, T v, T n, T weight) { + std::vector &v_dists = dists[v]; + std::vector &n_dists = dists[n]; + size_t num_lanes = dists[v].size(); + size_t lane_idx = 0; + bool xor_diff = false; + while (lane_idx < num_lanes) { + xor_diff |= UpdateOneLane(n_dists[lane_idx], v_dists[lane_idx], weight); + ++lane_idx; + } + return xor_diff; +} + +template +int16_t TemplatedBatchBellmanFord(CSR *csr, DataChunk &args, int64_t input_size, UnifiedVectorFormat &vdata_src, + int64_t *src_data, const UnifiedVectorFormat &vdata_target, int64_t *target_data, + const std::vector &weight_array, int16_t result_size, T *result_data, + ValidityMask &result_validity) { + vector> dists; + int16_t curr_batch_size = + InitialiseBellmanFord(args, input_size, vdata_src, src_data, result_size, dists); + bool changed = true; + while (changed) { + changed = false; + //! For every v in the input + for (int64_t v = 0; v < input_size; v++) { + //! Loop through all the n neighbours of v + for (auto index = (int64_t)csr->v[v]; index < (int64_t)csr->v[v + 1]; index++) { + //! Get weight of (v,n) + changed = UpdateLanes(dists, v, csr->e[index], weight_array[index]) | changed; + } + } + } + for (idx_t i = result_size; i < (idx_t)(result_size + curr_batch_size); i++) { + auto target_index = vdata_target.sel->get_index(i); + if (!vdata_target.validity.RowIsValid(target_index)) { + result_validity.SetInvalid(i); + } + + const auto &target_entry = target_data[target_index]; + auto resulting_distance = dists[target_entry][i % lane_limit]; + + if (resulting_distance == std::numeric_limits::max() / 2) { + result_validity.SetInvalid(i); + } else { + result_data[i] = resulting_distance; + } + } + dists.clear(); + return curr_batch_size; +} + +template +void TemplatedBellmanFord(CSR *csr, DataChunk &args, int64_t input_size, Vector &result, UnifiedVectorFormat &vdata_src, + int64_t *src_data, const UnifiedVectorFormat &vdata_target, int64_t *target_data, + const std::vector &weight_array) { + idx_t result_size = 0; + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); + auto &result_validity = FlatVector::Validity(result); + vector> final_dists(input_size, std::vector(args.size(), std::numeric_limits::max() / 2)); + + while (result_size < args.size()) { + if ((args.size() - result_size) / 256 >= 1) { + result_size += + TemplatedBatchBellmanFord(csr, args, input_size, vdata_src, src_data, vdata_target, target_data, + weight_array, result_size, result_data, result_validity); + } else if ((args.size() - result_size) / 128 >= 1) { + result_size += + TemplatedBatchBellmanFord(csr, args, input_size, vdata_src, src_data, vdata_target, target_data, + weight_array, result_size, result_data, result_validity); + } else if ((args.size() - result_size) / 64 >= 1) { + result_size += + TemplatedBatchBellmanFord(csr, args, input_size, vdata_src, src_data, vdata_target, target_data, + weight_array, result_size, result_data, result_validity); + } else if ((args.size() - result_size) / 16 >= 1) { + result_size += + TemplatedBatchBellmanFord(csr, args, input_size, vdata_src, src_data, vdata_target, target_data, + weight_array, result_size, result_data, result_validity); + } else if ((args.size() - result_size) / 8 >= 1) { + result_size += + TemplatedBatchBellmanFord(csr, args, input_size, vdata_src, src_data, vdata_target, target_data, + weight_array, result_size, result_data, result_validity); + } else if ((args.size() - result_size) / 4 >= 1) { + result_size += + TemplatedBatchBellmanFord(csr, args, input_size, vdata_src, src_data, vdata_target, target_data, + weight_array, result_size, result_data, result_validity); + } else if ((args.size() - result_size) / 2 >= 1) { + result_size += + TemplatedBatchBellmanFord(csr, args, input_size, vdata_src, src_data, vdata_target, target_data, + weight_array, result_size, result_data, result_validity); + } else { + result_size += + TemplatedBatchBellmanFord(csr, args, input_size, vdata_src, src_data, vdata_target, target_data, + weight_array, result_size, result_data, result_validity); + } + } +} + +static void CheapestPathLengthFunction(DataChunk &args, ExpressionState &state, Vector &result) { + auto &func_expr = state.expr.Cast(); + auto &info = func_expr.bind_info->Cast(); + int64_t input_size = args.data[1].GetValue(0).GetValue(); + auto duckpgq_state = GetDuckPGQState(info.context); + + CSR *csr = duckpgq_state->GetCSR(info.csr_id); + auto &src = args.data[2]; + + UnifiedVectorFormat vdata_src, vdata_target; + src.ToUnifiedFormat(args.size(), vdata_src); + + auto src_data = reinterpret_cast(vdata_src.data); + + auto &target = args.data[3]; + target.ToUnifiedFormat(args.size(), vdata_target); + auto target_data = reinterpret_cast(vdata_target.data); + if (csr->w.empty()) { + TemplatedBellmanFord(csr, args, input_size, result, vdata_src, src_data, vdata_target, target_data, + csr->w_double); + } else { + TemplatedBellmanFord(csr, args, input_size, result, vdata_src, src_data, vdata_target, target_data, + csr->w); + } + duckpgq_state->csr_to_delete.insert(info.csr_id); +} +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreScalarFunctions::RegisterCheapestPathLengthScalarFunction(ExtensionLoader &loader) { + loader.RegisterFunction(ScalarFunction( + "cheapest_path_length", {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::ANY, CheapestPathLengthFunction, CheapestPathLengthFunctionData::CheapestPathLengthBind)); +} + +} // namespace duckdb diff --git a/src/core/functions/scalar/csr_creation.cpp b/src/core/functions/scalar/csr_creation.cpp new file mode 100644 index 00000000..02e1e4a9 --- /dev/null +++ b/src/core/functions/scalar/csr_creation.cpp @@ -0,0 +1,218 @@ +#include "duckdb/common/vector_operations/quaternary_executor.hpp" +#include "duckdb/main/client_data.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckpgq/common.hpp" +#include "duckpgq/core/utils/compressed_sparse_row.hpp" +#include +#include +#include +#include +#include + +namespace duckdb { + +static void CsrInitializeVertex(DuckPGQState &context, int32_t id, int64_t v_size) { + lock_guard csr_init_lock(context.csr_lock); + + auto csr_entry = context.csr_list.find(id); + if (csr_entry != context.csr_list.end()) { + if (csr_entry->second->initialized_v) { + return; + } + } + try { + auto csr = make_uniq(); + // extra 2 spaces required for CSR padding + // data contains a vector of elements so will need an anonymous function to + // apply the first element id is repeated across, can I access the value + // directly? + csr->v = new std::atomic[v_size + 2]; + csr->vsize = v_size + 2; + + for (idx_t i = 0; i < (idx_t)v_size + 2; i++) { + csr->v[i] = 0; + } + csr->initialized_v = true; + context.csr_list[id] = std::move(csr); + } catch (std::bad_alloc const &) { + throw Exception(ExceptionType::INTERNAL, "Unable to initialize vector of size for csr vertex table " + "representation"); + } +} + +static void CsrInitializeEdge(DuckPGQState &context, int32_t id, int64_t v_size, int64_t e_size) { + const lock_guard csr_init_lock(context.csr_lock); + + auto csr_entry = context.csr_list.find(id); + if (csr_entry->second->initialized_e) { + return; + } + try { + csr_entry->second->e.resize(e_size, 0); + csr_entry->second->edge_ids.resize(e_size, 0); + } catch (std::bad_alloc const &) { + throw Exception(ExceptionType::INTERNAL, "Unable to initialize vector of size for csr edge table " + "representation"); + } + for (auto i = 1; i < v_size + 2; i++) { + csr_entry->second->v[i] += csr_entry->second->v[i - 1]; + } + csr_entry->second->initialized_e = true; +} + +static void CsrInitializeWeight(DuckPGQState &context, int32_t id, int64_t e_size, PhysicalType weight_type) { + const lock_guard csr_init_lock(context.csr_lock); + auto csr_entry = context.csr_list.find(id); + + if (csr_entry->second->initialized_w) { + return; + } + try { + if (weight_type == PhysicalType::INT64) { + csr_entry->second->w.resize(e_size, 0); + } else if (weight_type == PhysicalType::DOUBLE) { + csr_entry->second->w_double.resize(e_size, 0); + } else { + throw NotImplementedException("Unrecognized weight type detected."); + } + } catch (std::bad_alloc const &) { + throw Exception(ExceptionType::INTERNAL, "Unable to initialize vector of size for csr weight table " + "representation"); + } + + csr_entry->second->initialized_w = true; +} + +static void CreateCsrVertexFunction(DataChunk &args, ExpressionState &state, Vector &result) { + auto &func_expr = state.expr.Cast(); + auto &info = func_expr.bind_info->Cast(); + + auto duckpgq_state = GetDuckPGQState(info.context); + int64_t input_size = args.data[1].GetValue(0).GetValue(); + auto csr_entry = duckpgq_state->csr_list.find(info.id); + + if (csr_entry == duckpgq_state->csr_list.end()) { + CsrInitializeVertex(*duckpgq_state, info.id, input_size); + csr_entry = duckpgq_state->csr_list.find(info.id); + } else { + if (!csr_entry->second->initialized_v) { + CsrInitializeVertex(*duckpgq_state, info.id, input_size); + } + } + + BinaryExecutor::Execute(args.data[2], args.data[3], result, args.size(), + [&](int64_t src, int64_t cnt) { + int64_t edge_count = 0; + csr_entry->second->v[src + 2] = cnt; + edge_count = edge_count + cnt; + return edge_count; + }); +} + +static void CreateCsrEdgeFunction(DataChunk &args, ExpressionState &state, Vector &result) { + auto &func_expr = state.expr.Cast(); + auto &info = func_expr.bind_info->Cast(); + + auto duckpgq_state = GetDuckPGQState(info.context, true); + + int64_t vertex_size = args.data[1].GetValue(0).GetValue(); + int64_t edge_size = args.data[2].GetValue(0).GetValue(); + int64_t edge_size_count = args.data[3].GetValue(0).GetValue(); + if (edge_size != edge_size_count) { + duckpgq_state->csr_to_delete.insert(info.id); + throw ConstraintException("Non-existent/non-unique vertices detected. Make sure all " + "vertices referred by edge tables exist and are unique for path-finding queries."); + } + + auto csr_entry = duckpgq_state->csr_list.find(info.id); + if (!csr_entry->second->initialized_e) { + CsrInitializeEdge(*duckpgq_state, info.id, vertex_size, edge_size); + } + if (info.weight_type == LogicalType::SQLNULL) { + TernaryExecutor::Execute( + args.data[4], args.data[5], args.data[6], result, args.size(), + [&](int64_t src, int64_t dst, int64_t edge_id) { + auto pos = ++csr_entry->second->v[src + 1]; + csr_entry->second->e[(int64_t)pos - 1] = dst; + csr_entry->second->edge_ids[(int64_t)pos - 1] = edge_id; + return 1; + }); + return; + } + auto weight_type = args.data[7].GetType().InternalType(); + if (!csr_entry->second->initialized_w) { + CsrInitializeWeight(*duckpgq_state, info.id, edge_size, weight_type); + } + if (weight_type == PhysicalType::INT64) { + QuaternaryExecutor::Execute( + args.data[4], args.data[5], args.data[6], args.data[7], result, args.size(), + [&](int64_t src, int64_t dst, int64_t edge_id, int64_t weight) { + auto pos = ++csr_entry->second->v[src + 1]; + csr_entry->second->e[(int64_t)pos - 1] = dst; + csr_entry->second->edge_ids[(int64_t)pos - 1] = edge_id; + csr_entry->second->w[(int64_t)pos - 1] = weight; + return weight; + }); + return; + } + + QuaternaryExecutor::Execute( + args.data[4], args.data[5], args.data[6], args.data[7], result, args.size(), + [&](int64_t src, int64_t dst, int64_t edge_id, double_t weight) { + auto pos = ++csr_entry->second->v[src + 1]; + csr_entry->second->e[(int64_t)pos - 1] = dst; + csr_entry->second->edge_ids[(int64_t)pos - 1] = edge_id; + csr_entry->second->w_double[(int64_t)pos - 1] = weight; + return weight; + }); +} + +ScalarFunctionSet GetCSRVertexFunction() { + ScalarFunctionSet set("create_csr_vertex"); + + set.AddFunction(ScalarFunction( + "create_csr_vertex", {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::BIGINT, CreateCsrVertexFunction, CSRFunctionData::CSRVertexBind)); + + return set; +} + +ScalarFunctionSet GetCSREdgeFunction() { + ScalarFunctionSet set("create_csr_edge"); + /* 1. CSR ID + * 2. Vertex size + * 3. Sum of the edges (assuming all unique vertices) + * 4. Edge size (to ensure all vertices are unique this should equal point 3) + * 4. source rowid + * 5. destination rowid + * 6. edge rowid + * 7. edge weight (INT OR DOUBLE) + */ + + //! No edge weight + set.AddFunction(ScalarFunction({LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::INTEGER, CreateCsrEdgeFunction, CSRFunctionData::CSREdgeBind)); + + //! Integer for edge weight + set.AddFunction(ScalarFunction({LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::INTEGER, CreateCsrEdgeFunction, CSRFunctionData::CSREdgeBind)); + + //! Double for edge weight + set.AddFunction(ScalarFunction({LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::DOUBLE}, + LogicalType::INTEGER, CreateCsrEdgeFunction, CSRFunctionData::CSREdgeBind)); + + return set; +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreScalarFunctions::RegisterCSRCreationScalarFunctions(ExtensionLoader &loader) { + loader.RegisterFunction(GetCSREdgeFunction()); + loader.RegisterFunction(GetCSRVertexFunction()); +} + +} // namespace duckdb diff --git a/src/core/functions/scalar/csr_deletion.cpp b/src/core/functions/scalar/csr_deletion.cpp new file mode 100644 index 00000000..be067c37 --- /dev/null +++ b/src/core/functions/scalar/csr_deletion.cpp @@ -0,0 +1,30 @@ +#include "duckdb/main/client_data.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckpgq/common.hpp" +#include +#include +#include + +namespace duckdb { + +static void DeleteCsrFunction(DataChunk &args, ExpressionState &state, Vector &result) { + auto &func_expr = state.expr.Cast(); + auto &info = func_expr.bind_info->Cast(); + + auto duckpgq_state = GetDuckPGQState(info.context); + + auto flag = duckpgq_state->csr_list.erase(info.id); + result.SetVectorType(VectorType::CONSTANT_VECTOR); + auto result_data = ConstantVector::GetData(result); + result_data[0] = flag == 1; +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreScalarFunctions::RegisterCSRDeletionScalarFunction(ExtensionLoader &loader) { + loader.RegisterFunction(ScalarFunction("delete_csr", {LogicalType::INTEGER}, LogicalType::BOOLEAN, + DeleteCsrFunction, CSRFunctionData::CSRBind)); +} + +} // namespace duckdb diff --git a/src/core/functions/scalar/csr_get_w_type.cpp b/src/core/functions/scalar/csr_get_w_type.cpp new file mode 100644 index 00000000..9001d74b --- /dev/null +++ b/src/core/functions/scalar/csr_get_w_type.cpp @@ -0,0 +1,46 @@ +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckpgq/common.hpp" + +#include +#include + +namespace duckdb { + +enum class CSRWType : int32_t { + // possible weight types of a csr + UNWEIGHTED, //! unweighted + INTWEIGHT, //! integer + DOUBLEWEIGHT, //! double +}; + +static void GetCsrWTypeFunction(DataChunk &args, ExpressionState &state, Vector &result) { + auto &func_expr = state.expr.Cast(); + auto &info = func_expr.bind_info->Cast(); + + auto duckpgq_state = GetDuckPGQState(info.context); + + result.SetVectorType(VectorType::CONSTANT_VECTOR); + auto result_data = ConstantVector::GetData(result); + auto csr = duckpgq_state->GetCSR(info.id); + int32_t flag; + if (!csr->initialized_w) { + flag = static_cast(CSRWType::UNWEIGHTED); + } else if (!csr->w.empty()) { + flag = static_cast(CSRWType::INTWEIGHT); + } else if (!csr->w_double.empty()) { + flag = static_cast(CSRWType::DOUBLEWEIGHT); + } else { + throw InternalException("Corrupted weight vector"); + } + result_data[0] = flag; +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreScalarFunctions::RegisterGetCSRWTypeScalarFunction(ExtensionLoader &loader) { + loader.RegisterFunction(ScalarFunction("csr_get_w_type", {LogicalType::INTEGER}, LogicalType::INTEGER, + GetCsrWTypeFunction, CSRFunctionData::CSRBind)); +} + +} // namespace duckdb diff --git a/src/core/functions/scalar/iterativelength.cpp b/src/core/functions/scalar/iterativelength.cpp new file mode 100644 index 00000000..276e034d --- /dev/null +++ b/src/core/functions/scalar/iterativelength.cpp @@ -0,0 +1,303 @@ +#include "duckdb/main/client_data.hpp" +#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckpgq/common.hpp" +#include "duckpgq/core/functions/function_data/iterative_length_function_data.hpp" + +#include +#include + +namespace duckdb { + +static bool IterativeLength(int64_t v_size, int64_t *v, vector &e, vector> &seen, + vector> &visit, vector> &next) { + bool change = false; + for (auto i = 0; i < v_size; i++) { + next[i] = 0; + } + for (auto i = 0; i < v_size; i++) { + if (visit[i].any()) { + for (auto offset = v[i]; offset < v[i + 1]; offset++) { + auto n = e[offset]; + next[n] = next[n] | visit[i]; + } + } + } + for (auto i = 0; i < v_size; i++) { + next[i] = next[i] & ~seen[i]; + seen[i] = seen[i] | next[i]; + change |= next[i].any(); + } + return change; +} + +static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, Vector &result) { + auto &func_expr = state.expr.Cast(); + auto &info = func_expr.bind_info->Cast(); + auto duckpgq_state = GetDuckPGQState(info.context); + + D_ASSERT(duckpgq_state->csr_list[info.csr_id]); + + if (info.csr_id + 1 > duckpgq_state->csr_list.size()) { + throw ConstraintException("Invalid ID"); + } + auto csr_entry = duckpgq_state->csr_list.find(info.csr_id); + if (csr_entry == duckpgq_state->csr_list.end()) { + throw ConstraintException("Need to initialize CSR before doing shortest path"); + } + + if (!csr_entry->second->initialized_v) { + throw ConstraintException("Need to initialize CSR before doing shortest path"); + } + int64_t v_size = args.data[1].GetValue(0).GetValue(); + int64_t *v = reinterpret_cast(duckpgq_state->csr_list[info.csr_id]->v); + vector &e = duckpgq_state->csr_list[info.csr_id]->e; + + // get src and dst vectors for searches + auto &src = args.data[2]; + auto &dst = args.data[3]; + UnifiedVectorFormat vdata_src; + UnifiedVectorFormat vdata_dst; + src.ToUnifiedFormat(args.size(), vdata_src); + dst.ToUnifiedFormat(args.size(), vdata_dst); + auto src_data = reinterpret_cast(vdata_src.data); + auto dst_data = reinterpret_cast(vdata_dst.data); + + ValidityMask &result_validity = FlatVector::Validity(result); + + // create result vector + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); + + // create temp SIMD arrays + vector> seen(v_size); + vector> visit1(v_size); + vector> visit2(v_size); + + // maps lane to search number + int64_t lane_to_num[LANE_LIMIT]; + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + lane_to_num[lane] = -1; // inactive + } + + int64_t started_searches = 0; + while (started_searches < args.size()) { + + // empty visit vectors + for (auto i = 0; i < v_size; i++) { + seen[i] = 0; + visit1[i] = 0; + } + + // add search jobs to free lanes + uint64_t active = 0; + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + lane_to_num[lane] = -1; + while (started_searches < args.size()) { + int64_t search_num = started_searches++; + auto src_pos = vdata_src.sel->get_index(search_num); + auto dst_pos = vdata_dst.sel->get_index(search_num); + if (!vdata_src.validity.RowIsValid(src_pos)) { + result_validity.SetInvalid(search_num); + result_data[search_num] = -1; /* no path */ + } else if (src_data[src_pos] == dst_data[dst_pos]) { + result_data[search_num] = 0; // path of length 0 does not require a search + } else { + visit1[src_data[src_pos]][lane] = true; + lane_to_num[lane] = search_num; // active lane + active++; + break; + } + } + } + + // make passes while a lane is still active + for (int64_t iter = 1; active; iter++) { + if (!IterativeLength(v_size, v, e, seen, (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1)) { + break; + } + // detect lanes that finished + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + int64_t search_num = lane_to_num[lane]; + if (search_num >= 0) { // active lane + auto dst_pos = vdata_dst.sel->get_index(search_num); + if (seen[dst_data[dst_pos]][lane]) { + result_data[search_num] = iter; /* found at iter => iter = path length */ + lane_to_num[lane] = -1; // mark inactive + active--; + } + } + } + } + + // no changes anymore: any still active searches have no path + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + int64_t search_num = lane_to_num[lane]; + if (search_num >= 0) { // active lane + result_validity.SetInvalid(search_num); + result_data[search_num] = (int64_t)-1; /* no path */ + lane_to_num[lane] = -1; // mark inactive + } + } + } + duckpgq_state->csr_to_delete.insert(info.csr_id); +} + +static bool IterativeLengthBoundedStep(int64_t v_size, int64_t *v, vector &e, + vector> &visit, + vector> &next) { + bool change = false; + for (auto i = 0; i < v_size; i++) { + next[i] = 0; + } + for (auto i = 0; i < v_size; i++) { + if (visit[i].any()) { + for (auto offset = v[i]; offset < v[i + 1]; offset++) { + auto n = e[offset]; + next[n] = next[n] | visit[i]; + } + } + } + for (auto i = 0; i < v_size; i++) { + change |= next[i].any(); + } + return change; +} + +static void IterativeLengthBoundedFunction(DataChunk &args, ExpressionState &state, Vector &result) { + auto &func_expr = state.expr.Cast(); + auto &info = func_expr.bind_info->Cast(); + auto duckpgq_state = GetDuckPGQState(info.context); + + D_ASSERT(duckpgq_state->csr_list[info.csr_id]); + + if (info.csr_id + 1 > duckpgq_state->csr_list.size()) { + throw ConstraintException("Invalid ID"); + } + auto csr_entry = duckpgq_state->csr_list.find(info.csr_id); + if (csr_entry == duckpgq_state->csr_list.end()) { + throw ConstraintException("Need to initialize CSR before doing shortest path"); + } + + if (!csr_entry->second->initialized_v) { + throw ConstraintException("Need to initialize CSR before doing shortest path"); + } + int64_t v_size = args.data[1].GetValue(0).GetValue(); + int64_t *v = reinterpret_cast(duckpgq_state->csr_list[info.csr_id]->v); + vector &e = duckpgq_state->csr_list[info.csr_id]->e; + int64_t lower_limit = args.data[4].GetValue(0).GetValue(); + int64_t upper_limit = args.data[5].GetValue(0).GetValue(); + + if (lower_limit < 0) { + lower_limit = 0; + } + if (upper_limit < lower_limit) { + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); + for (idx_t row = 0; row < args.size(); row++) { + result_data[row] = false; + } + return; + } + + // get src and dst vectors for searches + auto &src = args.data[2]; + auto &dst = args.data[3]; + UnifiedVectorFormat vdata_src; + UnifiedVectorFormat vdata_dst; + src.ToUnifiedFormat(args.size(), vdata_src); + dst.ToUnifiedFormat(args.size(), vdata_dst); + auto src_data = reinterpret_cast(vdata_src.data); + auto dst_data = reinterpret_cast(vdata_dst.data); + + ValidityMask &result_validity = FlatVector::Validity(result); + + // create result vector + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); + + // create temp SIMD arrays + vector> visit1(v_size); + vector> visit2(v_size); + + // maps lane to search number + int64_t lane_to_num[LANE_LIMIT]; + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + lane_to_num[lane] = -1; // inactive + } + + int64_t started_searches = 0; + while (started_searches < args.size()) { + for (auto i = 0; i < v_size; i++) { + visit1[i] = 0; + } + + uint64_t active = 0; + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + lane_to_num[lane] = -1; + while (started_searches < args.size()) { + int64_t search_num = started_searches++; + auto src_pos = vdata_src.sel->get_index(search_num); + auto dst_pos = vdata_dst.sel->get_index(search_num); + if (!vdata_src.validity.RowIsValid(src_pos)) { + result_validity.SetInvalid(search_num); + result_data[search_num] = false; + } else if (src_data[src_pos] == dst_data[dst_pos] && lower_limit <= 0) { + result_data[search_num] = true; + } else { + visit1[src_data[src_pos]][lane] = true; + lane_to_num[lane] = search_num; + active++; + break; + } + } + } + + for (int64_t iter = 1; active && iter <= upper_limit; iter++) { + if (!IterativeLengthBoundedStep(v_size, v, e, (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1)) { + break; + } + if (iter >= lower_limit) { + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + int64_t search_num = lane_to_num[lane]; + if (search_num >= 0) { + auto dst_pos = vdata_dst.sel->get_index(search_num); + if ((iter & 1) ? visit2[dst_data[dst_pos]][lane] : visit1[dst_data[dst_pos]][lane]) { + result_data[search_num] = true; + lane_to_num[lane] = -1; + active--; + } + } + } + } + } + + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + int64_t search_num = lane_to_num[lane]; + if (search_num >= 0) { + result_data[search_num] = false; + lane_to_num[lane] = -1; + } + } + } + duckpgq_state->csr_to_delete.insert(info.csr_id); +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreScalarFunctions::RegisterIterativeLengthScalarFunction(ExtensionLoader &loader) { + loader.RegisterFunction(ScalarFunction( + "iterativelength", {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::BIGINT, IterativeLengthFunction, IterativeLengthFunctionData::IterativeLengthBind)); +} + +void CoreScalarFunctions::RegisterIterativeLengthBoundedScalarFunction(ExtensionLoader &loader) { + loader.RegisterFunction(ScalarFunction( + "iterativelengthbounded", + {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT}, + LogicalType::BOOLEAN, IterativeLengthBoundedFunction, IterativeLengthFunctionData::IterativeLengthBind)); +} + +} // namespace duckdb diff --git a/src/core/functions/scalar/iterativelength2.cpp b/src/core/functions/scalar/iterativelength2.cpp new file mode 100644 index 00000000..14b6d60e --- /dev/null +++ b/src/core/functions/scalar/iterativelength2.cpp @@ -0,0 +1,142 @@ +#include "duckdb/main/client_data.hpp" +#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckpgq/common.hpp" +#include "duckpgq/core/functions/function_data/iterative_length_function_data.hpp" +#include + +#include +#include + +namespace duckdb { + +static bool IterativeLength2(int64_t v_size, int64_t *V, vector &E, vector> &seen, + vector> &visit, vector> &next) { + std::bitset change; + for (auto v = 0; v < v_size; v++) { + seen[v] |= visit[v]; + next[v] = 0; + } + for (auto v = 0; v < v_size; v++) { + if (visit[v].any()) { + for (auto e = V[v]; e < V[v + 1]; e++) { + auto n = E[e]; + auto unseen = visit[v] & ~seen[n]; + next[n] |= unseen; + change |= unseen; + } + } + } + return change.any(); +} + +static void IterativeLength2Function(DataChunk &args, ExpressionState &state, Vector &result) { + auto &func_expr = state.expr.Cast(); + auto &info = func_expr.bind_info->Cast(); + + auto duckpgq_state = GetDuckPGQState(info.context); + + D_ASSERT(duckpgq_state->csr_list[info.csr_id]); + int64_t v_size = args.data[1].GetValue(0).GetValue(); + int64_t *v = reinterpret_cast(duckpgq_state->csr_list[info.csr_id]->v); + vector &e = duckpgq_state->csr_list[info.csr_id]->e; + + // get src and dst vectors for searches + auto &src = args.data[2]; + auto &dst = args.data[3]; + UnifiedVectorFormat vdata_src; + UnifiedVectorFormat vdata_dst; + src.ToUnifiedFormat(args.size(), vdata_src); + dst.ToUnifiedFormat(args.size(), vdata_dst); + auto src_data = reinterpret_cast(vdata_src.data); + auto dst_data = reinterpret_cast(vdata_dst.data); + + // create result vector + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); + + ValidityMask &result_validity = FlatVector::Validity(result); + + // create temp SIMD arrays + vector> seen(v_size); + vector> visit1(v_size); + vector> visit2(v_size); + + // maps lane to search number + int64_t lane_to_num[LANE_LIMIT]; + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + lane_to_num[lane] = -1; // inactive + } + + int64_t started_searches = 0; + while (started_searches < args.size()) { + + // empty visit vectors + for (auto i = 0; i < v_size; i++) { + seen[i] = 0; + visit1[i] = 0; + } + + // add search jobs to free lanes + uint64_t active = 0; + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + lane_to_num[lane] = -1; + while (started_searches < args.size()) { + int64_t search_num = started_searches++; + auto src_pos = vdata_src.sel->get_index(search_num); + auto dst_pos = vdata_dst.sel->get_index(search_num); + if (!vdata_src.validity.RowIsValid(src_pos)) { + result_validity.SetInvalid(search_num); + result_data[search_num] = -1; // no path + } else if (src_data[src_pos] == dst_data[dst_pos]) { + result_data[search_num] = 0; // path of length 0 does not require a search + } else { + visit1[src_data[src_pos]][lane] = true; + lane_to_num[lane] = search_num; // active lane + active++; + break; + } + } + } + + // make passes while a lane is still active + for (int64_t iter = 1; active; iter++) { + if (!IterativeLength2(v_size, v, e, seen, (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1)) { + break; + } + // detect lanes that finished + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + int64_t search_num = lane_to_num[lane]; + if (search_num >= 0) { // active lane + auto dst_pos = vdata_dst.sel->get_index(search_num); + if ((iter & 1) ? visit2[dst_data[dst_pos]][lane] : visit1[dst_data[dst_pos]][lane]) { + result_data[search_num] = iter; /* found at iter => iter = path length */ + lane_to_num[lane] = -1; // mark inactive + active--; + } + } + } + } + // no changes anymore: any still active searches have no path + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + int64_t search_num = lane_to_num[lane]; + if (search_num >= 0) { // active lane + result_validity.SetInvalid(search_num); + result_data[search_num] = (int64_t)-1; /* no path */ + lane_to_num[lane] = -1; // mark inactive + } + } + } + duckpgq_state->csr_to_delete.insert(info.csr_id); +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreScalarFunctions::RegisterIterativeLength2ScalarFunction(ExtensionLoader &loader) { + loader.RegisterFunction(ScalarFunction( + "iterativelength2", {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::BIGINT, IterativeLength2Function, IterativeLengthFunctionData::IterativeLengthBind)); +} + +} // namespace duckdb diff --git a/src/core/functions/scalar/iterativelength_bidirectional.cpp b/src/core/functions/scalar/iterativelength_bidirectional.cpp new file mode 100644 index 00000000..9c63898f --- /dev/null +++ b/src/core/functions/scalar/iterativelength_bidirectional.cpp @@ -0,0 +1,166 @@ +#include "duckdb/main/client_data.hpp" +#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckpgq/common.hpp" +#include "duckpgq/core/functions/function_data/iterative_length_function_data.hpp" + +#include +#include + +namespace duckdb { + +static bool IterativeLengthBidirectional(int64_t v_size, int64_t *V, vector &E, + vector> &seen, vector> &visit, + vector> &next) { + bool change = false; + for (auto v = 0; v < v_size; v++) { + next[v] = 0; + } + for (auto v = 0; v < v_size; v++) { + if (visit[v].any()) { + for (auto e = V[v]; e < V[v + 1]; e++) { + auto n = E[e]; + next[n] = next[n] | visit[v]; + } + } + } + for (auto v = 0; v < v_size; v++) { + next[v] = next[v] & ~seen[v]; + seen[v] = seen[v] | next[v]; + change |= next[v].any(); + } + return change; +} +static std::bitset InterSectFronteers(int64_t v_size, vector> &src_seen, + vector> &dst_seen) { + std::bitset result; + for (auto v = 0; v < v_size; v++) { + result |= src_seen[v] & dst_seen[v]; + } + return result; +} + +static void IterativeLengthBidirectionalFunction(DataChunk &args, ExpressionState &state, Vector &result) { + auto &func_expr = state.expr.Cast(); + auto &info = func_expr.bind_info->Cast(); + + auto duckpgq_state = GetDuckPGQState(info.context); + + D_ASSERT(duckpgq_state->csr_list[info.csr_id]); + int64_t v_size = args.data[1].GetValue(0).GetValue(); + int64_t *v = reinterpret_cast(duckpgq_state->csr_list[info.csr_id]->v); + vector &e = duckpgq_state->csr_list[info.csr_id]->e; + + // get src and dst vectors for searches + auto &src = args.data[2]; + auto &dst = args.data[3]; + UnifiedVectorFormat vdata_src; + UnifiedVectorFormat vdata_dst; + src.ToUnifiedFormat(args.size(), vdata_src); + dst.ToUnifiedFormat(args.size(), vdata_dst); + auto src_data = vdata_src.data; + auto dst_data = vdata_dst.data; + + // create result vector + result.SetVectorType(VectorType::FLAT_VECTOR); + ValidityMask &result_validity = FlatVector::Validity(result); + auto result_data = FlatVector::GetData(result); + + // create temp SIMD arrays + vector> src_seen(v_size); + vector> src_visit1(v_size); + vector> src_visit2(v_size); + vector> dst_seen(v_size); + vector> dst_visit1(v_size); + vector> dst_visit2(v_size); + + // maps lane to search number + int64_t lane_to_num[LANE_LIMIT]; + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + lane_to_num[lane] = -1; // inactive + } + + int64_t started_searches = 0; + while (started_searches < args.size()) { + + // empty visit vectors + for (auto i = 0; i < v_size; i++) { + src_seen[i] = 0; + dst_seen[i] = 0; + src_visit1[i] = 0; + dst_visit1[i] = 0; + } + + // add search jobs to free lanes + uint64_t active = 0; + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + lane_to_num[lane] = -1; + while (started_searches < args.size()) { + int64_t search_num = started_searches++; + auto src_pos = vdata_src.sel->get_index(search_num); + auto dst_pos = vdata_dst.sel->get_index(search_num); + if (!vdata_src.validity.RowIsValid(src_pos)) { + result_validity.SetInvalid(search_num); + result_data[search_num] = -1; // no path + } else if (src_data[src_pos] == dst_data[dst_pos]) { + result_data[search_num] = 0; // path of length 0 does not require a search + } else { + src_visit1[src_data[src_pos]][lane] = true; + dst_visit1[dst_data[dst_pos]][lane] = true; + src_seen[src_data[src_pos]][lane] = true; + dst_seen[dst_data[dst_pos]][lane] = true; + lane_to_num[lane] = search_num; // active lane + active++; + break; + } + } + } + + // make passes while a lane is still active + for (int64_t iter = 0; active; iter++) { + if (!IterativeLengthBidirectional(v_size, v, e, (iter & 1) ? dst_seen : src_seen, + (iter & 2) ? (iter & 1) ? dst_visit2 : src_visit2 + : (iter & 1) ? dst_visit1 + : src_visit1, + (iter & 2) ? (iter & 1) ? dst_visit1 : src_visit1 + : (iter & 1) ? dst_visit2 + : src_visit2)) { + break; + } + std::bitset done = InterSectFronteers(v_size, src_seen, dst_seen); + // detect lanes that finished + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + if (done[lane]) { + int64_t search_num = lane_to_num[lane]; + if (search_num >= 0) { + result_data[search_num] = iter + 1; /* found at iter => iter = path length */ + lane_to_num[lane] = -1; // mark inactive + active--; + } + } + } + } + // no changes anymore: any still active searches have no path + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + int64_t search_num = lane_to_num[lane]; + if (search_num >= 0) { + result_validity.SetInvalid(search_num); + result_data[search_num] = (int64_t)-1; /* no path */ + lane_to_num[lane] = -1; // mark inactive + } + } + } + duckpgq_state->csr_to_delete.insert(info.csr_id); +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreScalarFunctions::RegisterIterativeLengthBidirectionalScalarFunction(ExtensionLoader &loader) { + loader.RegisterFunction(ScalarFunction( + "iterativelengthbidirectional", + {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::BIGINT, + IterativeLengthBidirectionalFunction, IterativeLengthFunctionData::IterativeLengthBind)); +} + +} // namespace duckdb diff --git a/src/core/functions/scalar/local_clustering_coefficient.cpp b/src/core/functions/scalar/local_clustering_coefficient.cpp new file mode 100644 index 00000000..11f0816f --- /dev/null +++ b/src/core/functions/scalar/local_clustering_coefficient.cpp @@ -0,0 +1,83 @@ +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckpgq/common.hpp" +#include "duckpgq/core/functions/function_data/local_clustering_coefficient_function_data.hpp" +#include "duckpgq/core/utils/duckpgq_bitmap.hpp" +#include "duckpgq/core/utils/duckpgq_utils.hpp" + +#include + +namespace duckdb { + +static void LocalClusteringCoefficientFunction(DataChunk &args, ExpressionState &state, Vector &result) { + auto &func_expr = state.expr.Cast(); + auto &info = func_expr.bind_info->Cast(); + auto duckpgq_state = GetDuckPGQState(info.context); + + auto csr_entry = duckpgq_state->csr_list.find(info.csr_id); + if (csr_entry == duckpgq_state->csr_list.end()) { + throw ConstraintException("CSR not found. Is the graph populated?"); + } + + if (!(csr_entry->second->initialized_v && csr_entry->second->initialized_e)) { + throw ConstraintException("Need to initialize CSR before doing local clustering coefficient."); + } + int64_t *v = reinterpret_cast(duckpgq_state->csr_list[info.csr_id]->v); + vector &e = duckpgq_state->csr_list[info.csr_id]->e; + size_t v_size = duckpgq_state->csr_list[info.csr_id]->vsize; + // get src and dst vectors for searches + auto &src = args.data[1]; + UnifiedVectorFormat vdata_src; + src.ToUnifiedFormat(args.size(), vdata_src); + auto src_data = reinterpret_cast(vdata_src.data); + + ValidityMask &result_validity = FlatVector::Validity(result); + // create result vector + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); + + DuckPGQBitmap neighbors(v_size); + + for (idx_t n = 0; n < args.size(); n++) { + auto src_sel = vdata_src.sel->get_index(n); + if (!vdata_src.validity.RowIsValid(src_sel)) { + result_validity.SetInvalid(n); + } + int64_t src_node = src_data[src_sel]; + int64_t number_of_edges = v[src_node + 1] - v[src_node]; + if (number_of_edges < 2) { + result_data[n] = static_cast(0.0); + continue; + } + neighbors.reset(); + for (int64_t offset = v[src_node]; offset < v[src_node + 1]; offset++) { + neighbors.set(e[offset]); + } + + // Count connections between neighbors + int64_t count = 0; + for (int64_t offset = v[src_node]; offset < v[src_node + 1]; offset++) { + int64_t neighbor = e[offset]; + for (int64_t offset2 = v[neighbor]; offset2 < v[neighbor + 1]; offset2++) { + int is_connected = neighbors.test(e[offset2]); + count += is_connected; // Add 1 if connected, 0 otherwise + } + } + + const float num_edges_float = static_cast(number_of_edges); + float local_result = static_cast(count) / (num_edges_float * (num_edges_float - 1.0f)); + + result_data[n] = local_result; + } + duckpgq_state->csr_to_delete.insert(info.csr_id); +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreScalarFunctions::RegisterLocalClusteringCoefficientScalarFunction(ExtensionLoader &loader) { + loader.RegisterFunction(ScalarFunction("local_clustering_coefficient", {LogicalType::INTEGER, LogicalType::BIGINT}, + LogicalType::FLOAT, LocalClusteringCoefficientFunction, + LocalClusteringCoefficientFunctionData::LocalClusteringCoefficientBind)); +} + +} // namespace duckdb diff --git a/src/core/functions/scalar/pagerank.cpp b/src/core/functions/scalar/pagerank.cpp new file mode 100644 index 00000000..5ff05561 --- /dev/null +++ b/src/core/functions/scalar/pagerank.cpp @@ -0,0 +1,121 @@ +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckpgq/common.hpp" +#include "duckpgq/core/functions/function_data/pagerank_function_data.hpp" +#include +#include +#include +#include + +namespace duckdb { + +static void PageRankFunction(DataChunk &args, ExpressionState &state, Vector &result) { + auto &func_expr = state.expr.Cast(); + auto &info = func_expr.bind_info->Cast(); + auto duckpgq_state = GetDuckPGQState(info.context); + + // Locate the CSR representation of the graph + auto csr_entry = duckpgq_state->csr_list.find(info.csr_id); + if (csr_entry == duckpgq_state->csr_list.end()) { + throw ConstraintException("CSR not found. Is the graph populated?"); + } + + if (!(csr_entry->second->initialized_v && csr_entry->second->initialized_e)) { + throw ConstraintException("Need to initialize CSR before running PageRank."); + } + + auto *v = reinterpret_cast(duckpgq_state->csr_list[info.csr_id]->v); + vector &e = duckpgq_state->csr_list[info.csr_id]->e; + size_t v_size = duckpgq_state->csr_list[info.csr_id]->vsize; + + // State initialization (only once) + if (!info.state_initialized) { + info.rank.resize(v_size, 1.0 / static_cast(v_size)); // Initial rank for each node + info.temp_rank.resize(v_size, + 0.0); // Temporary storage for ranks during iteration + info.damping_factor = 0.85; // Typical damping factor + info.convergence_threshold = 1e-6; // Convergence threshold + info.state_initialized = true; + info.converged = false; + info.iteration_count = 0; + } + + // Check if already converged + if (!info.converged) { + std::lock_guard guard(info.state_lock); // Thread safety + + bool continue_iteration = true; + while (continue_iteration) { + fill(info.temp_rank.begin(), info.temp_rank.end(), 0.0); + + double total_dangling_rank = 0.0; // For dangling nodes + + for (size_t i = 0; i < v_size; i++) { + auto start_edge = v[i]; + auto end_edge = (i + 1 < v_size) ? v[i + 1] : e.size(); // Adjust end_edge + if (end_edge > start_edge) { + double rank_contrib = info.rank[i] / static_cast(end_edge - start_edge); + for (int64_t j = start_edge; j < end_edge; j++) { + int64_t neighbor = e[j]; + info.temp_rank[neighbor] += rank_contrib; + } + } else { + total_dangling_rank += info.rank[i]; + } + } + + // Apply damping factor and handle dangling node ranks + double correction_factor = total_dangling_rank / static_cast(v_size); + double max_delta = 0.0; + for (size_t i = 0; i < v_size; i++) { + info.temp_rank[i] = (1 - info.damping_factor) / static_cast(v_size) + + info.damping_factor * (info.temp_rank[i] + correction_factor); + max_delta = std::max(max_delta, std::abs(info.temp_rank[i] - info.rank[i])); + } + + info.rank.swap(info.temp_rank); + info.iteration_count++; + if (max_delta < info.convergence_threshold) { + info.converged = true; + continue_iteration = false; + } + } + } + + // Get the source vector for the current DataChunk + auto &src = args.data[1]; + UnifiedVectorFormat vdata_src; + src.ToUnifiedFormat(args.size(), vdata_src); + auto src_data = reinterpret_cast(vdata_src.data); + + // Create result vector + ValidityMask &result_validity = FlatVector::Validity(result); + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); + + // Output the PageRank value corresponding to each source ID in the DataChunk + for (idx_t i = 0; i < args.size(); i++) { + auto id_pos = vdata_src.sel->get_index(i); + if (!vdata_src.validity.RowIsValid(id_pos)) { + result_validity.SetInvalid(i); + continue; // Skip invalid rows + } + auto node_id = src_data[id_pos]; + if (node_id < 0 || node_id >= v_size) { + result_validity.SetInvalid(i); + continue; + } + result_data[i] = info.rank[node_id]; + } + + duckpgq_state->csr_to_delete.insert(info.csr_id); +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreScalarFunctions::RegisterPageRankScalarFunction(ExtensionLoader &loader) { + loader.RegisterFunction(ScalarFunction("pagerank", {LogicalType::INTEGER, LogicalType::BIGINT}, LogicalType::DOUBLE, + PageRankFunction, PageRankFunctionData::PageRankBind)); +} + +} // namespace duckdb diff --git a/src/core/functions/scalar/reachability.cpp b/src/core/functions/scalar/reachability.cpp new file mode 100644 index 00000000..85828a6d --- /dev/null +++ b/src/core/functions/scalar/reachability.cpp @@ -0,0 +1,266 @@ +#include "duckdb/main/client_data.hpp" +#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckpgq/common.hpp" +#include "duckpgq/core/functions/function_data/iterative_length_function_data.hpp" +#include + +#include +#include + +namespace duckdb { + +typedef enum { NO_ARRAY, ARRAY, INTERMEDIATE } msbfs_modes_t; + +static int16_t InitialiseBfs(idx_t curr_batch, idx_t size, data_ptr_t src_data, const SelectionVector *src_sel, + const ValidityMask &src_validity, vector> &seen, + vector> &visit, vector> &visit_next, + unordered_map>> &lane_map) { + int16_t lanes = 0; + int16_t curr_batch_size = 0; + + for (idx_t i = curr_batch; i < size && lanes < LANE_LIMIT; i++) { + auto src_index = src_sel->get_index(i); + + if (src_validity.RowIsValid(src_index)) { + auto src_entry = src_data[src_index]; + auto entry = lane_map.find(src_entry); + if (entry == lane_map.end()) { + lane_map[src_entry].first = lanes; + seen[src_entry][lanes] = true; + visit[src_entry][lanes] = true; + lanes++; + } + lane_map[src_entry].second.push_back(i); + curr_batch_size++; + } + } + return curr_batch_size; +} + +static bool BfsWithoutArrayVariant(bool exit_early, CSR *csr, int64_t input_size, vector> &seen, + vector> &visit, vector> &visit_next, + vector &visit_list) { + for (int64_t i = 0; i < input_size; i++) { + if (!visit[i].any()) { + continue; + } + + for (auto index = (int64_t)csr->v[i]; index < csr->v[i + 1]; index++) { + auto n = csr->e[index]; + visit_next[n] = visit_next[n] | visit[i]; + } + } + + for (int64_t i = 0; i < input_size; i++) { + if (visit_next[i].none()) { + continue; + } + visit_next[i] = visit_next[i] & ~seen[i]; + seen[i] = seen[i] | visit_next[i]; + if (exit_early && visit_next[i].any()) { + exit_early = false; + } + if (visit_next[i].any()) { + visit_list.push_back(i); + } + } + return exit_early; +} + +static bool BfsWithoutArray(bool exit_early, CSR *csr, int64_t input_size, vector> &seen, + vector> &visit, vector> &visit_next) { + for (int64_t i = 0; i < input_size; i++) { + if (!visit[i].any()) { + continue; + } + + for (auto index = (int64_t)csr->v[i]; index < (int64_t)csr->v[i + 1]; index++) { + auto n = csr->e[index]; + visit_next[n] = visit_next[n] | visit[i]; + } + } + + for (int64_t i = 0; i < input_size; i++) { + if (visit_next[i].none()) { + continue; + } + visit_next[i] = visit_next[i] & ~seen[i]; + seen[i] = seen[i] | visit_next[i]; + if (exit_early && visit_next[i].any()) { + exit_early = false; + } + } + return exit_early; +} + +static pair BfsTempStateVariant(bool exit_early, CSR *csr, int64_t input_size, + vector> &seen, + vector> &visit, + vector> &visit_next) { + size_t num_nodes_to_visit = 0; + for (int64_t i = 0; i < input_size; i++) { + if (!visit[i].any()) { + continue; + } + + for (auto index = (int64_t)csr->v[i]; index < (int64_t)csr->v[i + 1]; index++) { + auto n = csr->e[index]; + visit_next[n] = visit_next[n] | visit[i]; + } + } + + for (int64_t i = 0; i < input_size; i++) { + if (visit_next[i].none()) { + continue; + } + visit_next[i] = visit_next[i] & ~seen[i]; + seen[i] = seen[i] | visit_next[i]; + if (exit_early && visit_next[i].any()) { + exit_early = false; + } + if (visit_next[i].any()) { + num_nodes_to_visit++; + } + } + return pair(exit_early, num_nodes_to_visit); +} + +static bool BfsWithArrayVariant(bool exit_early, CSR *csr, vector> &seen, + vector> &visit, vector> &visit_next, + vector &visit_list) { + unordered_set neighbours_set; + for (int64_t i : visit_list) { + for (auto index = (int64_t)csr->v[i]; index < (int64_t)csr->v[i + 1]; index++) { + auto n = csr->e[index]; + visit_next[n] = visit_next[n] | visit[i]; + neighbours_set.insert(n); + } + } + visit_list.clear(); + for (int64_t i : neighbours_set) { + visit_next[i] = visit_next[i] & ~seen[i]; + seen[i] = seen[i] | visit_next[i]; + if (exit_early && visit_next[i].any()) { + exit_early = false; + } + if (visit_next[i].any()) { + visit_list.push_back(i); + } + } + return exit_early; +} + +static int FindMode(int mode, size_t visit_list_len, size_t visit_limit, size_t num_nodes_to_visit) { + if (mode == 0 && visit_list_len > 0) { + mode = 1; + } else if (mode == 1 && visit_list_len > visit_limit) { + mode = 2; + } else if (mode == 2 && num_nodes_to_visit < visit_limit) { + mode = 0; + } + return mode; +} + +static void ReachabilityFunction(DataChunk &args, ExpressionState &state, Vector &result) { + auto &func_expr = state.expr.Cast(); + auto &info = func_expr.bind_info->Cast(); + + bool is_variant = args.data[1].GetValue(0).GetValue(); + int64_t input_size = args.data[2].GetValue(0).GetValue(); + + auto &src = args.data[3]; + + UnifiedVectorFormat vdata_src, vdata_target; + src.ToUnifiedFormat(args.size(), vdata_src); + + auto src_data = vdata_src.data; + + auto &target = args.data[4]; + target.ToUnifiedFormat(args.size(), vdata_target); + auto target_data = vdata_target.data; + + idx_t result_size = 0; + vector visit_list; + size_t visit_limit = input_size / VISIT_SIZE_DIVISOR; + size_t num_nodes_to_visit = 0; + result.SetVectorType(VectorType::FLAT_VECTOR); + + auto result_data = FlatVector::GetData(result); + auto duckpgq_state = GetDuckPGQState(info.context); + + CSR *csr = duckpgq_state->GetCSR(info.csr_id); + + while (result_size < args.size()) { + vector> seen(input_size); + vector> visit(input_size); + vector> visit_next(input_size); + + //! mapping of src_value -> (bfs_num/lane, vector of indices in src_data) + unordered_map>> lane_map; + auto curr_batch_size = InitialiseBfs(result_size, args.size(), src_data, vdata_src.sel, vdata_src.validity, + seen, visit, visit_next, lane_map); + int mode = 0; + bool exit_early = false; + while (!exit_early) { + exit_early = true; + if (is_variant) { + mode = FindMode(mode, visit_list.size(), visit_limit, num_nodes_to_visit); + switch (mode) { + case 1: + exit_early = BfsWithArrayVariant(exit_early, csr, seen, visit, visit_next, visit_list); + break; + case 0: + exit_early = + BfsWithoutArrayVariant(exit_early, csr, input_size, seen, visit, visit_next, visit_list); + break; + case 2: { + auto return_pair = BfsTempStateVariant(exit_early, csr, input_size, seen, visit, visit_next); + exit_early = return_pair.first; + num_nodes_to_visit = return_pair.second; + break; + } + default: + throw Exception(ExceptionType::INTERNAL, "Unknown reachability mode encountered"); + } + } else { + exit_early = BfsWithoutArray(exit_early, csr, input_size, seen, visit, visit_next); + } + + visit = visit_next; + for (auto i = 0; i < input_size; i++) { + visit_next[i] = 0; + } + } + + for (const auto &iter : lane_map) { + auto value = iter.first; + auto bfs_num = iter.second.first; + auto pos = iter.second.second; + for (auto index : pos) { + auto target_index = vdata_target.sel->get_index(index); + if (seen[target_data[target_index]][bfs_num] && seen[value][bfs_num]) { + // if(is_bit_set(seen[target_data[index]], bfs_num) & + // is_bit_set(seen[value], bfs_num) ) { + result_data[index] = true; + } else { + result_data[index] = false; + } + } + } + result_size = result_size + curr_batch_size; + } + duckpgq_state->csr_to_delete.insert(info.csr_id); +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreScalarFunctions::RegisterReachabilityScalarFunction(ExtensionLoader &loader) { + loader.RegisterFunction(ScalarFunction( + "reachability", + {LogicalType::INTEGER, LogicalType::BOOLEAN, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::BOOLEAN, ReachabilityFunction, IterativeLengthFunctionData::IterativeLengthBind)); +} + +} // namespace duckdb diff --git a/src/core/functions/scalar/shortest_path.cpp b/src/core/functions/scalar/shortest_path.cpp new file mode 100644 index 00000000..4d0866c6 --- /dev/null +++ b/src/core/functions/scalar/shortest_path.cpp @@ -0,0 +1,219 @@ +#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckpgq/common.hpp" +#include "duckpgq/core/functions/function_data/iterative_length_function_data.hpp" + +#include +#include + +namespace duckdb { + +static bool IterativeLength(int64_t v_size, int64_t *V, vector &E, vector &edge_ids, + vector> &parents_v, vector> &parents_e, + vector> &seen, vector> &visit, + vector> &next) { + bool change = false; + for (auto v = 0; v < v_size; v++) { + next[v] = 0; + } + //! Keep track of edge id through which the node was reached + for (auto v = 0; v < v_size; v++) { + if (visit[v].any()) { + for (auto e = V[v]; e < V[v + 1]; e++) { + auto n = E[e]; + auto edge_id = edge_ids[e]; + next[n] = next[n] | visit[v]; + for (auto l = 0; l < LANE_LIMIT; l++) { + parents_v[n][l] = ((parents_v[n][l] == -1) && visit[v][l]) ? v : parents_v[n][l]; + parents_e[n][l] = ((parents_e[n][l] == -1) && visit[v][l]) ? edge_id : parents_e[n][l]; + } + } + } + } + + for (auto v = 0; v < v_size; v++) { + next[v] = next[v] & ~seen[v]; + seen[v] = seen[v] | next[v]; + change |= next[v].any(); + } + return change; +} + +static void ShortestPathFunction(DataChunk &args, ExpressionState &state, Vector &result) { + auto &func_expr = state.expr.Cast(); + auto &info = func_expr.bind_info->Cast(); + auto duckpgq_state = GetDuckPGQState(info.context); + + D_ASSERT(duckpgq_state->csr_list[info.csr_id]); + auto csr_entry = duckpgq_state->csr_list.find(info.csr_id); + if (csr_entry == duckpgq_state->csr_list.end()) { + throw ConstraintException("Invalid ID"); + } + auto &csr = csr_entry->second; + + if (!csr->initialized_v) { + throw ConstraintException("Need to initialize CSR before doing shortest path"); + } + int64_t v_size = args.data[1].GetValue(0).GetValue(); + + auto *v = reinterpret_cast(csr->v); + vector &e = csr->e; + vector &edge_ids = csr->edge_ids; + + auto &src = args.data[2]; + auto &target = args.data[3]; + + UnifiedVectorFormat vdata_src, vdata_dst; + src.ToUnifiedFormat(args.size(), vdata_src); + target.ToUnifiedFormat(args.size(), vdata_dst); + + auto src_data = reinterpret_cast(vdata_src.data); + auto dst_data = reinterpret_cast(vdata_dst.data); + + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); + ValidityMask &result_validity = FlatVector::Validity(result); + + // create temp SIMD arrays + vector> seen(v_size); + vector> visit1(v_size); + vector> visit2(v_size); + vector> parents_v(v_size, std::vector(LANE_LIMIT, -1)); + vector> parents_e(v_size, std::vector(LANE_LIMIT, -1)); + + // maps lane to search number + int64_t lane_to_num[LANE_LIMIT]; + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + lane_to_num[lane] = -1; // inactive + } + uint64_t total_len = 0; + + int64_t started_searches = 0; + while (started_searches < args.size()) { + + // empty visit vectors + for (auto i = 0; i < v_size; i++) { + seen[i] = 0; + visit1[i] = 0; + for (auto j = 0; j < LANE_LIMIT; j++) { + parents_v[i][j] = -1; + parents_e[i][j] = -1; + } + } + + // add search jobs to free lanes + uint64_t active = 0; + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + lane_to_num[lane] = -1; + while (started_searches < args.size()) { + int64_t search_num = started_searches++; + auto src_pos = vdata_src.sel->get_index(search_num); + if (!vdata_src.validity.RowIsValid(src_pos)) { + result_validity.SetInvalid(search_num); + } else { + visit1[src_data[src_pos]][lane] = true; + parents_v[src_data[src_pos]][lane] = src_data[src_pos]; // Mark source with source id + parents_e[src_data[src_pos]][lane] = -2; // Mark the source with -2, there is no incoming edge for + // the source. + lane_to_num[lane] = search_num; // active lane + active++; + break; + } + } + } + + //! make passes while a lane is still active + for (int64_t iter = 1; active; iter++) { + //! Perform one step of bfs exploration + if (!IterativeLength(v_size, v, e, edge_ids, parents_v, parents_e, seen, (iter & 1) ? visit1 : visit2, + (iter & 1) ? visit2 : visit1)) { + break; + } + int64_t finished_searches = 0; + // detect lanes that finished + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + int64_t search_num = lane_to_num[lane]; + if (search_num >= 0) { // active lane + //! Check if dst for a source has been seen + auto dst_pos = vdata_dst.sel->get_index(search_num); + if (seen[dst_data[dst_pos]][lane]) { + finished_searches++; + } + } + } + if (finished_searches == LANE_LIMIT) { + break; + } + } + //! Reconstruct the paths + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + int64_t search_num = lane_to_num[lane]; + if (search_num == -1) { // empty lanes + continue; + } + + //! Searches that have stopped have found a path + auto src_pos = vdata_src.sel->get_index(search_num); + auto dst_pos = vdata_dst.sel->get_index(search_num); + if (src_data[src_pos] == dst_data[dst_pos]) { // Source == destination + unique_ptr output = make_uniq(LogicalType::LIST(LogicalType::BIGINT)); + ListVector::PushBack(*output, src_data[src_pos]); + ListVector::Append(result, ListVector::GetEntry(*output), ListVector::GetListSize(*output)); + result_data[search_num].length = ListVector::GetListSize(*output); + result_data[search_num].offset = total_len; + total_len += result_data[search_num].length; + continue; + } + std::vector output_vector; + std::vector output_edge; + auto source_v = src_data[src_pos]; // Take the source + + auto parent_vertex = parents_v[dst_data[dst_pos]][lane]; // Take the parent vertex of the destination vertex + auto parent_edge = parents_e[dst_data[dst_pos]][lane]; // Take the parent edge of the destination vertex + + output_vector.push_back(dst_data[dst_pos]); // Add destination vertex + output_vector.push_back(parent_edge); + while (parent_vertex != source_v) { // Continue adding vertices until we + // have reached the source vertex + //! -1 is used to signify no parent + if (parent_vertex == -1 || parent_vertex == parents_v[parent_vertex][lane]) { + result_validity.SetInvalid(search_num); + break; + } + output_vector.push_back(parent_vertex); + parent_edge = parents_e[parent_vertex][lane]; + parent_vertex = parents_v[parent_vertex][lane]; + output_vector.push_back(parent_edge); + } + + if (!result_validity.RowIsValid(search_num)) { + continue; + } + output_vector.push_back(source_v); + std::reverse(output_vector.begin(), output_vector.end()); + auto output = make_uniq(LogicalType::LIST(LogicalType::BIGINT)); + for (auto val : output_vector) { + Value value_to_insert = val; + ListVector::PushBack(*output, value_to_insert); + } + + result_data[search_num].length = ListVector::GetListSize(*output); + result_data[search_num].offset = total_len; + ListVector::Append(result, ListVector::GetEntry(*output), ListVector::GetListSize(*output)); + total_len += result_data[search_num].length; + } + } + duckpgq_state->csr_to_delete.insert(info.csr_id); +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreScalarFunctions::RegisterShortestPathScalarFunction(ExtensionLoader &loader) { + loader.RegisterFunction(ScalarFunction( + "shortestpath", {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::LIST(LogicalType::BIGINT), ShortestPathFunction, + IterativeLengthFunctionData::IterativeLengthBind)); +} + +} // namespace duckdb diff --git a/src/core/functions/scalar/weakly_connected_component.cpp b/src/core/functions/scalar/weakly_connected_component.cpp new file mode 100644 index 00000000..27a46361 --- /dev/null +++ b/src/core/functions/scalar/weakly_connected_component.cpp @@ -0,0 +1,115 @@ +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckpgq/common.hpp" +#include "duckpgq/core/functions/function_data/local_clustering_coefficient_function_data.hpp" +#include +#include +#include +#include +#include +#include +#include + +namespace duckdb { + +// Helper function to find the root of a node with path compression +const static int64_t FindTreeRoot(std::vector &forest, int64_t node) { + while (true) { + int64_t parent = forest[node]; + if (parent == node) { + return node; // Found the root + } + forest[node] = forest[parent]; + node = parent; + } +} + +// Helper function to link two nodes in the same connected component +const static void Link(std::vector &forest, int64_t nodeA, int64_t nodeB) { + int64_t rootA = FindTreeRoot(forest, nodeA); + int64_t rootB = FindTreeRoot(forest, nodeB); + + if (rootA != rootB) { + forest[rootA] = rootB; + } +} + +static void WeaklyConnectedComponentFunction(DataChunk &args, ExpressionState &state, Vector &result) { + auto &func_expr = state.expr.Cast(); + auto &info = func_expr.bind_info->Cast(); + auto duckpgq_state = GetDuckPGQState(info.context); + + auto csr_entry = duckpgq_state->csr_list.find(info.csr_id); + if (csr_entry == duckpgq_state->csr_list.end()) { + throw ConstraintException("CSR not found. Is the graph populated?"); + } + + if (!(csr_entry->second->initialized_v && csr_entry->second->initialized_e)) { + throw ConstraintException("Need to initialize CSR before doing weakly connected components."); + } + + // Retrieve CSR data + int64_t *v = reinterpret_cast(duckpgq_state->csr_list[info.csr_id]->v); + vector &e = duckpgq_state->csr_list[info.csr_id]->e; + size_t v_size = duckpgq_state->csr_list[info.csr_id]->vsize; + + // Get source vector for searches + auto &src = args.data[1]; + UnifiedVectorFormat vdata_src; + src.ToUnifiedFormat(args.size(), vdata_src); + auto src_data = reinterpret_cast(vdata_src.data); + ValidityMask &result_validity = FlatVector::Validity(result); + + // Create result vector + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); + + if (!info.state_initialized) { + std::lock_guard guard(info.initialize_lock); // Thread safety + if (!info.state_converged) { + info.forest.resize(v_size); + info.state_initialized = true; // Initialize state + } + } + + // Check if already converged + if (!info.state_converged) { + std::lock_guard guard(info.wcc_lock); // Thread safety + if (!info.state_converged) { + // Initialize the forest for connected components + for (int64_t i = 0; i < v_size - 1; ++i) { + info.forest[i] = i; // Each node points to itself + } + // Process edges to link nodes + for (int64_t i = 0; i < v_size - 1; i++) { + for (int64_t edge_idx = v[i]; edge_idx < v[i + 1]; edge_idx++) { + int64_t neighbor = e[edge_idx]; + Link(info.forest, i, neighbor); + } + } + info.state_converged = true; + } + } + // Assign component IDs for the source nodes + for (size_t i = 0; i < args.size(); i++) { + int64_t src_node = src_data[i]; + if (src_node >= 0 && src_node < v_size) { + result_data[i] = FindTreeRoot(info.forest, src_node); // Assign component ID to the result + } else { + result_validity.SetInvalid(i); + } + } + + // Mark CSR for deletion + duckpgq_state->csr_to_delete.insert(info.csr_id); +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreScalarFunctions::RegisterWeaklyConnectedComponentScalarFunction(ExtensionLoader &loader) { + loader.RegisterFunction(ScalarFunction("weakly_connected_component", {LogicalType::INTEGER, LogicalType::BIGINT}, + LogicalType::BIGINT, WeaklyConnectedComponentFunction, + WeaklyConnectedComponentFunctionData::WeaklyConnectedComponentBind)); +} + +} // namespace duckdb diff --git a/src/core/functions/table/CMakeLists.txt b/src/core/functions/table/CMakeLists.txt new file mode 100644 index 00000000..c954e556 --- /dev/null +++ b/src/core/functions/table/CMakeLists.txt @@ -0,0 +1,12 @@ +set(EXTENSION_SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/create_property_graph.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/describe_property_graph.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/drop_property_graph.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/local_clustering_coefficient.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/match.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/pagerank.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/pgq_scan.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/summarize_property_graph.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/weakly_connected_component.cpp + ${EXTENSION_SOURCES} + PARENT_SCOPE) diff --git a/src/core/functions/table/create_property_graph.cpp b/src/core/functions/table/create_property_graph.cpp new file mode 100644 index 00000000..b51f3fbe --- /dev/null +++ b/src/core/functions/table/create_property_graph.cpp @@ -0,0 +1,416 @@ +#include "duckpgq/core/functions/table/create_property_graph.hpp" +#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" +#include "duckdb/parser/constraints/foreign_key_constraint.hpp" +#include "duckdb/parser/statement/create_statement.hpp" +#include "duckpgq/common.hpp" +#include +#include +#include "duckdb/main/connection_manager.hpp" +#include +#include "duckdb/catalog/catalog_entry/view_catalog_entry.hpp" +#include "duckdb/catalog/catalog.hpp" + +namespace duckdb { + +void CreatePropertyGraphFunction::CheckPropertyGraphTableLabels(const shared_ptr &pg_table, + optional_ptr &table) { + if (!pg_table->discriminator.empty()) { + if (!table->ColumnExists(pg_table->discriminator)) { + throw Exception(ExceptionType::INVALID, + "Column " + pg_table->discriminator + " not found in table " + pg_table->table_name); + } + auto &column = table->GetColumn(pg_table->discriminator); + if (!(column.GetType() == LogicalType::BIGINT || column.GetType() == LogicalType::INTEGER)) { + throw Exception(ExceptionType::INVALID, "The discriminator column " + pg_table->discriminator + + " of table " + pg_table->table_name + + " should be of type BIGINT or INTEGER"); + } + } +} + +void CreatePropertyGraphFunction::CheckPropertyGraphTableColumns(const shared_ptr &pg_table, + optional_ptr &table) { + if (pg_table->no_columns) { + return; + } + + if (pg_table->all_columns) { + for (auto &except_column : pg_table->except_columns) { + if (!table->ColumnExists(except_column)) { + throw Exception(ExceptionType::INVALID, + "EXCEPT column " + except_column + " not found in table " + pg_table->table_name); + } + } + + auto columns_of_table = table->GetColumns().GetColumnNames(); + + std::sort(std::begin(columns_of_table), std::end(columns_of_table)); + std::sort(std::begin(pg_table->except_columns), std::end(pg_table->except_columns)); + std::set_difference(columns_of_table.begin(), columns_of_table.end(), pg_table->except_columns.begin(), + pg_table->except_columns.end(), + std::inserter(pg_table->column_names, pg_table->column_names.begin())); + pg_table->column_aliases = pg_table->column_names; + return; + } + + for (auto &column : pg_table->column_names) { + if (!table->ColumnExists(column)) { + throw Exception(ExceptionType::INVALID, "Column " + column + " not found in table " + pg_table->table_name); + } + } +} + +// Helper function to validate source/destination keys +void CreatePropertyGraphFunction::ValidateKeys(shared_ptr &edge_table, const string &reference, + const string &key_type, vector &pk_columns, + vector &fk_columns, + const vector> &table_constraints) { + // todo(dtenwolde) add test case for attached databases or different schema that has pk-fk relationships + if (fk_columns.empty() && pk_columns.empty()) { + if (table_constraints.empty()) { + throw Exception(ExceptionType::INVALID, "No primary key - foreign key relationship found in " + + edge_table->table_name + " with " + + StringUtil::Upper(key_type) + " table " + reference); + } + + for (const auto &constraint : table_constraints) { + if (constraint->type == ConstraintType::FOREIGN_KEY) { + auto fk_constraint = constraint->Cast(); + if (fk_constraint.info.table != reference) { + continue; + } + // If a PK-FK relationship was found earlier, throw an ambiguity + // exception + if (!pk_columns.empty() && !fk_columns.empty()) { + throw Exception(ExceptionType::INVALID, "Multiple primary key - foreign key relationships " + "detected between " + + edge_table->table_name + " and " + reference + + ". " + "Please explicitly define the primary key and " + "foreign key columns using `" + + StringUtil::Upper(key_type) + + " KEY REFERENCES " + reference + + " `"); + } + pk_columns = fk_constraint.pk_columns; + fk_columns = fk_constraint.fk_columns; + } + } + + if (pk_columns.empty()) { + throw Exception(ExceptionType::INVALID, "The primary key for the " + StringUtil::Upper(key_type) + + " table " + reference + " is not defined in the edge table " + + edge_table->table_name); + } + + if (fk_columns.empty()) { + throw Exception(ExceptionType::INVALID, "The foreign key for the " + StringUtil::Upper(key_type) + + " table " + reference + " is not defined in the edge table " + + edge_table->table_name); + } + } +} + +void CreatePropertyGraphFunction::ValidateForeignKeyColumns(shared_ptr &edge_table, + const vector &fk_columns, + optional_ptr &table) { + for (const auto &fk : fk_columns) { + if (!table->ColumnExists(fk)) { + throw Exception(ExceptionType::INVALID, + "Foreign key " + fk + " does not exist in table " + edge_table->table_name); + } + } +} + +// Helper function to check if the vertex table is registered +void CreatePropertyGraphFunction::ValidateVertexTableRegistration(shared_ptr &pg_table, + const case_insensitive_set_t &v_table_names) { + if (v_table_names.find(pg_table->FullTableName()) == v_table_names.end()) { + throw Exception(ExceptionType::INVALID, "Referenced vertex table " + pg_table->FullTableName() + + " is not registered in the vertex tables."); + } +} + +// Helper function to validate primary keys in the source or destination tables +void CreatePropertyGraphFunction::ValidatePrimaryKeyInTable(ClientContext &context, + shared_ptr &pg_table, + const vector &pk_columns) { + auto table = Catalog::GetEntry(context, pg_table->catalog_name, pg_table->schema_name, + pg_table->table_name, OnEntryNotFound::RETURN_NULL); + if (!table) { + throw Exception(ExceptionType::INVALID, "Table with name " + pg_table->table_name + " does not exist"); + } + + for (const auto &pk : pk_columns) { + if (!table->ColumnExists(pk)) { + throw Exception(ExceptionType::INVALID, + "Primary key " + pk + " does not exist in table " + pg_table->table_name); + } + } +} + +unique_ptr CreatePropertyGraphFunction::CreatePropertyGraphBind(ClientContext &context, + TableFunctionBindInput &input, + vector &return_types, + vector &names) { + names.emplace_back("Success"); + return_types.emplace_back(LogicalType::BOOLEAN); + auto duckpgq_state = GetDuckPGQState(context); + + const auto duckpgq_parse_data = dynamic_cast(duckpgq_state->parse_data.get()); + + if (!duckpgq_parse_data) { + return {}; + } + auto statement = dynamic_cast(duckpgq_parse_data->statement.get()); + auto info = dynamic_cast(statement->info.get()); + auto pg_table = duckpgq_state->registered_property_graphs.find(info->property_graph_name); + + if (pg_table != duckpgq_state->registered_property_graphs.end() && + info->on_conflict == OnCreateConflict::ERROR_ON_CONFLICT) { + throw Exception(ExceptionType::INVALID, + "Property graph table with name " + info->property_graph_name + " already exists"); + } + + case_insensitive_set_t v_table_names; + for (auto &vertex_table : info->vertex_tables) { + try { + Binder::BindSchemaOrCatalog(context, vertex_table->catalog_name, vertex_table->schema_name); + auto table = + Catalog::GetEntry(context, vertex_table->catalog_name, vertex_table->schema_name, + vertex_table->table_name, OnEntryNotFound::RETURN_NULL); + if (!table) { + throw Exception(ExceptionType::INVALID, + "Table with name " + vertex_table->table_name + " does not exist"); + } + CheckPropertyGraphTableColumns(vertex_table, table); + CheckPropertyGraphTableLabels(vertex_table, table); + } catch (CatalogException &e) { + auto table = + Catalog::GetEntry(context, vertex_table->catalog_name, vertex_table->schema_name, + vertex_table->table_name, OnEntryNotFound::RETURN_NULL); + if (table) { + throw Exception(ExceptionType::INVALID, "Found a view with name " + vertex_table->table_name + + ". Creating property graph tables over views is " + "currently not supported."); + } + throw Exception(ExceptionType::INVALID, e.what()); + } catch (BinderException &e) { + throw Exception(ExceptionType::INVALID, "Catalog '" + vertex_table->catalog_name + "' does not exist!"); + } + v_table_names.insert(vertex_table->FullTableName()); + if (vertex_table->hasTableNameAlias()) { + v_table_names.insert(vertex_table->table_name_alias); + } + } + + for (auto &edge_table : info->edge_tables) { + try { + Binder::BindSchemaOrCatalog(context, edge_table->catalog_name, edge_table->schema_name); + auto table = + Catalog::GetEntry(context, edge_table->catalog_name, edge_table->schema_name, + edge_table->table_name, OnEntryNotFound::RETURN_NULL); + if (!table) { + throw Exception(ExceptionType::INVALID, + "Table with name " + edge_table->table_name + " does not exist"); + } + CheckPropertyGraphTableColumns(edge_table, table); + CheckPropertyGraphTableLabels(edge_table, table); + Binder::BindSchemaOrCatalog(context, edge_table->source_catalog, edge_table->source_schema); + Binder::BindSchemaOrCatalog(context, edge_table->destination_catalog, edge_table->destination_schema); + + auto &table_constraints = table->GetConstraints(); + + ValidateKeys(edge_table, edge_table->source_reference, "source", edge_table->source_pk, + edge_table->source_fk, table_constraints); + + // Check source foreign key columns exist in the table + ValidateForeignKeyColumns(edge_table, edge_table->source_fk, table); + + // Validate destination keys + ValidateKeys(edge_table, edge_table->destination_reference, "destination", edge_table->destination_pk, + edge_table->destination_fk, table_constraints); + + // Check destination foreign key columns exist in the table + ValidateForeignKeyColumns(edge_table, edge_table->destination_fk, table); + + // Validate source table registration + ValidateVertexTableRegistration(edge_table->source_pg_table, v_table_names); + + // Validate primary keys in the source table + ValidatePrimaryKeyInTable(context, edge_table->source_pg_table, edge_table->source_pk); + + // Validate destination table registration + ValidateVertexTableRegistration(edge_table->destination_pg_table, v_table_names); + + // Validate primary keys in the destination table + ValidatePrimaryKeyInTable(context, edge_table->destination_pg_table, edge_table->destination_pk); + } catch (CatalogException &e) { + auto table = Catalog::GetEntry(context, edge_table->catalog_name, edge_table->schema_name, + edge_table->table_name, OnEntryNotFound::RETURN_NULL); + if (table) { + throw Exception(ExceptionType::INVALID, "Found a view with name " + edge_table->table_name + + ". Creating property graph tables over views is " + "currently not supported."); + } + throw Exception(ExceptionType::INVALID, e.what()); + } catch (BinderException &e) { + throw Exception(ExceptionType::INVALID, "Catalog '" + edge_table->catalog_name + "' does not exist!"); + } + } + return make_uniq(info); +} + +unique_ptr +CreatePropertyGraphFunction::CreatePropertyGraphInit(ClientContext &context, TableFunctionInitInput &input) { + return make_uniq(); +} + +void CreatePropertyGraphFunction::CreatePropertyGraphFunc(ClientContext &context, TableFunctionInput &data_p, + DataChunk &output) { + auto &bind_data = data_p.bind_data->Cast(); + auto pg_info = bind_data.create_pg_info; + auto duckpgq_state = GetDuckPGQState(context); + + for (auto &local_client_context : ConnectionManager::Get(*context.db).GetConnectionList()) { + auto local_state = GetDuckPGQState(*local_client_context); + local_state->registered_property_graphs[pg_info->property_graph_name] = pg_info->Copy(); + } + + duckpgq_state->InitializeInternalTable(context); + auto new_conn = make_shared_ptr(*context.db); + auto retrieve_query = new_conn->Query("SELECT * FROM __duckpgq_internal where property_graph = '" + + pg_info->property_graph_name + "';"); + if (retrieve_query->HasError()) { + throw TransactionException(retrieve_query->GetError()); + } + auto &query_result = retrieve_query->Cast(); + if (query_result.RowCount() > 0) { + if (pg_info->on_conflict == OnCreateConflict::ERROR_ON_CONFLICT) { + throw Exception(ExceptionType::INVALID, + "Property graph " + pg_info->property_graph_name + " is already registered"); + } + if (pg_info->on_conflict == OnCreateConflict::IGNORE_ON_CONFLICT) { + return; // Do nothing and silently return + } + if (pg_info->on_conflict == OnCreateConflict::REPLACE_ON_CONFLICT) { + // DELETE the old property graph and insert new one. + new_conn->Query("DELETE FROM __duckpgq_internal WHERE property_graph = '" + pg_info->property_graph_name + + "';"); + } + } + + string insert_info = "INSERT INTO __duckpgq_internal VALUES "; + for (const auto &v_table : pg_info->vertex_tables) { + insert_info += "("; + insert_info += "'" + pg_info->property_graph_name + "', "; + insert_info += "'" + v_table->table_name + "', "; + insert_info += "'" + v_table->main_label + "', "; + insert_info += "true, "; // is_vertex_table + insert_info += "NULL, "; // source_table + insert_info += "NULL, "; // source_pk + insert_info += "NULL, "; // source_fk + insert_info += "NULL, "; // destination_table + insert_info += "NULL, "; // destination_pk + insert_info += "NULL, "; // destination_fk + insert_info += v_table->discriminator.empty() ? "NULL, " : "'" + v_table->discriminator + "', "; + if (!v_table->discriminator.empty()) { + insert_info += "["; + for (idx_t i = 0; i < v_table->sub_labels.size(); i++) { + insert_info += "'" + v_table->sub_labels[i] + (i == v_table->sub_labels.size() - 1 ? "'" : "', "); + } + insert_info += "],"; + } else { + insert_info += "NULL,"; + } + insert_info += "'" + v_table->catalog_name + "', "; + insert_info += "'" + v_table->schema_name + "', "; + insert_info += "NULL,"; // source table catalog + insert_info += "NULL,"; // source table schema + insert_info += "NULL,"; // destination table catalog + insert_info += "NULL,"; // destination table schema + insert_info += "["; // Start of column names + for (idx_t i = 0; i < v_table->column_names.size(); i++) { + insert_info += "'" + v_table->column_names[i] + (i == v_table->column_names.size() - 1 ? "'" : "', "); + } + insert_info += "],"; // End of column names + insert_info += "["; // Start of column aliases + for (idx_t i = 0; i < v_table->column_aliases.size(); i++) { + insert_info += "'" + v_table->column_aliases[i] + (i == v_table->column_aliases.size() - 1 ? "'" : "', "); + } + insert_info += "]"; // End of column aliases + insert_info += "), "; + } + + for (const auto &e_table : pg_info->edge_tables) { + insert_info += "("; + insert_info += "'" + pg_info->property_graph_name + "', "; + insert_info += "'" + e_table->table_name + "', "; + insert_info += "'" + e_table->main_label + "', "; + insert_info += "false, "; // is_vertex_table + insert_info += "'" + e_table->source_reference + "', "; + insert_info += "["; + for (const auto &source_pk : e_table->source_pk) { + insert_info += "'" + source_pk + "', "; + } + insert_info += "], "; + insert_info += "["; + for (const auto &source_fk : e_table->source_fk) { + insert_info += "'" + source_fk + "', "; + } + insert_info += "], "; + + insert_info += "'" + e_table->destination_reference + "', "; + insert_info += "["; + for (const auto &destination_pk : e_table->destination_pk) { + insert_info += "'" + destination_pk + "', "; + } + insert_info += "], "; + insert_info += "["; + for (const auto &destination_fk : e_table->destination_fk) { + insert_info += "'" + destination_fk + "', "; + } + insert_info += "], "; + + insert_info += e_table->discriminator.empty() ? "NULL, " : "'" + e_table->discriminator + "', "; + if (!e_table->discriminator.empty()) { + insert_info += "["; + for (idx_t i = 0; i < e_table->sub_labels.size(); i++) { + insert_info += "'" + e_table->sub_labels[i] + (i == e_table->sub_labels.size() - 1 ? "'" : "', "); + } + insert_info += "], "; + } else { + insert_info += "NULL, "; + } + insert_info += "'" + e_table->catalog_name + "', "; + insert_info += "'" + e_table->schema_name + "', "; + insert_info += "'" + e_table->source_catalog + "', "; + insert_info += "'" + e_table->source_schema + "', "; + insert_info += "'" + e_table->destination_catalog + "', "; + insert_info += "'" + e_table->destination_schema + "', "; + insert_info += "["; // Start of column names + for (idx_t i = 0; i < e_table->column_names.size(); i++) { + insert_info += "'" + e_table->column_names[i] + (i == e_table->column_names.size() - 1 ? "'" : "', "); + } + insert_info += "],"; // End of column names + insert_info += "["; // Start of column aliases + for (idx_t i = 0; i < e_table->column_aliases.size(); i++) { + insert_info += "'" + e_table->column_aliases[i] + (i == e_table->column_aliases.size() - 1 ? "'" : "', "); + } + insert_info += "]"; // End of column aliases + insert_info += "), "; + } + auto insert_query = new_conn->Query(insert_info); + if (insert_query->HasError()) { + throw TransactionException(insert_query->GetError()); + } +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreTableFunctions::RegisterCreatePropertyGraphTableFunction(ExtensionLoader &loader) { + loader.RegisterFunction(CreatePropertyGraphFunction()); +} + +} // namespace duckdb diff --git a/src/core/functions/table/describe_property_graph.cpp b/src/core/functions/table/describe_property_graph.cpp new file mode 100644 index 00000000..cfdcbdcc --- /dev/null +++ b/src/core/functions/table/describe_property_graph.cpp @@ -0,0 +1,166 @@ +#include "duckpgq/core/functions/table/describe_property_graph.hpp" +#include "duckdb/parser/parsed_data/create_property_graph_info.hpp" +#include "duckdb/parser/query_node/select_node.hpp" +#include "duckdb/parser/statement/create_statement.hpp" +#include "duckdb/parser/tableref/showref.hpp" +#include +#include +#include +#include + +namespace duckdb { + +unique_ptr DescribePropertyGraphFunction::DescribePropertyGraphBind(ClientContext &context, + TableFunctionBindInput &input, + vector &return_types, + vector &names) { + auto duckpgq_state = GetDuckPGQState(context); + + const auto duckpgq_parse_data = dynamic_cast(duckpgq_state->parse_data.get()); + + if (!duckpgq_parse_data) { + return {}; + } + auto statement = dynamic_cast(duckpgq_parse_data->statement.get()); + auto select_node = dynamic_cast(statement->node.get()); + auto show_ref = dynamic_cast(select_node->from_table.get()); + + auto pg_table = duckpgq_state->registered_property_graphs.find(show_ref->table_name); + if (pg_table == duckpgq_state->registered_property_graphs.end()) { + throw Exception(ExceptionType::INVALID, "Property graph " + show_ref->table_name + " does not exist."); + } + auto property_graph = dynamic_cast(pg_table->second.get()); + names.emplace_back("property_graph"); + return_types.emplace_back(LogicalType::VARCHAR); + names.emplace_back("table_name"); + return_types.emplace_back(LogicalType::VARCHAR); + names.emplace_back("label"); + return_types.emplace_back(LogicalType::VARCHAR); + names.emplace_back("is_vertex_table"); + return_types.emplace_back(LogicalType::BOOLEAN); + names.emplace_back("source_table"); + return_types.emplace_back(LogicalType::VARCHAR); + names.emplace_back("source_pk"); + return_types.emplace_back(LogicalType::LIST(LogicalType::VARCHAR)); + names.emplace_back("source_fk"); + return_types.emplace_back(LogicalType::LIST(LogicalType::VARCHAR)); + names.emplace_back("destination_table"); + return_types.emplace_back(LogicalType::VARCHAR); + names.emplace_back("destination_pk"); + return_types.emplace_back(LogicalType::LIST(LogicalType::VARCHAR)); + names.emplace_back("destination_fk"); + return_types.emplace_back(LogicalType::LIST(LogicalType::VARCHAR)); + names.emplace_back("discriminator"); + return_types.emplace_back(LogicalType::VARCHAR); + names.emplace_back("sub_labels"); + return_types.emplace_back(LogicalType::LIST(LogicalType::VARCHAR)); + names.emplace_back("catalog"); + return_types.emplace_back(LogicalType::VARCHAR); + names.emplace_back("schema"); + return_types.emplace_back(LogicalType::VARCHAR); + + return make_uniq(property_graph); +} + +unique_ptr +DescribePropertyGraphFunction::DescribePropertyGraphInit(ClientContext &context, TableFunctionInitInput &input) { + return make_uniq(); +} + +void DescribePropertyGraphFunction::DescribePropertyGraphFunc(ClientContext &context, TableFunctionInput &data_p, + DataChunk &output) { + auto &bind_data = data_p.bind_data->Cast(); + auto &data = data_p.global_state->Cast(); + if (data.done) { + return; + } + auto pg_info = bind_data.describe_pg_info; + idx_t vector_idx = 0; + for (const auto &vertex_table : pg_info->vertex_tables) { + output.SetValue(0, vector_idx, Value(pg_info->property_graph_name)); + output.SetValue(1, vector_idx, Value(vertex_table->table_name)); + output.SetValue(2, vector_idx, Value(vertex_table->main_label)); + output.SetValue(3, vector_idx, Value(vertex_table->is_vertex_table)); + output.SetValue(4, vector_idx, Value()); + output.SetValue(5, vector_idx, Value()); + output.SetValue(6, vector_idx, Value()); + output.SetValue(7, vector_idx, Value()); + output.SetValue(8, vector_idx, Value()); + output.SetValue(9, vector_idx, Value()); + if (!vertex_table->discriminator.empty()) { + output.SetValue(10, vector_idx, Value(vertex_table->discriminator)); + vector sub_labels; + for (const auto &label : vertex_table->sub_labels) { + sub_labels.push_back(Value(label)); + } + output.SetValue(11, vector_idx, Value::LIST(LogicalType::VARCHAR, sub_labels)); + } else { + output.SetValue(10, vector_idx, Value()); + output.SetValue(11, vector_idx, Value()); + } + if (vertex_table->catalog_name.empty()) { + output.SetValue(12, vector_idx, Value()); + } else { + output.SetValue(12, vector_idx, Value(vertex_table->catalog_name)); + } + output.SetValue(13, vector_idx, Value(vertex_table->schema_name)); + vector_idx++; + } + for (const auto &edge_table : pg_info->edge_tables) { + output.SetValue(0, vector_idx, Value(pg_info->property_graph_name)); + output.SetValue(1, vector_idx, Value(edge_table->table_name)); + output.SetValue(2, vector_idx, Value(edge_table->main_label)); + output.SetValue(3, vector_idx, Value(edge_table->is_vertex_table)); + output.SetValue(4, vector_idx, Value(edge_table->source_reference)); + vector source_pk_list; + for (const auto &col : edge_table->source_pk) { + source_pk_list.push_back(Value(col)); + } + output.SetValue(5, vector_idx, Value::LIST(LogicalType::VARCHAR, source_pk_list)); + vector source_fk_list; + for (const auto &col : edge_table->source_fk) { + source_fk_list.push_back(Value(col)); + } + output.SetValue(6, vector_idx, Value::LIST(LogicalType::VARCHAR, source_fk_list)); + output.SetValue(7, vector_idx, Value(edge_table->destination_reference)); + vector destination_pk_list; + for (const auto &col : edge_table->destination_pk) { + destination_pk_list.push_back(Value(col)); + } + output.SetValue(8, vector_idx, Value::LIST(LogicalType::VARCHAR, destination_pk_list)); + vector destination_fk_list; + for (const auto &col : edge_table->destination_fk) { + destination_fk_list.push_back(Value(col)); + } + output.SetValue(9, vector_idx, Value::LIST(LogicalType::VARCHAR, destination_fk_list)); + if (!edge_table->discriminator.empty()) { + output.SetValue(10, vector_idx, Value(edge_table->discriminator)); + vector sub_labels; + for (const auto &label : edge_table->sub_labels) { + sub_labels.push_back(Value(label)); + } + output.SetValue(11, vector_idx, Value::LIST(LogicalType::VARCHAR, sub_labels)); + } else { + output.SetValue(10, vector_idx, Value()); + output.SetValue(11, vector_idx, Value()); + } + if (edge_table->catalog_name.empty()) { + output.SetValue(12, vector_idx, Value()); + } else { + output.SetValue(12, vector_idx, Value(edge_table->catalog_name)); + } + output.SetValue(13, vector_idx, Value(edge_table->schema_name)); + vector_idx++; + } + output.SetCardinality(vector_idx); + data.done = true; +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreTableFunctions::RegisterDescribePropertyGraphTableFunction(ExtensionLoader &loader) { + loader.RegisterFunction(DescribePropertyGraphFunction()); +} + +} // namespace duckdb diff --git a/src/core/functions/table/drop_property_graph.cpp b/src/core/functions/table/drop_property_graph.cpp new file mode 100644 index 00000000..a70faef4 --- /dev/null +++ b/src/core/functions/table/drop_property_graph.cpp @@ -0,0 +1,67 @@ +#include "duckpgq/core/functions/table/drop_property_graph.hpp" + +#include "duckdb/parser/parsed_data/drop_property_graph_info.hpp" +#include +#include +#include +#include "duckdb/main/connection_manager.hpp" + +namespace duckdb { + +unique_ptr DropPropertyGraphFunction::DropPropertyGraphBind(ClientContext &context, + TableFunctionBindInput &, + vector &return_types, + vector &names) { + names.emplace_back("success"); + return_types.emplace_back(LogicalType::VARCHAR); + auto duckpgq_state = GetDuckPGQState(context); + + auto duckpgq_parse_data = dynamic_cast(duckpgq_state->parse_data.get()); + + if (!duckpgq_parse_data) { + return {}; + } + auto statement = dynamic_cast(duckpgq_parse_data->statement.get()); + auto info = dynamic_cast(statement->info.get()); + return make_uniq(info); +} + +unique_ptr DropPropertyGraphFunction::DropPropertyGraphInit(ClientContext &, + TableFunctionInitInput &) { + return make_uniq(); +} + +void DropPropertyGraphFunction::DropPropertyGraphFunc(ClientContext &context, TableFunctionInput &data_p, DataChunk &) { + auto &bind_data = data_p.bind_data->Cast(); + + auto pg_info = bind_data.drop_pg_info; + auto duckpgq_state = GetDuckPGQState(context); + + auto registered_pg = duckpgq_state->registered_property_graphs.find(pg_info->property_graph_name); + if (registered_pg == duckpgq_state->registered_property_graphs.end()) { + if (pg_info->missing_ok) { + return; // Do nothing + } + throw BinderException("Property graph %s does not exist.", pg_info->property_graph_name); + } + + for (auto &connection : ConnectionManager::Get(*context.db).GetConnectionList()) { + auto local_state = connection->registered_state->Get("duckpgq"); + if (!local_state) { + continue; + } + local_state->registered_property_graphs.erase(pg_info->property_graph_name); + } + + auto new_conn = make_shared_ptr(*context.db); + new_conn->Query("DELETE FROM __duckpgq_internal where property_graph = '" + pg_info->property_graph_name + "'"); +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreTableFunctions::RegisterDropPropertyGraphTableFunction(ExtensionLoader &loader) { + loader.RegisterFunction(DropPropertyGraphFunction()); +} + +} // namespace duckdb diff --git a/src/core/functions/table/local_clustering_coefficient.cpp b/src/core/functions/table/local_clustering_coefficient.cpp new file mode 100644 index 00000000..7091c332 --- /dev/null +++ b/src/core/functions/table/local_clustering_coefficient.cpp @@ -0,0 +1,47 @@ +#include "duckpgq/core/functions/table/local_clustering_coefficient.hpp" +#include "duckdb/function/table_function.hpp" +#include "duckpgq_extension.hpp" +#include "duckpgq/core/utils/duckpgq_utils.hpp" + +#include "duckdb/parser/expression/constant_expression.hpp" +#include "duckdb/parser/expression/function_expression.hpp" +#include "duckpgq/core/utils/compressed_sparse_row.hpp" +#include +#include +#include +#include + +namespace duckdb { + +// Main binding function +unique_ptr +LocalClusteringCoefficientFunction::LocalClusteringCoefficientBindReplace(ClientContext &context, + TableFunctionBindInput &input) { + auto pg_name = StringUtil::Lower(StringValue::Get(input.inputs[0])); + auto node_label = StringUtil::Lower(StringValue::Get(input.inputs[1])); + auto edge_label = StringUtil::Lower(StringValue::Get(input.inputs[2])); + + auto duckpgq_state = GetDuckPGQState(context); + auto pg_info = GetPropertyGraphInfo(duckpgq_state, pg_name); + auto edge_pg_entry = ValidateSourceNodeAndEdgeTable(pg_info, node_label, edge_label); + + auto select_node = CreateSelectNode(edge_pg_entry, "local_clustering_coefficient", "local_clustering_coefficient"); + + select_node->cte_map.map["csr_cte"] = CreateUndirectedCSRCTE(edge_pg_entry, select_node); + + auto subquery = make_uniq(); + subquery->node = std::move(select_node); + + auto result = make_uniq(std::move(subquery)); + result->alias = "lcc"; + // input.ref.alias = "lcc"; + return std::move(result); +} +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreTableFunctions::RegisterLocalClusteringCoefficientTableFunction(ExtensionLoader &loader) { + loader.RegisterFunction(LocalClusteringCoefficientFunction()); +} + +} // namespace duckdb diff --git a/src/core/functions/table/match.cpp b/src/core/functions/table/match.cpp new file mode 100644 index 00000000..acd621cb --- /dev/null +++ b/src/core/functions/table/match.cpp @@ -0,0 +1,1083 @@ +#include +#include "duckpgq/core/functions/table/match.hpp" + +#include "duckdb/common/string_util.hpp" +#include "duckdb/common/case_insensitive_map.hpp" +#include "duckdb/parser/tableref/matchref.hpp" +#include "duckdb/parser/tableref/subqueryref.hpp" +#include "duckdb/parser/tableref/joinref.hpp" +#include "duckdb/parser/tableref/basetableref.hpp" + +#include "duckdb/parser/expression/function_expression.hpp" +#include "duckdb/parser/expression/cast_expression.hpp" +#include "duckdb/parser/expression/between_expression.hpp" +#include "duckdb/parser/expression/constant_expression.hpp" +#include "duckdb/parser/expression/comparison_expression.hpp" +#include "duckdb/parser/expression/conjunction_expression.hpp" +#include "duckdb/parser/expression/star_expression.hpp" + +#include "duckdb/parser/query_node/set_operation_node.hpp" + +#include "duckdb/parser/query_node/select_node.hpp" +#include "duckdb/common/enums/subquery_type.hpp" +#include "duckdb/common/enums/joinref_type.hpp" +#include "duckpgq/core/utils/compressed_sparse_row.hpp" + +#include "duckdb/parser/property_graph_table.hpp" +#include "duckdb/parser/subpath_element.hpp" +#include +#include +#include + +namespace duckdb { + +namespace { + +// Get fully-qualified column names for the property graph [tbl], and insert +// into set [col_names]. +void PopulateFullyQualifiedColName(const vector> &tbls, + const case_insensitive_map_t> &tbl_name_to_aliases, + case_insensitive_set_t &col_names) { + for (const auto &cur_tbl : tbls) { + for (const auto &cur_col : cur_tbl->column_names) { + // It's legal to query by `` instead of `.`. + col_names.insert(cur_col); + + const string &tbl_name = cur_tbl->table_name; + auto iter = tbl_name_to_aliases.find(tbl_name); + // Prefer to use table alias specified in the statement, otherwise use + // table name. + if (iter == tbl_name_to_aliases.end()) { + col_names.insert(StringUtil::Format("%s.%s", tbl_name, cur_col)); + } else { + const auto &all_aliases = iter->second; + for (const auto &cur_alias : all_aliases) { + col_names.insert(StringUtil::Format("%s.%s", cur_alias, cur_col)); + } + } + } + } +} + +// Get fully-qualified column names from property graph. +case_insensitive_set_t +GetFullyQualifiedColFromPg(const CreatePropertyGraphInfo &pg, + const case_insensitive_map_t> &alias_map) { + case_insensitive_map_t> relation_name_to_aliases; + for (const auto &entry : alias_map) { + relation_name_to_aliases[entry.second->table_name].emplace_back(entry.first); + } + + case_insensitive_set_t col_names; + PopulateFullyQualifiedColName(pg.vertex_tables, relation_name_to_aliases, col_names); + PopulateFullyQualifiedColName(pg.edge_tables, relation_name_to_aliases, col_names); + return col_names; +} + +// Get all fully-qualified column names from the given property graph [pg] for +// the given relation [alias], only vertex table is selected. +// +// Return column reference expressions which represent columns to select. +vector> +GetColRefExprFromPg(const case_insensitive_map_t> &alias_map, const std::string &alias) { + vector> registered_col_names; + auto iter = alias_map.find(alias); + D_ASSERT(iter != alias_map.end()); + const auto &tbl = iter->second; + registered_col_names.reserve(tbl->column_names.size()); + for (const auto &cur_col : tbl->column_names) { + auto new_col_names = vector {"", ""}; + new_col_names[0] = alias; + new_col_names[1] = cur_col; + registered_col_names.emplace_back(make_uniq(std::move(new_col_names))); + } + return registered_col_names; +} + +// Get all fully-qualified column names from the given property graph [pg] for +// all vertex relations. +// +// Return column reference expressions which represent columns to select. +vector> +GetColRefExprFromPg(const case_insensitive_map_t> &alias_map) { + vector> registered_col_names; + for (const auto &alias_and_table : alias_map) { + const auto &alias = alias_and_table.first; + const auto &tbl = alias_and_table.second; + // Skip edge table. + registered_col_names.reserve(registered_col_names.size() + tbl->column_names.size()); + for (const auto &cur_col : tbl->column_names) { + auto new_col_names = vector {"", ""}; + new_col_names[0] = alias; + new_col_names[1] = cur_col; + registered_col_names.emplace_back(make_uniq(std::move(new_col_names))); + } + } + return registered_col_names; +} + +} // namespace + +shared_ptr PGQMatchFunction::FindGraphTable(const string &label, + CreatePropertyGraphInfo &pg_table) { + const auto graph_table_entry = pg_table.label_map.find(label); + if (graph_table_entry == pg_table.label_map.end()) { + throw Exception(ExceptionType::BINDER, + "The label " + label + " is not registered in property graph " + pg_table.property_graph_name); + } + + return graph_table_entry->second; +} + +void PGQMatchFunction::CheckInheritance(const shared_ptr &tableref, PathElement *element, + vector> &conditions) { + if (tableref->main_label == element->label) { + return; + } + auto constant_expression_two = make_uniq(Value::INTEGER(2)); + const auto itr = std::find(tableref->sub_labels.begin(), tableref->sub_labels.end(), element->label); + + const auto idx_of_label = std::distance(tableref->sub_labels.begin(), itr); + auto constant_expression_idx_label = + make_uniq(Value::INTEGER(static_cast(idx_of_label))); + + vector> power_of_children; + power_of_children.push_back(std::move(constant_expression_two)); + power_of_children.push_back(std::move(constant_expression_idx_label)); + auto power_of_term = make_uniq("power", std::move(power_of_children)); + auto bigint_cast = make_uniq(LogicalType::INTEGER, std::move(power_of_term)); + auto subcategory_colref = make_uniq(tableref->discriminator, element->variable_binding); + vector> and_children; + and_children.push_back(std::move(subcategory_colref)); + and_children.push_back(std::move(bigint_cast)); + + auto and_expression = make_uniq("&", std::move(and_children)); + + auto constant_expression_idx_label_comparison = + make_uniq(Value::INTEGER(static_cast(std::pow(2, idx_of_label)))); + + auto subset_compare = make_uniq(ExpressionType::COMPARE_EQUAL, std::move(and_expression), + std::move(constant_expression_idx_label_comparison)); + conditions.push_back(std::move(subset_compare)); +} + +void PGQMatchFunction::CheckEdgeTableConstraints(const string &src_reference, const string &dst_reference, + const shared_ptr &edge_table) { + if (src_reference != edge_table->source_reference) { + throw BinderException("Label %s is not registered as a source reference " + "for edge pattern of table %s", + src_reference, edge_table->table_name); + } + if (dst_reference != edge_table->destination_reference) { + throw BinderException("Label %s is not registered as a destination " + "reference for edge pattern of table %s", + src_reference, edge_table->table_name); + } +} + +unique_ptr PGQMatchFunction::CreateMatchJoinExpression(vector vertex_keys, + vector edge_keys, + const string &vertex_alias, + const string &edge_alias) { + vector> conditions; + + if (vertex_keys.size() != edge_keys.size()) { + throw BinderException("Vertex columns and edge columns size mismatch"); + } + for (idx_t i = 0; i < vertex_keys.size(); i++) { + auto vertex_colref = make_uniq(vertex_keys[i], vertex_alias); + auto edge_colref = make_uniq(edge_keys[i], edge_alias); + conditions.push_back(make_uniq(ExpressionType::COMPARE_EQUAL, std::move(vertex_colref), + std::move(edge_colref))); + } + unique_ptr where_clause; + + for (auto &condition : conditions) { + if (where_clause) { + where_clause = make_uniq(ExpressionType::CONJUNCTION_AND, std::move(where_clause), + std::move(condition)); + } else { + where_clause = std::move(condition); + } + } + + return where_clause; +} + +PathElement *PGQMatchFunction::GetPathElement(const unique_ptr &path_reference) { + if (path_reference->path_reference_type == PGQPathReferenceType::PATH_ELEMENT) { + return reinterpret_cast(path_reference.get()); + } + if (path_reference->path_reference_type == PGQPathReferenceType::SUBPATH) { + return nullptr; + } + throw InternalException("Unknown path reference type detected"); +} + +SubPath *PGQMatchFunction::GetSubPath(const unique_ptr &path_reference) { + if (path_reference->path_reference_type == PGQPathReferenceType::PATH_ELEMENT) { + return nullptr; + } + if (path_reference->path_reference_type == PGQPathReferenceType::SUBPATH) { + return reinterpret_cast(path_reference.get()); + } + throw InternalException("Unknown path reference type detected"); +} + +unique_ptr PGQMatchFunction::CreateCountCTESubquery() { + //! BEGIN OF (SELECT count(cte1.temp) as temp * 0 from cte1) __x + + auto temp_cte_select_node = make_uniq(); + + auto cte_table_ref = make_uniq(); + + cte_table_ref->table_name = "cte1"; + temp_cte_select_node->from_table = std::move(cte_table_ref); + vector> children; + children.push_back(make_uniq("temp", "cte1")); + + auto count_function = make_uniq("count", std::move(children)); + + auto zero = make_uniq(Value::INTEGER(0)); + + vector> multiply_children; + + multiply_children.push_back(std::move(zero)); + multiply_children.push_back(std::move(count_function)); + auto multiply_function = make_uniq("multiply", std::move(multiply_children)); + multiply_function->alias = "temp"; + temp_cte_select_node->select_list.push_back(std::move(multiply_function)); + auto temp_cte_select_statement = make_uniq(); + temp_cte_select_statement->node = std::move(temp_cte_select_node); + + auto temp_cte_select_subquery = make_uniq(std::move(temp_cte_select_statement), "__x"); + //! END OF (SELECT count(cte1.temp) * 0 as temp from cte1) __x + return temp_cte_select_subquery; +} + +void PGQMatchFunction::EdgeTypeAny(const shared_ptr &edge_table, const string &edge_binding, + const string &prev_binding, const string &next_binding, + vector> &conditions, + unique_ptr &from_clause) { + + // START SELECT src, dst, * from edge_table + auto src_dst_select_node = make_uniq(); + + auto edge_left_ref = edge_table->CreateBaseTableRef(edge_binding); + src_dst_select_node->from_table = std::move(edge_left_ref); + auto src_dst_children = vector>(); + src_dst_children.push_back(make_uniq(edge_table->source_fk[0], edge_binding)); + src_dst_children.push_back(make_uniq(edge_table->destination_fk[0], edge_binding)); + src_dst_children.push_back(make_uniq()); + + src_dst_select_node->select_list = std::move(src_dst_children); + // END SELECT src, dst, * from edge_table + + // START SELECT dst, src, * from edge_table + auto dst_src_select_node = make_uniq(); + + auto edge_right_ref = edge_table->CreateBaseTableRef(edge_binding); + auto dst_src_children = vector>(); + dst_src_select_node->from_table = std::move(edge_right_ref); + + dst_src_children.push_back(make_uniq(edge_table->destination_fk[0], edge_binding)); + dst_src_children.push_back(make_uniq(edge_table->source_fk[0], edge_binding)); + dst_src_children.push_back(make_uniq()); + + dst_src_select_node->select_list = std::move(dst_src_children); + // END SELECT dst, src, * from edge_table + + auto union_node = make_uniq(); + union_node->setop_type = SetOperationType::UNION; + union_node->setop_all = true; + union_node->children.push_back(std::move(src_dst_select_node)); + union_node->children.push_back(std::move(dst_src_select_node)); + auto union_select = make_uniq(); + union_select->node = std::move(union_node); + // (SELECT src, dst, * from edge_table UNION ALL SELECT dst, src, * from + // edge_table) + auto union_subquery = make_uniq(std::move(union_select)); + union_subquery->alias = edge_binding; + if (from_clause) { + auto from_join = make_uniq(JoinRefType::CROSS); + from_join->left = std::move(from_clause); + from_join->right = std::move(union_subquery); + from_clause = std::move(from_join); + } else { + from_clause = std::move(union_subquery); + } + // (a) src.key = edge.src + auto src_left_expr = + CreateMatchJoinExpression(edge_table->source_pk, edge_table->source_fk, prev_binding, edge_binding); + // (b) dst.key = edge.dst + auto dst_left_expr = + CreateMatchJoinExpression(edge_table->destination_pk, edge_table->destination_fk, next_binding, edge_binding); + // (a) AND (b) + auto combined_left_expr = make_uniq(ExpressionType::CONJUNCTION_AND, + std::move(src_left_expr), std::move(dst_left_expr)); + + conditions.push_back(std::move(combined_left_expr)); +} + +void PGQMatchFunction::EdgeTypeLeft(const shared_ptr &edge_table, const string &next_table_name, + const string &prev_table_name, const string &edge_binding, + const string &prev_binding, const string &next_binding, + vector> &conditions) { + CheckEdgeTableConstraints(next_table_name, prev_table_name, edge_table); + conditions.push_back( + CreateMatchJoinExpression(edge_table->source_pk, edge_table->source_fk, next_binding, edge_binding)); + conditions.push_back( + CreateMatchJoinExpression(edge_table->destination_pk, edge_table->destination_fk, prev_binding, edge_binding)); +} + +void PGQMatchFunction::EdgeTypeRight(const shared_ptr &edge_table, const string &next_table_name, + const string &prev_table_name, const string &edge_binding, + const string &prev_binding, const string &next_binding, + vector> &conditions) { + CheckEdgeTableConstraints(prev_table_name, next_table_name, edge_table); + conditions.push_back( + CreateMatchJoinExpression(edge_table->source_pk, edge_table->source_fk, prev_binding, edge_binding)); + conditions.push_back( + CreateMatchJoinExpression(edge_table->destination_pk, edge_table->destination_fk, next_binding, edge_binding)); +} + +void PGQMatchFunction::EdgeTypeLeftRight(const shared_ptr &edge_table, const string &edge_binding, + const string &prev_binding, const string &next_binding, + vector> &conditions, + case_insensitive_map_t> &alias_map, + int32_t &extra_alias_counter) { + auto src_left_expr = + CreateMatchJoinExpression(edge_table->source_pk, edge_table->source_fk, next_binding, edge_binding); + auto dst_left_expr = + CreateMatchJoinExpression(edge_table->destination_pk, edge_table->destination_fk, prev_binding, edge_binding); + + auto combined_left_expr = make_uniq(ExpressionType::CONJUNCTION_AND, + std::move(src_left_expr), std::move(dst_left_expr)); + + const auto additional_edge_alias = edge_binding + std::to_string(extra_alias_counter); + extra_alias_counter++; + + alias_map[additional_edge_alias] = edge_table; + + auto src_right_expr = + CreateMatchJoinExpression(edge_table->source_pk, edge_table->source_fk, prev_binding, additional_edge_alias); + auto dst_right_expr = CreateMatchJoinExpression(edge_table->destination_pk, edge_table->destination_fk, + next_binding, additional_edge_alias); + auto combined_right_expr = make_uniq(ExpressionType::CONJUNCTION_AND, + std::move(src_right_expr), std::move(dst_right_expr)); + + auto combined_expr = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(combined_left_expr), std::move(combined_right_expr)); + conditions.push_back(std::move(combined_expr)); +} + +PathElement *PGQMatchFunction::HandleNestedSubPath(unique_ptr &path_reference, + vector> &conditions, + idx_t element_idx) { + auto subpath = reinterpret_cast(path_reference.get()); + return GetPathElement(subpath->path_list[element_idx]); +} + +unique_ptr PGQMatchFunction::CreateWhereClause(vector> &conditions) { + unique_ptr where_clause; + for (auto &condition : conditions) { + if (where_clause) { + where_clause = make_uniq(ExpressionType::CONJUNCTION_AND, std::move(where_clause), + std::move(condition)); + } else { + where_clause = std::move(condition); + } + } + return where_clause; +} + +unique_ptr +PGQMatchFunction::GenerateShortestPathCTE(CreatePropertyGraphInfo &pg_table, SubPath *edge_subpath, + PathElement *previous_vertex_element, PathElement *next_vertex_element, + vector> &path_finding_conditions) { + auto cte_info = make_uniq(); + auto select_statement = make_uniq(); + auto select_node = make_uniq(); + + auto edge_element = GetPathElement(edge_subpath->path_list[0]); + auto edge_table = FindGraphTable(edge_element->label, pg_table); + + auto src_row_id = make_uniq("rowid", previous_vertex_element->variable_binding); + auto dst_row_id = make_uniq("rowid", next_vertex_element->variable_binding); + auto csr_id = make_uniq(Value::INTEGER(0)); + + vector> pathfinding_children; + pathfinding_children.push_back(std::move(csr_id)); + pathfinding_children.push_back(std::move(GetCountTable( + edge_table->source_pg_table, previous_vertex_element->variable_binding, edge_table->source_pk[0]))); + pathfinding_children.push_back(std::move(src_row_id)); + pathfinding_children.push_back(std::move(dst_row_id)); + + auto shortest_path_function = make_uniq("shortestpath", std::move(pathfinding_children)); + shortest_path_function->alias = "path"; + select_node->select_list.push_back(std::move(shortest_path_function)); + auto src_rowid_outer_select = make_uniq("rowid", previous_vertex_element->variable_binding); + src_rowid_outer_select->alias = "src_rowid"; + select_node->select_list.push_back(std::move(src_rowid_outer_select)); + auto dst_rowid_outer_select = make_uniq("rowid", next_vertex_element->variable_binding); + dst_rowid_outer_select->alias = "dst_rowid"; + select_node->select_list.push_back(std::move(dst_rowid_outer_select)); + + auto src_tableref = edge_table->source_pg_table->CreateBaseTableRef(); + src_tableref->alias = previous_vertex_element->variable_binding; + auto dst_tableref = edge_table->destination_pg_table->CreateBaseTableRef(); + dst_tableref->alias = next_vertex_element->variable_binding; + auto first_cross_join_ref = make_uniq(JoinRefType::CROSS); + first_cross_join_ref->left = std::move(src_tableref); + first_cross_join_ref->right = std::move(dst_tableref); + auto temp_cte_select_subquery = CreateCountCTESubquery(); + + auto second_cross_join_ref = make_uniq(JoinRefType::CROSS); + second_cross_join_ref->left = std::move(first_cross_join_ref); + second_cross_join_ref->right = std::move(temp_cte_select_subquery); + + select_node->from_table = std::move(second_cross_join_ref); + vector> count_children; + count_children.push_back(make_uniq("temp", "cte1")); + auto count_function = make_uniq("count", std::move(count_children)); + + path_finding_conditions.push_back(AddPathQuantifierCondition( + previous_vertex_element->variable_binding, next_vertex_element->variable_binding, edge_table, edge_subpath)); + + select_node->where_clause = CreateWhereClause(path_finding_conditions); + + select_statement->node = std::move(select_node); + cte_info->query = std::move(select_statement); + return cte_info; +} + +unique_ptr PGQMatchFunction::CreatePathFindingFunction( + vector> &path_list, CreatePropertyGraphInfo &pg_table, const string &path_variable, + unique_ptr &final_select_node, vector> &conditions) { + // This method will return a SubqueryRef of a list of rowids + // For every vertex and edge element, we add the rowid to the list using + // list_append, or list_prepend The difficulty is that there may be a + // (un)bounded path pattern at some point in the query This is computed using + // the shortestpath() UDF and returns a list. This list will be part of the + // full list of element rowids, using list_concat. For now we will only + // support returning rowids + + unique_ptr final_list; + vector> path_finding_conditions; + auto previous_vertex_element = GetPathElement(path_list[0]); + SubPath *previous_vertex_subpath = nullptr; // NOLINT + if (!previous_vertex_element) { + // We hit a vertex element with a WHERE, but we only care about the rowid + // here + // In the future this might be a recursive path pattern + previous_vertex_subpath = reinterpret_cast(path_list[0].get()); + previous_vertex_element = GetPathElement(previous_vertex_subpath->path_list[0]); + } + + for (idx_t idx_i = 1; idx_i < path_list.size(); idx_i = idx_i + 2) { + auto next_vertex_element = GetPathElement(path_list[idx_i + 1]); + SubPath *next_vertex_subpath = nullptr; // NOLINT + if (!next_vertex_element) { + next_vertex_subpath = reinterpret_cast(path_list[idx_i + 1].get()); + next_vertex_element = GetPathElement(next_vertex_subpath->path_list[0]); + } + + auto edge_element = GetPathElement(path_list[idx_i]); + if (!edge_element) { + auto edge_subpath = reinterpret_cast(path_list[idx_i].get()); + if (edge_subpath->upper > 1) { + // (un)bounded shortest path + // Add the shortest path UDF as a CTE + if (previous_vertex_subpath) { + path_finding_conditions.push_back(std::move(previous_vertex_subpath->where_clause)); + } + if (next_vertex_subpath) { + path_finding_conditions.push_back(std::move(next_vertex_subpath->where_clause)); + } + if (final_select_node->cte_map.map.find("cte1") == final_select_node->cte_map.map.end()) { + edge_element = reinterpret_cast(edge_subpath->path_list[0].get()); + if (edge_element->match_type == PGQMatchType::MATCH_EDGE_RIGHT) { + final_select_node->cte_map.map["cte1"] = CreateDirectedCSRCTE( + FindGraphTable(edge_element->label, pg_table), previous_vertex_element->variable_binding, + edge_element->variable_binding, next_vertex_element->variable_binding); + } else if (edge_element->match_type == PGQMatchType::MATCH_EDGE_ANY) { + final_select_node->cte_map.map["cte1"] = + CreateUndirectedCSRCTE(FindGraphTable(edge_element->label, pg_table), final_select_node); + } else { + throw NotImplementedException("Cannot do shortest path for edge type %s", + edge_element->match_type == PGQMatchType::MATCH_EDGE_LEFT + ? "MATCH_EDGE_LEFT" + : "MATCH_EDGE_LEFT_RIGHT"); + } + } + string shortest_path_cte_name = "shortest_path_cte"; + if (final_select_node->cte_map.map.find(shortest_path_cte_name) == + final_select_node->cte_map.map.end()) { + final_select_node->cte_map.map[shortest_path_cte_name] = GenerateShortestPathCTE( + pg_table, edge_subpath, previous_vertex_element, next_vertex_element, path_finding_conditions); + auto cte_shortest_path_ref = make_uniq(); + cte_shortest_path_ref->table_name = shortest_path_cte_name; + if (!final_select_node->from_table) { + final_select_node->from_table = std::move(cte_shortest_path_ref); + } else { + auto join_ref = make_uniq(JoinRefType::CROSS); + join_ref->left = std::move(final_select_node->from_table); + join_ref->right = std::move(cte_shortest_path_ref); + final_select_node->from_table = std::move(join_ref); + } + + conditions.push_back(make_uniq( + ExpressionType::COMPARE_EQUAL, + make_uniq("src_rowid", shortest_path_cte_name), + make_uniq("rowid", previous_vertex_element->variable_binding))); + conditions.push_back(make_uniq( + ExpressionType::COMPARE_EQUAL, + make_uniq("dst_rowid", shortest_path_cte_name), + make_uniq("rowid", next_vertex_element->variable_binding))); + } + auto shortest_path_ref = make_uniq("path", shortest_path_cte_name); + if (!final_list) { + final_list = std::move(shortest_path_ref); + } else { + auto pop_front_shortest_path_children = vector>(); + pop_front_shortest_path_children.push_back(std::move(shortest_path_ref)); + auto pop_front = + make_uniq("array_pop_front", std::move(pop_front_shortest_path_children)); + + auto final_list_children = vector>(); + final_list_children.push_back(std::move(final_list)); + final_list_children.push_back(std::move(pop_front)); + final_list = make_uniq("list_concat", std::move(final_list_children)); + } + // Set next vertex to be previous + previous_vertex_element = next_vertex_element; + continue; + } + if (previous_vertex_subpath) { + conditions.push_back(std::move(previous_vertex_subpath->where_clause)); + } + if (next_vertex_subpath) { + conditions.push_back(std::move(next_vertex_subpath->where_clause)); + } + edge_element = GetPathElement(edge_subpath->path_list[0]); + } + auto previous_rowid = make_uniq("rowid", previous_vertex_element->variable_binding); + auto edge_rowid = make_uniq("rowid", edge_element->variable_binding); + auto next_rowid = make_uniq("rowid", next_vertex_element->variable_binding); + auto starting_list_children = vector>(); + + if (!final_list) { + starting_list_children.push_back(std::move(previous_rowid)); + starting_list_children.push_back(std::move(edge_rowid)); + starting_list_children.push_back(std::move(next_rowid)); + final_list = make_uniq("list_value", std::move(starting_list_children)); + } else { + starting_list_children.push_back(std::move(edge_rowid)); + starting_list_children.push_back(std::move(next_rowid)); + auto next_elements_list = make_uniq("list_value", std::move(starting_list_children)); + auto final_list_children = vector>(); + final_list_children.push_back(std::move(final_list)); + final_list_children.push_back(std::move(next_elements_list)); + final_list = make_uniq("list_concat", std::move(final_list_children)); + } + previous_vertex_element = next_vertex_element; + previous_vertex_subpath = next_vertex_subpath; + } + + return final_list; +} + +void PGQMatchFunction::AddEdgeJoins(const shared_ptr &edge_table, + const shared_ptr &previous_vertex_table, + const shared_ptr &next_vertex_table, PGQMatchType edge_type, + const string &edge_binding, const string &prev_binding, const string &next_binding, + vector> &conditions, + case_insensitive_map_t> &alias_map, + int32_t &extra_alias_counter, unique_ptr &from_clause) { + if (edge_type != PGQMatchType::MATCH_EDGE_ANY) { + alias_map[edge_binding] = edge_table; + } + switch (edge_type) { + case PGQMatchType::MATCH_EDGE_ANY: { + EdgeTypeAny(edge_table, edge_binding, prev_binding, next_binding, conditions, from_clause); + break; + } + case PGQMatchType::MATCH_EDGE_LEFT: + EdgeTypeLeft(edge_table, next_vertex_table->table_name, previous_vertex_table->table_name, edge_binding, + prev_binding, next_binding, conditions); + break; + case PGQMatchType::MATCH_EDGE_RIGHT: + EdgeTypeRight(edge_table, next_vertex_table->table_name, previous_vertex_table->table_name, edge_binding, + prev_binding, next_binding, conditions); + break; + case PGQMatchType::MATCH_EDGE_LEFT_RIGHT: { + EdgeTypeLeftRight(edge_table, edge_binding, prev_binding, next_binding, conditions, alias_map, + extra_alias_counter); + break; + } + default: + throw InternalException("Unknown match type found"); + } +} + +unique_ptr +PGQMatchFunction::AddPathQuantifierCondition(const string &prev_binding, const string &next_binding, + const shared_ptr &edge_table, const SubPath *subpath) { + + auto src_row_id = make_uniq("rowid", prev_binding); + auto dst_row_id = make_uniq("rowid", next_binding); + auto csr_id = make_uniq(Value::INTEGER(0)); + + vector> pathfinding_children; + pathfinding_children.push_back(std::move(csr_id)); + pathfinding_children.push_back( + std::move(GetCountTable(edge_table->source_pg_table, prev_binding, edge_table->source_pk[0]))); + pathfinding_children.push_back(std::move(src_row_id)); + pathfinding_children.push_back(std::move(dst_row_id)); + + // Push bounds directly into the UDF to handle cases where shortest path + // is outside bounds but a longer path within bounds exists (issue #67) + auto lower_limit = make_uniq(Value::INTEGER(static_cast(subpath->lower))); + auto upper_limit = make_uniq(Value::INTEGER(static_cast(subpath->upper))); + pathfinding_children.push_back(std::move(lower_limit)); + pathfinding_children.push_back(std::move(upper_limit)); + + auto reachability_function = + make_uniq("iterativelengthbounded", std::move(pathfinding_children)); + return std::move(reachability_function); +} + +void PGQMatchFunction::AddPathFinding(unique_ptr &select_node, + vector> &conditions, const string &prev_binding, + const string &edge_binding, const string &next_binding, + const shared_ptr &edge_table, + CreatePropertyGraphInfo &pg_table, SubPath *subpath, PGQMatchType edge_type) { + //! START + //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x + if (select_node->cte_map.map.find("cte1") == select_node->cte_map.map.end()) { + if (edge_type == PGQMatchType::MATCH_EDGE_RIGHT) { + select_node->cte_map.map["cte1"] = + CreateDirectedCSRCTE(edge_table, prev_binding, edge_binding, next_binding); + } else if (edge_type == PGQMatchType::MATCH_EDGE_ANY) { + select_node->cte_map.map["cte1"] = CreateUndirectedCSRCTE(edge_table, select_node); + } else { + throw NotImplementedException("Cannot do shortest path for edge type %s", + edge_type == PGQMatchType::MATCH_EDGE_LEFT ? "MATCH_EDGE_LEFT" + : "MATCH_EDGE_LEFT_RIGHT"); + } + } + if (select_node->cte_map.map.find("shortest_path_cte") != select_node->cte_map.map.end()) { + return; + } + auto temp_cte_select_subquery = CreateCountCTESubquery(); + if (select_node->from_table) { + // create a cross join since there is already something in the + // from clause + auto from_join = make_uniq(JoinRefType::CROSS); + from_join->left = std::move(select_node->from_table); + from_join->right = std::move(temp_cte_select_subquery); + select_node->from_table = std::move(from_join); + } else { + select_node->from_table = std::move(temp_cte_select_subquery); + } + //! END + //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x + + //! START + //! WHERE __x.temp + iterativelength(, (SELECT count(c.id) + //! from dst c, a.rowid, b.rowid) between lower and upper + conditions.push_back(AddPathQuantifierCondition(prev_binding, next_binding, edge_table, subpath)); + //! END + //! WHERE __x.temp + iterativelength(, (SELECT count(s.id) + //! from src s, a.rowid, b.rowid) between lower and upper +} + +void PGQMatchFunction::CheckNamedSubpath(SubPath &subpath, MatchExpression &original_ref, + CreatePropertyGraphInfo &pg_table, unique_ptr &final_select_node, + vector> &conditions) { + for (idx_t idx_i = 0; idx_i < original_ref.column_list.size(); idx_i++) { + auto parsed_ref = dynamic_cast(original_ref.column_list[idx_i].get()); + if (parsed_ref == nullptr) { + continue; + } + auto column_ref = dynamic_cast(parsed_ref->children[0].get()); + if (column_ref == nullptr) { + continue; + } + + if (column_ref->column_names[0] != subpath.path_variable) { + continue; + } + // Trying to check parsed_ref->alias directly leads to a segfault + string column_alias = parsed_ref->alias; + if (parsed_ref->function_name == "element_id") { + // Check subpath name matches the column referenced in the function --> + // element_id(named_subpath) + auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table, subpath.path_variable, + final_select_node, conditions); + + if (column_alias.empty()) { + shortest_path_function->alias = "element_id(" + subpath.path_variable + ")"; + } else { + shortest_path_function->alias = column_alias; + } + original_ref.column_list.erase(original_ref.column_list.begin() + static_cast(idx_i)); + original_ref.column_list.insert(original_ref.column_list.begin() + static_cast(idx_i), + std::move(shortest_path_function)); + } else if (parsed_ref->function_name == "path_length") { + auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table, subpath.path_variable, + final_select_node, conditions); + auto path_len_children = vector>(); + path_len_children.push_back(std::move(shortest_path_function)); + auto path_len = make_uniq("len", std::move(path_len_children)); + auto constant_two = make_uniq(Value::INTEGER(2)); + vector> div_children; + div_children.push_back(std::move(path_len)); + div_children.push_back(std::move(constant_two)); + auto path_length_function = make_uniq("//", std::move(div_children)); + path_length_function->alias = + column_alias.empty() ? "path_length(" + subpath.path_variable + ")" : column_alias; + original_ref.column_list.erase(original_ref.column_list.begin() + static_cast(idx_i)); + original_ref.column_list.insert(original_ref.column_list.begin() + static_cast(idx_i), + std::move(path_length_function)); + } else if (parsed_ref->function_name == "vertices" || parsed_ref->function_name == "edges") { + auto list_slice_children = vector>(); + auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table, subpath.path_variable, + final_select_node, conditions); + list_slice_children.push_back(std::move(shortest_path_function)); + + if (parsed_ref->function_name == "vertices") { + list_slice_children.push_back(make_uniq(Value::INTEGER(1))); + } else { + list_slice_children.push_back(make_uniq(Value::INTEGER(2))); + } + auto slice_end = make_uniq(Value::INTEGER(-1)); + auto slice_step = make_uniq(Value::INTEGER(2)); + + list_slice_children.push_back(std::move(slice_end)); + list_slice_children.push_back(std::move(slice_step)); + auto list_slice = make_uniq("list_slice", std::move(list_slice_children)); + if (parsed_ref->function_name == "vertices") { + list_slice->alias = column_alias.empty() ? "vertices(" + subpath.path_variable + ")" : column_alias; + } else { + list_slice->alias = column_alias.empty() ? "edges(" + subpath.path_variable + ")" : column_alias; + } + original_ref.column_list.erase(original_ref.column_list.begin() + static_cast(idx_i)); + original_ref.column_list.insert(original_ref.column_list.begin() + static_cast(idx_i), + std::move(list_slice)); + } + } +} + +void PGQMatchFunction::ProcessPathList(vector> &path_list, + vector> &conditions, + unique_ptr &final_select_node, + case_insensitive_map_t> &alias_map, + CreatePropertyGraphInfo &pg_table, int32_t &extra_alias_counter, + MatchExpression &original_ref) { + PathElement *previous_vertex_element = GetPathElement(path_list[0]); + if (!previous_vertex_element) { + const auto previous_vertex_subpath = reinterpret_cast(path_list[0].get()); + if (previous_vertex_subpath->where_clause) { + conditions.push_back(std::move(previous_vertex_subpath->where_clause)); + } + if (!previous_vertex_subpath->path_variable.empty() && previous_vertex_subpath->path_list.size() > 1) { + CheckNamedSubpath(*previous_vertex_subpath, original_ref, pg_table, final_select_node, conditions); + } + if (previous_vertex_subpath->path_list.size() == 1) { + previous_vertex_element = GetPathElement(previous_vertex_subpath->path_list[0]); + } else { + // Add the shortest path if the name is found in the column_list + ProcessPathList(previous_vertex_subpath->path_list, conditions, final_select_node, alias_map, pg_table, + extra_alias_counter, original_ref); + return; + } + } + auto previous_vertex_table = FindGraphTable(previous_vertex_element->label, pg_table); + CheckInheritance(previous_vertex_table, previous_vertex_element, conditions); + alias_map[previous_vertex_element->variable_binding] = previous_vertex_table; + + for (idx_t idx_j = 1; idx_j < path_list.size(); idx_j = idx_j + 2) { + PathElement *next_vertex_element = GetPathElement(path_list[idx_j + 1]); + if (!next_vertex_element) { + auto next_vertex_subpath = reinterpret_cast(path_list[idx_j + 1].get()); + if (next_vertex_subpath->path_list.size() > 1) { + throw NotImplementedException("Recursive patterns are not yet supported."); + } + if (next_vertex_subpath->where_clause) { + conditions.push_back(std::move(next_vertex_subpath->where_clause)); + } + next_vertex_element = GetPathElement(next_vertex_subpath->path_list[0]); + } + if (next_vertex_element->match_type != PGQMatchType::MATCH_VERTEX || + previous_vertex_element->match_type != PGQMatchType::MATCH_VERTEX) { + throw BinderException("Vertex and edge patterns must be alternated."); + } + auto next_vertex_table = FindGraphTable(next_vertex_element->label, pg_table); + CheckInheritance(next_vertex_table, next_vertex_element, conditions); + alias_map[next_vertex_element->variable_binding] = next_vertex_table; + + PathElement *edge_element = GetPathElement(path_list[idx_j]); + if (!edge_element) { + // We are dealing with a subpath + auto edge_subpath = reinterpret_cast(path_list[idx_j].get()); + if (edge_subpath->where_clause) { + conditions.push_back(std::move(edge_subpath->where_clause)); + } + if (edge_subpath->path_list.size() > 1) { + throw NotImplementedException("Subpath on an edge is not yet supported."); + } + edge_element = GetPathElement(edge_subpath->path_list[0]); + auto edge_table = FindGraphTable(edge_element->label, pg_table); + + if (edge_subpath->upper > 1) { + // Add the path-finding + AddPathFinding(final_select_node, conditions, previous_vertex_element->variable_binding, + edge_element->variable_binding, next_vertex_element->variable_binding, edge_table, + pg_table, edge_subpath, edge_element->match_type); + } else { + AddEdgeJoins(edge_table, previous_vertex_table, next_vertex_table, edge_element->match_type, + edge_element->variable_binding, previous_vertex_element->variable_binding, + next_vertex_element->variable_binding, conditions, alias_map, extra_alias_counter, + final_select_node->from_table); + } + } else { + // The edge element is a path element without WHERE or path-finding. + auto edge_table = FindGraphTable(edge_element->label, pg_table); + CheckInheritance(edge_table, edge_element, conditions); + // check aliases + AddEdgeJoins(edge_table, previous_vertex_table, next_vertex_table, edge_element->match_type, + edge_element->variable_binding, previous_vertex_element->variable_binding, + next_vertex_element->variable_binding, conditions, alias_map, extra_alias_counter, + final_select_node->from_table); + // Check the edge type + // If (a)-[b]->(c) -> b.src = a.id AND b.dst = c.id + // If (a)<-[b]-(c) -> b.dst = a.id AND b.src = c.id + // If (a)-[b]-(c) -> (b.src = a.id AND b.dst = c.id) + // FROM (src, dst, * from b UNION ALL dst, src, * from b) + // If (a)<-[b]->(c) -> (b.src = a.id AND b.dst = c.id) AND + // (b.dst = a.id AND b.src + //= c.id) + } + previous_vertex_element = next_vertex_element; + previous_vertex_table = next_vertex_table; + } +} + +void PGQMatchFunction::PopulateGraphTableAliasMap( + const CreatePropertyGraphInfo &pg_table, const unique_ptr &path_reference, + case_insensitive_map_t> &alias_to_vertex_and_edge_tables) { + PathElement *path_elem = GetPathElement(path_reference); + + // Populate binding from PathElement. + if (path_elem != nullptr) { + auto iter = pg_table.label_map.find(path_elem->label); + if (iter == pg_table.label_map.end()) { + throw BinderException("The label %s is not registered in property graph %s", path_elem->label, + pg_table.property_graph_name); + } + alias_to_vertex_and_edge_tables[path_elem->variable_binding] = iter->second; + return; + } + + // Recursively populate binding from SubPath. + SubPath *sub_path = GetSubPath(path_reference); + D_ASSERT(sub_path != nullptr); + const auto &path_list = sub_path->path_list; + for (const auto &cur_path : path_list) { + PopulateGraphTableAliasMap(pg_table, cur_path, alias_to_vertex_and_edge_tables); + } +} + +case_insensitive_map_t> +PGQMatchFunction::PopulateGraphTableAliasMap(const CreatePropertyGraphInfo &pg_table, + const MatchExpression &match_expr) { + case_insensitive_map_t> alias_to_vertex_and_edge_tables; + for (idx_t idx_i = 0; idx_i < match_expr.path_patterns.size(); idx_i++) { + const auto &path_list = match_expr.path_patterns[idx_i]->path_elements; + for (const auto &cur_path : path_list) { + PopulateGraphTableAliasMap(pg_table, cur_path, alias_to_vertex_and_edge_tables); + } + } + return alias_to_vertex_and_edge_tables; +} + +void PGQMatchFunction::CheckColumnBinding( + const CreatePropertyGraphInfo &pg_table, const MatchExpression &ref, + const case_insensitive_map_t> &alias_to_vertex_and_edge_tables) { + // All fully-qualified column names for vertex tables and edge tables. + const auto all_fq_col_names = GetFullyQualifiedColFromPg(pg_table, alias_to_vertex_and_edge_tables); + + for (auto &expression : ref.column_list) { + // TODO(hjiang): `ColumnRefExpression` alone is not enough, we could have + // more complicated expression. + // + // See issue for reference: + // https://github.com/cwida/duckpgq-extension/issues/198 + auto *column_ref = dynamic_cast(expression.get()); + if (column_ref == nullptr) { + continue; + } + // 'shortest_path_cte' is a special table populated by pgq. + if (column_ref->column_names[0] == "shortest_path_cte") { + continue; + } + // 'rowid' is a column duckdb binds automatically. + if (column_ref->column_names.back() == "rowid") { + continue; + } + if (column_ref->column_names.size() == 1) { + bool single_alias = false; + for (const auto &alias : alias_to_vertex_and_edge_tables) { + if (alias.first == column_ref->column_names[0]) { + single_alias = true; + break; + } + } + if (single_alias) { + continue; + } + } + const auto cur_fq_col_name = StringUtil::Join(column_ref->column_names, /*separator=*/"."); + if (all_fq_col_names.find(cur_fq_col_name) == all_fq_col_names.end()) { + throw BinderException("Property %s is never registered!", cur_fq_col_name); + } + } +} + +unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, TableFunctionBindInput &bind_input) { + auto duckpgq_state = GetDuckPGQState(context); + + auto match_index = bind_input.inputs[0].GetValue(); + auto *ref = dynamic_cast(duckpgq_state->transform_expression[match_index].get()); + auto *pg_table = duckpgq_state->GetPropertyGraph(ref->pg_name); + + vector> conditions; + + auto final_select_node = make_uniq(); + case_insensitive_map_t> alias_map; + + int32_t extra_alias_counter = 0; + for (idx_t idx_i = 0; idx_i < ref->path_patterns.size(); idx_i++) { + auto &path_pattern = ref->path_patterns[idx_i]; + // Check if the element is PathElement or a Subpath with potentially many + // items + ProcessPathList(path_pattern->path_elements, conditions, final_select_node, alias_map, *pg_table, + extra_alias_counter, *ref); + } + + // Go through all aliases encountered + for (auto &table_alias_entry : alias_map) { + auto table_ref = table_alias_entry.second->CreateBaseTableRef(); + table_ref->alias = table_alias_entry.first; + if (final_select_node->from_table) { + auto new_root = make_uniq(JoinRefType::CROSS); + new_root->left = std::move(final_select_node->from_table); + new_root->right = std::move(table_ref); + final_select_node->from_table = std::move(new_root); + } else { + final_select_node->from_table = std::move(table_ref); + } + } + + if (ref->where_clause) { + conditions.push_back(std::move(ref->where_clause)); + } + + // Maps from table alias to table, including vertex and edge tables. + auto alias_to_vertex_and_edge_tables = PopulateGraphTableAliasMap(*pg_table, *ref); + CheckColumnBinding(*pg_table, *ref, alias_to_vertex_and_edge_tables); + + std::vector> final_column_list; + + for (auto &expression : ref->column_list) { + unordered_set named_subpaths; + + // Handle ColumnRefExpression. + auto *column_ref = dynamic_cast(expression.get()); + if (column_ref != nullptr) { + if (named_subpaths.count(column_ref->column_names[0]) && column_ref->column_names.size() == 1) { + final_column_list.emplace_back(make_uniq("path", column_ref->column_names[0])); + } else { + final_column_list.push_back(std::move(expression)); + } + continue; + } + + // Handle FunctionExpression. + auto *function_ref = dynamic_cast(expression.get()); + if (function_ref != nullptr) { + if (function_ref->function_name == "path_length") { + column_ref = dynamic_cast(function_ref->children[0].get()); + if (column_ref == nullptr) { + continue; + } + if (named_subpaths.count(column_ref->column_names[0]) && column_ref->column_names.size() == 1) { + auto path_ref = make_uniq("path", column_ref->column_names[0]); + vector> path_children; + path_children.push_back(std::move(path_ref)); + auto path_len = make_uniq("len", std::move(path_children)); + auto constant_two = make_uniq(Value::INTEGER(2)); + vector> div_children; + div_children.push_back(std::move(path_len)); + div_children.push_back(std::move(constant_two)); + auto div_expression = make_uniq("//", std::move(div_children)); + div_expression->alias = "path_length_" + column_ref->column_names[0]; + final_column_list.emplace_back(std::move(div_expression)); + } + } else { + final_column_list.push_back(std::move(expression)); + } + + continue; + } + + // Handle StarExpression. + auto *star_expression = dynamic_cast(expression.get()); + if (star_expression != nullptr) { + if (!star_expression->relation_name.empty()) { + auto tbl_iter = alias_to_vertex_and_edge_tables.find(star_expression->relation_name); + if (tbl_iter == alias_to_vertex_and_edge_tables.end()) { + continue; + } + } + + auto selected_col_exprs = + star_expression->relation_name.empty() + ? GetColRefExprFromPg(alias_to_vertex_and_edge_tables) + : GetColRefExprFromPg(alias_to_vertex_and_edge_tables, star_expression->relation_name); + + // Fallback to star expression if cannot figure out the columns to query. + if (selected_col_exprs.empty()) { + final_column_list.emplace_back(std::move(expression)); + continue; + } + + final_column_list.reserve(final_column_list.size() + selected_col_exprs.size()); + for (auto &expr : selected_col_exprs) { + final_column_list.emplace_back(std::move(expr)); + } + continue; + } + + // By default, directly handle expression without further processing. + final_column_list.emplace_back(std::move(expression)); + } + + final_select_node->where_clause = CreateWhereClause(conditions); + final_select_node->select_list = std::move(final_column_list); + + auto subquery = make_uniq(); + subquery->node = std::move(final_select_node); + auto result = make_uniq(std::move(subquery), ref->alias); + return std::move(result); +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreTableFunctions::RegisterMatchTableFunction(ExtensionLoader &loader) { + loader.RegisterFunction(PGQMatchFunction()); +} + +} // namespace duckdb diff --git a/src/core/functions/table/pagerank.cpp b/src/core/functions/table/pagerank.cpp new file mode 100644 index 00000000..65dcc3dd --- /dev/null +++ b/src/core/functions/table/pagerank.cpp @@ -0,0 +1,40 @@ +#include "duckpgq/core/functions/table/pagerank.hpp" +#include "duckdb/function/table_function.hpp" +#include "duckdb/parser/tableref/subqueryref.hpp" + +#include +#include +#include "duckdb/parser/tableref/basetableref.hpp" + +namespace duckdb { + +// Main binding function +unique_ptr PageRankFunction::PageRankBindReplace(ClientContext &context, TableFunctionBindInput &input) { + auto pg_name = StringUtil::Lower(StringValue::Get(input.inputs[0])); + auto node_table = StringUtil::Lower(StringValue::Get(input.inputs[1])); + auto edge_table = StringUtil::Lower(StringValue::Get(input.inputs[2])); + + auto duckpgq_state = GetDuckPGQState(context); + auto pg_info = GetPropertyGraphInfo(duckpgq_state, pg_name); + auto edge_pg_entry = ValidateSourceNodeAndEdgeTable(pg_info, node_table, edge_table); + + auto select_node = CreateSelectNode(edge_pg_entry, "pagerank", "pagerank"); + + select_node->cte_map.map["csr_cte"] = CreateDirectedCSRCTE(edge_pg_entry, "src", "edge", "dst"); + + auto subquery = make_uniq(); + subquery->node = std::move(select_node); + + auto result = make_uniq(std::move(subquery)); + result->alias = "pagerank"; + return std::move(result); +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreTableFunctions::RegisterPageRankTableFunction(ExtensionLoader &loader) { + loader.RegisterFunction(PageRankFunction()); +} + +} // namespace duckdb diff --git a/src/core/functions/table/pgq_scan.cpp b/src/core/functions/table/pgq_scan.cpp new file mode 100644 index 00000000..2844e1e8 --- /dev/null +++ b/src/core/functions/table/pgq_scan.cpp @@ -0,0 +1,297 @@ +#include "duckpgq/core/functions/table/pgq_scan.hpp" +#include "duckdb/common/types.hpp" +#include "duckdb/function/table_function.hpp" +#include "duckdb/parser/parsed_data/create_property_graph_info.hpp" +#include "duckdb/parser/parsed_data/create_table_function_info.hpp" +#include "duckdb/parser/property_graph_table.hpp" +#include "duckpgq/core/utils/compressed_sparse_row.hpp" +#include "duckpgq/core/utils/duckpgq_utils.hpp" +#include +#include +#include + +namespace duckdb { + +static void ScanCSREFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { + auto state = &data_p.global_state->Cast(); + + if (state->finished) { + output.SetCardinality(0); + return; + } + + auto duckpgq_state = GetDuckPGQState(context); + auto csr_id = data_p.bind_data->Cast().csr_id; + CSR *csr = duckpgq_state->GetCSR(csr_id); + + idx_t vector_size = state->csr_e_offset + DEFAULT_STANDARD_VECTOR_SIZE <= csr->e.size() + ? DEFAULT_STANDARD_VECTOR_SIZE + : csr->e.size() - state->csr_e_offset; + + output.SetCardinality(vector_size); + output.data[0].SetVectorType(VectorType::FLAT_VECTOR); + for (idx_t idx_i = 0; idx_i < vector_size; idx_i++) { + output.data[0].SetValue(idx_i, Value(csr->e[state->csr_e_offset + idx_i])); + } + + if (state->csr_e_offset + vector_size >= csr->e.size()) { + state->finished = true; + } else { + state->csr_e_offset += vector_size; + } +} + +static void ScanCSRPtrFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { + auto &gstate = data_p.global_state->Cast(); + if (gstate.finished) { + output.SetCardinality(0); + return; + } + + gstate.finished = true; + + auto duckpgq_state = GetDuckPGQState(context); + auto csr_id = data_p.bind_data->Cast().csr_id; + CSR *csr = duckpgq_state->GetCSR(csr_id); + output.SetCardinality(5); + output.data[0].SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(output.data[0]); + // now set the result vector + // the first element is the address of the vertex array + result_data[0] = reinterpret_cast(csr->v); + // the second element is the address of the edge array + result_data[1] = reinterpret_cast(&csr->e); + // here we check the type of the weight array + // and set the third and fifth element + // the third element is the address of the weight array + // the fifth element is the type of the weight array + // 0 if the weights are integres, 1 if they are doubles, and 2 for unweighted + if (csr->w.empty()) { + result_data[2] = reinterpret_cast(&csr->w); + result_data[4] = static_cast(0); + } else if (csr->w_double.empty()) { + result_data[2] = reinterpret_cast(&csr->w_double); + result_data[4] = static_cast(1); + } else { + result_data[2] = static_cast(0); + result_data[4] = static_cast(2); + } + // we also need the number of elements in the vertex array, since its C-array + // not a vector. + result_data[3] = static_cast(csr->vsize); +} + +static void ScanCSRVFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { + auto state = &data_p.global_state->Cast(); + + if (state->finished) { + output.SetCardinality(0); + return; + } + + auto duckpgq_state = GetDuckPGQState(context); + auto csr_id = data_p.bind_data->Cast().csr_id; + CSR *csr = duckpgq_state->GetCSR(csr_id); + + idx_t vector_size = state->csr_v_offset + DEFAULT_STANDARD_VECTOR_SIZE <= csr->vsize + ? DEFAULT_STANDARD_VECTOR_SIZE + : csr->vsize - state->csr_v_offset; + + output.SetCardinality(vector_size); + output.data[0].SetVectorType(VectorType::FLAT_VECTOR); + for (idx_t idx_i = 0; idx_i < vector_size; idx_i++) { + output.data[0].SetValue(idx_i, Value(csr->v[state->csr_v_offset + idx_i])); + } + + if (state->csr_v_offset + vector_size >= csr->vsize) { + state->finished = true; + } else { + state->csr_v_offset += vector_size; + } +} + +static void ScanCSRWFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { + auto state = &data_p.global_state->Cast(); + + if (state->finished) { + output.SetCardinality(0); + return; + } + + auto duckpgq_state = GetDuckPGQState(context); + auto csr_scanw_data = data_p.bind_data->Cast(); + auto csr_id = csr_scanw_data.csr_id; + CSR *csr = duckpgq_state->GetCSR(csr_id); + + size_t w_size = 0; + if (csr_scanw_data.is_double) { + w_size = csr->w_double.size(); + } else { + w_size = csr->w.size(); + } + + idx_t vector_size = state->csr_w_offset + DEFAULT_STANDARD_VECTOR_SIZE <= w_size ? DEFAULT_STANDARD_VECTOR_SIZE + : w_size - state->csr_w_offset; + + output.SetCardinality(vector_size); + output.data[0].SetVectorType(VectorType::FLAT_VECTOR); + if (csr_scanw_data.is_double) { + for (idx_t idx_i = 0; idx_i < vector_size; idx_i++) { + output.data[0].SetValue(idx_i, Value(csr->w_double[state->csr_w_offset + idx_i])); + } + } else { + for (idx_t idx_i = 0; idx_i < vector_size; idx_i++) { + output.data[0].SetValue(idx_i, Value(csr->w[state->csr_w_offset + idx_i])); + } + } + + if (state->csr_w_offset + vector_size >= w_size) { + state->finished = true; + } else { + state->csr_w_offset += vector_size; + } +} + +static void ScanPGVTableFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { + auto &gstate = data_p.global_state->Cast(); + + if (gstate.finished) { + output.SetCardinality(0); + return; + } + + gstate.finished = true; + + auto duckpgq_state = GetDuckPGQState(context); + auto pg_name = data_p.bind_data->Cast().pg_name; + auto pg = duckpgq_state->GetPropertyGraph(pg_name); + + output.data[0].SetVectorType(VectorType::FLAT_VECTOR); + auto vtables = FlatVector::GetData(output.data[0]); + idx_t size = 0; + for (auto &ele : pg->vertex_tables) { + vtables[size] = string_t(ele->table_name.c_str(), ele->table_name.size()); + size++; + } + output.SetCardinality(size); +} + +static void ScanPGETableFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { + auto &gstate = data_p.global_state->Cast(); + + if (gstate.finished) { + output.SetCardinality(0); + return; + } + + gstate.finished = true; + + auto duckpgq_state = GetDuckPGQState(context); + auto pg_name = data_p.bind_data->Cast().pg_name; + auto pg = duckpgq_state->GetPropertyGraph(pg_name); + + output.data[0].SetVectorType(VectorType::FLAT_VECTOR); + auto etables = FlatVector::GetData(output.data[0]); + idx_t size = 0; + for (auto &ele : pg->edge_tables) { + etables[size] = string_t(ele->table_name.c_str(), ele->table_name.size()); + size++; + } + output.SetCardinality(size); +} + +shared_ptr find_table_entry(const vector> &vec, string &table_name) { + for (auto &&entry : vec) { + if (entry->table_name == table_name) { + return entry; + } + } + throw BinderException("Table name %s does not exist", table_name); +} + +static void ScanPGVColFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { + auto &gstate = data_p.global_state->Cast(); + + if (gstate.finished) { + output.SetCardinality(0); + return; + } + + gstate.finished = true; + + auto duckpgq_state = GetDuckPGQState(context); + auto scan_v_col_data = data_p.bind_data->Cast(); + auto pg_name = scan_v_col_data.pg_name; + auto table_name = scan_v_col_data.table_name; + auto pg = duckpgq_state->GetPropertyGraph(pg_name); + + auto table_entry = find_table_entry(pg->vertex_tables, table_name); + + output.data[0].SetVectorType(VectorType::FLAT_VECTOR); + auto colsdata = FlatVector::GetData(output.data[0]); + idx_t size = 0; + for (auto &ele : table_entry->column_names) { + colsdata[size] = string_t(ele.c_str(), ele.size()); + size++; + } + output.SetCardinality(size); +} + +static void ScanPGEColFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { + auto &gstate = data_p.global_state->Cast(); + + if (gstate.finished) { + output.SetCardinality(0); + return; + } + + gstate.finished = true; + + auto duckpgq_state = GetDuckPGQState(context); + auto pg_scan_e_col_data = data_p.bind_data->Cast(); + auto pg_name = pg_scan_e_col_data.pg_name; + auto table_name = pg_scan_e_col_data.table_name; + auto pg = duckpgq_state->GetPropertyGraph(pg_name); + + auto table_entry = find_table_entry(pg->edge_tables, table_name); + + output.data[0].SetVectorType(VectorType::FLAT_VECTOR); + auto colsdata = FlatVector::GetData(output.data[0]); + idx_t size = 0; + for (auto &ele : table_entry->column_names) { + colsdata[size] = string_t(ele.c_str(), ele.size()); + size++; + } + output.SetCardinality(size); +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreTableFunctions::RegisterScanTableFunctions(ExtensionLoader &loader) { + loader.RegisterFunction(TableFunction("get_csr_e", {LogicalType::INTEGER}, ScanCSREFunction, + CSRScanEData::ScanCSREBind, CSRScanState::Init)); + + loader.RegisterFunction(TableFunction("get_csr_v", {LogicalType::INTEGER}, ScanCSRVFunction, + CSRScanVData::ScanCSRVBind, CSRScanState::Init)); + + loader.RegisterFunction(TableFunction("get_csr_w", {LogicalType::INTEGER}, ScanCSRWFunction, + CSRScanWData::ScanCSRWBind, CSRScanState::Init)); + + loader.RegisterFunction(TableFunction("get_pg_vtablenames", {LogicalType::VARCHAR}, ScanPGVTableFunction, + PGScanVTableData::ScanPGVTableBind, CSRScanState::Init)); + + loader.RegisterFunction(TableFunction("get_pg_vcolnames", {LogicalType::VARCHAR, LogicalType::VARCHAR}, + ScanPGVColFunction, PGScanVColData::ScanPGVColBind, CSRScanState::Init)); + + loader.RegisterFunction(TableFunction("get_csr_ptr", {LogicalType::INTEGER}, ScanCSRPtrFunction, + CSRScanPtrData::ScanCSRPtrBind, CSRScanState::Init)); + + loader.RegisterFunction(TableFunction("get_pg_etablenames", {LogicalType::VARCHAR}, ScanPGETableFunction, + PGScanETableData::ScanPGETableBind, CSRScanState::Init)); + + loader.RegisterFunction(TableFunction("get_pg_ecolnames", {LogicalType::VARCHAR, LogicalType::VARCHAR}, + ScanPGEColFunction, PGScanEColData::ScanPGEColBind, CSRScanState::Init)); +} + +} // namespace duckdb diff --git a/src/core/functions/table/summarize_property_graph.cpp b/src/core/functions/table/summarize_property_graph.cpp new file mode 100644 index 00000000..213cbd7a --- /dev/null +++ b/src/core/functions/table/summarize_property_graph.cpp @@ -0,0 +1,334 @@ +#include "duckpgq/core/functions/table/summarize_property_graph.hpp" +#include "duckdb/parser/expression/comparison_expression.hpp" +#include "duckdb/parser/expression/constant_expression.hpp" +#include "duckdb/parser/expression/function_expression.hpp" +#include "duckdb/parser/expression/operator_expression.hpp" +#include "duckdb/parser/expression/star_expression.hpp" +#include "duckdb/parser/expression/subquery_expression.hpp" +#include "duckdb/parser/group_by_node.hpp" +#include "duckdb/parser/query_node/select_node.hpp" +#include "duckdb/parser/statement/select_statement.hpp" +#include "duckdb/parser/tableref/showref.hpp" +#include "duckdb/parser/tableref/subqueryref.hpp" +#include +#include + +namespace duckdb { + +unique_ptr GetTableNameConstantExpression(const string &table_name, const string &alias) { + auto table_name_column = make_uniq(Value(table_name)); + table_name_column->alias = alias; + return std::move(table_name_column); +} + +unique_ptr GetFunctionExpression(const string &aggregate_function, const string &alias, + bool is_in_degree, const Value &value = nullptr) { + vector> max_children; + max_children.push_back(make_uniq(is_in_degree ? "in_degree" : "out_degree")); + if (!value.IsNull()) { + max_children.push_back(make_uniq(value)); + } + auto agg_function = make_uniq(aggregate_function, std::move(max_children)); + agg_function->alias = alias + (is_in_degree ? "_in_degree" : "_out_degree"); + return std::move(agg_function); +} + +unique_ptr GetConstantNullExpressionWithAlias(const string &alias) { + auto result = make_uniq(Value()); + result->alias = alias; + return std::move(result); +} + +unique_ptr IsVertexTableConstantExpression(bool is_vertex_table, const string &alias) { + auto result = make_uniq(Value(is_vertex_table)); + result->alias = alias; + return std::move(result); +} +unique_ptr GetTableCount(const string &alias) { + vector> children; + auto aggregate_function = make_uniq("count_star", std::move(children)); + aggregate_function->alias = alias; + return std::move(aggregate_function); +} + +unique_ptr +SummarizePropertyGraphFunction::GetDistinctCount(const shared_ptr &pg_table, const string &alias, + bool is_source) { + auto result = make_uniq(); + result->subquery_type = SubqueryType::SCALAR; + auto select_statement = make_uniq(); + auto select_node = make_uniq(); + select_node->from_table = + make_uniq(TableDescription(pg_table->catalog_name, pg_table->schema_name, pg_table->table_name)); + vector> count_children; + auto column_to_count = is_source ? pg_table->source_fk[0] : pg_table->destination_fk[0]; + count_children.push_back(make_uniq(column_to_count, pg_table->table_name)); + auto count_expression = make_uniq("count", std::move(count_children)); + count_expression->distinct = true; + select_node->select_list.push_back(std::move(count_expression)); + select_statement->node = std::move(select_node); + result->subquery = std::move(select_statement); + result->alias = alias; + return std::move(result); +} + +unique_ptr SummarizePropertyGraphFunction::GetIsolatedNodes(shared_ptr &pg_table, + const string &alias, bool is_source) { + auto result = make_uniq(); + result->subquery_type = SubqueryType::SCALAR; + auto select_statement = make_uniq(); + auto select_node = make_uniq(); + vector> count_children; + string table_reference = is_source ? pg_table->source_reference : pg_table->destination_reference; + string table_schema = is_source ? pg_table->source_schema : pg_table->destination_schema; + string table_catalog = is_source ? pg_table->source_catalog : pg_table->destination_catalog; + string pk_reference = is_source ? pg_table->source_pk[0] : pg_table->destination_pk[0]; + string fk_reference = is_source ? pg_table->source_fk[0] : pg_table->destination_fk[0]; + count_children.push_back(make_uniq(pk_reference, table_reference)); + select_node->select_list.push_back(make_uniq("count", std::move(count_children))); + + auto join_ref = make_uniq(); + join_ref->type = JoinType::LEFT; + auto source_table_ref = make_uniq(TableDescription(table_catalog, table_schema, table_reference)); + auto edge_table_ref = + make_uniq(TableDescription(pg_table->catalog_name, pg_table->schema_name, pg_table->table_name)); + join_ref->left = std::move(source_table_ref); + join_ref->right = std::move(edge_table_ref); + + join_ref->condition = make_uniq( + ExpressionType::COMPARE_EQUAL, make_uniq(pk_reference, table_reference), + make_uniq(fk_reference, pg_table->table_name)); + + select_node->from_table = std::move(join_ref); + + vector> operator_children; + operator_children.push_back(make_uniq(fk_reference, pg_table->table_name)); + auto operator_expression = + make_uniq(ExpressionType::OPERATOR_IS_NULL, std::move(operator_children)); + select_node->where_clause = std::move(operator_expression); + + select_statement->node = std::move(select_node); + result->subquery = std::move(select_statement); + result->alias = alias; + return std::move(result); +} + +unique_ptr +SummarizePropertyGraphFunction::CreateGroupBySubquery(const shared_ptr &pg_table, bool is_in_degree, + const string °ree_column) { + auto select_node = make_uniq(); + select_node->select_list.push_back(make_uniq(degree_column)); + vector> children; + auto count_star_expression = make_uniq("count_star", std::move(children)); + count_star_expression->alias = is_in_degree ? "in_degree" : "out_degree"; + select_node->select_list.push_back(std::move(count_star_expression)); + select_node->from_table = + make_uniq(TableDescription(pg_table->catalog_name, pg_table->schema_name, pg_table->table_name)); + GroupByNode grouping_node; + grouping_node.group_expressions.push_back(make_uniq(degree_column)); + grouping_node.grouping_sets.push_back({0}); + select_node->groups = std::move(grouping_node); + auto select_statement = make_uniq(); + select_statement->node = std::move(select_node); + return make_uniq(std::move(select_statement)); +} + +unique_ptr +SummarizePropertyGraphFunction::CreateDegreeStatisticsCTE(const shared_ptr &pg_table, + const string °ree_column, bool is_in_degree) { + auto cte_info = make_uniq(); + auto select_statement = make_uniq(); + auto select_node = make_uniq(); + select_node->select_list.push_back(GetFunctionExpression("avg", "avg", is_in_degree)); + select_node->select_list.push_back(GetFunctionExpression("min", "min", is_in_degree)); + select_node->select_list.push_back(GetFunctionExpression("max", "max", is_in_degree)); + select_node->select_list.push_back( + GetFunctionExpression("approx_quantile", "q25", is_in_degree, Value::FLOAT(0.25))); + select_node->select_list.push_back( + GetFunctionExpression("approx_quantile", "q50", is_in_degree, Value::FLOAT(0.5))); + select_node->select_list.push_back( + GetFunctionExpression("approx_quantile", "q75", is_in_degree, Value::FLOAT(0.75))); + select_node->from_table = CreateGroupBySubquery(pg_table, is_in_degree, degree_column); + select_statement->node = std::move(select_node); + cte_info->query = std::move(select_statement); + // todo(dtenwolde) make this CTE materialized + // cte_info->materialized = CTEMaterialize::CTE_MATERIALIZE_ALWAYS; + return cte_info; +} + +unique_ptr SummarizePropertyGraphFunction::GetDegreeStatistics(const string &aggregate_function, + bool is_in_degree) { + auto result = make_uniq(); + result->subquery_type = SubqueryType::SCALAR; + auto select_statement = make_uniq(); + auto select_node = make_uniq(); + select_node->select_list.push_back( + make_uniq(aggregate_function + "_" + (is_in_degree ? "in_degree" : "out_degree"))); + auto cte_table_ref = make_uniq(); + cte_table_ref->table_name = is_in_degree ? "in_degrees" : "out_degrees"; + select_node->from_table = std::move(cte_table_ref); + select_statement->node = std::move(select_node); + result->subquery = std::move(select_statement); + result->alias = aggregate_function + "_" + (is_in_degree ? "in_degree" : "out_degree"); + return std::move(result); +} + +unique_ptr +SummarizePropertyGraphFunction::CreateVertexTableCTE(const shared_ptr &vertex_table) { + auto cte_info = make_uniq(); + auto select_statement = make_uniq(); + auto select_node = make_uniq(); + select_node->select_list.push_back(GetTableNameConstantExpression(vertex_table->table_name, "table_name")); + select_node->select_list.push_back(IsVertexTableConstantExpression(true, "is_vertex_table")); + select_node->select_list.push_back( + GetConstantNullExpressionWithAlias("source_table")); // source table name (NULL for vertex tables) + select_node->select_list.push_back( + GetConstantNullExpressionWithAlias("destination_table")); // destination table name (NULL for vertex tables) + select_node->select_list.push_back(GetTableCount("vertex_count")); + select_node->select_list.push_back( + GetConstantNullExpressionWithAlias("edge_count")); // edge_count (NULL for vertex tables) + select_node->select_list.push_back(GetConstantNullExpressionWithAlias("unique_source_count")); + select_node->select_list.push_back(GetConstantNullExpressionWithAlias("unique_destination_count")); + select_node->select_list.push_back(GetConstantNullExpressionWithAlias("isolated_sources")); + select_node->select_list.push_back(GetConstantNullExpressionWithAlias("isolated_destinations")); + select_node->select_list.push_back(GetConstantNullExpressionWithAlias("avg_in_degree")); + select_node->select_list.push_back(GetConstantNullExpressionWithAlias("min_in_degree")); + select_node->select_list.push_back(GetConstantNullExpressionWithAlias("max_in_degree")); + select_node->select_list.push_back(GetConstantNullExpressionWithAlias("q25_in_degree")); + select_node->select_list.push_back(GetConstantNullExpressionWithAlias("q50_in_degree")); + select_node->select_list.push_back(GetConstantNullExpressionWithAlias("q75_in_degree")); + select_node->select_list.push_back(GetConstantNullExpressionWithAlias("avg_out_degree")); + select_node->select_list.push_back(GetConstantNullExpressionWithAlias("min_out_degree")); + select_node->select_list.push_back(GetConstantNullExpressionWithAlias("max_out_degree")); + select_node->select_list.push_back(GetConstantNullExpressionWithAlias("q25_out_degree")); + select_node->select_list.push_back(GetConstantNullExpressionWithAlias("q50_out_degree")); + select_node->select_list.push_back(GetConstantNullExpressionWithAlias("q75_out_degree")); + select_node->from_table = make_uniq( + TableDescription(vertex_table->catalog_name, vertex_table->schema_name, vertex_table->table_name)); + select_statement->node = std::move(select_node); + cte_info->query = std::move(select_statement); + return cte_info; +} + +unique_ptr +SummarizePropertyGraphFunction::CreateEdgeTableCTE(shared_ptr &edge_table) { + auto cte_info = make_uniq(); + auto select_statement = make_uniq(); + auto select_node = make_uniq(); + select_node->cte_map.map.insert("in_degrees", + CreateDegreeStatisticsCTE(edge_table, edge_table->destination_fk[0], true)); + select_node->cte_map.map.insert("out_degrees", + CreateDegreeStatisticsCTE(edge_table, edge_table->source_fk[0], false)); + + select_node->select_list.push_back(GetTableNameConstantExpression(edge_table->table_name, "table_name")); + select_node->select_list.push_back(IsVertexTableConstantExpression(false, "is_vertex_table")); + select_node->select_list.push_back( + GetTableNameConstantExpression(edge_table->source_reference, + "source_table")); // source table name (NULL for vertex tables) + select_node->select_list.push_back( + GetTableNameConstantExpression(edge_table->destination_reference, + "destination_table")); // destination table name (NULL for vertex tables) + select_node->select_list.push_back(GetConstantNullExpressionWithAlias("vertex_count")); + select_node->select_list.push_back(GetTableCount("edge_count")); + + select_node->select_list.push_back(GetDistinctCount(edge_table, "unique_source_count", true)); + select_node->select_list.push_back(GetDistinctCount(edge_table, "unique_destination_count", false)); + + select_node->select_list.push_back(GetIsolatedNodes(edge_table, "isolated_sources", true)); + select_node->select_list.push_back(GetIsolatedNodes(edge_table, "isolated_destinations", false)); + + select_node->select_list.push_back(GetDegreeStatistics("avg", true)); + select_node->select_list.push_back(GetDegreeStatistics("min", true)); + select_node->select_list.push_back(GetDegreeStatistics("max", true)); + select_node->select_list.push_back(GetDegreeStatistics("q25", true)); + select_node->select_list.push_back(GetDegreeStatistics("q50", true)); + select_node->select_list.push_back(GetDegreeStatistics("q75", true)); + + select_node->select_list.push_back(GetDegreeStatistics("avg", false)); + select_node->select_list.push_back(GetDegreeStatistics("min", false)); + select_node->select_list.push_back(GetDegreeStatistics("max", false)); + select_node->select_list.push_back(GetDegreeStatistics("q25", false)); + select_node->select_list.push_back(GetDegreeStatistics("q50", false)); + select_node->select_list.push_back(GetDegreeStatistics("q75", false)); + + select_node->from_table = make_uniq( + TableDescription(edge_table->catalog_name, edge_table->schema_name, edge_table->table_name)); + select_statement->node = std::move(select_node); + cte_info->query = std::move(select_statement); + return cte_info; +} + +unique_ptr +SummarizePropertyGraphFunction::HandleSingleVertexTable(const shared_ptr &vertex_table, + const string &stat_table_alias) { + auto select_node = make_uniq(); + select_node->cte_map.map.insert(stat_table_alias, CreateVertexTableCTE(vertex_table)); + auto base_table_ref = make_uniq(); + base_table_ref->table_name = stat_table_alias; + select_node->from_table = std::move(base_table_ref); + select_node->select_list.push_back(make_uniq()); + auto select_stmt = make_uniq(); + select_stmt->node = std::move(select_node); + auto subquery = make_uniq(std::move(select_stmt)); + return std::move(subquery); +} + +void AddToUnionNode(unique_ptr &final_union_node, unique_ptr &inner_select_node) { + final_union_node->children.push_back(std::move(inner_select_node)); +} + +unique_ptr CreateInnerSelectStatNode(const string &stat_table_alias) { + auto inner_select_node = make_uniq(); + inner_select_node->select_list.push_back(make_uniq()); + auto base_table_ref = make_uniq(); + base_table_ref->table_name = stat_table_alias; + inner_select_node->from_table = std::move(base_table_ref); + return inner_select_node; +} + +unique_ptr +SummarizePropertyGraphFunction::SummarizePropertyGraphBindReplace(ClientContext &context, + TableFunctionBindInput &bind_input) { + auto duckpgq_state = GetDuckPGQState(context); + + string property_graph = bind_input.inputs[0].GetValue(); + auto pg_info = duckpgq_state->GetPropertyGraph(property_graph); + + if (pg_info->vertex_tables.size() == 1 && pg_info->edge_tables.empty()) { + // Special case where we don't want to create a union across the different + // tables + string stat_table_alias = pg_info->vertex_tables[0]->table_name + "_stats"; + return HandleSingleVertexTable(pg_info->vertex_tables[0], stat_table_alias); + } + + auto final_union_node = make_uniq(); + final_union_node->setop_type = SetOperationType::UNION; + final_union_node->setop_all = true; + for (auto &vertex_table : pg_info->vertex_tables) { + string stat_table_alias = vertex_table->table_name + "_stats"; + auto inner_select_node = CreateInnerSelectStatNode(stat_table_alias); + inner_select_node->cte_map.map.insert(stat_table_alias, CreateVertexTableCTE(vertex_table)); + AddToUnionNode(final_union_node, inner_select_node); + } + for (auto &edge_table : pg_info->edge_tables) { + string stat_table_alias = edge_table->source_reference + "_" + edge_table->table_name + "_" + + edge_table->destination_reference + "_stats"; + auto inner_select_node = CreateInnerSelectStatNode(stat_table_alias); + inner_select_node->cte_map.map.insert(stat_table_alias, CreateEdgeTableCTE(edge_table)); + AddToUnionNode(final_union_node, inner_select_node); + } + + auto select_stmt = make_uniq(); + select_stmt->node = std::move(final_union_node); + auto subquery = make_uniq(std::move(select_stmt)); + return std::move(subquery); +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreTableFunctions::RegisterSummarizePropertyGraphTableFunction(ExtensionLoader &loader) { + loader.RegisterFunction(SummarizePropertyGraphFunction()); +} + +} // namespace duckdb diff --git a/src/core/functions/table/weakly_connected_component.cpp b/src/core/functions/table/weakly_connected_component.cpp new file mode 100644 index 00000000..0464b397 --- /dev/null +++ b/src/core/functions/table/weakly_connected_component.cpp @@ -0,0 +1,42 @@ +#include "duckpgq/core/functions/table/weakly_connected_component.hpp" +#include "duckdb/function/table_function.hpp" +#include "duckdb/parser/tableref/subqueryref.hpp" + +#include +#include +#include "duckdb/parser/tableref/basetableref.hpp" + +namespace duckdb { + +// Main binding function +unique_ptr +WeaklyConnectedComponentFunction::WeaklyConnectedComponentBindReplace(ClientContext &context, + TableFunctionBindInput &input) { + auto pg_name = StringUtil::Lower(StringValue::Get(input.inputs[0])); + auto node_table = StringUtil::Lower(StringValue::Get(input.inputs[1])); + auto edge_table = StringUtil::Lower(StringValue::Get(input.inputs[2])); + + auto duckpgq_state = GetDuckPGQState(context); + auto pg_info = GetPropertyGraphInfo(duckpgq_state, pg_name); + auto edge_pg_entry = ValidateSourceNodeAndEdgeTable(pg_info, node_table, edge_table); + + auto select_node = CreateSelectNode(edge_pg_entry, "weakly_connected_component", "componentId"); + + select_node->cte_map.map["csr_cte"] = CreateUndirectedCSRCTE(edge_pg_entry, select_node); + + auto subquery = make_uniq(); + subquery->node = std::move(select_node); + + auto result = make_uniq(std::move(subquery)); + result->alias = "wcc"; + return std::move(result); +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CoreTableFunctions::RegisterWeaklyConnectedComponentTableFunction(ExtensionLoader &loader) { + loader.RegisterFunction(WeaklyConnectedComponentFunction()); +} + +} // namespace duckdb diff --git a/src/core/module.cpp b/src/core/module.cpp new file mode 100644 index 00000000..f36ef865 --- /dev/null +++ b/src/core/module.cpp @@ -0,0 +1,20 @@ + +#include "duckpgq/core/module.hpp" +#include "duckpgq/common.hpp" +#include "duckpgq/core/functions/scalar.hpp" +#include "duckpgq/core/functions/table.hpp" +#include "duckpgq/core/operator/duckpgq_operator.hpp" +#include "duckpgq/core/parser/duckpgq_parser.hpp" +#include "duckpgq/core/pragma/duckpgq_pragma.hpp" + +namespace duckdb { + +void CoreModule::Register(ExtensionLoader &loader) { + CoreTableFunctions::Register(loader); + CoreScalarFunctions::Register(loader); + CorePGQParser::Register(loader); + CorePGQPragma::Register(loader); + CorePGQOperator::Register(loader); +} + +} // namespace duckdb diff --git a/src/core/operator/CMakeLists.txt b/src/core/operator/CMakeLists.txt new file mode 100644 index 00000000..f8cd83dd --- /dev/null +++ b/src/core/operator/CMakeLists.txt @@ -0,0 +1,3 @@ +set(EXTENSION_SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/duckpgq_bind.cpp ${EXTENSION_SOURCES} + PARENT_SCOPE) diff --git a/src/core/operator/duckpgq_bind.cpp b/src/core/operator/duckpgq_bind.cpp new file mode 100644 index 00000000..aa88c368 --- /dev/null +++ b/src/core/operator/duckpgq_bind.cpp @@ -0,0 +1,34 @@ +#include "duckpgq/core/operator/duckpgq_bind.hpp" +#include "duckpgq/common.hpp" + +#include +#include "duckpgq/core/operator/duckpgq_operator.hpp" +#include + +namespace duckdb { + +BoundStatement duckpgq_bind(ClientContext &context, Binder &binder, OperatorExtensionInfo *info, + SQLStatement &statement) { + auto duckpgq_state = context.registered_state->Get("duckpgq"); + if (!duckpgq_state) { + throw; // Throw the original error that got us here if DuckPGQ is not loaded + } + + auto duckpgq_binder = Binder::CreateBinder(context, &binder); + auto duckpgq_parse_data = dynamic_cast(duckpgq_state->parse_data.get()); + if (duckpgq_parse_data) { + return duckpgq_binder->Bind(*(duckpgq_parse_data->statement)); + } + throw; +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CorePGQOperator::RegisterPGQBindOperator(ExtensionLoader &loader) { + auto &db = loader.GetDatabaseInstance(); + auto &config = DBConfig::GetConfig(db); + config.operator_extensions.push_back(make_uniq()); +} + +} // namespace duckdb diff --git a/src/core/parser/CMakeLists.txt b/src/core/parser/CMakeLists.txt new file mode 100644 index 00000000..16a9e9a7 --- /dev/null +++ b/src/core/parser/CMakeLists.txt @@ -0,0 +1,3 @@ +set(EXTENSION_SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/duckpgq_parser.cpp ${EXTENSION_SOURCES} + PARENT_SCOPE) diff --git a/src/core/parser/duckpgq_parser.cpp b/src/core/parser/duckpgq_parser.cpp new file mode 100644 index 00000000..932deb66 --- /dev/null +++ b/src/core/parser/duckpgq_parser.cpp @@ -0,0 +1,202 @@ + +#include "duckpgq/core/parser/duckpgq_parser.hpp" + +#include "duckdb/parser/expression/constant_expression.hpp" +#include "duckdb/parser/tableref/showref.hpp" +#include "duckdb/parser/tableref/table_function_ref.hpp" +#include +#include +#include +#include +#include +#include +#include + +#include "duckdb/parser/query_node/cte_node.hpp" +#include "duckdb/parser/tableref/subqueryref.hpp" +#include +#include + +#include +#include + +#include "duckpgq/core/utils/duckpgq_utils.hpp" + +namespace duckdb { + +ParserExtensionParseResult duckpgq_parse(ParserExtensionInfo *info, const std::string &query) { + Parser parser; + parser.ParseQuery((query[0] == '-') ? query.substr(1, query.length()) : query); + if (parser.statements.size() != 1) { + throw Exception(ExceptionType::PARSER, "More than one statement detected, please only give one."); + } + return ParserExtensionParseResult( + make_uniq_base(std::move(parser.statements[0]))); +} + +void duckpgq_find_match_function(TableRef *table_ref, DuckPGQState &duckpgq_state) { + // TODO(dtenwolde) add support for other style of tableRef (e.g. PivotRef) + if (auto table_function_ref = dynamic_cast(table_ref)) { + // Handle TableFunctionRef case + auto function = dynamic_cast(table_function_ref->function.get()); + if (function->function_name != "duckpgq_match") { + return; + } + table_function_ref->alias = function->children[0]->Cast().alias; + int32_t match_index = duckpgq_state.match_index++; + duckpgq_state.transform_expression[match_index] = std::move(function->children[0]); + function->children.pop_back(); + auto function_identifier = make_uniq(Value::CreateValue(match_index)); + function->children.push_back(std::move(function_identifier)); + } else if (auto join_ref = dynamic_cast(table_ref)) { + // Handle JoinRef case + duckpgq_find_match_function(join_ref->left.get(), duckpgq_state); + duckpgq_find_match_function(join_ref->right.get(), duckpgq_state); + } else if (auto subquery_ref = dynamic_cast(table_ref)) { + // Handle SubqueryRef case + auto subquery = subquery_ref->subquery.get(); + duckpgq_find_select_statement(subquery, duckpgq_state); + } +} + +ParserExtensionPlanResult duckpgq_find_select_statement(SQLStatement *statement, DuckPGQState &duckpgq_state) { + const auto select_statement = dynamic_cast(statement); + auto node = dynamic_cast(select_statement->node.get()); + CTENode *cte_node = nullptr; + + // Check if node is not a SelectNode + if (!node) { + // Attempt to cast to CTENode + cte_node = dynamic_cast(select_statement->node.get()); + if (cte_node) { + // Get the child node as a SelectNode if cte_node is valid + node = dynamic_cast(cte_node->child.get()); + } + } + + // Check if node is a ShowRef + if (node) { + const auto describe_node = dynamic_cast(node->from_table.get()); + if (describe_node) { + ParserExtensionPlanResult result; + result.requires_valid_transaction = true; + result.return_type = StatementReturnType::QUERY_RESULT; + if (describe_node->show_type == ShowType::SUMMARY) { + result.function = SummarizePropertyGraphFunction(); + result.parameters.push_back(Value(describe_node->table_name)); + return result; + } + if (describe_node->show_type == ShowType::DESCRIBE) { + result.function = DescribePropertyGraphFunction(); + return result; + } + throw BinderException("Unknown show type %s found.", describe_node->show_type); + } + } + + CommonTableExpressionMap *cte_map = nullptr; + if (node) { + cte_map = &node->cte_map; + } else if (cte_node) { + cte_map = &cte_node->cte_map; + } + + if (!cte_map) { + return {}; + } + + for (auto const &kv_pair : cte_map->map) { + auto const &cte = kv_pair.second; + + auto *cte_select_statement = dynamic_cast(cte->query.get()); + if (!cte_select_statement) { + continue; + } + + auto *select_node = dynamic_cast(cte_select_statement->node.get()); + if (!select_node) { + continue; // The SelectStatement has no SelectNode, skip. + } + + // If we get here, we know select_node is valid. + duckpgq_find_match_function(select_node->from_table.get(), duckpgq_state); + } + if (node) { + duckpgq_find_match_function(node->from_table.get(), duckpgq_state); + } else { + throw Exception(ExceptionType::INTERNAL, "node is a nullptr."); + } + return {}; +} + +ParserExtensionPlanResult duckpgq_handle_statement(SQLStatement *statement, DuckPGQState &duckpgq_state) { + if (statement->type == StatementType::SELECT_STATEMENT) { + auto result = duckpgq_find_select_statement(statement, duckpgq_state); + if (result.function.bind == nullptr) { + throw Exception(ExceptionType::BINDER, "use duckpgq_bind instead"); + } + return result; + } + if (statement->type == StatementType::CREATE_STATEMENT) { + const auto &create_statement = statement->Cast(); + const auto create_property_graph = dynamic_cast(create_statement.info.get()); + if (create_property_graph) { + ParserExtensionPlanResult result; + result.function = CreatePropertyGraphFunction(); + result.requires_valid_transaction = true; + result.return_type = StatementReturnType::QUERY_RESULT; + return result; + } + const auto create_table = reinterpret_cast(create_statement.info.get()); + duckpgq_handle_statement(create_table->query.get(), duckpgq_state); + } + if (statement->type == StatementType::DROP_STATEMENT) { + ParserExtensionPlanResult result; + result.function = DropPropertyGraphFunction(); + result.requires_valid_transaction = true; + result.return_type = StatementReturnType::QUERY_RESULT; + return result; + } + if (statement->type == StatementType::EXPLAIN_STATEMENT) { + auto &explain_statement = statement->Cast(); + duckpgq_handle_statement(explain_statement.stmt.get(), duckpgq_state); + } + if (statement->type == StatementType::COPY_STATEMENT) { + const auto ©_statement = statement->Cast(); + const auto select_node = dynamic_cast(copy_statement.info->select_statement.get()); + duckpgq_find_match_function(select_node->from_table.get(), duckpgq_state); + throw Exception(ExceptionType::BINDER, "use duckpgq_bind instead"); + } + if (statement->type == StatementType::INSERT_STATEMENT) { + const auto &insert_statement = statement->Cast(); + duckpgq_handle_statement(insert_statement.select_statement.get(), duckpgq_state); + } + + throw Exception(ExceptionType::NOT_IMPLEMENTED, + StatementTypeToString(statement->type) + "has not been implemented yet for DuckPGQ queries"); +} + +ParserExtensionPlanResult duckpgq_plan(ParserExtensionInfo *, ClientContext &context, + unique_ptr parse_data) { + auto duckpgq_state = GetDuckPGQState(context); + duckpgq_state->parse_data = std::move(parse_data); + auto duckpgq_parse_data = dynamic_cast(duckpgq_state->parse_data.get()); + + if (!duckpgq_parse_data) { + throw Exception(ExceptionType::BINDER, "No DuckPGQ parse data found"); + } + + auto statement = duckpgq_parse_data->statement.get(); + return duckpgq_handle_statement(statement, *duckpgq_state); +} + +//------------------------------------------------------------------------------ +// Register functions +//------------------------------------------------------------------------------ +void CorePGQParser::RegisterPGQParserExtension(ExtensionLoader &loader) { + auto &db = loader.GetDatabaseInstance(); + auto &config = DBConfig::GetConfig(db); + config.parser_extensions.push_back(DuckPGQParserExtension()); +} + +} // namespace duckdb diff --git a/src/core/pragma/CMakeLists.txt b/src/core/pragma/CMakeLists.txt new file mode 100644 index 00000000..ed35bf65 --- /dev/null +++ b/src/core/pragma/CMakeLists.txt @@ -0,0 +1,4 @@ +set(EXTENSION_SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/create_vertex_table.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/show_property_graphs.cpp ${EXTENSION_SOURCES} + PARENT_SCOPE) diff --git a/src/core/pragma/create_vertex_table.cpp b/src/core/pragma/create_vertex_table.cpp new file mode 100644 index 00000000..9a7c8613 --- /dev/null +++ b/src/core/pragma/create_vertex_table.cpp @@ -0,0 +1,40 @@ +#include "duckdb/function/pragma_function.hpp" +#include + +namespace duckdb { + +static string PragmaCreateVertexTable(ClientContext &context, const FunctionParameters ¶meters) { + if (parameters.values.size() != 5) { + throw InvalidInputException("PRAGMA create_vertex_table requires exactly five parameters: edge_table, " + "source_column, destination_column, vertex_table_name, id_column_name"); + } + + string edge_table = parameters.values[0].GetValue(); + string source_column = parameters.values[1].GetValue(); + string destination_column = parameters.values[2].GetValue(); + string vertex_table_name = parameters.values[3].GetValue(); + string id_column_name = parameters.values[4].GetValue(); + + auto result_query = "CREATE TABLE " + vertex_table_name + " AS " + "SELECT DISTINCT " + id_column_name + " FROM " + + "(SELECT " + source_column + " AS " + id_column_name + " FROM " + edge_table + " UNION ALL " + + "SELECT " + destination_column + " AS " + id_column_name + " FROM " + edge_table + ")"; + return result_query; +} + +void CorePGQPragma::RegisterCreateVertexTable(ExtensionLoader &loader) { + // Define the pragma function + auto pragma_func = PragmaFunction::PragmaCall("create_vertex_table", // Name of the pragma + PragmaCreateVertexTable, // Query substitution function + { + LogicalType::VARCHAR, // Edge table + LogicalType::VARCHAR, // Source column + LogicalType::VARCHAR, // Destination column + LogicalType::VARCHAR, // Vertex table name + LogicalType::VARCHAR // ID column name + }); + + // Register the pragma function + loader.RegisterFunction(pragma_func); +} + +} // namespace duckdb diff --git a/src/core/pragma/show_property_graphs.cpp b/src/core/pragma/show_property_graphs.cpp new file mode 100644 index 00000000..a516e38b --- /dev/null +++ b/src/core/pragma/show_property_graphs.cpp @@ -0,0 +1,21 @@ +#include "duckdb/function/pragma_function.hpp" +#include + +namespace duckdb { + +static string PragmaShowPropertyGraphs(ClientContext &context, const FunctionParameters ¶meters) { + return "SELECT DISTINCT property_graph from __duckpgq_internal"; +} + +void CorePGQPragma::RegisterShowPropertyGraphs(ExtensionLoader &loader) { + // Define the pragma function + auto pragma_func = PragmaFunction::PragmaCall("show_property_graphs", // Name of the pragma + PragmaShowPropertyGraphs, // Query substitution function + {} // Parameter types (mail_limit is an integer) + ); + + // Register the pragma function + loader.RegisterFunction(pragma_func); +} + +} // namespace duckdb diff --git a/src/core/utils/CMakeLists.txt b/src/core/utils/CMakeLists.txt new file mode 100644 index 00000000..03fd0a1b --- /dev/null +++ b/src/core/utils/CMakeLists.txt @@ -0,0 +1,5 @@ +set(EXTENSION_SOURCES + ${EXTENSION_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/compressed_sparse_row.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/duckpgq_bitmap.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/duckpgq_utils.cpp + PARENT_SCOPE) diff --git a/src/core/utils/compressed_sparse_row.cpp b/src/core/utils/compressed_sparse_row.cpp new file mode 100644 index 00000000..cca0f81b --- /dev/null +++ b/src/core/utils/compressed_sparse_row.cpp @@ -0,0 +1,609 @@ +#include "duckpgq/core/utils/compressed_sparse_row.hpp" +#include "duckdb/common/string.hpp" +#include "duckdb/execution/expression_executor.hpp" +#include "duckdb/parser/expression/comparison_expression.hpp" +#include "duckdb/parser/expression/constant_expression.hpp" +#include "duckdb/parser/expression/star_expression.hpp" +#include "duckdb/parser/tableref/basetableref.hpp" +#include "duckdb/parser/tableref/subqueryref.hpp" + +#include + +namespace duckdb { + +string CSR::ToString() const { + std::ostringstream result; + + if (initialized_v) { + result << "v (Node Offsets):\n"; + for (size_t i = 0; i < vsize; i++) { + result << " Node " << i << ": Offset " << v[i].load() << "\n"; + } + } else { + result << "v: V has not been initialized\n"; + } + + result << "\n"; + + if (initialized_e) { + result << "e (Edges):\n"; + for (size_t i = 0; i < vsize - 2; i++) { + result << " Node " << i << " connects to: "; + for (size_t j = v[i].load(); j < v[i + 1].load(); j++) { + result << e[j] << " "; + } + result << "\n"; + } + } else { + result << "e: E has not been initialized\n"; + } + + result << "\n"; + + if (initialized_w) { + result << "w (Weights):\n"; + for (size_t i = 0; i < vsize - 1; i++) { + result << " Node " << i << " weights: "; + for (size_t j = v[i].load(); j < v[i + 1].load(); j++) { + result << w[j] << " "; + } + result << "\n"; + } + } else { + result << "w: W has not been initialized\n"; + } + + return result.str(); +} + +CSRFunctionData::CSRFunctionData(ClientContext &context, int32_t id, const LogicalType &weight_type) + : context(context), id(id), weight_type(weight_type) { +} + +unique_ptr CSRFunctionData::Copy() const { + return make_uniq(context, id, weight_type); +} + +bool CSRFunctionData::Equals(const FunctionData &other_p) const { + auto &other = dynamic_cast(other_p); + return id == other.id && weight_type == other.weight_type; +} + +unique_ptr CSRFunctionData::CSRVertexBind(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments) { + if (!arguments[0]->IsFoldable()) { + throw InvalidInputException("Id must be constant."); + } + + Value id = ExpressionExecutor::EvaluateScalar(context, *arguments[0]); + if (arguments.size() == 4) { + auto logical_type = LogicalType::SQLNULL; + return make_uniq(context, id.GetValue(), logical_type); + } + return make_uniq(context, id.GetValue(), arguments[3]->return_type); +} + +unique_ptr CSRFunctionData::CSREdgeBind(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments) { + if (!arguments[0]->IsFoldable()) { + throw InvalidInputException("Id must be constant."); + } + Value id = ExpressionExecutor::EvaluateScalar(context, *arguments[0]); + if (arguments.size() == 8) { + return make_uniq(context, id.GetValue(), arguments[7]->return_type); + } + auto logical_type = LogicalType::SQLNULL; + return make_uniq(context, id.GetValue(), logical_type); +} + +unique_ptr CSRFunctionData::CSRBind(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments) { + if (!arguments[0]->IsFoldable()) { + throw InvalidInputException("Id must be constant."); + } + Value id = ExpressionExecutor::EvaluateScalar(context, *arguments[0]); + return make_uniq(context, id.GetValue(), LogicalType::BOOLEAN); +} + +// Helper function to create a JoinRef +unique_ptr CreateJoin(const string &fk_column, const string &pk_column, + const shared_ptr &fk_table, + const shared_ptr &pk_table) { + auto join = make_uniq(JoinRefType::REGULAR); + join->left = fk_table->CreateBaseTableRef(); + join->right = pk_table->CreateBaseTableRef(); + join->condition = make_uniq(ExpressionType::COMPARE_EQUAL, + make_uniq(fk_column, fk_table->table_name), + make_uniq(pk_column, pk_table->table_name)); + return join; +} + +// Helper function to setup SelectNode +void SetupSelectNode(unique_ptr &select_node, const shared_ptr &edge_table, + bool reverse) { + select_node = make_uniq(); + + select_node->select_list.emplace_back(CreateColumnRefExpression("rowid", edge_table->source_reference, "dense_id")); + + if (!reverse) { + select_node->select_list.emplace_back( + CreateColumnRefExpression(edge_table->source_fk[0], edge_table->table_name, "outgoing_edges")); + select_node->select_list.emplace_back( + CreateColumnRefExpression(edge_table->destination_fk[0], edge_table->table_name, "incoming_edges")); + select_node->from_table = + CreateJoin(edge_table->source_fk[0], edge_table->source_pk[0], edge_table, edge_table->source_pg_table); + } else { + select_node->select_list.emplace_back( + CreateColumnRefExpression(edge_table->destination_fk[0], edge_table->table_name, "outgoing_edges")); + select_node->select_list.emplace_back( + CreateColumnRefExpression(edge_table->source_fk[0], edge_table->table_name, "incoming_edges")); + select_node->from_table = CreateJoin(edge_table->destination_fk[0], edge_table->source_pk[0], edge_table, + edge_table->source_pg_table); + } +} + +// Function to create a subquery expression for counting table entries +unique_ptr GetCountTable(const shared_ptr &table, const string &table_alias, + const string &primary_key) { + auto select_count = make_uniq(); + auto select_inner = make_uniq(); + auto ref = table->CreateBaseTableRef(table_alias); + select_inner->from_table = std::move(ref); + + vector> children; + children.push_back(make_uniq(primary_key, table_alias)); + + auto count_function = make_uniq("count", std::move(children)); + select_inner->select_list.push_back(std::move(count_function)); + select_count->node = std::move(select_inner); + + auto result = make_uniq(); + result->subquery = std::move(select_count); + result->subquery_type = SubqueryType::SCALAR; + return result; +} + +unique_ptr GetJoinRef(const shared_ptr &edge_table, const string &edge_binding, + const string &prev_binding, const string &next_binding) { + auto first_join_ref = make_uniq(JoinRefType::REGULAR); + first_join_ref->type = JoinType::INNER; + + auto second_join_ref = make_uniq(JoinRefType::REGULAR); + second_join_ref->type = JoinType::INNER; + + second_join_ref->left = edge_table->CreateBaseTableRef(edge_binding); + second_join_ref->right = edge_table->source_pg_table->CreateBaseTableRef(prev_binding); + auto t_from_ref = make_uniq(edge_table->source_fk[0], edge_binding); + auto src_cid_ref = make_uniq(edge_table->source_pk[0], prev_binding); + second_join_ref->condition = + make_uniq(ExpressionType::COMPARE_EQUAL, std::move(t_from_ref), std::move(src_cid_ref)); + first_join_ref->left = std::move(second_join_ref); + first_join_ref->right = edge_table->destination_pg_table->CreateBaseTableRef(next_binding); + + auto t_to_ref = make_uniq(edge_table->destination_fk[0], edge_binding); + auto dst_cid_ref = make_uniq(edge_table->destination_pk[0], next_binding); + first_join_ref->condition = + make_uniq(ExpressionType::COMPARE_EQUAL, std::move(t_to_ref), std::move(dst_cid_ref)); + return first_join_ref; +} + +unique_ptr CreateDirectedCSRVertexSubquery(const shared_ptr &edge_table, + const string &prev_binding) { + auto count_create_vertex_expr = GetCountTable(edge_table->source_pg_table, prev_binding, edge_table->source_pk[0]); + + vector> csr_vertex_children; + csr_vertex_children.push_back(make_uniq(Value::INTEGER(0))); + csr_vertex_children.push_back(std::move(count_create_vertex_expr)); + csr_vertex_children.push_back(make_uniq("dense_id", "sub")); + csr_vertex_children.push_back(make_uniq("cnt", "sub")); + auto create_vertex_function = make_uniq("create_csr_vertex", std::move(csr_vertex_children)); + + vector> sum_children; + sum_children.push_back(std::move(create_vertex_function)); + auto sum_function = make_uniq("sum", std::move(sum_children)); + + auto inner_select_statement = make_uniq(); + auto inner_select_node = make_uniq(); + + inner_select_node->select_list.emplace_back(CreateColumnRefExpression("rowid", prev_binding, "dense_id")); + auto edge_src_colref = make_uniq(edge_table->source_fk[0], edge_table->table_name); + vector> count_children; + count_children.push_back(std::move(edge_src_colref)); + auto count_function = make_uniq("count", std::move(count_children)); + count_function->alias = "cnt"; + inner_select_node->select_list.emplace_back(std::move(count_function)); + + auto left_join_ref = make_uniq(JoinRefType::REGULAR); + left_join_ref->type = JoinType::LEFT; + left_join_ref->left = edge_table->source_pg_table->CreateBaseTableRef(prev_binding); + left_join_ref->right = edge_table->CreateBaseTableRef(edge_table->table_name_alias); + + auto join_condition = make_uniq( + ExpressionType::COMPARE_EQUAL, make_uniq(edge_table->source_fk[0], edge_table->table_name), + make_uniq(edge_table->source_pk[0], prev_binding)); + left_join_ref->condition = std::move(join_condition); + inner_select_node->from_table = std::move(left_join_ref); + + auto dense_id_colref = make_uniq("dense_id"); + inner_select_node->groups.group_expressions.push_back(std::move(dense_id_colref)); + GroupingSet grouping_set = {0}; + inner_select_node->groups.grouping_sets.push_back(grouping_set); + + inner_select_statement->node = std::move(inner_select_node); + + auto inner_from_subquery = make_uniq(std::move(inner_select_statement), "sub"); + + auto cast_select_node = make_uniq(); + cast_select_node->from_table = std::move(inner_from_subquery); + cast_select_node->select_list.push_back(std::move(sum_function)); + + auto cast_select_stmt = make_uniq(); + cast_select_stmt->node = std::move(cast_select_node); + + auto cast_subquery_expr = make_uniq(); + cast_subquery_expr->subquery = std::move(cast_select_stmt); + cast_subquery_expr->subquery_type = SubqueryType::SCALAR; + + return cast_subquery_expr; +} + +// Helper function to create CSR Vertex Subquery +unique_ptr CreateUndirectedCSRVertexSubquery(const shared_ptr &edge_table, + const string &binding) { + auto count_create_vertex_expr = GetCountTable(edge_table->source_pg_table, binding, edge_table->source_pk[0]); + + vector> csr_vertex_children; + csr_vertex_children.push_back(make_uniq(Value::INTEGER(0))); + csr_vertex_children.push_back(std::move(count_create_vertex_expr)); + csr_vertex_children.push_back(make_uniq("dense_id", "sub")); + csr_vertex_children.push_back(make_uniq("cnt", "sub")); + + auto create_vertex_function = make_uniq("create_csr_vertex", std::move(csr_vertex_children)); + vector> sum_children; + sum_children.push_back(std::move(create_vertex_function)); + auto sum_function = make_uniq("sum", std::move(sum_children)); + + vector> multiply_csr_vertex_children; + auto two_constant = make_uniq(Value::INTEGER(2)); + multiply_csr_vertex_children.push_back(std::move(two_constant)); + multiply_csr_vertex_children.push_back(std::move(sum_function)); + + auto multiply_function = make_uniq("multiply", std::move(multiply_csr_vertex_children)); + + auto inner_select_statement = make_uniq(); + auto inner_select_node = make_uniq(); + + auto dense_id_ref = make_uniq("dense_id"); + + auto count_create_inner_expr = make_uniq(); + count_create_inner_expr->subquery_type = SubqueryType::SCALAR; + auto outgoing_edges_ref = make_uniq("outgoing_edges"); + vector> inner_count_children; + inner_count_children.push_back(std::move(outgoing_edges_ref)); + auto inner_count_function = make_uniq("count", std::move(inner_count_children)); + inner_count_function->alias = "cnt"; + + inner_select_node->select_list.push_back(std::move(dense_id_ref)); + inner_select_node->select_list.push_back(std::move(inner_count_function)); + + auto dense_id_colref = make_uniq("dense_id"); + inner_select_node->groups.group_expressions.push_back(std::move(dense_id_colref)); + GroupingSet grouping_set = {0}; + inner_select_node->groups.grouping_sets.push_back(grouping_set); + + unique_ptr unique_edges_select_node, unique_edges_select_node_reverse; + + SetupSelectNode(unique_edges_select_node, edge_table, false); + SetupSelectNode(unique_edges_select_node_reverse, edge_table, true); + + auto union_all_node = make_uniq(); + union_all_node->setop_type = SetOperationType::UNION_BY_NAME; + union_all_node->children.push_back(std::move(unique_edges_select_node)); + union_all_node->children.push_back(std::move(unique_edges_select_node_reverse)); + + auto subquery_select_statement = make_uniq(); + subquery_select_statement->node = std::move(union_all_node); + auto unique_edges_subquery = make_uniq(std::move(subquery_select_statement), "unique_edges"); + + inner_select_node->from_table = std::move(unique_edges_subquery); + inner_select_statement->node = std::move(inner_select_node); + + auto inner_from_subquery = make_uniq(std::move(inner_select_statement), "sub"); + + auto cast_select_node = make_uniq(); + cast_select_node->from_table = std::move(inner_from_subquery); + cast_select_node->select_list.push_back(std::move(multiply_function)); + + auto cast_select_stmt = make_uniq(); + cast_select_stmt->node = std::move(cast_select_node); + + auto cast_subquery_expr = make_uniq(); + cast_subquery_expr->subquery = std::move(cast_select_stmt); + cast_subquery_expr->subquery_type = SubqueryType::SCALAR; + + return cast_subquery_expr; +} + +// Helper function to create outer select edges node +unique_ptr CreateOuterSelectEdgesNode() { + auto outer_select_edges_node = make_uniq(); + outer_select_edges_node->select_list.push_back(make_uniq("src")); + outer_select_edges_node->select_list.push_back(make_uniq("dst")); + + vector> any_value_children; + any_value_children.push_back(make_uniq("edges")); + auto any_value_function = make_uniq("any_value", std::move(any_value_children)); + any_value_function->alias = "edge"; + outer_select_edges_node->select_list.push_back(std::move(any_value_function)); + + outer_select_edges_node->groups.group_expressions.push_back(make_uniq("src")); + outer_select_edges_node->groups.group_expressions.push_back(make_uniq("dst")); + GroupingSet outer_grouping_set = {0, 1}; + outer_select_edges_node->groups.grouping_sets.push_back(outer_grouping_set); + + return outer_select_edges_node; +} + +// Helper function to create outer select node +unique_ptr CreateOuterSelectNode(unique_ptr create_csr_edge_function) { + auto outer_select_node = make_uniq(); + create_csr_edge_function->alias = "temp"; + outer_select_node->select_list.push_back(std::move(create_csr_edge_function)); + return outer_select_node; +} + +// Function to create the CTE for the edges +unique_ptr MakeEdgesCTE(const shared_ptr &edge_table) { + std::vector> select_expression; + auto src_col_ref = make_uniq("rowid", "src_table"); + src_col_ref->alias = "src"; + + select_expression.emplace_back(std::move(src_col_ref)); + + auto dst_col_ref = make_uniq("rowid", "dst_table"); + dst_col_ref->alias = "dst"; + select_expression.emplace_back(std::move(dst_col_ref)); + + auto edge_col_ref = make_uniq("rowid", edge_table->table_name); + edge_col_ref->alias = "edges"; + select_expression.emplace_back(std::move(edge_col_ref)); + + auto select_node = make_uniq(); + select_node->select_list = std::move(select_expression); + + auto join_ref = make_uniq(JoinRefType::REGULAR); + auto first_join_ref = make_uniq(JoinRefType::REGULAR); + first_join_ref->type = JoinType::INNER; + first_join_ref->left = edge_table->CreateBaseTableRef(); + first_join_ref->right = edge_table->source_pg_table->CreateBaseTableRef("src_table"); + + auto edge_from_ref = make_uniq(edge_table->source_fk[0], edge_table->table_name); + auto src_cid_ref = make_uniq(edge_table->source_pk[0], "src_table"); + first_join_ref->condition = make_uniq(ExpressionType::COMPARE_EQUAL, std::move(edge_from_ref), + std::move(src_cid_ref)); + + auto second_join_ref = make_uniq(JoinRefType::REGULAR); + second_join_ref->type = JoinType::INNER; + second_join_ref->left = std::move(first_join_ref); + second_join_ref->right = edge_table->destination_pg_table->CreateBaseTableRef("dst_table"); + + auto edge_to_ref = make_uniq(edge_table->destination_fk[0], edge_table->table_name); + auto dst_cid_ref = make_uniq(edge_table->destination_pk[0], "dst_table"); + second_join_ref->condition = + make_uniq(ExpressionType::COMPARE_EQUAL, std::move(edge_to_ref), std::move(dst_cid_ref)); + + select_node->from_table = std::move(second_join_ref); + + auto select_statement = make_uniq(); + select_statement->node = std::move(select_node); + + auto result = make_uniq(); + result->query = std::move(select_statement); + return result; +} + +// Function to create the CTE for the Undirected CSR +unique_ptr CreateUndirectedCSRCTE(const shared_ptr &edge_table, + const unique_ptr &select_node) { + if (select_node->cte_map.map.find("edges_cte") == select_node->cte_map.map.end()) { + select_node->cte_map.map["edges_cte"] = MakeEdgesCTE(edge_table); + } + + auto csr_edge_id_constant = make_uniq(Value::INTEGER(0)); + auto count_create_edge_select = + GetCountTable(edge_table->source_pg_table, edge_table->source_reference, edge_table->source_pk[0]); + + auto count_edges_subquery = GetCountUndirectedEdgeTable(); + + auto cast_subquery_expr = CreateUndirectedCSRVertexSubquery(edge_table, edge_table->source_reference); + + auto src_rowid_colref = make_uniq("src"); + auto dst_rowid_colref = make_uniq("dst"); + auto edge_rowid_colref = make_uniq("edge"); + + auto cast_expression = make_uniq(LogicalType::BIGINT, std::move(cast_subquery_expr)); + + vector> csr_edge_children; + csr_edge_children.push_back(std::move(csr_edge_id_constant)); + csr_edge_children.push_back(std::move(count_create_edge_select)); + csr_edge_children.push_back(std::move(cast_expression)); + csr_edge_children.push_back(std::move(count_edges_subquery)); + csr_edge_children.push_back(std::move(src_rowid_colref)); + csr_edge_children.push_back(std::move(dst_rowid_colref)); + csr_edge_children.push_back(std::move(edge_rowid_colref)); + + auto create_csr_edge_function = make_uniq("create_csr_edge", std::move(csr_edge_children)); + auto outer_select_node = CreateOuterSelectNode(std::move(create_csr_edge_function)); + + auto outer_select_edges_node = CreateOuterSelectEdgesNode(); + + auto outer_union_all_node = make_uniq(); + outer_union_all_node->setop_all = true; + outer_union_all_node->setop_type = SetOperationType::UNION; + + auto src_dst_select_node = make_uniq(); + + src_dst_select_node->from_table = std::move(CreateBaseTableRef("edges_cte")); + src_dst_select_node->select_list.push_back(make_uniq("src")); + src_dst_select_node->select_list.push_back(make_uniq("dst")); + src_dst_select_node->select_list.push_back(make_uniq("edges")); + + auto dst_src_select_node = make_uniq(); + dst_src_select_node->from_table = std::move(CreateBaseTableRef("edges_cte")); + dst_src_select_node->select_list.push_back(make_uniq("dst")); + dst_src_select_node->select_list.push_back(make_uniq("src")); + dst_src_select_node->select_list.push_back(make_uniq("edges")); + + outer_union_all_node->children.push_back(std::move(src_dst_select_node)); + outer_union_all_node->children.push_back(std::move(dst_src_select_node)); + + auto outer_union_select_statement = make_uniq(); + outer_union_select_statement->node = std::move(outer_union_all_node); + outer_select_edges_node->from_table = make_uniq(std::move(outer_union_select_statement)); + + auto outer_select_edges_select_statement = make_uniq(); + outer_select_edges_select_statement->node = std::move(outer_select_edges_node); + outer_select_node->from_table = make_uniq(std::move(outer_select_edges_select_statement)); + + auto outer_select_statement = make_uniq(); + outer_select_statement->node = std::move(outer_select_node); + auto info = make_uniq(); + info->query = std::move(outer_select_statement); + return info; +} + +unique_ptr GetCountUndirectedEdgeTable() { + auto count_edges_select_statement = make_uniq(); + auto count_edges_select_node = make_uniq(); + vector> count_children; + auto count_function = make_uniq("count", std::move(count_children)); + vector> multiply_children; + auto constant_two = make_uniq(Value::BIGINT(2)); + multiply_children.push_back(std::move(constant_two)); + multiply_children.push_back(std::move(count_function)); + auto multiply_function = make_uniq("multiply", std::move(multiply_children)); + count_edges_select_node->select_list.emplace_back(std::move(multiply_function)); + + auto inner_select_statement = make_uniq(); + + auto src_dst_select_node = make_uniq(); + src_dst_select_node->select_list.emplace_back(CreateColumnRefExpression("src")); + src_dst_select_node->select_list.emplace_back(CreateColumnRefExpression("dst")); + + src_dst_select_node->from_table = std::move(CreateBaseTableRef("edges_cte")); + + auto dst_src_select_node = make_uniq(); + dst_src_select_node->select_list.emplace_back(CreateColumnRefExpression("dst", "", "src")); + dst_src_select_node->select_list.emplace_back(CreateColumnRefExpression("src", "", "dst")); + dst_src_select_node->from_table = CreateBaseTableRef("edges_cte"); + + auto union_by_name_node = make_uniq(); + union_by_name_node->setop_all = false; + union_by_name_node->setop_type = SetOperationType::UNION_BY_NAME; + union_by_name_node->children.push_back(std::move(src_dst_select_node)); + union_by_name_node->children.push_back(std::move(dst_src_select_node)); + inner_select_statement->node = std::move(union_by_name_node); + auto inner_from_subquery = make_uniq(std::move(inner_select_statement)); + count_edges_select_node->from_table = std::move(inner_from_subquery); + count_edges_select_statement->node = std::move(count_edges_select_node); + auto result = make_uniq(); + result->subquery = std::move(count_edges_select_statement); + result->subquery_type = SubqueryType::SCALAR; + return result; +} + +unique_ptr GetCountEdgeTable(const shared_ptr &edge_table) { + auto result = make_uniq(); + auto outer_select_statement = make_uniq(); + auto outer_select_node = make_uniq(); + vector> count_children; + outer_select_node->select_list.push_back(make_uniq("count", std::move(count_children))); + auto inner_select_node = make_uniq(); + + auto first_join = make_uniq(JoinRefType::REGULAR); + first_join->left = edge_table->CreateBaseTableRef(); + first_join->right = edge_table->source_pg_table->CreateBaseTableRef("src"); + first_join->condition = make_uniq( + ExpressionType::COMPARE_EQUAL, make_uniq(edge_table->source_fk[0], edge_table->table_name), + make_uniq(edge_table->source_pk[0], "src")); + auto second_join = make_uniq(JoinRefType::REGULAR); + second_join->left = std::move(first_join); + second_join->right = edge_table->destination_pg_table->CreateBaseTableRef("dst"); + second_join->condition = make_uniq( + ExpressionType::COMPARE_EQUAL, + make_uniq(edge_table->destination_fk[0], edge_table->table_name), + make_uniq(edge_table->destination_pk[0], "dst")); + outer_select_node->from_table = std::move(second_join); + outer_select_statement->node = std::move(outer_select_node); + result->subquery = std::move(outer_select_statement); + result->subquery_type = SubqueryType::SCALAR; + return result; +} + +// Function to create the CTE for the Directed CSR +unique_ptr CreateDirectedCSRCTE(const shared_ptr &edge_table, + const string &prev_binding, const string &edge_binding, + const string &next_binding) { + auto csr_edge_id_constant = make_uniq(Value::INTEGER(0)); + auto count_create_edge_select = GetCountTable(edge_table->source_pg_table, prev_binding, edge_table->source_pk[0]); + + auto cast_subquery_expr = CreateDirectedCSRVertexSubquery(edge_table, prev_binding); + auto count_edge_table = GetCountEdgeTable(edge_table); // Count the number of edges + + auto src_rowid_colref = make_uniq("rowid", prev_binding); + auto dst_rowid_colref = make_uniq("rowid", next_binding); + auto edge_rowid_colref = make_uniq("rowid", edge_binding); + + auto cast_expression = make_uniq(LogicalType::BIGINT, std::move(cast_subquery_expr)); + + vector> csr_edge_children; + csr_edge_children.push_back(std::move(csr_edge_id_constant)); + csr_edge_children.push_back(std::move(count_create_edge_select)); + csr_edge_children.push_back(std::move(cast_expression)); + csr_edge_children.push_back(std::move(count_edge_table)); + csr_edge_children.push_back(std::move(src_rowid_colref)); + csr_edge_children.push_back(std::move(dst_rowid_colref)); + csr_edge_children.push_back(std::move(edge_rowid_colref)); + + auto create_csr_edge_function = make_uniq("create_csr_edge", std::move(csr_edge_children)); + auto outer_select_node = CreateOuterSelectNode(std::move(create_csr_edge_function)); + + outer_select_node->from_table = GetJoinRef(edge_table, edge_binding, prev_binding, next_binding); + + auto outer_select_statement = make_uniq(); + outer_select_statement->node = std::move(outer_select_node); + + auto info = make_uniq(); + info->query = std::move(outer_select_statement); + return info; +} + +// Function to create a subquery for counting with CTE +unique_ptr CreateCountCTESubquery() { + auto temp_cte_select_node = make_uniq(); + + auto cte_table_ref = make_uniq(); + cte_table_ref->table_name = "csr_cte"; + temp_cte_select_node->from_table = std::move(cte_table_ref); + + vector> children; + children.push_back(make_uniq("temp", "csr_cte")); + + auto count_function = make_uniq("count", std::move(children)); + auto zero = make_uniq(Value::INTEGER((int32_t)0)); + + vector> multiply_children; + multiply_children.push_back(std::move(zero)); + multiply_children.push_back(std::move(count_function)); + + auto multiply_function = make_uniq("multiply", std::move(multiply_children)); + multiply_function->alias = "temp"; + temp_cte_select_node->select_list.push_back(std::move(multiply_function)); + + auto temp_cte_select_statement = make_uniq(); + temp_cte_select_statement->node = std::move(temp_cte_select_node); + + return make_uniq(std::move(temp_cte_select_statement), "__x"); +} + +} // namespace duckdb diff --git a/src/core/utils/duckpgq_bitmap.cpp b/src/core/utils/duckpgq_bitmap.cpp new file mode 100644 index 00000000..0ed177d3 --- /dev/null +++ b/src/core/utils/duckpgq_bitmap.cpp @@ -0,0 +1,21 @@ +#include "duckpgq/core/utils/duckpgq_bitmap.hpp" + +namespace duckdb { + +DuckPGQBitmap::DuckPGQBitmap(size_t size) { + bitmap.resize((size + 63) / 64, 0); +} + +void DuckPGQBitmap::set(size_t index) { + bitmap[index / 64] |= (1ULL << (index % 64)); +} + +bool DuckPGQBitmap::test(size_t index) const { + return (bitmap[index / 64] & (1ULL << (index % 64))) != 0; +} + +void DuckPGQBitmap::reset() { + fill(bitmap.begin(), bitmap.end(), 0); +} + +} // namespace duckdb diff --git a/src/core/utils/duckpgq_utils.cpp b/src/core/utils/duckpgq_utils.cpp new file mode 100644 index 00000000..87610ba1 --- /dev/null +++ b/src/core/utils/duckpgq_utils.cpp @@ -0,0 +1,121 @@ +#include "duckpgq/core/utils/duckpgq_utils.hpp" +#include "duckpgq/common.hpp" +#include "duckdb/parser/statement/copy_statement.hpp" + +#include "duckpgq/core/functions/table/describe_property_graph.hpp" +#include "duckpgq/core/functions/table/drop_property_graph.hpp" +#include "duckdb/parser/expression/constant_expression.hpp" +#include "duckdb/parser/tableref/joinref.hpp" +#include "duckdb/parser/tableref/basetableref.hpp" +#include "duckdb/parser/expression/columnref_expression.hpp" +#include "duckdb/parser/tableref/subqueryref.hpp" + +namespace duckdb { + +// Function to get DuckPGQState from ClientContext +shared_ptr GetDuckPGQState(ClientContext &context, bool throw_not_found_error) { + auto lookup = context.registered_state->Get("duckpgq"); + if (lookup) { + return lookup; + } + if (throw_not_found_error) { + throw Exception(ExceptionType::INVALID, "Registered DuckPGQ state not found"); + } + shared_ptr state = make_shared_ptr(); + context.registered_state->Insert("duckpgq", state); + state->InitializeInternalTable(context); + auto connection = make_shared_ptr(*context.db); + state->RetrievePropertyGraphs(connection); + return state; +} + +// Function to get PropertyGraphInfo from DuckPGQState +CreatePropertyGraphInfo *GetPropertyGraphInfo(const shared_ptr &duckpgq_state, const string &pg_name) { + auto property_graph = duckpgq_state->registered_property_graphs.find(pg_name); + if (property_graph == duckpgq_state->registered_property_graphs.end()) { + throw Exception(ExceptionType::INVALID, "Property graph " + pg_name + " not found"); + } + return dynamic_cast(property_graph->second.get()); +} + +// Function to validate the source node and edge table +shared_ptr ValidateSourceNodeAndEdgeTable(CreatePropertyGraphInfo *pg_info, + const std::string &node_label, + const std::string &edge_label) { + auto source_node_pg_entry = pg_info->GetTableByLabel(node_label, true, true); + if (!source_node_pg_entry->is_vertex_table) { + throw Exception(ExceptionType::INVALID, node_label + " is an edge table, expected a vertex table"); + } + auto edge_pg_entry = pg_info->GetTableByLabel(edge_label, true, false); + if (edge_pg_entry->is_vertex_table) { + throw Exception(ExceptionType::INVALID, edge_label + " is a vertex table, expected an edge table"); + } + if (!edge_pg_entry->IsSourceTable(source_node_pg_entry->table_name)) { + throw Exception(ExceptionType::INVALID, + "Vertex table " + node_label + " is not a source of edge table " + edge_label); + } + return edge_pg_entry; +} + +// Function to create the SELECT node +unique_ptr CreateSelectNode(const shared_ptr &edge_pg_entry, + const string &function_name, const string &function_alias) { + auto select_node = make_uniq(); + std::vector> select_expression; + + select_expression.emplace_back( + make_uniq(edge_pg_entry->source_pk[0], edge_pg_entry->source_reference)); + + auto cte_col_ref = make_uniq("temp", "__x"); + + vector> function_children; + function_children.push_back(make_uniq(Value::INTEGER(0))); + function_children.push_back(make_uniq("rowid", edge_pg_entry->source_reference)); + auto function = make_uniq(function_name, std::move(function_children)); + + std::vector> addition_children; + addition_children.emplace_back(std::move(cte_col_ref)); + addition_children.emplace_back(std::move(function)); + + auto addition_function = make_uniq("add", std::move(addition_children)); + addition_function->alias = function_alias; + select_expression.emplace_back(std::move(addition_function)); + select_node->select_list = std::move(select_expression); + + auto src_base_ref = edge_pg_entry->source_pg_table->CreateBaseTableRef(); + + auto temp_cte_select_subquery = CreateCountCTESubquery(); + + auto cross_join_ref = make_uniq(JoinRefType::CROSS); + cross_join_ref->left = std::move(src_base_ref); + cross_join_ref->right = std::move(temp_cte_select_subquery); + + select_node->from_table = std::move(cross_join_ref); + + return select_node; +} + +unique_ptr CreateBaseTableRef(const string &table_name, const string &alias) { + auto base_table_ref = make_uniq(); + base_table_ref->table_name = table_name; + if (!alias.empty()) { + base_table_ref->alias = alias; + } + return base_table_ref; +} + +unique_ptr CreateColumnRefExpression(const string &column_name, const string &table_name, + const string &alias) { + unique_ptr column_ref; + if (table_name.empty()) { + column_ref = make_uniq(column_name); + } else { + column_ref = make_uniq(column_name, table_name); + } + if (!alias.empty()) { + column_ref->alias = alias; + } + return column_ref; +} + +} // namespace duckdb diff --git a/src/duckpgq_extension.cpp b/src/duckpgq_extension.cpp new file mode 100644 index 00000000..3ffc47db --- /dev/null +++ b/src/duckpgq_extension.cpp @@ -0,0 +1,30 @@ +#define DUCKDB_EXTENSION_MAIN + +#include "duckpgq_extension.hpp" +#include "duckpgq/common.hpp" +#include "duckpgq/core/module.hpp" +#include +#include "duckdb/main/connection_manager.hpp" + +namespace duckdb { + +static void LoadInternal(ExtensionLoader &loader) { + CoreModule::Register(loader); +} + +void DuckpgqExtension::Load(ExtensionLoader &loader) { + LoadInternal(loader); +} + +std::string DuckpgqExtension::Name() { + return "duckpgq"; +} + +} // namespace duckdb + +extern "C" { + +DUCKDB_CPP_EXTENSION_ENTRY(duckpgq, loader) { + duckdb::LoadInternal(loader); +} +} diff --git a/src/duckpgq_state.cpp b/src/duckpgq_state.cpp new file mode 100644 index 00000000..7cfe1f70 --- /dev/null +++ b/src/duckpgq_state.cpp @@ -0,0 +1,188 @@ +#include "duckpgq_state.hpp" + +namespace duckdb { + +void DuckPGQState::InitializeInternalTable(ClientContext &context) { + auto connection = make_shared_ptr(*context.db); + auto query = connection->Query("CREATE TABLE IF NOT EXISTS __duckpgq_internal (" + "property_graph varchar, " + "table_name varchar, " + "label varchar, " + "is_vertex_table boolean, " + "source_table varchar, " + "source_pk varchar[], " + "source_fk varchar[], " + "destination_table varchar, " + "destination_pk varchar[], " + "destination_fk varchar[], " + "discriminator varchar, " + "sub_labels varchar[], " + "catalog varchar, " + "schema varchar," + "source_catalog varchar, " + "source_schema varchar, " + "destination_catalog varchar, " + "destination_schema varchar, " + "properties varchar[], " + "column_aliases varchar[]" + ")"); + if (query->HasError()) { + throw TransactionException(query->GetError()); + } +} + +void DuckPGQState::RetrievePropertyGraphs(const shared_ptr &connection) { + // Retrieve and process vertex property graphs + auto vertex_property_graphs = connection->Query("SELECT * FROM __duckpgq_internal WHERE is_vertex_table"); + ProcessPropertyGraphs(vertex_property_graphs, true); + + // Retrieve and process edge property graphs + auto edge_property_graphs = connection->Query("SELECT * FROM __duckpgq_internal WHERE NOT is_vertex_table"); + ProcessPropertyGraphs(edge_property_graphs, false); +} + +void DuckPGQState::ProcessPropertyGraphs(unique_ptr &property_graphs, bool is_vertex) { + if (!property_graphs || property_graphs->type != QueryResultType::MATERIALIZED_RESULT) { + throw std::runtime_error("Failed to fetch property graphs or invalid result type."); + } + + auto &materialized_result = property_graphs->Cast(); + auto row_count = materialized_result.RowCount(); + if (row_count == 0) { + return; // No results + } + + auto chunk = materialized_result.Fetch(); + for (idx_t i = 0; i < row_count; i++) { + auto table = make_shared_ptr(); + + // Extract and validate common properties + table->table_name = chunk->GetValue(1, i).GetValue(); + table->main_label = chunk->GetValue(2, i).GetValue(); + table->is_vertex_table = chunk->GetValue(3, i).GetValue(); + + // Handle discriminator and sub-labels + const auto &discriminator = chunk->GetValue(10, i).GetValue(); + if (discriminator != "NULL") { + table->discriminator = discriminator; + auto sublabels = ListValue::GetChildren(chunk->GetValue(11, i)); + for (const auto &sublabel : sublabels) { + table->sub_labels.push_back(sublabel.GetValue()); + } + } + + // Extract catalog and schema names + if (chunk->ColumnCount() > 12) { + table->catalog_name = chunk->GetValue(12, i).GetValue(); + table->schema_name = chunk->GetValue(13, i).GetValue(); + } else { + table->catalog_name = ""; + table->schema_name = DEFAULT_SCHEMA; + } + if (chunk->ColumnCount() > 14) { + table->source_catalog = chunk->GetValue(14, i).GetValue(); + table->source_schema = chunk->GetValue(15, i).GetValue(); + table->destination_catalog = chunk->GetValue(16, i).GetValue(); + table->destination_schema = chunk->GetValue(17, i).GetValue(); + } else { + table->source_catalog = ""; + table->schema_name = DEFAULT_SCHEMA; + table->destination_catalog = ""; + table->destination_schema = DEFAULT_SCHEMA; + } + if (chunk->ColumnCount() > 18) { + // read properties + auto properties = ListValue::GetChildren(chunk->GetValue(18, i)); + for (const auto &property : properties) { + table->column_names.push_back(property.GetValue()); + } + auto column_aliases = ListValue::GetChildren(chunk->GetValue(19, i)); + for (const auto &alias : column_aliases) { + table->column_aliases.push_back(alias.GetValue()); + } + } else { + table->all_columns = true; + } + + // Additional edge-specific handling + if (!is_vertex) { + PopulateEdgeSpecificFields(chunk, i, *table); + } + + RegisterPropertyGraph(table, chunk->GetValue(0, i).GetValue(), is_vertex); + } +} + +void DuckPGQState::PopulateEdgeSpecificFields(unique_ptr &chunk, idx_t row_idx, PropertyGraphTable &table) { + table.source_reference = chunk->GetValue(4, row_idx).GetValue(); + ExtractListValues(chunk->GetValue(5, row_idx), table.source_pk); + ExtractListValues(chunk->GetValue(6, row_idx), table.source_fk); + table.destination_reference = chunk->GetValue(7, row_idx).GetValue(); + ExtractListValues(chunk->GetValue(8, row_idx), table.destination_pk); + ExtractListValues(chunk->GetValue(9, row_idx), table.destination_fk); +} + +void DuckPGQState::ExtractListValues(const Value &list_value, vector &output) { + auto children = ListValue::GetChildren(list_value); + output.reserve(output.size() + children.size()); + for (const auto &child : children) { + output.push_back(child.GetValue()); + } +} + +void DuckPGQState::RegisterPropertyGraph(const shared_ptr &table, const string &graph_name, + bool is_vertex) { + // Ensure the property graph exists in the registry + if (registered_property_graphs.find(graph_name) == registered_property_graphs.end()) { + registered_property_graphs[graph_name] = make_uniq(graph_name); + } + + auto &pg_info = registered_property_graphs[graph_name]->Cast(); + pg_info.label_map[table->main_label] = table; + + if (!table->discriminator.empty()) { + for (const auto &label : table->sub_labels) { + pg_info.label_map[label] = table; + } + } + + if (is_vertex) { + pg_info.vertex_tables.push_back(table); + } else { + table->source_pg_table = + pg_info.GetTableByName(table->source_catalog, table->source_schema, table->source_reference); + D_ASSERT(table->source_pg_table); + table->destination_pg_table = + pg_info.GetTableByName(table->destination_catalog, table->destination_schema, table->destination_reference); + D_ASSERT(table->destination_pg_table); + pg_info.edge_tables.push_back(table); + } +} + +void DuckPGQState::QueryEnd() { + parse_data.reset(); + transform_expression.clear(); + match_index = 0; // Reset the index + for (const auto &csr_id : csr_to_delete) { + csr_list.erase(csr_id); + } + csr_to_delete.clear(); +} + +CreatePropertyGraphInfo *DuckPGQState::GetPropertyGraph(const string &pg_name) { + auto pg_table_entry = registered_property_graphs.find(pg_name); + if (pg_table_entry == registered_property_graphs.end()) { + throw BinderException("Property graph %s does not exist", pg_name); + } + return reinterpret_cast(pg_table_entry->second.get()); +} + +CSR *DuckPGQState::GetCSR(int32_t id) { + auto csr_entry = csr_list.find(id); + if (csr_entry == csr_list.end()) { + throw ConstraintException("CSR not found with ID %d", id); + } + return csr_entry->second.get(); +} + +} // namespace duckdb diff --git a/src/include/boilerplate_extension.hpp b/src/include/boilerplate_extension.hpp deleted file mode 100644 index 3a3af8c6..00000000 --- a/src/include/boilerplate_extension.hpp +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once - -#include "duckdb.hpp" - -namespace duckdb { - -class BoilerplateExtension : public Extension { -public: - void Load(DuckDB &db) override; - std::string Name() override; -}; - -} // namespace duckdb diff --git a/src/include/duckpgq/common.hpp b/src/include/duckpgq/common.hpp new file mode 100644 index 00000000..4b39f7b8 --- /dev/null +++ b/src/include/duckpgq/common.hpp @@ -0,0 +1,5 @@ +#pragma once + +#include "duckdb.hpp" +#include "duckdb/common/helper.hpp" +// TODO doc util diff --git a/src/include/duckpgq/core/functions/function_data/cheapest_path_length_function_data.hpp b/src/include/duckpgq/core/functions/function_data/cheapest_path_length_function_data.hpp new file mode 100644 index 00000000..e8963098 --- /dev/null +++ b/src/include/duckpgq/core/functions/function_data/cheapest_path_length_function_data.hpp @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// DuckPGQ +// +// duckpgq/core/functions/function_data/cheapest_path_length_function_data.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once +#include "duckpgq/common.hpp" +#include "duckdb/main/client_context.hpp" + +namespace duckdb { + +struct CheapestPathLengthFunctionData final : FunctionData { + ClientContext &context; + int32_t csr_id; + + CheapestPathLengthFunctionData(ClientContext &context, int32_t csr_id) : context(context), csr_id(csr_id) { + } + static unique_ptr CheapestPathLengthBind(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments); + + unique_ptr Copy() const override; + bool Equals(const FunctionData &other_p) const override; +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/functions/function_data/iterative_length_function_data.hpp b/src/include/duckpgq/core/functions/function_data/iterative_length_function_data.hpp new file mode 100644 index 00000000..2395e2d0 --- /dev/null +++ b/src/include/duckpgq/core/functions/function_data/iterative_length_function_data.hpp @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// DuckPGQ +// +// duckpgq/core/functions/function_data/iterative_length_function_data.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once +#include "duckdb/main/client_context.hpp" +#include "duckpgq/common.hpp" + +namespace duckdb { + +struct IterativeLengthFunctionData final : FunctionData { + ClientContext &context; + int32_t csr_id; + + IterativeLengthFunctionData(ClientContext &context, int32_t csr_id) : context(context), csr_id(csr_id) { + } + static unique_ptr IterativeLengthBind(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments); + + unique_ptr Copy() const override; + bool Equals(const FunctionData &other_p) const override; +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/functions/function_data/local_clustering_coefficient_function_data.hpp b/src/include/duckpgq/core/functions/function_data/local_clustering_coefficient_function_data.hpp new file mode 100644 index 00000000..50bf3e8e --- /dev/null +++ b/src/include/duckpgq/core/functions/function_data/local_clustering_coefficient_function_data.hpp @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// DuckPGQ +// +// duckpgq/core/functions/function_data/local_clustering_coefficient_function_data.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once +#include "duckdb/main/client_context.hpp" +#include "duckpgq/common.hpp" + +namespace duckdb { + +struct LocalClusteringCoefficientFunctionData final : FunctionData { + ClientContext &context; + int32_t csr_id; + + LocalClusteringCoefficientFunctionData(ClientContext &context, int32_t csr_id); + static unique_ptr LocalClusteringCoefficientBind(ClientContext &context, + ScalarFunction &bound_function, + vector> &arguments); + + unique_ptr Copy() const override; + bool Equals(const FunctionData &other_p) const override; +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/functions/function_data/pagerank_function_data.hpp b/src/include/duckpgq/core/functions/function_data/pagerank_function_data.hpp new file mode 100644 index 00000000..6f185b1c --- /dev/null +++ b/src/include/duckpgq/core/functions/function_data/pagerank_function_data.hpp @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// DuckPGQ +// +// duckpgq/core/functions/function_data/pagerank_function_data.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once +#include "duckdb/main/client_context.hpp" +#include "duckpgq/common.hpp" + +namespace duckdb { + +struct PageRankFunctionData final : FunctionData { + ClientContext &context; + int32_t csr_id; + vector rank; + vector temp_rank; + double_t damping_factor; + double_t convergence_threshold; + int64_t iteration_count; + std::mutex state_lock; // Lock for state + bool state_initialized; + bool converged; + + PageRankFunctionData(ClientContext &context, int32_t csr_id); + PageRankFunctionData(ClientContext &context, int32_t csr_id, const vector &componentId); + static unique_ptr PageRankBind(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments); + + unique_ptr Copy() const override; + bool Equals(const FunctionData &other_p) const override; +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/functions/function_data/weakly_connected_component_function_data.hpp b/src/include/duckpgq/core/functions/function_data/weakly_connected_component_function_data.hpp new file mode 100644 index 00000000..b43d3018 --- /dev/null +++ b/src/include/duckpgq/core/functions/function_data/weakly_connected_component_function_data.hpp @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// DuckPGQ +// +// duckpgq/core/functions/function_data/weakly_connected_component_function_data.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once +#include "duckdb/main/client_context.hpp" +#include "duckpgq/common.hpp" + +namespace duckdb { + +struct WeaklyConnectedComponentFunctionData final : FunctionData { + ClientContext &context; + int32_t csr_id; + std::mutex wcc_lock; + std::mutex initialize_lock; + bool state_converged; + bool state_initialized; + vector forest; + + WeaklyConnectedComponentFunctionData(ClientContext &context, int32_t csr_id); + + static unique_ptr WeaklyConnectedComponentBind(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments); + + unique_ptr Copy() const override; + bool Equals(const FunctionData &other_p) const override; +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/functions/scalar.hpp b/src/include/duckpgq/core/functions/scalar.hpp new file mode 100644 index 00000000..7499b1a0 --- /dev/null +++ b/src/include/duckpgq/core/functions/scalar.hpp @@ -0,0 +1,39 @@ +#pragma once +#include "duckpgq/common.hpp" + +namespace duckdb { + +struct CoreScalarFunctions { + static void Register(ExtensionLoader &loader) { + RegisterCheapestPathLengthScalarFunction(loader); + RegisterCSRCreationScalarFunctions(loader); + RegisterCSRDeletionScalarFunction(loader); + RegisterGetCSRWTypeScalarFunction(loader); + RegisterIterativeLengthScalarFunction(loader); + RegisterIterativeLengthBoundedScalarFunction(loader); + RegisterIterativeLength2ScalarFunction(loader); + RegisterIterativeLengthBidirectionalScalarFunction(loader); + RegisterLocalClusteringCoefficientScalarFunction(loader); + RegisterReachabilityScalarFunction(loader); + RegisterShortestPathScalarFunction(loader); + RegisterWeaklyConnectedComponentScalarFunction(loader); + RegisterPageRankScalarFunction(loader); + } + +private: + static void RegisterCheapestPathLengthScalarFunction(ExtensionLoader &loader); + static void RegisterCSRCreationScalarFunctions(ExtensionLoader &loader); + static void RegisterCSRDeletionScalarFunction(ExtensionLoader &loader); + static void RegisterGetCSRWTypeScalarFunction(ExtensionLoader &loader); + static void RegisterIterativeLengthScalarFunction(ExtensionLoader &loader); + static void RegisterIterativeLengthBoundedScalarFunction(ExtensionLoader &loader); + static void RegisterIterativeLength2ScalarFunction(ExtensionLoader &loader); + static void RegisterIterativeLengthBidirectionalScalarFunction(ExtensionLoader &loader); + static void RegisterLocalClusteringCoefficientScalarFunction(ExtensionLoader &loader); + static void RegisterReachabilityScalarFunction(ExtensionLoader &loader); + static void RegisterShortestPathScalarFunction(ExtensionLoader &loader); + static void RegisterWeaklyConnectedComponentScalarFunction(ExtensionLoader &loader); + static void RegisterPageRankScalarFunction(ExtensionLoader &loader); +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/functions/table.hpp b/src/include/duckpgq/core/functions/table.hpp new file mode 100644 index 00000000..3a487713 --- /dev/null +++ b/src/include/duckpgq/core/functions/table.hpp @@ -0,0 +1,31 @@ +#pragma once +#include "duckpgq/common.hpp" + +namespace duckdb { + +struct CoreTableFunctions { + static void Register(ExtensionLoader &loader) { + RegisterCreatePropertyGraphTableFunction(loader); + RegisterMatchTableFunction(loader); + RegisterDropPropertyGraphTableFunction(loader); + RegisterDescribePropertyGraphTableFunction(loader); + RegisterLocalClusteringCoefficientTableFunction(loader); + RegisterScanTableFunctions(loader); + RegisterSummarizePropertyGraphTableFunction(loader); + RegisterWeaklyConnectedComponentTableFunction(loader); + RegisterPageRankTableFunction(loader); + } + +private: + static void RegisterCreatePropertyGraphTableFunction(ExtensionLoader &loader); + static void RegisterMatchTableFunction(ExtensionLoader &loader); + static void RegisterDropPropertyGraphTableFunction(ExtensionLoader &loader); + static void RegisterDescribePropertyGraphTableFunction(ExtensionLoader &loader); + static void RegisterLocalClusteringCoefficientTableFunction(ExtensionLoader &loader); + static void RegisterScanTableFunctions(ExtensionLoader &loader); + static void RegisterWeaklyConnectedComponentTableFunction(ExtensionLoader &loader); + static void RegisterPageRankTableFunction(ExtensionLoader &loader); + static void RegisterSummarizePropertyGraphTableFunction(ExtensionLoader &loader); +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/functions/table/create_property_graph.hpp b/src/include/duckpgq/core/functions/table/create_property_graph.hpp new file mode 100644 index 00000000..a6ddbd7a --- /dev/null +++ b/src/include/duckpgq/core/functions/table/create_property_graph.hpp @@ -0,0 +1,68 @@ +//===----------------------------------------------------------------------===// +// DuckPGQ +// +// duckpgq/functions/tablefunctions/create_property_graph.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once +#include "duckpgq/common.hpp" +#include "duckdb/function/table_function.hpp" +#include "duckdb/parser/parsed_data/create_property_graph_info.hpp" +#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" + +namespace duckdb { + +class CreatePropertyGraphFunction : public TableFunction { +public: + CreatePropertyGraphFunction() { + name = "create_property_graph"; + bind = CreatePropertyGraphBind; + init_global = CreatePropertyGraphInit; + function = CreatePropertyGraphFunc; + } + + struct CreatePropertyGraphBindData : public TableFunctionData { + explicit CreatePropertyGraphBindData(CreatePropertyGraphInfo *pg_info) : create_pg_info(pg_info) { + } + + CreatePropertyGraphInfo *create_pg_info; + }; + + struct CreatePropertyGraphGlobalData : public GlobalTableFunctionState { + CreatePropertyGraphGlobalData() = default; + }; + + static void CheckPropertyGraphTableLabels(const shared_ptr &pg_table, + optional_ptr &table); + + static void CheckPropertyGraphTableColumns(const shared_ptr &pg_table, + optional_ptr &table); + + static reference GetTableCatalogEntry(ClientContext &context, + shared_ptr &pg_table); + + static unique_ptr CreatePropertyGraphBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names); + + static void ValidateVertexTableRegistration(shared_ptr &pg_table, + const case_insensitive_set_t &v_table_names); + + static void ValidatePrimaryKeyInTable(ClientContext &context, shared_ptr &pg_table, + const vector &pk_columns); + + static void ValidateKeys(shared_ptr &edge_table, const string &reference, + const string &key_type, vector &pk_columns, vector &fk_columns, + const vector> &table_constraints); + + static void ValidateForeignKeyColumns(shared_ptr &edge_table, const vector &fk_columns, + optional_ptr &table); + + static unique_ptr CreatePropertyGraphInit(ClientContext &context, + TableFunctionInitInput &input); + + static void CreatePropertyGraphFunc(ClientContext &context, TableFunctionInput &data_p, DataChunk &output); +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/functions/table/describe_property_graph.hpp b/src/include/duckpgq/core/functions/table/describe_property_graph.hpp new file mode 100644 index 00000000..85ae8191 --- /dev/null +++ b/src/include/duckpgq/core/functions/table/describe_property_graph.hpp @@ -0,0 +1,46 @@ +//===----------------------------------------------------------------------===// +// DuckPGQ +// +// duckpgq/functions/tablefunctions/describe_property_graph.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once +#include "duckpgq/common.hpp" +#include "duckdb/function/table_function.hpp" +#include "duckdb/parser/parsed_data/create_property_graph_info.hpp" +#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" + +namespace duckdb { + +class DescribePropertyGraphFunction : public TableFunction { +public: + DescribePropertyGraphFunction() { + name = "describe_property_graph"; + bind = DescribePropertyGraphBind; + init_global = DescribePropertyGraphInit; + function = DescribePropertyGraphFunc; + } + + struct DescribePropertyGraphBindData : public TableFunctionData { + explicit DescribePropertyGraphBindData(CreatePropertyGraphInfo *pg_info) : describe_pg_info(pg_info) { + } + CreatePropertyGraphInfo *describe_pg_info; + }; + + struct DescribePropertyGraphGlobalData : public GlobalTableFunctionState { + DescribePropertyGraphGlobalData() = default; + bool done = false; + }; + + static unique_ptr DescribePropertyGraphBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names); + + static unique_ptr DescribePropertyGraphInit(ClientContext &context, + TableFunctionInitInput &input); + + static void DescribePropertyGraphFunc(ClientContext &context, TableFunctionInput &data_p, DataChunk &output); +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/functions/table/drop_property_graph.hpp b/src/include/duckpgq/core/functions/table/drop_property_graph.hpp new file mode 100644 index 00000000..e4274111 --- /dev/null +++ b/src/include/duckpgq/core/functions/table/drop_property_graph.hpp @@ -0,0 +1,47 @@ +//===----------------------------------------------------------------------===// +// DuckPGQ +// +// duckpgq/functions/tablefunctions/drop_property_graph.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once +#include "duckpgq/common.hpp" +#include "duckdb/function/table_function.hpp" +#include "duckdb/parser/statement/drop_statement.hpp" + +#include + +namespace duckdb { + +class DropPropertyGraphFunction : public TableFunction { +public: + DropPropertyGraphFunction() { + name = "drop_property_graph"; + bind = DropPropertyGraphBind; + init_global = DropPropertyGraphInit; + function = DropPropertyGraphFunc; + } + + struct DropPropertyGraphBindData : TableFunctionData { + explicit DropPropertyGraphBindData(DropPropertyGraphInfo *pg_info) : drop_pg_info(pg_info) { + } + + DropPropertyGraphInfo *drop_pg_info; + }; + + struct DropPropertyGraphGlobalData : GlobalTableFunctionState { + DropPropertyGraphGlobalData() = default; + }; + + static unique_ptr DropPropertyGraphBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names); + + static unique_ptr DropPropertyGraphInit(ClientContext &context, + TableFunctionInitInput &input); + + static void DropPropertyGraphFunc(ClientContext &context, TableFunctionInput &data_p, DataChunk &output); +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/functions/table/local_clustering_coefficient.hpp b/src/include/duckpgq/core/functions/table/local_clustering_coefficient.hpp new file mode 100644 index 00000000..487a6c62 --- /dev/null +++ b/src/include/duckpgq/core/functions/table/local_clustering_coefficient.hpp @@ -0,0 +1,50 @@ +#pragma once + +#include "duckpgq/common.hpp" +#include "duckdb/function/table_function.hpp" + +namespace duckdb { + +class LocalClusteringCoefficientFunction : public TableFunction { +public: + LocalClusteringCoefficientFunction() { + name = "local_clustering_coefficient"; + arguments = {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR}; + bind_replace = LocalClusteringCoefficientBindReplace; + } + + static unique_ptr LocalClusteringCoefficientBindReplace(ClientContext &context, + TableFunctionBindInput &input); +}; + +struct LocalClusteringCoefficientData : TableFunctionData { + static unique_ptr LocalClusteringCoefficientBind(ClientContext &context, + TableFunctionBindInput &input, + vector &return_types, + vector &names) { + auto result = make_uniq(); + result->pg_name = StringValue::Get(input.inputs[0]); + result->node_table = StringValue::Get(input.inputs[1]); + result->edge_table = StringValue::Get(input.inputs[2]); + return_types.emplace_back(LogicalType::BIGINT); + return_types.emplace_back(LogicalType::FLOAT); + names.emplace_back("rowid"); + names.emplace_back("local_clustering_coefficient"); + return std::move(result); + } + + string pg_name; + string node_table; + string edge_table; +}; + +struct LocalClusteringCoefficientScanState : GlobalTableFunctionState { + static unique_ptr Init(ClientContext &context, TableFunctionInitInput &input) { + auto result = make_uniq(); + return std::move(result); + } + + bool finished = false; +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/functions/table/match.hpp b/src/include/duckpgq/core/functions/table/match.hpp new file mode 100644 index 00000000..2f5e5256 --- /dev/null +++ b/src/include/duckpgq/core/functions/table/match.hpp @@ -0,0 +1,143 @@ +//===----------------------------------------------------------------------===// +// DuckPGQ +// +// duckpgq/core/functions/table/match.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckpgq/common.hpp" +#include + +#include "duckdb/function/table_function.hpp" +#include "duckdb/parser/parsed_data/create_property_graph_info.hpp" +#include "duckdb/parser/path_element.hpp" +#include "duckdb/parser/path_pattern.hpp" +#include "duckdb/parser/subpath_element.hpp" +#include "duckdb/parser/property_graph_table.hpp" + +#include + +namespace duckdb { + +struct PGQMatchFunction : public TableFunction { +public: + PGQMatchFunction() { + name = "duckpgq_match"; + arguments.push_back(LogicalType::INTEGER); + bind_replace = MatchBindReplace; + } + + struct MatchBindData : public TableFunctionData { + bool done = false; + }; + + static shared_ptr FindGraphTable(const string &label, CreatePropertyGraphInfo &pg_table); + + static void CheckInheritance(const shared_ptr &tableref, PathElement *element, + vector> &conditions); + + static void CheckEdgeTableConstraints(const string &src_reference, const string &dst_reference, + const shared_ptr &edge_table); + + static unique_ptr CreateMatchJoinExpression(vector vertex_keys, vector edge_keys, + const string &vertex_alias, const string &edge_alias); + + // Populate all vertex and edge tables and their alias into + // [alias_to_vertex_and_edge_tables], from paths information from + // [path_reference]. + static void + PopulateGraphTableAliasMap(const CreatePropertyGraphInfo &pg_table, const unique_ptr &path_reference, + case_insensitive_map_t> &alias_to_vertex_and_edge_tables); + + static case_insensitive_map_t> + PopulateGraphTableAliasMap(const CreatePropertyGraphInfo &pg_table, const MatchExpression &match_expr); + + static PathElement *GetPathElement(const unique_ptr &path_reference); + + static SubPath *GetSubPath(const unique_ptr &path_reference); + + static unique_ptr GetJoinRef(const shared_ptr &edge_table, const string &edge_binding, + const string &prev_binding, const string &next_binding); + + static unique_ptr CreateCountCTESubquery(); + + static unique_ptr CreateWhereClause(vector> &conditions); + + static void EdgeTypeAny(const shared_ptr &edge_table, const string &edge_binding, + const string &prev_binding, const string &next_binding, + vector> &conditions, unique_ptr &from_clause); + + static void EdgeTypeLeft(const shared_ptr &edge_table, const string &next_table_name, + const string &prev_table_name, const string &edge_binding, const string &prev_binding, + const string &next_binding, vector> &conditions); + + static void EdgeTypeRight(const shared_ptr &edge_table, const string &next_table_name, + const string &prev_table_name, const string &edge_binding, const string &prev_binding, + const string &next_binding, vector> &conditions); + + static void EdgeTypeLeftRight(const shared_ptr &edge_table, const string &edge_binding, + const string &prev_binding, const string &next_binding, + vector> &conditions, + case_insensitive_map_t> &alias_map, + int32_t &extra_alias_counter); + + static PathElement *HandleNestedSubPath(unique_ptr &path_reference, + vector> &conditions, idx_t element_idx); + + static unique_ptr AddPathQuantifierCondition(const string &prev_binding, + const string &next_binding, + const shared_ptr &edge_table, + const SubPath *subpath); + + static unique_ptr MatchBindReplace(ClientContext &context, TableFunctionBindInput &input); + + static unique_ptr GenerateSubpathPatternSubquery(unique_ptr &path_pattern, + CreatePropertyGraphInfo *pg_table, + vector> &column_list, + unordered_set &named_subpaths); + + static unique_ptr + GenerateShortestPathCTE(CreatePropertyGraphInfo &pg_table, SubPath *edge_subpath, PathElement *path_element, + PathElement *next_vertex_element, + vector> &path_finding_conditions); + + static unique_ptr CreatePathFindingFunction(vector> &path_list, + CreatePropertyGraphInfo &pg_table, + const string &path_variable, + unique_ptr &final_select_node, + vector> &conditions); + + static void AddPathFinding(unique_ptr &select_node, vector> &conditions, + const string &prev_binding, const string &edge_binding, const string &next_binding, + const shared_ptr &edge_table, CreatePropertyGraphInfo &pg_table, + SubPath *subpath, PGQMatchType edge_type); + + static void AddEdgeJoins(const shared_ptr &edge_table, + const shared_ptr &previous_vertex_table, + const shared_ptr &next_vertex_table, PGQMatchType edge_type, + const string &edge_binding, const string &prev_binding, const string &next_binding, + vector> &conditions, + case_insensitive_map_t> &alias_map, + int32_t &extra_alias_counter, unique_ptr &from_clause); + + static void ProcessPathList(vector> &path_pattern, + vector> &conditions, unique_ptr &select_node, + case_insensitive_map_t> &alias_map, + CreatePropertyGraphInfo &pg_table, int32_t &extra_alias_counter, + MatchExpression &original_ref); + + static void CheckNamedSubpath(SubPath &subpath, MatchExpression &original_ref, CreatePropertyGraphInfo &pg_table, + unique_ptr &final_select_node, + vector> &conditions); + + // Check whether columns to query are valid against the property graph, throws + // BinderException if error. + static void + CheckColumnBinding(const CreatePropertyGraphInfo &pg_table, const MatchExpression &ref, + const case_insensitive_map_t> &alias_to_vertex_and_edge_tables); +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/functions/table/pagerank.hpp b/src/include/duckpgq/core/functions/table/pagerank.hpp new file mode 100644 index 00000000..6d7b84f7 --- /dev/null +++ b/src/include/duckpgq/core/functions/table/pagerank.hpp @@ -0,0 +1,52 @@ +//===----------------------------------------------------------------------===// +// DuckPGQ +// +// duckpgq/core/functions/table/pagerank.hpp +// +// +//===----------------------------------------------------------------------===// + +#include "duckpgq/common.hpp" + +namespace duckdb { + +class PageRankFunction : public TableFunction { +public: + PageRankFunction() { + name = "pagerank"; + arguments = {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR}; + bind_replace = PageRankBindReplace; + } + + static unique_ptr PageRankBindReplace(ClientContext &context, TableFunctionBindInput &input); +}; + +struct PageRankData : TableFunctionData { + static unique_ptr PageRankBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + auto result = make_uniq(); + result->pg_name = StringValue::Get(input.inputs[0]); + result->node_table = StringValue::Get(input.inputs[1]); + result->edge_table = StringValue::Get(input.inputs[2]); + return_types.emplace_back(LogicalType::BIGINT); + return_types.emplace_back(LogicalType::BIGINT); + names.emplace_back("rowid"); + names.emplace_back("pagerank"); + return std::move(result); + } + + string pg_name; + string node_table; + string edge_table; +}; + +struct PageRankScanState : GlobalTableFunctionState { + static unique_ptr Init(ClientContext &context, TableFunctionInitInput &input) { + auto result = make_uniq(); + return std::move(result); + } + + bool finished = false; +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/functions/table/pgq_scan.hpp b/src/include/duckpgq/core/functions/table/pgq_scan.hpp new file mode 100644 index 00000000..19d70268 --- /dev/null +++ b/src/include/duckpgq/core/functions/table/pgq_scan.hpp @@ -0,0 +1,187 @@ +/* we treat csr -> v, csr -> e and csr -> w as tables. + * The col names and tables names are also treated as tables. + * And later, we define some table function to get graph-related data. + * + * This header defines all the structs and classes used later. + */ + +#pragma once +#include "duckdb/common/types/value.hpp" +#include "duckdb/function/table_function.hpp" +#include "duckpgq/core/utils/compressed_sparse_row.hpp" + +#include + +#include "duckpgq/core/utils/duckpgq_utils.hpp" + +namespace duckdb { + +struct CSRScanVData : TableFunctionData { +public: + static unique_ptr ScanCSRVBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + auto result = make_uniq(); + result->csr_id = input.inputs[0].GetValue(); + return_types.emplace_back(LogicalType::BIGINT); + names.emplace_back("csrv"); + return std::move(result); + } + +public: + int32_t csr_id; +}; + +struct CSRScanPtrData : public TableFunctionData { +public: + static unique_ptr ScanCSRPtrBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + auto result = make_uniq(); + result->csr_id = input.inputs[0].GetValue(); + return_types.emplace_back(LogicalType::UBIGINT); + names.emplace_back("ptr"); + return std::move(result); + } + +public: + int32_t csr_id; +}; + +struct CSRScanEData : public TableFunctionData { +public: + static unique_ptr ScanCSREBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + auto result = make_uniq(); + result->csr_id = input.inputs[0].GetValue(); + return_types.emplace_back(LogicalType::BIGINT); + names.emplace_back("csre"); + return std::move(result); + } + +public: + int32_t csr_id; +}; + +struct CSRScanWData : public TableFunctionData { +public: + static unique_ptr ScanCSRWBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + auto result = make_uniq(); + result->csr_id = input.inputs[0].GetValue(); + + auto duckpgq_state = GetDuckPGQState(context); + + CSR *csr = duckpgq_state->GetCSR(result->csr_id); + + if (!csr->w.empty()) { + result->is_double = false; + return_types.emplace_back(LogicalType::BIGINT); + } else { + result->is_double = true; + return_types.emplace_back(LogicalType::DOUBLE); + } + names.emplace_back("csrw"); + return std::move(result); + } + +public: + int32_t csr_id; + bool is_double; +}; + +struct CSRScanWDoubleData : public TableFunctionData { +public: + static unique_ptr ScanCSRWDoubleBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + auto result = make_uniq(); + result->csr_id = input.inputs[0].GetValue(); + return_types.emplace_back(LogicalType::DOUBLE); + names.emplace_back("csrw"); + return std::move(result); + } + +public: + int32_t csr_id; +}; + +struct PGScanVTableData : public TableFunctionData { +public: + static unique_ptr ScanPGVTableBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + auto result = make_uniq(); + result->pg_name = StringValue::Get(input.inputs[0]); + return_types.emplace_back(LogicalType::VARCHAR); + names.emplace_back("vtables"); + return std::move(result); + } + +public: + string pg_name; +}; + +struct PGScanVColData : public TableFunctionData { +public: + static unique_ptr ScanPGVColBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + auto result = make_uniq(); + result->pg_name = StringValue::Get(input.inputs[0]); + result->table_name = StringValue::Get(input.inputs[1]); + return_types.emplace_back(LogicalType::VARCHAR); + names.emplace_back("colnames"); + return std::move(result); + } + +public: + string pg_name; + string table_name; +}; + +struct PGScanETableData : public TableFunctionData { +public: + static unique_ptr ScanPGETableBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + auto result = make_uniq(); + result->pg_name = StringValue::Get(input.inputs[0]); + return_types.emplace_back(LogicalType::VARCHAR); + names.emplace_back("etables"); + return std::move(result); + } + +public: + string pg_name; +}; + +struct PGScanEColData : public TableFunctionData { +public: + static unique_ptr ScanPGEColBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + auto result = make_uniq(); + result->pg_name = StringValue::Get(input.inputs[0]); + result->table_name = StringValue::Get(input.inputs[1]); + return_types.emplace_back(LogicalType::VARCHAR); + names.emplace_back("colnames"); + return std::move(result); + } + +public: + string pg_name; + string table_name; +}; + +struct CSRScanState : public GlobalTableFunctionState { +public: + static unique_ptr Init(ClientContext &context, TableFunctionInitInput &input) { + auto result = make_uniq(); + result->csr_v_offset = 0; + result->csr_e_offset = 0; + result->csr_w_offset = 0; + return std::move(result); + } + +public: + bool finished = false; + idx_t csr_v_offset; + idx_t csr_e_offset; + idx_t csr_w_offset; +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/functions/table/summarize_property_graph.hpp b/src/include/duckpgq/core/functions/table/summarize_property_graph.hpp new file mode 100644 index 00000000..d43b9581 --- /dev/null +++ b/src/include/duckpgq/core/functions/table/summarize_property_graph.hpp @@ -0,0 +1,59 @@ +//===----------------------------------------------------------------------===// +// DuckPGQ +// +// duckpgq/functions/tablefunctions/summarize_property_graph.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once +#include "duckdb/function/table_function.hpp" +#include "duckdb/parser/parsed_data/create_property_graph_info.hpp" +#include "duckpgq/common.hpp" + +namespace duckdb { + +class SummarizePropertyGraphFunction : public TableFunction { +public: + SummarizePropertyGraphFunction() { + name = "summarize_property_graph"; + arguments.push_back(LogicalType::VARCHAR); + bind_replace = SummarizePropertyGraphBindReplace; + } + + struct SummarizePropertyGraphBindData : public TableFunctionData { + explicit SummarizePropertyGraphBindData(CreatePropertyGraphInfo *pg_info) : summarize_pg_info(pg_info) { + } + CreatePropertyGraphInfo *summarize_pg_info; + }; + + struct SummarizePropertyGraphGlobalData : public GlobalTableFunctionState { + SummarizePropertyGraphGlobalData() = default; + bool done = false; + }; + + static unique_ptr SummarizePropertyGraphInit(ClientContext &context, + TableFunctionInitInput &input); + + static unique_ptr CreateGroupBySubquery(const shared_ptr &pg_table, + bool is_in_degree, const string °ree_column); + static unique_ptr GetDegreeStatistics(const string &aggregate_function, bool is_in_degree); + static unique_ptr + CreateDegreeStatisticsCTE(const shared_ptr &pg_table, const string °ree_column, + bool is_in_degree); + static unique_ptr GetIsolatedNodes(shared_ptr &pg_table, const string &alias, + bool is_source); + static unique_ptr GetDistinctCount(const shared_ptr &pg_table, + const string &alias, bool is_source); + + static unique_ptr + CreateVertexTableCTE(const shared_ptr &vertex_table); + static unique_ptr CreateEdgeTableCTE(shared_ptr &edge_table); + + static unique_ptr HandleSingleVertexTable(const shared_ptr &vertex_table, + const string &stat_table_alias); + static unique_ptr SummarizePropertyGraphBindReplace(ClientContext &context, + TableFunctionBindInput &input); +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/functions/table/weakly_connected_component.hpp b/src/include/duckpgq/core/functions/table/weakly_connected_component.hpp new file mode 100644 index 00000000..18b0c544 --- /dev/null +++ b/src/include/duckpgq/core/functions/table/weakly_connected_component.hpp @@ -0,0 +1,54 @@ +//===----------------------------------------------------------------------===// +// DuckPGQ +// +// duckpgq/core/functions/table/weakly_connected_component.hpp +// +// +//===----------------------------------------------------------------------===// + +#include "duckpgq/common.hpp" + +namespace duckdb { + +class WeaklyConnectedComponentFunction : public TableFunction { +public: + WeaklyConnectedComponentFunction() { + name = "weakly_connected_component"; + arguments = {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR}; + bind_replace = WeaklyConnectedComponentBindReplace; + } + + static unique_ptr WeaklyConnectedComponentBindReplace(ClientContext &context, + TableFunctionBindInput &input); +}; + +struct WeaklyConnectedComponentData : TableFunctionData { + static unique_ptr WeaklyConnectedComponentBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, + vector &names) { + auto result = make_uniq(); + result->pg_name = StringValue::Get(input.inputs[0]); + result->node_table = StringValue::Get(input.inputs[1]); + result->edge_table = StringValue::Get(input.inputs[2]); + return_types.emplace_back(LogicalType::BIGINT); + return_types.emplace_back(LogicalType::BIGINT); + names.emplace_back("rowid"); + names.emplace_back("componentId"); + return std::move(result); + } + + string pg_name; + string node_table; + string edge_table; +}; + +struct WeaklyConnectedComponentScanState : GlobalTableFunctionState { + static unique_ptr Init(ClientContext &context, TableFunctionInitInput &input) { + auto result = make_uniq(); + return std::move(result); + } + + bool finished = false; +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/module.hpp b/src/include/duckpgq/core/module.hpp new file mode 100644 index 00000000..b351e41f --- /dev/null +++ b/src/include/duckpgq/core/module.hpp @@ -0,0 +1,11 @@ +#pragma once +#include "duckpgq/common.hpp" + +namespace duckdb { + +struct CoreModule { +public: + static void Register(ExtensionLoader &loader); +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/operator/duckpgq_bind.hpp b/src/include/duckpgq/core/operator/duckpgq_bind.hpp new file mode 100644 index 00000000..617126cf --- /dev/null +++ b/src/include/duckpgq/core/operator/duckpgq_bind.hpp @@ -0,0 +1,24 @@ +#pragma once + +#include "duckpgq/common.hpp" + +namespace duckdb { + +BoundStatement duckpgq_bind(ClientContext &context, Binder &binder, OperatorExtensionInfo *info, + SQLStatement &statement); + +struct DuckPGQOperatorExtension : public OperatorExtension { + DuckPGQOperatorExtension() : OperatorExtension() { + Bind = duckpgq_bind; + } + + std::string GetName() override { + return "duckpgq_bind"; + } + + unique_ptr Deserialize(Deserializer &deserializer) override { + throw InternalException("DuckPGQ operator should not be serialized"); + } +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/operator/duckpgq_operator.hpp b/src/include/duckpgq/core/operator/duckpgq_operator.hpp new file mode 100644 index 00000000..a4322c09 --- /dev/null +++ b/src/include/duckpgq/core/operator/duckpgq_operator.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include "duckpgq/common.hpp" + +namespace duckdb { + +struct CorePGQOperator { + static void Register(ExtensionLoader &loader) { + RegisterPGQBindOperator(loader); + } + +private: + static void RegisterPGQBindOperator(ExtensionLoader &loader); +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/parser/duckpgq_parser.hpp b/src/include/duckpgq/core/parser/duckpgq_parser.hpp new file mode 100644 index 00000000..e8809a75 --- /dev/null +++ b/src/include/duckpgq/core/parser/duckpgq_parser.hpp @@ -0,0 +1,56 @@ +#pragma once +#include "duckpgq/common.hpp" + +#include + +namespace duckdb { + +struct CorePGQParser { + static void Register(ExtensionLoader &loader) { + RegisterPGQParserExtension(loader); + } + +private: + static void RegisterPGQParserExtension(ExtensionLoader &loader); +}; + +struct DuckPGQParserExtensionInfo : ParserExtensionInfo { + DuckPGQParserExtensionInfo() : ParserExtensionInfo() {}; + ~DuckPGQParserExtensionInfo() override = default; +}; + +ParserExtensionParseResult duckpgq_parse(ParserExtensionInfo *info, const std::string &query); + +ParserExtensionPlanResult duckpgq_plan(ParserExtensionInfo *info, ClientContext &, + unique_ptr); + +ParserExtensionPlanResult duckpgq_find_select_statement(SQLStatement *statement, DuckPGQState &duckpgq_state); + +ParserExtensionPlanResult duckpgq_handle_statement(SQLStatement *statement, DuckPGQState &duckpgq_state); + +void duckpgq_find_match_function(TableRef *table_ref, DuckPGQState &duckpgq_state); + +struct DuckPGQParserExtension : ParserExtension { + DuckPGQParserExtension() : ParserExtension() { + parse_function = duckpgq_parse; + plan_function = duckpgq_plan; + parser_info = make_shared_ptr(); + } +}; + +struct DuckPGQParseData : ParserExtensionParseData { + unique_ptr statement; + + unique_ptr Copy() const override { + return make_uniq_base(statement->Copy()); + } + + string ToString() const override { + return statement->ToString(); + }; + + explicit DuckPGQParseData(unique_ptr statement) : statement(std::move(statement)) { + } +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/pragma/duckpgq_pragma.hpp b/src/include/duckpgq/core/pragma/duckpgq_pragma.hpp new file mode 100644 index 00000000..a4b568f2 --- /dev/null +++ b/src/include/duckpgq/core/pragma/duckpgq_pragma.hpp @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckpgq/include/core/pragma/show_property_graphs.hpp +// +//===----------------------------------------------------------------------===// + +#pragma once +#include "duckpgq/common.hpp" + +namespace duckdb { + +//! Class to register the PRAGMA create_inbox function +class CorePGQPragma { +public: + //! Register the PRAGMA function + static void Register(ExtensionLoader &loader) { + RegisterShowPropertyGraphs(loader); + RegisterCreateVertexTable(loader); + } + +private: + static void RegisterShowPropertyGraphs(ExtensionLoader &loader); + static void RegisterCreateVertexTable(ExtensionLoader &loader); +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/utils/compressed_sparse_row.hpp b/src/include/duckpgq/core/utils/compressed_sparse_row.hpp new file mode 100644 index 00000000..00307f9e --- /dev/null +++ b/src/include/duckpgq/core/utils/compressed_sparse_row.hpp @@ -0,0 +1,90 @@ +//===----------------------------------------------------------------------===// +// DuckPGQ +// +// duckpgq/core/utils/compressed_sparse_row.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once +#include "duckdb/function/function.hpp" + +#include "duckdb/parser/expression/cast_expression.hpp" +#include "duckdb/parser/expression/function_expression.hpp" +#include "duckdb/parser/expression/subquery_expression.hpp" +#include "duckdb/parser/query_node/set_operation_node.hpp" + +#include "duckdb/parser/expression/columnref_expression.hpp" +#include "duckdb/parser/property_graph_table.hpp" +#include "duckdb/parser/query_node/select_node.hpp" +#include "duckdb/parser/tableref/joinref.hpp" +#include "duckpgq/common.hpp" + +namespace duckdb { + +class CSR { +public: + CSR() = default; + ~CSR() { + delete[] v; + } + + atomic *v {}; + + vector e; + vector edge_ids; + + vector w; + vector w_double; + + bool initialized_v = false; + bool initialized_e = false; + bool initialized_w = false; + + size_t vsize {}; + + string ToString() const; +}; + +struct CSRFunctionData : FunctionData { + CSRFunctionData(ClientContext &context, int32_t id, const LogicalType &weight_type); + unique_ptr Copy() const override; + bool Equals(const FunctionData &other_p) const override; + static unique_ptr CSRVertexBind(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments); + static unique_ptr CSREdgeBind(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments); + static unique_ptr CSRBind(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments); + + ClientContext &context; + const int32_t id; + const LogicalType weight_type; +}; + +// CSR BindReplace functions +unique_ptr CreateUndirectedCSRCTE(const shared_ptr &edge_table, + const unique_ptr &select_node); +unique_ptr CreateDirectedCSRCTE(const shared_ptr &edge_table, + const string &prev_binding, const string &edge_binding, + const string &next_binding); + +// Helper functions +unique_ptr MakeEdgesCTE(const shared_ptr &edge_table); +unique_ptr CreateDirectedCSRVertexSubquery(const shared_ptr &edge_table, + const string &binding); +unique_ptr CreateUndirectedCSRVertexSubquery(const shared_ptr &edge_table, + const string &binding); +unique_ptr CreateOuterSelectEdgesNode(); +unique_ptr CreateOuterSelectNode(unique_ptr create_csr_edge_function); +unique_ptr GetJoinRef(const shared_ptr &edge_table, const string &edge_binding, + const string &prev_binding, const string &next_binding); +unique_ptr GetCountTable(const shared_ptr &table, const string &table_alias, + const string &primary_key); +void SetupSelectNode(unique_ptr &select_node, const shared_ptr &edge_table, + bool reverse = false); +unique_ptr CreateCountCTESubquery(); +unique_ptr GetCountUndirectedEdgeTable(); +unique_ptr GetCountEdgeTable(const shared_ptr &edge_table); + +} // namespace duckdb diff --git a/src/include/duckpgq/core/utils/duckpgq_bitmap.hpp b/src/include/duckpgq/core/utils/duckpgq_bitmap.hpp new file mode 100644 index 00000000..26c87439 --- /dev/null +++ b/src/include/duckpgq/core/utils/duckpgq_bitmap.hpp @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// DuckPGQ +// +// duckpgq/core/utils/duckpgq_bitmap.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once +#include "duckpgq/common.hpp" +#include "duckdb/common/vector.hpp" + +namespace duckdb { + +class DuckPGQBitmap { +public: + explicit DuckPGQBitmap(size_t size); + void set(size_t index); + bool test(size_t index) const; + void reset(); + +private: + vector bitmap; +}; + +} // namespace duckdb diff --git a/src/include/duckpgq/core/utils/duckpgq_utils.hpp b/src/include/duckpgq/core/utils/duckpgq_utils.hpp new file mode 100644 index 00000000..4fcebc3d --- /dev/null +++ b/src/include/duckpgq/core/utils/duckpgq_utils.hpp @@ -0,0 +1,21 @@ +#pragma once +#include "duckpgq_state.hpp" + +namespace duckdb { + +#define LANE_LIMIT 512 +#define VISIT_SIZE_DIVISOR 2 + +// Function to get DuckPGQState from ClientContext +shared_ptr GetDuckPGQState(ClientContext &context, bool throw_error_not_found = false); +CreatePropertyGraphInfo *GetPropertyGraphInfo(const shared_ptr &duckpgq_state, const string &pg_name); +shared_ptr ValidateSourceNodeAndEdgeTable(CreatePropertyGraphInfo *pg_info, + const std::string &node_table, + const std::string &edge_table); +unique_ptr CreateSelectNode(const shared_ptr &edge_pg_entry, + const string &function_name, const string &function_alias); +unique_ptr CreateBaseTableRef(const string &table_name, const string &alias = ""); +unique_ptr CreateColumnRefExpression(const string &column_name, const string &table_name = "", + const string &alias = ""); + +} // namespace duckdb diff --git a/src/include/duckpgq_extension.hpp b/src/include/duckpgq_extension.hpp new file mode 100644 index 00000000..49a91557 --- /dev/null +++ b/src/include/duckpgq_extension.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include "duckpgq/common.hpp" + +namespace duckdb { + +class DuckpgqExtension : public Extension { +public: + void Load(ExtensionLoader &db) override; + std::string Name() override; +}; + +} // namespace duckdb diff --git a/src/include/duckpgq_extension_callback.hpp b/src/include/duckpgq_extension_callback.hpp new file mode 100644 index 00000000..e7de3d5f --- /dev/null +++ b/src/include/duckpgq_extension_callback.hpp @@ -0,0 +1,14 @@ +#pragma once + +#include "duckpgq/common.hpp" +#include "duckdb/planner/extension_callback.hpp" +#include + +namespace duckdb { +// class DuckpgqExtensionCallback : public ExtensionCallback { +// void OnConnectionOpened(ClientContext &context) override { +// context.registered_state->Insert( +// "duckpgq", make_shared_ptr(context.shared_from_this())); +// } +// }; +} // namespace duckdb diff --git a/src/include/duckpgq_state.hpp b/src/include/duckpgq_state.hpp new file mode 100644 index 00000000..04d44ab2 --- /dev/null +++ b/src/include/duckpgq_state.hpp @@ -0,0 +1,39 @@ +#pragma once + +#include "duckpgq/common.hpp" +#include "duckdb/common/case_insensitive_map.hpp" + +#include + +namespace duckdb { + +class DuckPGQState : public ClientContextState { +public: + explicit DuckPGQState() {}; + + static void InitializeInternalTable(ClientContext &context); + void QueryEnd() override; + CreatePropertyGraphInfo *GetPropertyGraph(const string &pg_name); + CSR *GetCSR(int32_t id); + + void RetrievePropertyGraphs(const shared_ptr &context); + void ProcessPropertyGraphs(unique_ptr &property_graphs, bool is_vertex); + void PopulateEdgeSpecificFields(unique_ptr &chunk, idx_t row_idx, PropertyGraphTable &table); + static void ExtractListValues(const Value &list_value, vector &output); + void RegisterPropertyGraph(const shared_ptr &table, const string &graph_name, bool is_vertex); + +public: + unique_ptr parse_data; + unordered_map> transform_expression; + int32_t match_index = 0; + + //! Property graphs that are registered + case_insensitive_map_t> registered_property_graphs; + + //! Used to build the CSR data structures required for path-finding queries + std::unordered_map> csr_list; + std::mutex csr_lock; + std::unordered_set csr_to_delete; +}; + +} // namespace duckdb diff --git a/test/README.md b/test/README.md index 657378a6..fb5e514f 100644 --- a/test/README.md +++ b/test/README.md @@ -1,14 +1,11 @@ -# Testing the boilerplate extension -This directory contains all the tests for the boilerplate extension. The `sql` directory holds tests that are written as [SQLLogicTests](https://duckdb.org/dev/sqllogictest/intro.html). DuckDB aims to have most its tests in this format as SQL statements, so for the boilerplate extension, this should probably be the goal too. However, client specific testing is also available. +# Testing this extension +This directory contains all the tests for this extension. The `sql` directory holds tests that are written as [SQLLogicTests](https://duckdb.org/dev/sqllogictest/intro.html). DuckDB aims to have most its tests in this format as SQL statements, so for the quack extension, this should probably be the goal too. The root makefile contains targets to build and run all of these tests. To run the SQLLogicTests: ```bash make test ``` - -To run the python tests: -```sql -make test_python -``` - -For other client tests check the makefile in the root of this repository. \ No newline at end of file +or +```bash +make test_debug +``` \ No newline at end of file diff --git a/test/nodejs/boilerplate_test.js b/test/nodejs/boilerplate_test.js deleted file mode 100644 index bf754f8b..00000000 --- a/test/nodejs/boilerplate_test.js +++ /dev/null @@ -1,20 +0,0 @@ -var duckdb = require('../../duckdb/tools/nodejs'); -var assert = require('assert'); - -describe(`boilerplate extension`, () => { - let db; - let conn; - before((done) => { - db = new duckdb.Database(':memory:'); - conn = new duckdb.Connection(db); - done(); - }); - - it('function should return expected constant', function (done) { - db.all("SELECT boilerplate() as value;", function (err, res) { - if (err) throw err; - assert.deepEqual(res, [{value: "I'm a boilerplate!"}]); - done(); - }); - }); -}); \ No newline at end of file diff --git a/test/nodejs/duckpgq_test.js b/test/nodejs/duckpgq_test.js new file mode 100644 index 00000000..4a059a47 --- /dev/null +++ b/test/nodejs/duckpgq_test.js @@ -0,0 +1,31 @@ +var duckdb = require('../../duckdb/tools/nodejs'); +var assert = require('assert'); + +describe(`duckpgq extension`, () => { + let db; + let conn; + before((done) => { + db = new duckdb.Database(':memory:', {"allow_unsigned_extensions":"true"}); + conn = new duckdb.Connection(db); + conn.exec(`LOAD '${process.env.DUCKPGQ_EXTENSION_BINARY_PATH}';`, function (err) { + if (err) throw err; + done(); + }); + }); + + it('duckpgq function should return expected string', function (done) { + db.all("SELECT duckpgq('Sam') as value;", function (err, res) { + if (err) throw err; + assert.deepEqual(res, [{value: "Duckpgq Sam 🐥"}]); + done(); + }); + }); + + it('duckpgq_openssl_version function should return expected string', function (done) { + db.all("SELECT duckpgq_openssl_version('Michael') as value;", function (err, res) { + if (err) throw err; + assert(res[0].value.startsWith('Duckpgq Michael, my linked OpenSSL version is OpenSSL')); + done(); + }); + }); +}); \ No newline at end of file diff --git a/test/python/boilerplate_test.py b/test/python/boilerplate_test.py deleted file mode 100644 index d404617d..00000000 --- a/test/python/boilerplate_test.py +++ /dev/null @@ -1,7 +0,0 @@ -import duckdb - -def test_boilerplate(): - conn = duckdb.connect(''); - conn.execute('SELECT boilerplate() as value;'); - res = conn.fetchall() - assert(res[0][0] == "I'm a boilerplate!"); \ No newline at end of file diff --git a/test/python/duckpgq_test.py b/test/python/duckpgq_test.py new file mode 100644 index 00000000..66869791 --- /dev/null +++ b/test/python/duckpgq_test.py @@ -0,0 +1,29 @@ +import duckdb +import os +import pytest + + +# Get a fresh connection to DuckDB with the duckpgq extension binary loaded +@pytest.fixture +def duckdb_conn(): + extension_binary = os.getenv('DUCKPGQ_EXTENSION_BINARY_PATH') + if extension_binary == '': + raise Exception('Please make sure the `DUCKPGQ_EXTENSION_BINARY_PATH` is set to run the python tests') + conn = duckdb.connect('', config={'allow_unsigned_extensions': 'true'}) + conn.execute(f"load '{extension_binary}'") + return conn + + +def test_duckpgq(duckdb_conn): + duckdb_conn.execute("SELECT duckpgq('Sam') as value;") + res = duckdb_conn.fetchall() + assert res[0][0] == "Duckpgq Sam 🐥" + + +def test_property_graph(duckdb_conn): + duckdb_conn.execute("CREATE TABLE foo(i bigint)") + duckdb_conn.execute("INSERT INTO foo(i) VALUES (1)") + duckdb_conn.execute("-CREATE PROPERTY GRAPH t VERTEX TABLES (foo);") + duckdb_conn.execute("-FROM GRAPH_TABLE(t MATCH (f:foo))") + res = duckdb_conn.fetchall() + assert res[0][0] == 1 diff --git a/test/sql/211_using_other_schemas.test b/test/sql/211_using_other_schemas.test new file mode 100644 index 00000000..f14f5585 --- /dev/null +++ b/test/sql/211_using_other_schemas.test @@ -0,0 +1,334 @@ +# name: test/sql/211_using_other_schemas.test +# group: [sql] + +require duckpgq + +statement ok +CREATE SCHEMA test_schema; + +statement ok +SET search_path = test_schema; + +statement ok +CREATE TABLE Person(id BIGINT PRIMARY KEY, name TEXT); + +statement ok +CREATE TABLE Company(id BIGINT PRIMARY KEY, name TEXT); + +statement ok +CREATE TABLE WorksAt(person_id BIGINT, company_id BIGINT); + +statement ok +-CREATE PROPERTY GRAPH work_graph + VERTEX TABLES ( + Person, + Company + ) + EDGE TABLES ( + WorksAt SOURCE KEY (person_id) REFERENCES Person (id) + DESTINATION KEY (company_id) REFERENCES Company (id) + LABEL works_at + ); + +statement ok +-CREATE PROPERTY GRAPH enriched_graph + VERTEX TABLES ( + Person PROPERTIES (id, name), + Company PROPERTIES (id, name) + ) + EDGE TABLES ( + WorksAt SOURCE KEY (person_id) REFERENCES Person (id) + DESTINATION KEY (company_id) REFERENCES Company (id) + PROPERTIES (person_id, company_id) + LABEL works_at + ); + +statement ok +CREATE TABLE Friendship(person1_id BIGINT, person2_id BIGINT, since DATE); + +statement ok +-CREATE PROPERTY GRAPH social_graph + VERTEX TABLES ( + Person + ) + EDGE TABLES ( + Friendship SOURCE KEY (person1_id) REFERENCES Person (id) + DESTINATION KEY (person2_id) REFERENCES Person (id) + PROPERTIES (since) + LABEL friends_with + ); + +statement ok +-CREATE OR REPLACE PROPERTY GRAPH external_graph + VERTEX TABLES ( + test_schema.Person, + test_schema.Company + ) + EDGE TABLES ( + test_schema.WorksAt SOURCE KEY (person_id) REFERENCES test_schema.Person (id) + DESTINATION KEY (company_id) REFERENCES test_schema.Company (id) + LABEL works_at + ); + +statement ok +-CREATE OR REPLACE PROPERTY GRAPH memory_graph + VERTEX TABLES ( + memory.test_schema.Person, + memory.test_schema.Company + ) + EDGE TABLES ( + memory.test_schema.WorksAt SOURCE KEY (person_id) REFERENCES memory.test_schema.Person (id) + DESTINATION KEY (company_id) REFERENCES memory.test_schema.Company (id) + LABEL works_at + ); + +statement error +-CREATE PROPERTY GRAPH invalid_graph + VERTEX TABLES ( + nonexistent_schema.Person + ) + EDGE TABLES ( + nonexistent_schema.WorksAt SOURCE KEY (person_id) REFERENCES nonexistent_schema.Person (id) + DESTINATION KEY (company_id) REFERENCES nonexistent_schema.Company (id) + LABEL works_at + ); +---- +Invalid Error: Table 'nonexistent_schema.Company' not found in the property graph invalid_graph. + +statement ok +CREATE SCHEMA schema1; + +statement ok +CREATE SCHEMA schema2; + +statement ok +CREATE SCHEMA memory_schema; + +statement ok +SET search_path = schema1; + +statement ok +CREATE TABLE schema1.Person(id BIGINT PRIMARY KEY, name TEXT); INSERT INTO schema1.Person VALUES (1, 'Alice'); + +statement ok +CREATE TABLE schema2.Company(id BIGINT PRIMARY KEY, name TEXT); INSERT INTO schema2.Company VALUES (2, 'Bob B.V.'); + +statement ok +CREATE TABLE schema2.WorksAt(person_id BIGINT, company_id BIGINT); + +statement ok +CREATE TABLE Friendship(person1_id BIGINT, person2_id BIGINT, since DATE); + +statement ok +-CREATE PROPERTY GRAPH cross_schema_graph + VERTEX TABLES ( + schema1.Person, + schema2.Company + ) + EDGE TABLES ( + schema2.WorksAt SOURCE KEY (person_id) REFERENCES schema1.Person (id) + DESTINATION KEY (company_id) REFERENCES schema2.Company (id) + LABEL works_at + ); + +statement ok +-CREATE PROPERTY GRAPH memory_inclusive_graph + VERTEX TABLES ( + schema1.Person + ) + EDGE TABLES ( + Friendship SOURCE KEY (person1_id) REFERENCES schema1.Person (id) + DESTINATION KEY (person2_id) REFERENCES schema1.Person (id) + PROPERTIES (since) + LABEL friends_with + ); + +statement ok +-CREATE PROPERTY GRAPH fully_qualified_graph + VERTEX TABLES ( + schema1.Person, + schema2.Company + ) + EDGE TABLES ( + schema2.WorksAt SOURCE KEY (person_id) REFERENCES schema1.Person (id) + DESTINATION KEY (company_id) REFERENCES schema2.Company (id) + LABEL works_at, + Friendship SOURCE KEY (person1_id) REFERENCES schema1.Person (id) + DESTINATION KEY (person2_id) REFERENCES schema1.Person (id) + LABEL friends_with + ); + +statement ok +SET search_path = schema2; + +statement error +-CREATE PROPERTY GRAPH search_path_graph + VERTEX TABLES ( + Person, -- Should resolve to schema1.Person + Company + ) + EDGE TABLES ( + WorksAt SOURCE KEY (person_id) REFERENCES Person (id) + DESTINATION KEY (company_id) REFERENCES Company (id) + LABEL works_at + ); +---- +Invalid Error: Table with name Person does not exist + +statement ok +-CREATE PROPERTY GRAPH search_path_graph + VERTEX TABLES ( + schema1.Person, -- Should resolve to schema1.Person + Company + ) + EDGE TABLES ( + WorksAt SOURCE KEY (person_id) REFERENCES schema1.Person (id) + DESTINATION KEY (company_id) REFERENCES Company (id) + LABEL works_at + ); + +statement error +CREATE PROPERTY GRAPH invalid_schema_graph + VERTEX TABLES ( + nonexistent_schema.Person + ) + EDGE TABLES ( + nonexistent_schema.WorksAt SOURCE KEY (person_id) REFERENCES nonexistent_schema.Person (id) + DESTINATION KEY (company_id) REFERENCES nonexistent_schema.Company (id) + LABEL works_at + ); +---- +Invalid Error: Table 'nonexistent_schema.Company' not found in the property graph invalid_schema_graph. + +statement error +-CREATE PROPERTY GRAPH conflicting_schemas + VERTEX TABLES ( + schema1.Person, + schema2.Person -- Duplicate table name in different schema + ) + EDGE TABLES ( + schema2.WorksAt SOURCE KEY (person_id) REFERENCES schema1.Person (id) + DESTINATION KEY (company_id) REFERENCES schema2.Company (id) + LABEL works_at + ); +---- +Constraint Error: Label person is not unique, make sure all labels are unique + +statement error +CREATE PROPERTY GRAPH wrong_schema_reference + VERTEX TABLES ( + schema1.Person + ) + EDGE TABLES ( + schema2.WorksAt SOURCE KEY (person_id) REFERENCES schema1.Person (id) + DESTINATION KEY (company_id) REFERENCES schema1.Company (id) -- Incorrect reference + LABEL works_at + ); +---- +Invalid Error: Table 'schema1.Company' not found in the property graph wrong_schema_reference. + + + +statement ok +create table schema2.v ( + id BIGINT primary key, + name varchar +); + +statement ok +INSERT INTO schema2.v VALUES (1, 'a'); + +statement error +create table schema1.e ( + id bigint primary key, + src BIGINT REFERENCES v(id), + dst BIGINT REFERENCES v(id) +); +---- +Binder Error: Creating foreign keys across different schemas or catalogs is not supported + +statement ok +create table schema2.e ( + id bigint primary key, + src BIGINT REFERENCES schema2.v(id), + dst BIGINT REFERENCES schema2.v(id) +); + +statement error +-create property graph g +vertex tables (schema2.v) +edge tables (schema2.e source schema2.v destination schema2.v); +---- +Invalid Error: Multiple primary key - foreign key relationships detected between e and v. Please explicitly define the primary key and foreign key columns using `SOURCE KEY REFERENCES v ` + +statement ok +create table schema2.w ( + id BIGINT primary key, + name varchar +); + +statement ok +INSERT INTO schema2.w VALUES (2, 'b'); + +statement ok +create table schema2.e2 ( + id bigint primary key, + src BIGINT REFERENCES schema2.v(id), + dst BIGINT REFERENCES schema2.w(id) +); + +statement ok +-create property graph g +vertex tables (v,w) +edge tables (e2 source v destination w); + +statement ok +create schema myschema; set search_path = myschema; + +statement ok +CREATE TABLE Person (id bigint); CREATE TABLE Person_knows_person (person1id bigint, person2id bigint); + +statement ok +-CREATE OR REPLACE PROPERTY GRAPH snb + VERTEX TABLES ( + Person + ) + EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL knows + ); + + + + +statement ok +-FROM GRAPH_TABLE (snb MATCH (a:Person)-[k:knows]->(b:Person)); + +statement ok +-CREATE OR REPLACE PROPERTY GRAPH snb + VERTEX TABLES ( + myschema.Person + ) + EDGE TABLES ( + myschema.Person_knows_person SOURCE KEY (Person1Id) REFERENCES myschema.Person (id) + DESTINATION KEY (Person2Id) REFERENCES myschema.Person (id) + LABEL knows + ); + +statement ok +-FROM GRAPH_TABLE (snb MATCH (a:Person)-[k:knows]->(b:Person)); + +statement ok +-CREATE OR REPLACE PROPERTY GRAPH snb + VERTEX TABLES ( + memory.myschema.Person + ) + EDGE TABLES ( + memory.myschema.Person_knows_person SOURCE KEY (Person1Id) REFERENCES memory.myschema.Person (id) + DESTINATION KEY (Person2Id) REFERENCES memory.myschema.Person (id) + LABEL knows + ); + +statement ok +-FROM GRAPH_TABLE (snb MATCH (a:Person)-[k:knows]->(b:Person)); diff --git a/test/sql/altering_table.test b/test/sql/altering_table.test new file mode 100644 index 00000000..79842246 --- /dev/null +++ b/test/sql/altering_table.test @@ -0,0 +1,67 @@ +# name: test/sql/altering_table.test +# description: Testing altering a table after creating a property graph over it +# group: [sql] + +#statement ok +#pragma enable_verification + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT); + +statement ok +CREATE TABLE School(name VARCHAR, Id BIGINT, Kind VARCHAR); + +statement ok +CREATE TABLE StudyAt(personId BIGINT, schoolId BIGINT); + +statement ok +INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17); + +statement ok +INSERT INTO School VALUES ('VU', 0, 'University'), ('UVA', 1, 'University'); + +statement ok +INSERT INTO StudyAt VALUES (0, 0), (1, 0), (2, 1), (3, 1), (4, 1); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student PROPERTIES ( id, name ) LABEL Person, + School LABEL SCHOOL + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + LABEL Knows, + studyAt SOURCE KEY ( personId ) REFERENCES Student ( id ) + DESTINATION KEY ( SchoolId ) REFERENCES School ( id ) + LABEL StudyAt + ); + +statement ok +-FROM GRAPH_TABLE (pg + MATCH + (a:Person)-[s:StudyAt]->(b:School) + WHERE a.name = 'Daniel' + COLUMNS (a.id) + ) study; + +statement ok +ALTER TABLE student RENAME id TO jd; + +statement error +-FROM GRAPH_TABLE (pg + MATCH + (a:Person)-[s:StudyAt]->(b:School) + WHERE a.name = 'Daniel' + COLUMNS (a.jd) + ) study; +---- diff --git a/test/sql/boilerplate.test b/test/sql/boilerplate.test deleted file mode 100644 index 7f3ee99e..00000000 --- a/test/sql/boilerplate.test +++ /dev/null @@ -1,18 +0,0 @@ -# name: test/sql/boilerplate.test -# description: test boiler plate extension -# group: [boilerplate] - -# Before we load the extension, this will fail -statement error -SELECT boilerplate(); ----- -Catalog Error: Scalar Function with name boilerplate does not exist! - -# Require statement will ensure this test is run with this extension loaded -require boilerplate - -# Confirm the extension works -query I -SELECT boilerplate(); ----- -I'm a boilerplate! \ No newline at end of file diff --git a/test/sql/copy_to_duckpgq.test b/test/sql/copy_to_duckpgq.test new file mode 100644 index 00000000..bf5ef0f3 --- /dev/null +++ b/test/sql/copy_to_duckpgq.test @@ -0,0 +1,86 @@ +# name: test/sql/copy_to_duckpgq.test +# description: Testing the COPY TO query with a PGQ pattern +# group: [sql] + +require duckpgq + +statement ok +import database 'duckdb/data/SNB0.003' + +statement ok +-CREATE PROPERTY GRAPH snb +VERTEX TABLES ( + Person LABEL Person, + Forum LABEL Forum, + Organisation LABEL Organisation IN typemask(company, university), + Place LABEL Place, + Tag LABEL Tag, + TagClass LABEL TagClass, + Country LABEL Country, + City LABEL City, + Message LABEL Message + ) +EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL Knows, + Forum_hasMember_Person SOURCE KEY (ForumId) REFERENCES Forum (id) + DESTINATION KEY (PersonId) REFERENCES Person (id) + LABEL hasMember, + Forum_hasTag_Tag SOURCE KEY (ForumId) REFERENCES Forum (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL Forum_hasTag, + Person_hasInterest_Tag SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL hasInterest, + person_workAt_Organisation SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (OrganisationId) REFERENCES Organisation (id) + LABEL workAt_Organisation, + Person_likes_Message SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (id) REFERENCES Message (id) + LABEL likes_Message, + Message_hasTag_Tag SOURCE KEY (id) REFERENCES Message (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL message_hasTag, + Message_hasAuthor_Person SOURCE KEY (messageId) REFERENCES Message (id) + DESTINATION KEY (PersonId) REFERENCES Person (id) + LABEL hasAuthor, + Message_replyOf_Message SOURCE KEY (messageId) REFERENCES Message (id) + DESTINATION KEY (ParentMessageId) REFERENCES Message (id) + LABEL replyOf + ); + +# IS1 +statement ok +-COPY (FROM GRAPH_TABLE (snb + MATCH (a is person where a.id = 17592186044461) + COLUMNS(a.firstName, a.lastName, a.birthday, a.locationIP, a.browserUsed, a.LocationCityId, a.gender) +) tmp) TO '__TEST_DIR__/is1.csv' (HEADER FALSE); + +query IIIIIII +SELECT * FROM '__TEST_DIR__/is1.csv'; +---- +Ali Abouba 1987-05-29 41.203.147.168 Internet Explorer 1264 male + +statement ok +-CREATE TABLE result as (FROM GRAPH_TABLE (snb + MATCH (a is person where a.id = 17592186044461) + COLUMNS(a.firstName, a.lastName, a.birthday, a.locationIP, a.browserUsed, a.LocationCityId, a.gender, a.creationDate) + ) tmp); + +query IIIIIIII +SELECT * FROM result; +---- +Ali Abouba 1987-05-29 41.203.147.168 Internet Explorer 1264 male 2011-05-12 02:46:47.595+00 + +statement ok +-INSERT INTO result (FROM GRAPH_TABLE (snb + MATCH (a is person where a.id = 17592186044461) + COLUMNS(a.firstName, a.lastName, a.birthday, a.locationIP, a.browserUsed, a.LocationCityId, a.gender, a.creationDate) + ) tmp) + +query IIIIIIII +SELECT * FROM result; +---- +Ali Abouba 1987-05-29 41.203.147.168 Internet Explorer 1264 male 2011-05-12 02:46:47.595+00 +Ali Abouba 1987-05-29 41.203.147.168 Internet Explorer 1264 male 2011-05-12 02:46:47.595+00 diff --git a/test/sql/create_pg/209_property_undefined.test b/test/sql/create_pg/209_property_undefined.test new file mode 100644 index 00000000..a1c6b4d8 --- /dev/null +++ b/test/sql/create_pg/209_property_undefined.test @@ -0,0 +1,48 @@ +# name: test/sql/create_pg/209_property_undefined.test +# group: [create_pg] + +require duckpgq + +statement ok con1 +create table person (id bigint, firstName varchar, lastName varchar); + +statement ok con1 +create table person_knows_person(person1id bigint, person2id bigint); + +statement ok con1 +-CREATE PROPERTY GRAPH snb +VERTEX TABLES ( + Person + ) +EDGE TABLES ( + Person_knows_person + SOURCE KEY ( person1id ) REFERENCES Person ( id ) + DESTINATION KEY ( person2id ) REFERENCES Person ( id ) + LABEL Knows + ); + +statement ok con1 +-FROM GRAPH_TABLE(snb + MATCH (a:Person WHERE a.firstName = 'Jan')-[k:Knows]->(b:Person) + COLUMNS (b.firstName) +); + +statement ok con2 +-FROM GRAPH_TABLE(snb + MATCH (a:Person WHERE a.firstName = 'Bob')-[k:Knows]->(b:Person) + COLUMNS (b.firstName) +); + +statement ok con2 +-FROM GRAPH_TABLE(snb + MATCH (a:Person WHERE a.firstName = 'Bob')-[k:Knows]->(b:Person) + COLUMNS (b.FIRSTNAME) +); + +statement error con2 +-FROM GRAPH_TABLE(snb + MATCH (a:Person WHERE a.firstName = 'Bob')-[k:Knows]->(b:Person) + COLUMNS (b.nonregisteredproperty) +); +---- +Binder Error: Property b.nonregisteredproperty is never registered! diff --git a/test/sql/create_pg/all_properties.test b/test/sql/create_pg/all_properties.test new file mode 100644 index 00000000..29acf0fa --- /dev/null +++ b/test/sql/create_pg/all_properties.test @@ -0,0 +1,116 @@ +# name: test/sql/create_pg/all_properties.test +# description: Testing the creation of property graphs with all properties +# group: [create_pg] + +#statement ok +#pragma enable_verification + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT); + +statement ok +CREATE TABLE School(school_name VARCHAR, school_id BIGINT, school_kind BIGINT); + +statement ok +INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'); + +statement ok +INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (1,2, 14), (1,3, 15), (2,3, 16); + +# all properties +statement ok +-CREATE PROPERTY GRAPH pg_all_properties +VERTEX TABLES ( + Student LABEL Person, + School LABEL School IN School_kind (Hogeschool, University) + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + LABEL Knows + ); + + +# all properties +statement ok +-CREATE PROPERTY GRAPH pg_only_id +VERTEX TABLES ( + Student PROPERTIES (id) LABEL Person + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + PROPERTIES (src) + LABEL Knows + ); + +query II +-FROM GRAPH_TABLE (pg_only_id MATCH (p:Person)-[k:Knows]->(p2:Person) COLUMNS (p.id as id, p2.id as friend_id)); +---- +0 1 +0 2 +0 3 +1 2 +1 3 +2 3 + +statement error +-FROM GRAPH_TABLE (pg_only_id MATCH (p:Person)-[k:Knows]->(p2:Person) COLUMNS (p.id as id, p2.name as friend_name)); +---- +Binder Error: Property p2.name is never registered! + + +query III +-FROM GRAPH_TABLE (pg_only_id MATCH p = any shortest (p:Person)-[k:Knows]->*(p2:Person) COLUMNS (p.id as id, p2.id as friend_id, vertices(p))); +---- +0 0 [0] +0 1 [0, 1] +0 2 [0, 2] +0 3 [0, 3] +1 1 [1] +1 2 [1, 2] +1 3 [1, 3] +2 2 [2] +2 3 [2, 3] +3 3 [3] + + +statement error +-FROM GRAPH_TABLE (pg_only_id MATCH (p:Person)-[k:Knows]->(p2:Person) COLUMNS (dst)); +---- +Binder Error: Property dst is never registered! + +query I +-FROM GRAPH_TABLE (pg_only_id MATCH (p:Person)-[k:Knows]->(p2:Person) COLUMNS (src)); +---- +0 +0 +0 +1 +1 +2 + +query I +-FROM GRAPH_TABLE (pg_only_id MATCH (p:Person)-[k:Knows]->(p2:Person) COLUMNS (k.*)); +---- +0 +0 +0 +1 +1 +2 + +query II +-FROM GRAPH_TABLE (pg_only_id MATCH (p:Person)-[k:Knows]->(p2:Person) COLUMNS (p.*, k.*)); +---- +0 0 +0 0 +0 0 +1 1 +1 1 +2 2 diff --git a/test/sql/create_pg/attach_pg.test b/test/sql/create_pg/attach_pg.test new file mode 100644 index 00000000..b040544a --- /dev/null +++ b/test/sql/create_pg/attach_pg.test @@ -0,0 +1,181 @@ +# name: test/sql/create_pg/attach_pg.test +# description: Testing create property graph with an attached database +# group: [create_pg] + +require duckpgq + +statement ok con1 +attach 'duckdb/data/bluesky/bluesky.duckdb'; + +query I con1 +select count(*) from bluesky.follows; +---- +19566 + +statement error +-create or replace property graph stations vertex tables (stations.stations) +---- +Invalid Error: Table with name stations does not exist + +statement error +-create or replace property graph stations vertex tables (nonexistingschema.account) + edge tables (nonexistingschema.follows source key (source) references nonexistingschema.account (did) + destination key (destination) references nonexistingschema.account (did)) +---- +Invalid Error: Table with name account does not exist + +statement error +-create or replace property graph stations vertex tables (bluesky.account) + edge tables (nonexistingschema.follows source key (source) references bluesky.account (did) + destination key (destination) references bluesky.account (did)) +---- +Invalid Error: Table with name follows does not exist + +statement error +-create or replace property graph stations vertex tables (bluesky.account) + edge tables (bluesky.follows source key (source) references nonexistingschema.account (did) + destination key (destination) references nonexistingschema.account (did)) +---- +Invalid Error: Table 'nonexistingschema.account' not found in the property graph stations. + +statement error +-create or replace property graph stations vertex tables (bluesky.tabledoesnotexist) +---- +Invalid Error: Table with name tabledoesnotexist does not exist + +statement ok con1 +-CREATE OR REPLACE PROPERTY GRAPH bluesky + VERTEX TABLES (bluesky.account LABEL account) + EDGE TABLES (bluesky.follows SOURCE KEY (source) REFERENCES bluesky.account (did) + DESTINATION KEY (destination) REFERENCES bluesky.account (did) + LABEL follows); + +statement ok con1 +SELECT * FROM bluesky.account; + + +statement ok con1 +-FROM GRAPH_TABLE (bluesky MATCH (a:account)); + +query I +select count(*) from bluesky.account; +---- +8921 + +statement ok con2 +-FROM GRAPH_TABLE (bluesky MATCH (a:account)); + +statement ok con2 +-FROM GRAPH_TABLE (bluesky MATCH (a:account)-[f:follows]->(b:account)); + +statement ok con1 +-FROM GRAPH_TABLE (bluesky MATCH (a:account)-[f:follows]->(b:account)); + +query II con1 +-FROM GRAPH_TABLE (bluesky MATCH (a:account)-[f:follows]->(b:account) COLUMNS (a.did as a_id, b.did as b_id)) ORDER BY a_id, b_id LIMIT 10; +---- +did:plc:23df55poeztue4terk3s5ain did:plc:274qq3cgl4vrrofdg77balfw +did:plc:23df55poeztue4terk3s5ain did:plc:2ktpgfwt7cc2osldzh6uyww5 +did:plc:23df55poeztue4terk3s5ain did:plc:2p5eadzea3yb2ghwtzrlaebo +did:plc:23df55poeztue4terk3s5ain did:plc:37drmtazrclxzxezzk4ijuk7 +did:plc:23df55poeztue4terk3s5ain did:plc:42qpqlgojbezm3gt2nxdfikk +did:plc:23df55poeztue4terk3s5ain did:plc:45g7v5rojoj4tpspb7fg6dvp +did:plc:23df55poeztue4terk3s5ain did:plc:4kv7ldgzot7q4w4y65kpsbwo +did:plc:23df55poeztue4terk3s5ain did:plc:4nt6nwx353a3xxo5mzjiy4ha +did:plc:23df55poeztue4terk3s5ain did:plc:6k63663icgdybm5evgszxjn2 +did:plc:23df55poeztue4terk3s5ain did:plc:7562tstpez4aexd75ttshl3z + +query II con1 +-FROM GRAPH_TABLE (bluesky MATCH (a:account where a.did='did:plc:7qqkrwwec4qeujs6hthlgpbe')-[f:follows]->{1,3}(b:account) COLUMNS (a.did as a_id, b.did as b_id)) ORDER BY a_id, b_id LIMIT 10; +---- +did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:224f4aj5p5vgk7tpcc4lltgx +did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:225ihm6x4pkgdpbmvfpxyicf +did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:22btehtv4y5dogqne5nuu2jx +did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:22exfzbkuj3dlzj3ukyy4g5y +did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:22m65anrpfstjo5ymgyl2vwu +did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:22mof5hzsrituokdxsnoi7qi +did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:22nbqn2zjp2pobu6cwquhjuu +did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:22qt6jzmwtgyxzft57kvwut6 +did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:22u3xlfdxqxyzva2fsljotcy +did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:23df55poeztue4terk3s5ain + +query III con1 +-FROM GRAPH_TABLE (bluesky MATCH p = ANY SHORTEST (a:account where a.did='did:plc:7qqkrwwec4qeujs6hthlgpbe')-[f:follows]->*(b:account) + COLUMNS (element_id(p) as path, a.did as a_id, b.did as b_id)) +ORDER BY a_id, b_id +LIMIT 10; +---- +[15, 13, 9, 347, 276, 11536, 5145] did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:224f4aj5p5vgk7tpcc4lltgx +[15, 8, 1, 27, 65, 2565, 1530] did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:225ihm6x4pkgdpbmvfpxyicf +[15, 8, 1, 44, 31, 1058, 863] did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:22btehtv4y5dogqne5nuu2jx +[15, 3, 0, 22, 18, 620, 484] did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:22exfzbkuj3dlzj3ukyy4g5y +[15, 8, 1, 64, 68, 2709, 1594] did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:22m65anrpfstjo5ymgyl2vwu +[15, 7, 6, 260, 206, 8273, 4031] did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:22mof5hzsrituokdxsnoi7qi +[15, 6, 11, 424, 316, 13257, 5646] did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:22nbqn2zjp2pobu6cwquhjuu +[15, 8, 1, 39, 27, 882, 730] did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:22qt6jzmwtgyxzft57kvwut6 +[15, 2, 4, 140, 104, 4215, 2188] did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:22u3xlfdxqxyzva2fsljotcy +[15, 3, 0, 24, 17, 585, 461, 19521, 8887] did:plc:7qqkrwwec4qeujs6hthlgpbe did:plc:23czq4ad6kgrjdgke52yapso + + +query II con1 +from local_clustering_coefficient(bluesky, account, follows) limit 10; +---- +did:plc:rsfoi33e4iya2rd7jw52nfmo 0.11904762 +did:plc:btsulrw4wcqdai23fkl5qwm5 0.12508735 +did:plc:z72i7hdynmk6r22z27h6tvur 0.028301887 +did:plc:twcatus5xoa7jaeysmrhzcpv 0.15726179 +did:plc:wptnzi6wyzqltbenxapqa5qd 0.07191316 +did:plc:edglm4muiyzty2snc55ysuqx 0.117788464 +did:plc:sc6ieuzeygvm55vv2bjubkgt 0.102591224 +did:plc:g5gf5ho5yn3n5anprzf45hvm 0.103670634 +did:plc:cwcykfexwbxozuxhpuud63qa 0.35714287 +did:plc:c5ccfcya6zez3rhry6gluup4 0.07952872 + +query I con1 +select count(*) as c from weakly_connected_component(bluesky, account, follows) group by componentId order by componentid; +---- +8921 + +query II con1 +from pagerank(bluesky, account, follows) limit 10; +---- +did:plc:rsfoi33e4iya2rd7jw52nfmo 0.0001775254576762574 +did:plc:btsulrw4wcqdai23fkl5qwm5 0.0002686321909810804 +did:plc:z72i7hdynmk6r22z27h6tvur 0.0008689709539525615 +did:plc:twcatus5xoa7jaeysmrhzcpv 0.00018038910592903322 +did:plc:wptnzi6wyzqltbenxapqa5qd 0.00019702956517977273 +did:plc:edglm4muiyzty2snc55ysuqx 0.000183535102914441 +did:plc:sc6ieuzeygvm55vv2bjubkgt 0.00016956149650616158 +did:plc:g5gf5ho5yn3n5anprzf45hvm 0.00019340188585017924 +did:plc:cwcykfexwbxozuxhpuud63qa 0.0001771935670038598 +did:plc:c5ccfcya6zez3rhry6gluup4 0.00047818698168931504 + +statement error con1 +select count(*) from follows; +---- +Catalog Error: Table with name follows does not exist! + +statement error con1 +from pagerank(bluesky, bluesky.account, follows) limit 10; +---- +Invalid Error: Label 'bluesky.account' not found. Did you mean the vertex label 'account'? + +statement ok +-CREATE PROPERTY GRAPH pg VERTEX TABLES (bluesky.account PROPERTIES (displayName)); + +# Query on unspecified columns in the graph throws error. +statement error +-SELECT * FROM GRAPH_TABLE (pg MATCH (acc:account) COLUMNS (acc.displayName, acc.handle)); +---- +Binder Error: Property acc.handle is never registered! + +# Columns to query is only allowed to be or
., which we cannot prefix catalog or schema. +statement error +-SELECT * FROM GRAPH_TABLE (pg MATCH (acc:account) COLUMNS (bluesky.main.acc.displayName)); +---- +Binder Error: Property bluesky.main.acc.displayName is never registered! + +statement error +-SELECT * FROM GRAPH_TABLE (pg MATCH (acc:account) COLUMNS (main.acc.displayName)); +---- +Binder Error: Property main.acc.displayName is never registered! diff --git a/test/sql/create_pg/create_if_not_exists.test b/test/sql/create_pg/create_if_not_exists.test new file mode 100644 index 00000000..d7d74317 --- /dev/null +++ b/test/sql/create_pg/create_if_not_exists.test @@ -0,0 +1,69 @@ +# name: test/sql/create_pg/create_if_not_exists.test +# group: [create_pg] + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT); + +statement ok +CREATE TABLE School(school_name VARCHAR, school_id BIGINT, school_kind BIGINT); + +statement ok +INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'); + +statement ok +INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (1,2, 14), (1,3, 15), (2,3, 16); + +statement ok +-CREATE PROPERTY GRAPH IF NOT EXISTS pg_all_properties +VERTEX TABLES ( + Student, + School LABEL School IN School_kind (Hogeschool, University) + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + LABEL Knows + ) + +query I +select count(*) from __duckpgq_internal where is_vertex_table; +---- +2 + +statement ok +-CREATE PROPERTY GRAPH IF NOT EXISTS pg_all_properties +VERTEX TABLES ( + Student + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + LABEL Knows + ) + +query I +select count(*) from __duckpgq_internal where is_vertex_table; +---- +2 + +statement ok +-CREATE PROPERTY GRAPH IF NOT EXISTS snb +VERTEX TABLES ( + Student + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + LABEL Knows + ) + +query I +select distinct property_graph from __duckpgq_internal order by property_graph desc; +---- +snb +pg_all_properties diff --git a/test/sql/create_pg/create_or_replace_pg.test b/test/sql/create_pg/create_or_replace_pg.test new file mode 100644 index 00000000..dfd60e36 --- /dev/null +++ b/test/sql/create_pg/create_or_replace_pg.test @@ -0,0 +1,43 @@ +# name: test/sql/create_pg/create_or_replace_pg.test +# description: Testing create or replace property graph syntax +# group: [create_pg] + +require duckpgq + +statement ok +import database 'duckdb/data/SNB0.003'; + +statement ok +-CREATE PROPERTY GRAPH snb +VERTEX TABLES ( + Person LABEL Person + ) +EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL Knows + ); + +# Fails because University is not registered +statement error +-FROM GRAPH_TABLE(snb MATCH (a:Person)-[w:workAt_Organisation]->(u:University)) limit 10; +---- +Binder Error: The label university is not registered in property graph snb + +statement ok +-CREATE OR REPLACE PROPERTY GRAPH snb +VERTEX TABLES ( + Person LABEL Person, + Organisation LABEL Organisation IN typemask(company, university) + ) +EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL Knows, + person_workAt_Organisation SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (OrganisationId) REFERENCES Organisation (id) + LABEL workAt_Organisation + ); + +statement ok +-FROM GRAPH_TABLE(snb MATCH (a:Person)-[w:workAt_Organisation]->(u:University)) limit 10; diff --git a/test/sql/create_pg/create_pg_multiple_connections.test b/test/sql/create_pg/create_pg_multiple_connections.test new file mode 100644 index 00000000..fea6dd99 --- /dev/null +++ b/test/sql/create_pg/create_pg_multiple_connections.test @@ -0,0 +1,79 @@ +# name: test/sql/create_pg/create_pg_multiple_connections.test +# description: Testing the creation of property graphs across multiple connections +# group: [create_pg] + +require duckpgq + +statement ok con1 +CREATE TABLE Student(id BIGINT, name VARCHAR); + +statement ok con1 +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT); + +statement ok con1 +CREATE TABLE School(school_name VARCHAR, school_id BIGINT, school_kind BIGINT); + +statement ok con1 +INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'); + +statement ok con1 +INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (1,2, 14), (1,3, 15), (2,3, 16); + +# all properties +statement ok con1 +-CREATE PROPERTY GRAPH pg_all_properties +VERTEX TABLES ( + Student, + School LABEL School IN School_kind (Hogeschool, University) + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + LABEL Knows + ) + +statement ok con2 +select * from local_clustering_coefficient(pg_all_properties, student, knows); + +statement ok con2 +-from graph_table (pg_all_properties match (a:student)) + +statement ok con1 +-from graph_table (pg_all_properties match (a:student)) + +statement ok con3 +-from graph_table (pg_all_properties match (a:student)) + +statement ok con1 +-DROP PROPERTY GRAPH pg_all_properties + +statement error con3 +-from graph_table (pg_all_properties match (a:student)) +---- +Binder Error: Property graph pg_all_properties does not exist + +statement error con4 +-from graph_table (pg_all_properties match (a:student)) +---- +Binder Error: Property graph pg_all_properties does not exist + +statement error con2 +-from graph_table (pg_all_properties match (a:student)) +---- +Binder Error: Property graph pg_all_properties does not exist + +statement ok con1 +-CREATE PROPERTY GRAPH pg_all_properties +VERTEX TABLES ( + Student, + School LABEL School IN School_kind (Hogeschool, University) + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + LABEL Knows + ) + +# connection 2 already exists, but pg has been dropped and recreated +statement ok con2 +-from graph_table (pg_all_properties match (a:student)) \ No newline at end of file diff --git a/test/sql/create_pg/create_pg_on_view.test b/test/sql/create_pg/create_pg_on_view.test new file mode 100644 index 00000000..e72b4dbe --- /dev/null +++ b/test/sql/create_pg/create_pg_on_view.test @@ -0,0 +1,41 @@ +# name: test/sql/create_pg/create_pg_on_view.test +# group: [create_pg] + +require duckpgq + +statement ok +CREATE TABLE vdata(id TEXT PRIMARY KEY); + +statement ok +CREATE VIEW v AS SELECT * FROM vdata; + +statement ok +CREATE TABLE w(id TEXT PRIMARY KEY); + +statement ok +CREATE TABLE v_w(v_id TEXT, w_id TEXT); + +statement ok +create view v_w_view as select * from v_w; + +statement error +-CREATE PROPERTY GRAPH g1 + VERTEX TABLES (v, w) + EDGE TABLES ( + v_w SOURCE KEY (v_id) REFERENCES v (id) + DESTINATION KEY (w_id) REFERENCES w (id) + LABEL hasW + ); +---- +Invalid Error: Found a view with name v. Creating property graph tables over views is currently not supported. + +statement error +-CREATE PROPERTY GRAPH g1 + VERTEX TABLES (vdata, w) + EDGE TABLES ( + v_w_view SOURCE KEY (v_id) REFERENCES vdata (id) + DESTINATION KEY (w_id) REFERENCES w (id) + LABEL hasW + ); +---- +Invalid Error: Found a view with name v_w_view. Creating property graph tables over views is currently not supported. diff --git a/test/sql/create_pg/create_pg_with_pk_fk.test b/test/sql/create_pg/create_pg_with_pk_fk.test new file mode 100644 index 00000000..579dcf93 --- /dev/null +++ b/test/sql/create_pg/create_pg_with_pk_fk.test @@ -0,0 +1,297 @@ +# name: test/sql/create_pg/create_pg_with_pk_fk.test +# description: Testing create with predefined primary and foreign keys +# group: [create_pg] + +require duckpgq + +statement ok +create table v ( + id BIGINT primary key, + name varchar +); + +statement ok +INSERT INTO v VALUES (1, 'a'); + +statement ok +create table e ( + id bigint primary key, + src BIGINT REFERENCES v(id), + dst BIGINT REFERENCES v(id) +); + +statement error +-create property graph g +vertex tables (v) +edge tables (e source v destination v); +---- +Invalid Error: Multiple primary key - foreign key relationships detected between e and v. Please explicitly define the primary key and foreign key columns using `SOURCE KEY REFERENCES v ` + +statement ok +create table w ( + id BIGINT primary key, + name varchar +); + +statement ok +INSERT INTO w VALUES (2, 'b'); + +statement ok +create table e2 ( + id bigint primary key, + src BIGINT REFERENCES v(id), + dst BIGINT REFERENCES w(id) +); + +statement ok +INSERT INTO e2 VALUES (1, 1, 2); + +statement error +-create property graph g +vertex tables (v) +edge tables (e2 source v destination w); +---- +Invalid Error: Table 'w' not found in the property graph g. + +statement ok +-create property graph g +vertex tables (v, w) +edge tables (e2 source v destination w); + +query II +-FROM GRAPH_TABLE (g MATCH (v:v)-[e2:e2]->(w:w) COLUMNS (v.id, w.id)); +---- +1 2 + +statement ok +CREATE TABLE a ( + id BIGINT, + name VARCHAR +); + +statement error +CREATE TABLE b ( + name VARCHAR, + src BIGINT REFERENCES a(id), + dst BIGINT, +); +---- +Binder Error: Failed to create foreign key: there is no primary key or unique constraint for referenced table "a" + +statement ok +CREATE TABLE b ( + name VARCHAR, + src BIGINT, + dst BIGINT, +); + +statement error +-CREATE PROPERTY GRAPH g2 VERTEX TABLES (a) EDGE TABLES (b SOURCE a DESTINATION a); +---- +Invalid Error: No primary key - foreign key relationship found in b with SOURCE table a + +statement ok +CREATE TABLE a_pk ( + id BIGINT primary key, + name VARCHAR +); + +statement ok +CREATE TABLE b_pk ( + name VARCHAR, + src BIGINT REFERENCES a_pk(id), + dst BIGINT, +); + +statement ok +-CREATE PROPERTY GRAPH g2 VERTEX TABLES (a_pk) EDGE TABLES (b_pk SOURCE a_pk DESTINATION a_pk); + +statement ok +CREATE TABLE x ( + id BIGINT PRIMARY KEY, + name VARCHAR +); + +statement ok +CREATE TABLE y ( + id BIGINT PRIMARY KEY, + src BIGINT REFERENCES x(id), + dst BIGINT REFERENCES x(id) +); + +statement error +-CREATE PROPERTY GRAPH g3 +VERTEX TABLES (x) +EDGE TABLES (y SOURCE x DESTINATION x); +---- +Invalid Error: Multiple primary key - foreign key relationships detected between y and x. Please explicitly define the primary key and foreign key columns using `SOURCE KEY REFERENCES x ` + +statement error +-CREATE PROPERTY GRAPH g3 +VERTEX TABLES (x) +EDGE TABLES (y SOURCE KEY (src) REFERENCES x (id) DESTINATION x); +---- +Invalid Error: Multiple primary key - foreign key relationships detected between y and x. Please explicitly define the primary key and foreign key columns using `DESTINATION KEY REFERENCES x ` + +statement ok +-CREATE PROPERTY GRAPH g3_explicit +VERTEX TABLES (x) +EDGE TABLES (y SOURCE KEY (src) REFERENCES x (id) + DESTINATION KEY (dst) REFERENCES x (id)); + +query I +-FROM GRAPH_TABLE (g3_explicit MATCH (x:x)-[y:y]->(x:x) COLUMNS (x.id)); +---- + +statement ok +CREATE TABLE m ( + id BIGINT PRIMARY KEY, + name VARCHAR +); + +statement ok +CREATE TABLE n ( + id BIGINT PRIMARY KEY, + src BIGINT, + dst BIGINT +); + +statement error +-CREATE PROPERTY GRAPH g4 +VERTEX TABLES (m) +EDGE TABLES (n SOURCE m DESTINATION m); +---- +Invalid Error: The primary key for the SOURCE table m is not defined in the edge table n + +statement error +-CREATE PROPERTY GRAPH g4 +VERTEX TABLES (m) +EDGE TABLES (n SOURCE KEY (src) REFERENCES m DESTINATION m); +---- +Parser Error: syntax error at or near "DESTINATION" + +statement ok +CREATE TABLE p ( + id BIGINT PRIMARY KEY, + name VARCHAR +); + +statement ok +CREATE TABLE q ( + id BIGINT PRIMARY KEY, + src BIGINT, + dst BIGINT REFERENCES p(id) +); + +statement error +CREATE PROPERTY GRAPH g5 +VERTEX TABLES (p) +EDGE TABLES (q DESTINATION p); +---- +Parser Error: syntax error at or near "DESTINATION" + +statement ok +CREATE TABLE u ( + id BIGINT PRIMARY KEY, + name VARCHAR +); + +statement ok +CREATE TABLE v2 ( + id BIGINT PRIMARY KEY, + src BIGINT REFERENCES u(id), + dst BIGINT +); + +statement ok +CREATE TABLE w2 ( + id BIGINT PRIMARY KEY, + src BIGINT, + dst BIGINT REFERENCES u(id) +); + +statement error +CREATE PROPERTY GRAPH g6 +VERTEX TABLES (u) +EDGE TABLES (v2 SOURCE u, w2 DESTINATION u); +---- +Parser Error: syntax error at or near "," + +statement ok +CREATE TABLE vertex_b ( + id BIGINT PRIMARY KEY, + name VARCHAR +); + +statement ok +-CREATE PROPERTY GRAPH g11 +VERTEX TABLES (vertex_b); + +statement ok +CREATE TABLE node_a ( + id BIGINT PRIMARY KEY, + name VARCHAR +); + +statement ok +CREATE TABLE edge_a ( + src BIGINT REFERENCES node_a(id), + dst BIGINT REFERENCES node_a(id) +); + +statement error +-CREATE PROPERTY GRAPH g10 +VERTEX TABLES (node_a) +EDGE TABLES (edge_a); +---- +Parser Error: syntax error at or near ")" + +statement error +-CREATE PROPERTY GRAPH g10 +VERTEX TABLES (node_a) +EDGE TABLES (edge_a SOURCE node_a DESTINATION KEY (dst) REFERENCES node_a (id)); +---- +Invalid Error: Multiple primary key - foreign key relationships detected between edge_a and node_a. Please explicitly define the primary key and foreign key columns using `SOURCE KEY REFERENCES node_a ` + +statement ok +-CREATE PROPERTY GRAPH g10 +VERTEX TABLES (node_a) +EDGE TABLES (edge_a SOURCE KEY (src) REFERENCES node_a (id) DESTINATION KEY (dst) REFERENCES node_a (id)); + +statement ok +CREATE OR REPLACE TABLE a ( + id BIGINT PRIMARY KEY, + name VARCHAR +); + +statement ok +CREATE OR REPLACE TABLE b ( + id BIGINT PRIMARY KEY, + description VARCHAR +); + +statement ok +CREATE TABLE edge_ab ( + id BIGINT PRIMARY KEY, + src BIGINT REFERENCES a(id), + dst BIGINT REFERENCES b(id) +); + +statement ok +-CREATE PROPERTY GRAPH g_relationship +VERTEX TABLES (a, b) +EDGE TABLES (edge_ab SOURCE a DESTINATION b); + +statement ok +INSERT INTO a VALUES (1, 'Vertex A'); + +statement ok +INSERT INTO b VALUES (2, 'Vertex B'); + +statement ok +INSERT INTO edge_ab VALUES (1, 1, 2); + +query II +-FROM GRAPH_TABLE (g_relationship MATCH (a:a)-[edge_ab:edge_ab]->(b:b) COLUMNS (a.id, b.id)); +---- +1 2 \ No newline at end of file diff --git a/test/sql/create_pg/create_property_graph.test b/test/sql/create_pg/create_property_graph.test new file mode 100644 index 00000000..53339685 --- /dev/null +++ b/test/sql/create_pg/create_property_graph.test @@ -0,0 +1,215 @@ +# name: test/sql/create_pg/create_property_graph.test +# description: Testing the creation of property graphs +# group: [create_pg] + +require duckpgq + +#statement ok +#pragma enable_verification + +statement error +-CREATE PROPERTY GRAPH pg4 +VERTEX TABLES (tabledoesnotexist); +---- +Invalid Error: Table with name tabledoesnotexist does not exist + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR); + +statement error +-CREATE PROPERTY GRAPH pg4 +VERTEX TABLES (Student) +EDGE TABLES (edgetabledoesnotexist SOURCE KEY (id) REFERENCES Student (id) + DESTINATION KEY (id) REFERENCES Student (id) + ); +---- +Invalid Error: Table with name edgetabledoesnotexist does not exist + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT); + +statement ok +CREATE TABLE School(school_name VARCHAR, school_id BIGINT, school_kind BIGINT); + +statement ok +INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'); + +statement ok +INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (1,2, 14), (1,3, 15), (2,3, 16); + +# Vertex table with alias +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student PROPERTIES ( id, name ) LABEL Person, + School as school_alias PROPERTIES ( school_id, school_name ) LABEL School IN School_kind (Hogeschool, University) + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + PROPERTIES ( createDate ) LABEL Knows + ) + +query I +-SELECT count(id) +FROM + GRAPH_TABLE (PG + MATCH p = (s1:Person)-[k:Knows]->(s2:Person WHERE s2.name='Daniel') + COLUMNS (s1.id)); +---- +0 + +query I +-SELECT count(id) +FROM + GRAPH_TABLE (PG + MATCH p = (s1:Person)-[k:Knows]->(s2:Person WHERE s2.name='Peter') + COLUMNS (s1.id)); +---- +3 + +# Error as property graph pg already exists +statement error +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student PROPERTIES ( id, name ) LABEL Person, + School PROPERTIES ( school_id, school_name) LABEL School IN School_kind (Hogeschool, University) + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + PROPERTIES ( createDate ) LABEL Knows + ) +---- +Invalid Error: Property graph table with name pg already exists + +# Alias for the vertex table +statement ok +-CREATE PROPERTY GRAPH pg1 +VERTEX TABLES ( + Student as Student_alias PROPERTIES ( id as id_alias, name ) LABEL Person + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + PROPERTIES ( createDate ) LABEL Knows + ) + +# Alias for the vertex table +statement ok +-CREATE PROPERTY GRAPH pg2 +VERTEX TABLES ( + Student as Student_alias PROPERTIES ( id as id_alias, name ) LABEL Person + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student_alias ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + PROPERTIES ( createDate ) LABEL Knows + ) + +# Missing the keyword IN between Person and Person_alias +statement error +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student PROPERTIES ( id as id, name ) LABEL Person Person_alias Person2_alias, + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + PROPERTIES ( createDate ) LABEL Knows + ) +---- +Parser Error: syntax error at or near "Person_alias" + +# Duplicate labels +statement error +-CREATE PROPERTY GRAPH pg2 +VERTEX TABLES ( + Student PROPERTIES ( id, name ) LABEL Person, + School as school_alias PROPERTIES ( school_id, school_name) LABEL School IN School_kind (University, Hogeschool, University) + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + PROPERTIES ( createDate ) LABEL Knows + ) +---- +Constraint Error: Label university is not unique, make sure all labels are unique + +# Should fail as ( * ) is not allowed in the PROPERTIES list, in this case, PROPERTIES ( * ) should be omitted +statement error +-CREATE PROPERTY GRAPH pg3 +VERTEX TABLES ( + Student PROPERTIES ( * ) LABEL Person, + School PROPERTIES ( * ) LABEL School IN School_kind (Hogeschool, University) + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + PROPERTIES ( createDate ) LABEL Knows + ) +---- +Parser Error: syntax error at or near "*" + + +# Should fail since the edge table references vertex tables that do not exist +statement error +-CREATE PROPERTY GRAPH pg3 +VERTEX TABLES ( + School LABEL School IN School_kind (Hogeschool, University) + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + PROPERTIES ( createDate ) LABEL Knows + ) +---- +Invalid Error: Table 'Student' not found in the property graph pg3. + +# Should fail since the edge table references vertex tables that do not exist +statement error +-CREATE PROPERTY GRAPH pg3 +VERTEX TABLES ( + School LABEL School IN School_kind (Hogeschool, University) + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES School ( school_id ) + DESTINATION KEY ( dst ) REFERENCES Student__ ( id ) + PROPERTIES ( createDate ) LABEL Knows + ); +---- +Invalid Error: Table 'Student__' not found in the property graph pg3. + + +# Check duplicate labels +statement error +-CREATE PROPERTY GRAPH pg4 +VERTEX TABLES ( + School LABEL School IN School_kind (Hogeschool, Hogeschool), + Student + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + PROPERTIES ( createDate ) LABEL Knows + ); +---- +Constraint Error: Label hogeschool is not unique, make sure all labels are unique + + +# Check duplicate labels with case insenstitivity +statement error +-CREATE PROPERTY GRAPH pg4 +VERTEX TABLES ( + School LABEL School IN School_kind (HOGESCHOOL, University), + Student LABEL Hogeschool + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + PROPERTIES ( createDate ) LABEL Knows + ); +---- +Constraint Error: Label hogeschool is not unique, make sure all labels are unique + + diff --git a/test/sql/create_pg/describe_pg.test b/test/sql/create_pg/describe_pg.test new file mode 100644 index 00000000..6dec42fc --- /dev/null +++ b/test/sql/create_pg/describe_pg.test @@ -0,0 +1,113 @@ +# name: test/sql/create_pg/describe_pg.test +# description: Testing the describe property graph syntax +# group: [create_pg] + +require duckpgq + +statement ok +import database 'duckdb/data/SNB0.003'; + +statement ok +-CREATE PROPERTY GRAPH snb +VERTEX TABLES ( + Person LABEL Person + ) +EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL Knows + ); + +query IIIIIIIIIIIIII +-DESCRIBE PROPERTY GRAPH snb; +---- +snb Person person 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL (empty) +snb Person_knows_person knows 0 Person [id] [Person1Id] Person [id] [Person2Id] NULL NULL NULL (empty) + +statement ok +-CREATE OR REPLACE PROPERTY GRAPH snb +VERTEX TABLES ( + Person LABEL Person, + Forum LABEL Forum, + Organisation LABEL Organisation IN typemask(company, university), + Place LABEL Place, + Tag LABEL Tag, + TagClass LABEL TagClass, + Country LABEL Country, + City LABEL City, + Message LABEL Message + ) +EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL Knows, + Forum_hasMember_Person SOURCE KEY (ForumId) REFERENCES Forum (id) + DESTINATION KEY (PersonId) REFERENCES Person (id) + LABEL hasMember, + Forum_hasTag_Tag SOURCE KEY (ForumId) REFERENCES Forum (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL Forum_hasTag, + Person_hasInterest_Tag SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL hasInterest, + person_workAt_Organisation SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (OrganisationId) REFERENCES Organisation (id) + LABEL workAt_Organisation, + Person_likes_Message SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (id) REFERENCES Message (id) + LABEL likes_Message, + Message_hasTag_Tag SOURCE KEY (id) REFERENCES Message (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL message_hasTag, + Message_hasAuthor_Person SOURCE KEY (messageId) REFERENCES Message (id) + DESTINATION KEY (PersonId) REFERENCES Person (id) + LABEL hasAuthor, + Message_replyOf_Message SOURCE KEY (messageId) REFERENCES Message (id) + DESTINATION KEY (ParentMessageId) REFERENCES Message (id) + LABEL replyOf + ); + +query IIIIIIIIIIIIII +-DESCRIBE PROPERTY GRAPH snb; +---- +snb Message message 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL (empty) +snb City city 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL (empty) +snb Country country 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL (empty) +snb TagClass tagclass 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL (empty) +snb Tag tag 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL (empty) +snb Place place 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL (empty) +snb Organisation organisation 1 NULL NULL NULL NULL NULL NULL typemask [company, university] NULL (empty) +snb Forum forum 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL (empty) +snb Person person 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL (empty) +snb Message_replyOf_Message replyof 0 Message [id] [messageId] Message [id] [ParentMessageId] NULL NULL NULL (empty) +snb Message_hasAuthor_Person hasauthor 0 Message [id] [messageId] Person [id] [PersonId] NULL NULL NULL (empty) +snb Message_hasTag_Tag message_hastag 0 Message [id] [id] Tag [id] [TagId] NULL NULL NULL (empty) +snb Person_likes_Message likes_message 0 Person [id] [PersonId] Message [id] [id] NULL NULL NULL (empty) +snb person_workAt_Organisation workat_organisation 0 Person [id] [PersonId] Organisation [id] [OrganisationId] NULL NULL NULL (empty) +snb Person_hasInterest_Tag hasinterest 0 Person [id] [PersonId] Tag [id] [TagId] NULL NULL NULL (empty) +snb Forum_hasTag_Tag forum_hastag 0 Forum [id] [ForumId] Tag [id] [TagId] NULL NULL NULL (empty) +snb Forum_hasMember_Person hasmember 0 Forum [id] [ForumId] Person [id] [PersonId] NULL NULL NULL (empty) +snb Person_knows_person knows 0 Person [id] [Person1Id] Person [id] [Person2Id] NULL NULL NULL (empty) + + +statement error +-DESCRIBE PROPERTY GRAPH pgdoesnotexist; +---- +Invalid Error: Property graph pgdoesnotexist does not exist. + + +statement ok con1 +attach 'duckdb/data/bluesky/bluesky.duckdb'; + +statement ok +-CREATE OR REPLACE PROPERTY GRAPH bluesky + VERTEX TABLES (bluesky.account LABEL account) + EDGE TABLES (bluesky.follows SOURCE KEY (source) REFERENCES bluesky.account (did) + DESTINATION KEY (destination) REFERENCES bluesky.account (did) + LABEL follows); + +query IIIIIIIIIIIIII +-DESCRIBE PROPERTY GRAPH bluesky; +---- +bluesky account account 1 NULL NULL NULL NULL NULL NULL NULL NULL bluesky (empty) +bluesky follows follows 0 account [did] [source] account [did] [destination] NULL NULL bluesky (empty) \ No newline at end of file diff --git a/test/sql/create_pg/drop_property_graph.test b/test/sql/create_pg/drop_property_graph.test new file mode 100644 index 00000000..da652eed --- /dev/null +++ b/test/sql/create_pg/drop_property_graph.test @@ -0,0 +1,109 @@ +# name: test/sql/create_pg/drop_property_graph.test +# description: Testing the drop property graph syntax +# group: [create_pg] + +require duckpgq + +#statement ok +#pragma enable_verification + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT); + +statement ok +CREATE TABLE School(name VARCHAR, Id BIGINT, Kind VARCHAR); + +statement ok +CREATE TABLE StudyAt(personId BIGINT, schoolId BIGINT); + +statement ok +INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17); + +statement ok +INSERT INTO School VALUES ('VU', 0, 'University'), ('UVA', 1, 'University'); + +statement ok +INSERT INTO StudyAt VALUES (0, 0), (1, 0), (2, 1), (3, 1), (4, 1); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student PROPERTIES ( id, name ) LABEL Person, + School LABEL SCHOOL + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + LABEL Knows, + studyAt SOURCE KEY ( personId ) REFERENCES Student ( id ) + DESTINATION KEY ( SchoolId ) REFERENCES School ( id ) + LABEL StudyAt + ); + +statement ok +-DROP PROPERTY GRAPH pg; + +statement error +-DROP PROPERTY GRAPH pg; +---- +Binder Error: Property graph pg does not exist + +statement error +-DROP PROPERTY GRAPH pgdoesntexist; +---- +Binder Error: Property graph pgdoesntexist does not exist + +statement error +-SELECT study.id +FROM GRAPH_TABLE (pg + MATCH + (a:Person) + COLUMNS (a.id) + ) study; +---- +Binder Error: Property graph pg does not exist + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student PROPERTIES ( id, name ) LABEL Person, + School LABEL SCHOOL + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + LABEL Knows, + studyAt SOURCE KEY ( personId ) REFERENCES Student ( id ) + DESTINATION KEY ( SchoolId ) REFERENCES School ( id ) + LABEL StudyAt + ); + + +statement ok +-SELECT * +FROM GRAPH_TABLE (pg + MATCH + (a:Person) + COLUMNS (a.id) + ) study; + + +# should drop the property graph +statement ok +-DROP PROPERTY GRAPH if exists pg; + +# should not give an error as the property graph is already dropped +statement ok +-DROP PROPERTY GRAPH if exists pg; + +# should not give an error as the property graph is already dropped +statement error +-DROP PROPERTY GRAPH pg; +---- +Binder Error: Property graph pg does not exist diff --git a/test/sql/create_pg/except_properties.test b/test/sql/create_pg/except_properties.test new file mode 100644 index 00000000..cab89821 --- /dev/null +++ b/test/sql/create_pg/except_properties.test @@ -0,0 +1,49 @@ +# name: test/sql/create_pg/except_properties.test +# description: Testing the except properties +# group: [create_pg] + +#statement ok +#pragma enable_verification + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT); + +statement ok +CREATE TABLE School(school_name VARCHAR, school_id BIGINT, school_kind BIGINT); + +statement ok +INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'); + +statement ok +INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (1,2, 14), (1,3, 15), (2,3, 16); + +# ARE ALL COLUMNS EXCEPT +statement ok +-CREATE PROPERTY GRAPH pg_are_all_except_properties +VERTEX TABLES ( + Student LABEL Person, + School PROPERTIES ARE ALL COLUMNS EXCEPT (school_id) LABEL School IN School_kind (Hogeschool, University) + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + LABEL Knows + ) + +# ALL COLUMNS EXCEPT +statement ok +-CREATE PROPERTY GRAPH pg_all_except_properties +VERTEX TABLES ( + Student LABEL Person, + School PROPERTIES ALL COLUMNS EXCEPT (school_id) LABEL School IN School_kind (Hogeschool, University) + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + LABEL Knows + ) diff --git a/test/sql/create_pg/no_properties.test b/test/sql/create_pg/no_properties.test new file mode 100644 index 00000000..1d31bce3 --- /dev/null +++ b/test/sql/create_pg/no_properties.test @@ -0,0 +1,52 @@ +# name: test/sql/create_pg/no_properties.test +# description: Testing property graphs with no properties +# group: [create_pg] + +#statement ok +#pragma enable_verification + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT); + +statement ok +CREATE TABLE School(school_name VARCHAR, school_id BIGINT, school_kind BIGINT); + +statement ok +INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'); + +statement ok +INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (1,2, 14), (1,3, 15), (2,3, 16); + + +# No properties keyword +statement ok +-CREATE PROPERTY GRAPH pg_no_properties +VERTEX TABLES ( + Student LABEL Person, + School NO PROPERTIES LABEL School IN School_kind (Hogeschool, University) + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + LABEL Knows + ) + +statement ok +-CREATE PROPERTY GRAPH g VERTEX TABLES (Student PROPERTIES (id)); + +# Query on unspecified columns in the graph throws error. +statement error +-SELECT * FROM GRAPH_TABLE (g MATCH (s:Student) COLUMNS (s.id, s.name)); +---- +Binder Error: Property s.name is never registered! + +# Columns to query is only allowed to be or
., which we cannot prefix catalog or schema. +statement error +-SELECT * FROM GRAPH_TABLE (g MATCH (s:Student) COLUMNS (main.s.id)); +---- +Binder Error: Property main.s.id is never registered! diff --git a/test/sql/create_pg/optional_edge_table_clause.test b/test/sql/create_pg/optional_edge_table_clause.test new file mode 100644 index 00000000..74bdecc3 --- /dev/null +++ b/test/sql/create_pg/optional_edge_table_clause.test @@ -0,0 +1,27 @@ +# name: test/sql/create_pg/optional_edge_table_clause.test +# description: Testing the optional edge table property graphs +# group: [create_pg] + +require duckpgq + +statement ok +import database 'duckdb/data/SNB0.003'; + +statement ok +-CREATE PROPERTY GRAPH snb +VERTEX TABLES (Message, person); + + +statement ok +-FROM GRAPH_TABLE (snb + MATCH (m:Message) + COLUMNS (*) + ) tmp + +statement error +-FROM GRAPH_TABLE (snb + MATCH (p:Person)-[k:Knows]->(p2:Person) + COLUMNS (*) + ) tmp +---- +Binder Error: The label knows is not registered in property graph snb diff --git a/test/sql/csr_segfault.test b/test/sql/csr_segfault.test new file mode 100644 index 00000000..a74022dd --- /dev/null +++ b/test/sql/csr_segfault.test @@ -0,0 +1,72 @@ +# name: test/sql/csr_segfault.test +# group: [sql] + +require duckpgq + +statement ok +create or replace table student(id bigint); insert into student from range(0, 5000); + +statement ok +CREATE or replace TABLE know(src BIGINT, dst BIGINT); insert into know select s.id as src, s2.id as dst from student s positional join student s2; + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + ); + +statement ok +SELECT CREATE_CSR_EDGE( + 0, + (SELECT count(a.id) FROM Student a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 0, + (SELECT count(a.id) FROM Student a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM Student a + LEFT JOIN Know k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + (select count() FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), + a.rowid, + c.rowid, + k.rowid) as temp + FROM Know k + JOIN student a on a.id = k.src + JOIN student c on c.id = k.dst; + +query I +SELECT count(csrv) FROM get_csr_v(0); +---- +5002 + +query I +SELECT count(csre) FROM get_csr_e(0); +---- +5000 + +statement ok +COPY (SELECT csrv FROM get_csr_v(0)) TO '__TEST_DIR__/v.csv'; + +statement ok +COPY (SELECT csre FROM get_csr_e(0)) TO '__TEST_DIR__/e.csv'; + +query I +SELECT count(*) FROM read_csv('__TEST_DIR__/v.csv'); +---- +5002 + +query I +SELECT count(*) FROM read_csv('__TEST_DIR__/e.csv'); +---- +5000 \ No newline at end of file diff --git a/test/sql/duckdb_columns.test b/test/sql/duckdb_columns.test new file mode 100644 index 00000000..650caeaa --- /dev/null +++ b/test/sql/duckdb_columns.test @@ -0,0 +1,17 @@ +# name: test/sql/duckdb_columns.test +# description: Testing that normal duckdb queries do not interfere +# group: [sql] + +require duckpgq + +statement ok +from duckdb_columns; + +statement ok +from duckdb_constraints(); + +statement ok +select * from information_schema.columns; + +statement ok +select * from information_schema.tables; diff --git a/test/sql/explain_duckpgq.test b/test/sql/explain_duckpgq.test new file mode 100644 index 00000000..72a86828 --- /dev/null +++ b/test/sql/explain_duckpgq.test @@ -0,0 +1,69 @@ +# name: test/sql/explain_duckpgq.test +# description: Testing the EXPLAIN statements +# group: [sql] + +require duckpgq + +statement ok +import database 'duckdb/data/SNB0.003'; + +statement ok +-CREATE PROPERTY GRAPH snb +VERTEX TABLES ( + Person LABEL Person, + Forum LABEL Forum, + Organisation LABEL Organisation IN typemask(company, university), + Place LABEL Place, + Tag LABEL Tag, + TagClass LABEL TagClass, + Country LABEL Country, + City LABEL City, + Message LABEL Message + ) +EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL Knows, + Forum_hasMember_Person SOURCE KEY (ForumId) REFERENCES Forum (id) + DESTINATION KEY (PersonId) REFERENCES Person (id) + LABEL hasMember, + Forum_hasTag_Tag SOURCE KEY (ForumId) REFERENCES Forum (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL Forum_hasTag, + Person_hasInterest_Tag SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL hasInterest, + person_workAt_Organisation SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (OrganisationId) REFERENCES Organisation (id) + LABEL workAt_Organisation, + Person_likes_Message SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (id) REFERENCES Message (id) + LABEL likes_Message, + Message_hasTag_Tag SOURCE KEY (id) REFERENCES Message (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL message_hasTag, + Message_hasAuthor_Person SOURCE KEY (messageId) REFERENCES Message (id) + DESTINATION KEY (PersonId) REFERENCES Person (id) + LABEL hasAuthor, + Message_replyOf_Message SOURCE KEY (messageId) REFERENCES Message (id) + DESTINATION KEY (ParentMessageId) REFERENCES Message (id) + LABEL replyOf + ); + +#IC 2 +statement ok +-EXPLAIN FROM GRAPH_TABLE (snb + MATCH (a:Person WHERE a.id = 17592186044461)-[k:knows]-(b:Person)<-[au:hasAuthor]-(m:message WHERE m.creationDate < '2010-10-16') + COLUMNS (a.id, a.firstName, a.lastName, m.id as messageId, coalesce(m.imageFile, m.content), m.creationDate) + ) tmp + ORDER BY creationDate DESC, Messageid ASC + LIMIT 20; + +#IC 2 +statement ok +-EXPLAIN ANALYZE FROM GRAPH_TABLE (snb + MATCH (a:Person WHERE a.id = 17592186044461)-[k:knows]-(b:Person)<-[au:hasAuthor]-(m:message WHERE m.creationDate < '2010-10-16') + COLUMNS (a.id, a.firstName, a.lastName, m.id as messageId, coalesce(m.imageFile, m.content), m.creationDate) + ) tmp + ORDER BY creationDate DESC, Messageid ASC + LIMIT 20; diff --git a/test/sql/get_csr_ptr.test b/test/sql/get_csr_ptr.test new file mode 100644 index 00000000..5d98f223 --- /dev/null +++ b/test/sql/get_csr_ptr.test @@ -0,0 +1,65 @@ +# name: test/sql/get_csr_ptr.test +# description: Test getting the CSR pointer +# group: [sql] + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, id BIGINT); + +statement ok +CREATE TABLE School(school_name VARCHAR, school_id BIGINT, school_kind BIGINT); + +statement ok +INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17), (2, 4, 18); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student PROPERTIES ( id, name ) LABEL Person + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + PROPERTIES ( id ) LABEL Knows + ); + +statement ok +SELECT CREATE_CSR_EDGE( + 0, + (SELECT count(a.id) FROM Student a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 0, + (SELECT count(a.id) FROM Student a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM Student a + LEFT JOIN Know k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + (select count() FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), + a.rowid, + c.rowid, + k.rowid) as temp + FROM Know k + JOIN student a on a.id = k.src + JOIN student c on c.id = k.dst; + +statement ok +SELECT * FROM get_csr_ptr(0); + +statement error +SELECT * FROM get_csr_ptr(10); +---- +Constraint Error: CSR not found with ID 10 diff --git a/test/sql/label_optional.test b/test/sql/label_optional.test new file mode 100644 index 00000000..b6e9cf1d --- /dev/null +++ b/test/sql/label_optional.test @@ -0,0 +1,109 @@ +# name: test/sql/label_optional.test +# description: Testing the optional label for property graph creation +# group: [sql] + +require duckpgq + +# Test with a different number of vertices and edges +statement ok +CREATE TABLE VariedStudent(id BIGINT, name VARCHAR);INSERT INTO VariedStudent VALUES (0, 'Alice'), (1, 'Bob'), (2, 'Charlie'), (3, 'Dave'), (4, 'Eve'), (5, 'Frank'); + +statement ok +CREATE TABLE VariedKnow(src BIGINT, dst BIGINT);INSERT INTO VariedKnow VALUES (0,1), (0,2), (0,3), (1,2), (2,3), (3,4), (4,5); + +query II +SELECT * FROM (WITH edges_cte AS (SELECT src_table.rowid AS src, dst_table.rowid AS dst, VariedKnow.rowid AS edges FROM VariedKnow INNER JOIN VariedStudent AS src_table ON ((VariedKnow.src = src_table.id)) INNER JOIN VariedStudent AS dst_table ON ((VariedKnow.dst = dst_table.id))), csr_cte AS (SELECT create_csr_edge(0, (SELECT count(VariedStudent.id) FROM VariedStudent AS VariedStudent), CAST((SELECT multiply(2, sum(create_csr_vertex(0, (SELECT count(VariedStudent.id) FROM VariedStudent AS VariedStudent), sub.dense_id, sub.cnt))) FROM (SELECT dense_id, count(outgoing_edges) AS cnt FROM ((SELECT VariedStudent.rowid AS dense_id, VariedKnow.src AS outgoing_edges, VariedKnow.dst AS incoming_edges FROM VariedKnow INNER JOIN VariedStudent ON ((VariedKnow.src = VariedStudent.id))) UNION BY NAME (SELECT VariedStudent.rowid AS dense_id, VariedKnow.dst AS outgoing_edges, VariedKnow.src AS incoming_edges FROM VariedKnow INNER JOIN VariedStudent ON ((VariedKnow.dst = VariedStudent.id)))) AS unique_edges GROUP BY dense_id) AS sub) AS BIGINT), (SELECT multiply(2, count()) FROM ((SELECT src, dst FROM edges_cte) UNION BY NAME (SELECT dst AS src, src AS dst FROM edges_cte))), src, dst, edge) AS "temp" FROM (SELECT src, dst, any_value(edges) AS edge FROM ((SELECT src, dst, edges FROM edges_cte) UNION ALL (SELECT dst, src, edges FROM edges_cte)) GROUP BY src, dst))SELECT VariedStudent.id, "add"(__x."temp", local_clustering_coefficient(0, VariedStudent.rowid)) AS local_clustering_coefficient FROM VariedStudent CROSS JOIN (SELECT multiply(0, count(csr_cte."temp")) AS "temp" FROM csr_cte) AS __x) AS lcc; +---- +0 0.6666667 +1 1.0 +2 0.6666667 +3 0.33333334 +4 0.0 +5 0.0 + +statement ok +-CREATE PROPERTY GRAPH varied_pg_label_a +VERTEX TABLES ( + VariedStudent label a + ) +EDGE TABLES ( + VariedKnow SOURCE KEY ( src ) REFERENCES VariedStudent ( id ) + DESTINATION KEY ( dst ) REFERENCES VariedStudent ( id ) + ); + + +query II +select id, local_clustering_coefficient from local_clustering_coefficient(varied_pg_label_a, a, variedknow); +---- +0 0.6666667 +1 1.0 +2 0.6666667 +3 0.33333334 +4 0.0 +5 0.0 + +statement ok +select * from pagerank(varied_pg_label_a, a, variedknow); + +statement error +select id, local_clustering_coefficient from local_clustering_coefficient(varied_pg_label_a, variedStudent, variedknow); +---- +Invalid Error: Label 'variedstudent' not found. Did you mean the vertex label 'a'? + + +statement ok +import database 'duckdb/data/SNB0.003'; + +statement ok +-CREATE PROPERTY GRAPH snb +VERTEX TABLES ( + Person, + Organisation IN typemask(company, university) + ) +EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL Knows, + person_workAt_Organisation SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (OrganisationId) REFERENCES Organisation (id) + LABEL workAt_Organisation + ); + +query III +-FROM GRAPH_TABLE (snb + MATCH (p:Person)-[w:workAt_Organisation]->(u:University) + COLUMNS (p.id as p_id, u.id as u_id, u.type) + ) tmp + ORDER BY p_id, u_id + limit 10; +---- +14 4593 University +16 5809 University +32 5047 University +2199023255557 1953 University +2199023255573 5263 University +2199023255594 1597 University +4398046511139 4929 University +6597069766702 5038 University +8796093022234 3008 University +8796093022244 3008 University + + +query II +-FROM GRAPH_TABLE (snb + MATCH (p:Person)-[k:knows]->(p2:Person) + COLUMNS (p.id as p_id, p2.id as p2_id) + ) tmp + order by p_id, p2_id + limit 10; +---- +14 10995116277782 +14 24189255811081 +14 26388279066668 +16 2199023255594 +16 26388279066655 +16 28587302322180 +16 28587302322204 +32 2199023255594 +32 13194139533352 +32 17592186044461 diff --git a/test/sql/multiple_graph_table.test b/test/sql/multiple_graph_table.test new file mode 100644 index 00000000..08e0c179 --- /dev/null +++ b/test/sql/multiple_graph_table.test @@ -0,0 +1,177 @@ +# name: test/sql/multiple_graph_table.test +# description: Testing multiple graph tables in a single query +# group: [sql] + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR);INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT);INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17); + +statement ok +CREATE TABLE School(name VARCHAR, Id BIGINT, Kind VARCHAR);INSERT INTO School VALUES ('VU', 0, 'University'), ('UVA', 1, 'University'); + +statement ok +CREATE TABLE StudyAt(personId BIGINT, schoolId BIGINT);INSERT INTO StudyAt VALUES (0, 0), (1, 0), (2, 1), (3, 1), (4, 1); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student, + School + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ), + studyAt SOURCE KEY ( personId ) REFERENCES Student ( id ) + DESTINATION KEY ( SchoolId ) REFERENCES School ( id ) + ); + +query II +-select a.id, b.id FROM GRAPH_TABLE(pg MATCH (a:student)) a, GRAPH_TABLE(pg MATCH (b:student)) b; +---- +0 0 +1 0 +2 0 +3 0 +4 0 +0 1 +1 1 +2 1 +3 1 +4 1 +0 2 +1 2 +2 2 +3 2 +4 2 +0 3 +1 3 +2 3 +3 3 +4 3 +0 4 +1 4 +2 4 +3 4 +4 4 + +query II +-select unnamed_subquery.id, unnamed_subquery2.id FROM GRAPH_TABLE(pg MATCH (a:student)), GRAPH_TABLE(pg MATCH (b:student)); +---- +0 0 +1 0 +2 0 +3 0 +4 0 +0 1 +1 1 +2 1 +3 1 +4 1 +0 2 +1 2 +2 2 +3 2 +4 2 +0 3 +1 3 +2 3 +3 3 +4 3 +0 4 +1 4 +2 4 +3 4 +4 4 + + +query IIIIII +-select a.id, a.name, unnamed_subquery.b_id, unnamed_subquery.b_name, unnamed_subquery.C_id, unnamed_subquery.c_name +FROM GRAPH_TABLE(pg MATCH (a:student)) a, + GRAPH_TABLE(pg + MATCH (b:student)-[r:know]->(c:student) + COLUMNS (b.id as b_id, b.name as b_name, c.name as c_name, c.id as c_id) + ); +---- +0 Daniel 0 Daniel 1 Tavneet +0 Daniel 0 Daniel 2 Gabor +0 Daniel 0 Daniel 3 Peter +0 Daniel 3 Peter 0 Daniel +0 Daniel 1 Tavneet 2 Gabor +0 Daniel 1 Tavneet 3 Peter +0 Daniel 2 Gabor 3 Peter +0 Daniel 4 David 3 Peter +1 Tavneet 0 Daniel 1 Tavneet +1 Tavneet 0 Daniel 2 Gabor +1 Tavneet 0 Daniel 3 Peter +1 Tavneet 3 Peter 0 Daniel +1 Tavneet 1 Tavneet 2 Gabor +1 Tavneet 1 Tavneet 3 Peter +1 Tavneet 2 Gabor 3 Peter +1 Tavneet 4 David 3 Peter +2 Gabor 0 Daniel 1 Tavneet +2 Gabor 0 Daniel 2 Gabor +2 Gabor 0 Daniel 3 Peter +2 Gabor 3 Peter 0 Daniel +2 Gabor 1 Tavneet 2 Gabor +2 Gabor 1 Tavneet 3 Peter +2 Gabor 2 Gabor 3 Peter +2 Gabor 4 David 3 Peter +3 Peter 0 Daniel 1 Tavneet +3 Peter 0 Daniel 2 Gabor +3 Peter 0 Daniel 3 Peter +3 Peter 3 Peter 0 Daniel +3 Peter 1 Tavneet 2 Gabor +3 Peter 1 Tavneet 3 Peter +3 Peter 2 Gabor 3 Peter +3 Peter 4 David 3 Peter +4 David 0 Daniel 1 Tavneet +4 David 0 Daniel 2 Gabor +4 David 0 Daniel 3 Peter +4 David 3 Peter 0 Daniel +4 David 1 Tavneet 2 Gabor +4 David 1 Tavneet 3 Peter +4 David 2 Gabor 3 Peter +4 David 4 David 3 Peter + +query II +-select unnamed_subquery.id, unnamed_subquery2.id +FROM GRAPH_TABLE(pg MATCH (a:student)), (select 1 as id); +---- +0 1 +1 1 +2 1 +3 1 +4 1 + +statement ok +CREATE TABLE cities ( +name VARCHAR, +lat DECIMAL, +lon DECIMAL +); + +statement ok +CREATE TABLE cities_are_adjacent ( +city1name VARCHAR, +city2name VARCHAR +); + + +statement ok +-CREATE PROPERTY GRAPH citymap +VERTEX TABLES ( +cities PROPERTIES (name,lat,lon) LABEL city +) +EDGE TABLES ( +cities_are_adjacent SOURCE KEY ( city1name ) REFERENCES cities ( name ) +DESTINATION KEY ( city2name ) REFERENCES cities ( name ) +LABEL adjacent +); + +statement ok +-select * from GRAPH_TABLE (citymap MATCH (s:city)-[r:adjacent]->(t:city)) g1, GRAPH_TABLE (citymap MATCH (s:city)-[r:adjacent]->(t:city)) g2; + diff --git a/test/sql/nested_subquery.test b/test/sql/nested_subquery.test new file mode 100644 index 00000000..4a2a8873 --- /dev/null +++ b/test/sql/nested_subquery.test @@ -0,0 +1,97 @@ +# name: test/sql/nested_subquery.test +# group: [sql] + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR);INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT);INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17); + +statement ok +CREATE TABLE School(name VARCHAR, Id BIGINT, Kind VARCHAR);INSERT INTO School VALUES ('VU', 0, 'University'), ('UVA', 1, 'University'); + +statement ok +CREATE TABLE StudyAt(personId BIGINT, schoolId BIGINT);INSERT INTO StudyAt VALUES (0, 0), (1, 0), (2, 1), (3, 1), (4, 1); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student PROPERTIES ( id, name ) LABEL Person, + School LABEL SCHOOL + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ), + studyAt SOURCE KEY ( personId ) REFERENCES Student ( id ) + DESTINATION KEY ( SchoolId ) REFERENCES School ( id ) + ); + + +statement ok +-select * from (select id, id_1 from graph_table ( pg match (p:person)-[k:know]->(p2:person) columns (p.id, p2.id))); + +statement ok +-SELECT id, friend_id +FROM ( + SELECT id, friend_id + FROM GRAPH_TABLE ( + pg MATCH (p:Person)-[k:know]->(p2:Person) COLUMNS (p.id as id, p2.id as friend_id) + ) graph +); + +statement ok +-SELECT id, friend_id +FROM ( + SELECT id, friend_id + FROM GRAPH_TABLE ( + pg MATCH (p:Person)-[k:know]->(p2:Person) COLUMNS (p.id as id, p2.id as friend_id) + ) graph + WHERE id > 1 +); + +statement ok +-SELECT Student.name, friend_id +FROM Student +JOIN ( + SELECT student_id, friend_id + FROM GRAPH_TABLE ( + pg MATCH (p:Person)-[k:know]->(p2:Person) COLUMNS (p.id as student_id, p2.id as friend_id) + ) graph +) AS subquery +ON Student.id = subquery.student_id; + +statement ok +-SELECT id, nested_friend_id +FROM ( + SELECT id, friend_id AS nested_friend_id + FROM ( + SELECT id, friend_id + FROM GRAPH_TABLE ( + pg MATCH (p:Person)-[k:know]->(p2:Person) COLUMNS (p.id as id, p2.id as friend_id) + ) + ) +); + +statement ok +-SELECT id, friend_count +FROM ( + SELECT id, COUNT(friend_id) AS friend_count + FROM GRAPH_TABLE ( + pg MATCH (p:Person)-[k:know]->(p2:Person) COLUMNS (p.id as id, p2.id as friend_id) + ) + GROUP BY id +); + +statement ok +-WITH Friendships AS ( + SELECT person_id, friend_id + FROM ( + SELECT person_id, friend_id + FROM GRAPH_TABLE ( + pg MATCH (p:Person)-[k:know]->(p2:Person) COLUMNS (p.id as person_id, p2.id as friend_id) + ) + ) AS Subquery +) +SELECT * FROM Friendships; \ No newline at end of file diff --git a/test/sql/non_existing_table.test b/test/sql/non_existing_table.test new file mode 100644 index 00000000..668b5921 --- /dev/null +++ b/test/sql/non_existing_table.test @@ -0,0 +1,55 @@ +# name: test/sql/non_existing_table.test +# description: Testing creating a property graph on non-existing tables +# group: [sql] + +# https://github.com/cwida/duckpgq-extension/issues/95 + +require duckpgq + +# https://github.com/cwida/duckpgq-extension/issues/96 +statement error +select * from table_that_does_not_exist; +---- +Catalog Error: Table with name table_that_does_not_exist does not exist! + +statement ok +CREATE TABLE test (a INTEGER); + +statement error +SELECT b from test; +---- +Binder Error: Referenced column "b" not found in FROM clause! + +statement ok +import database 'duckdb/data/SNB0.003'; + +statement ok +-CREATE PROPERTY GRAPH snb +VERTEX TABLES ( + Person, + Organisation IN typemask(company, university) + ) +EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL Knows, + person_workAt_Organisation SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (OrganisationId) REFERENCES Organisation (id) + LABEL workAt_Organisation + ); + +statement error +-FROM GRAPH_TABLE (snb + MATCH (a:Kind) + COLUMNS (*) + ); +---- +Binder Error: The label kind is not registered in property graph snb + +statement error +-FROM GRAPH_TABLE (abc + MATCH (a:Kind) + COLUMNS (*) + ); +---- +Binder Error: Property graph abc does not exist diff --git a/test/sql/optional_columns.test b/test/sql/optional_columns.test new file mode 100644 index 00000000..eb684f4c --- /dev/null +++ b/test/sql/optional_columns.test @@ -0,0 +1,89 @@ +# name: test/sql/optional_columns.test +# description: Testing the optional columns syntax improvement +# group: [sql] + +require duckpgq + +statement ok +import database 'duckdb/data/SNB0.003' + +statement ok +-CREATE PROPERTY GRAPH snb +VERTEX TABLES ( + Person LABEL Person, + Organisation LABEL Organisation IN typemask(company, university) + ) +EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL Knows, + person_workAt_Organisation SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (OrganisationId) REFERENCES Organisation (id) + LABEL workAt_Organisation + ); + +query IIIIIIIIIII +-FROM GRAPH_TABLE (snb MATCH (p:Person)) limit 1; +---- +1166 1984-03-11 Firefox 2010-01-03 23:10:31.499+00 Hossein14@hotmail.com Hossein male 14 Forouhar 77.245.239.11 fa;ku;en + +query I +-FROM GRAPH_TABLE (snb MATCH (p:Person) COLUMNS (p.id)) limit 10; +---- +14 +16 +32 +2199023255557 +2199023255573 +2199023255594 +4398046511139 +6597069766702 +8796093022234 +8796093022237 + +query I +-SELECT p_id FROM GRAPH_TABLE (snb MATCH (p:Person) COLUMNS (p.id as p_id,)) limit 10; +---- +14 +16 +32 +2199023255557 +2199023255573 +2199023255594 +4398046511139 +6597069766702 +8796093022234 +8796093022237 + +query I +-FROM GRAPH_TABLE (snb MATCH (p:Person) COLUMNS (p.id as p_id)) limit 10; +---- +14 +16 +32 +2199023255557 +2199023255573 +2199023255594 +4398046511139 +6597069766702 +8796093022234 +8796093022237 + +query II +-FROM GRAPH_TABLE (snb MATCH (p:Person) COLUMNS (p.id, p.firstname as first_name)) limit 10; +---- +14 Hossein +16 Jan +32 Miguel +2199023255557 Eric +2199023255573 Arbaaz +2199023255594 Ali +4398046511139 Ayesha +6597069766702 Alejandro +8796093022234 Rahul +8796093022237 Lei + +query I +-SELECT count(*) FROM GRAPH_TABLE (snb MATCH (p:Person)) GROUP BY ALL limit 10; +---- +50 diff --git a/test/sql/path_finding/bounded_shortest_path.test b/test/sql/path_finding/bounded_shortest_path.test new file mode 100644 index 00000000..2c3cf13d --- /dev/null +++ b/test/sql/path_finding/bounded_shortest_path.test @@ -0,0 +1,34 @@ +# name: test/sql/path_finding/bounded_shortest_path.test +# description: Ensure bounded shortest path returns paths within bounds even if shorter paths exist +# group: [path_finding] + +require duckpgq + +statement ok +CREATE TABLE Point(id BIGINT); INSERT INTO Point VALUES (0), (1), (2), (3); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT); INSERT INTO know VALUES (0, 1), (0, 2), (2, 3), (3, 1); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Point PROPERTIES ( id ) LABEL Pnt + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Point ( id ) + DESTINATION KEY ( dst ) REFERENCES Point ( id ) + LABEL Knows + ); + +# Shortest path 0->1 has length 1, but query bounds are {2,3} +# Path 0->2->3->1 has length 3, which is within bounds +# Should return the path 0->2->3->1, not empty! +query I +-FROM GRAPH_TABLE (pg + MATCH + p = ANY SHORTEST (a:Pnt WHERE a.id = 0)-[k:knows]->{2,3}(b:Pnt WHERE b.id = 1) + COLUMNS (element_id(p)) + ) x; +---- +[0, 0, 1] diff --git a/test/sql/path_finding/complex_matching.test b/test/sql/path_finding/complex_matching.test new file mode 100644 index 00000000..924a4794 --- /dev/null +++ b/test/sql/path_finding/complex_matching.test @@ -0,0 +1,374 @@ +# name: test/sql/path_finding/complex_matching.test +# description: Testing more complex pattern matching +# group: [path_finding] + +require duckpgq + +statement ok +import database 'duckdb/data/SNB0.003'; + +statement ok +-CREATE PROPERTY GRAPH snb +VERTEX TABLES ( + Person LABEL Person, + Forum LABEL Forum, + Organisation LABEL Organisation IN typemask(company, university), + Place LABEL Place, + Tag LABEL Tag, + TagClass LABEL TagClass, + Country LABEL Country, + City LABEL City, + Message LABEL Message + ) +EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL Knows, + Forum_hasMember_Person SOURCE KEY (ForumId) REFERENCES Forum (id) + DESTINATION KEY (PersonId) REFERENCES Person (id) + LABEL hasMember, + Forum_hasTag_Tag SOURCE KEY (ForumId) REFERENCES Forum (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL Forum_hasTag, + Person_hasInterest_Tag SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL hasInterest, + person_workAt_Organisation SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (OrganisationId) REFERENCES Organisation (id) + LABEL workAt_Organisation, + Person_likes_Message SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (id) REFERENCES Message (id) + LABEL likes_Message, + Message_hasTag_Tag SOURCE KEY (id) REFERENCES Message (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL message_hasTag, + Message_hasAuthor_Person SOURCE KEY (messageId) REFERENCES Message (id) + DESTINATION KEY (PersonId) REFERENCES Person (id) + LABEL hasAuthor, + Message_replyOf_Message SOURCE KEY (messageId) REFERENCES Message (id) + DESTINATION KEY (ParentMessageId) REFERENCES Message (id) + LABEL replyOf + ); + +query IIIIIII +-FROM GRAPH_TABLE (snb + MATCH o = ANY SHORTEST (p4:Person where p4.rowid = 0)-[w3:knows]->(p:Person)-[w:knows]->{1,3}(p2:Person)-[w2:knows]->(p3:Person) + COLUMNS (p3.id as p3_id, element_id(o), path_length(o), vertices(o), edges(o), p4.id as p4_id, p.id as p_id) + ) tmp +ORDER BY p3_id, p4_id, p_id; +---- +28587302322180 [0, 0, 13, 42, 29, 68, 33] 3 [0, 13, 29, 33] [0, 42, 68] 14 10995116277782 +28587302322180 [0, 1, 26, 63, 32, 75, 33] 3 [0, 26, 32, 33] [1, 63, 75] 14 24189255811081 +28587302322196 [0, 0, 13, 43, 31, 71, 35] 3 [0, 13, 31, 35] [0, 43, 71] 14 10995116277782 +28587302322196 [0, 1, 26, 62, 31, 71, 35] 3 [0, 26, 31, 35] [1, 62, 71] 14 24189255811081 +28587302322204 [0, 0, 13, 44, 33, 77, 36] 3 [0, 13, 33, 36] [0, 44, 77] 14 10995116277782 +28587302322204 [0, 0, 13, 42, 29, 69, 36] 3 [0, 13, 29, 36] [0, 42, 69] 14 10995116277782 +28587302322204 [0, 1, 26, 64, 33, 77, 36] 3 [0, 26, 33, 36] [1, 64, 77] 14 24189255811081 +28587302322204 [0, 1, 26, 63, 32, 76, 36] 3 [0, 26, 32, 36] [1, 63, 76] 14 24189255811081 +28587302322204 [0, 2, 32, 75, 33, 77, 36] 3 [0, 32, 33, 36] [2, 75, 77] 14 26388279066668 +28587302322223 [0, 0, 13, 44, 33, 78, 38] 3 [0, 13, 33, 38] [0, 44, 78] 14 10995116277782 +28587302322223 [0, 1, 26, 64, 33, 78, 38] 3 [0, 26, 33, 38] [1, 64, 78] 14 24189255811081 +28587302322223 [0, 2, 32, 75, 33, 78, 38] 3 [0, 32, 33, 38] [2, 75, 78] 14 26388279066668 +30786325577731 [0, 0, 13, 44, 33, 79, 39] 3 [0, 13, 33, 39] [0, 44, 79] 14 10995116277782 +30786325577731 [0, 1, 26, 64, 33, 79, 39] 3 [0, 26, 33, 39] [1, 64, 79] 14 24189255811081 +30786325577731 [0, 2, 32, 75, 33, 79, 39] 3 [0, 32, 33, 39] [2, 75, 79] 14 26388279066668 +30786325577740 [0, 0, 13, 43, 31, 72, 40] 3 [0, 13, 31, 40] [0, 43, 72] 14 10995116277782 +30786325577740 [0, 1, 26, 62, 31, 72, 40] 3 [0, 26, 31, 40] [1, 62, 72] 14 24189255811081 +32985348833329 [0, 0, 13, 44, 33, 80, 43] 3 [0, 13, 33, 43] [0, 44, 80] 14 10995116277782 +32985348833329 [0, 1, 26, 64, 33, 80, 43] 3 [0, 26, 33, 43] [1, 64, 80] 14 24189255811081 +32985348833329 [0, 2, 32, 75, 33, 80, 43] 3 [0, 32, 33, 43] [2, 75, 80] 14 26388279066668 +35184372088850 [0, 0, 13, 43, 31, 73, 45] 3 [0, 13, 31, 45] [0, 43, 73] 14 10995116277782 +35184372088850 [0, 0, 13, 45, 36, 82, 45] 3 [0, 13, 36, 45] [0, 45, 82] 14 10995116277782 +35184372088850 [0, 1, 26, 63, 32, 76, 36, 82, 45] 4 [0, 26, 32, 36, 45] [1, 63, 76, 82] 14 24189255811081 +35184372088850 [0, 1, 26, 62, 31, 73, 45] 3 [0, 26, 31, 45] [1, 62, 73] 14 24189255811081 +35184372088850 [0, 2, 32, 76, 36, 82, 45] 3 [0, 32, 36, 45] [2, 76, 82] 14 26388279066668 +35184372088856 [0, 0, 13, 43, 31, 74, 46] 3 [0, 13, 31, 46] [0, 43, 74] 14 10995116277782 +35184372088856 [0, 1, 26, 62, 31, 74, 46] 3 [0, 26, 31, 46] [1, 62, 74] 14 24189255811081 + +statement error +-FROM GRAPH_TABLE (snb + MATCH o = ANY SHORTEST (p:Person)-[w:knows]->(p2:Person)-[w2:knows]->(p3:Person) + COLUMNS (o) + ) tmp + limit 10; +---- +Binder Error: Property o is never registered! + + +# https://github.com/cwida/duckpgq-extension/issues/68 +statement error +-FROM GRAPH_TABLE (snb + MATCH o = ANY SHORTEST (p:Person)-[w:knows]->(p2:Person)-[w2:knows]->(p3:Person) + COLUMNS (element_id(a)) + ) tmp + limit 10; +---- +Catalog Error: Scalar Function with name element_id does not exist! +Did you mean "element_at"? + +query III +-FROM GRAPH_TABLE (snb + MATCH o = ANY SHORTEST (p:Person)-[w:knows]->{1,3}(p2:Person)-[i:hasInterest]->(t:Tag) + COLUMNS (p.id as p_id, p2.id as p2_id, t.id as t_id) + ) tmp + ORDER BY p_id, p2_id, t_id + limit 10; +---- +14 10995116277782 470 +14 10995116277782 588 +14 10995116277782 598 +14 10995116277782 798 +14 10995116277782 805 +14 10995116277782 974 +14 10995116277782 1031 +14 10995116277782 1174 +14 10995116277782 1183 +14 10995116277782 1527 + +query IIIII +WITH CTE1 AS (SELECT CREATE_CSR_EDGE( + 0, + (SELECT count(a.id) FROM Person a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 0, + (SELECT count(a.id) FROM Person a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.person1id) as cnt + FROM Person a + LEFT JOIN Person_knows_Person k ON k.person1id = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + (select count(*) from Person_knows_Person k JOIN Person a on a.id = k.person1id JOIN Person b on b.id = k.person2id), + a.rowid, + b.rowid, + k.rowid) as temp + FROM Person_knows_Person k + JOIN Person a on a.id = k.person1id + JOIN Person b on b.id = k.person2id) +SELECT shortestpath(0, (select count(*) from Person), a.rowid, b.rowid) as path, + a.firstname as a_name, + b.rowid as b_rowid, + b.id as b_id, + t.id as t_id +FROM Person a, + Person b, + person_hasInterest_Tag i, + Tag t, + (select count(cte1.temp) as temp from cte1) __x +WHERE a.id = 28587302322180 + and b.id = i.PersonId + and t.id = i.TagId + and __x.temp * 0 + iterativelength(0, (select count(*) from Person), a.rowid, b.rowid) between 1 and 3 +ORDER BY b_id, t_id +---- +[33, 77, 36] Bryn 36 28587302322204 6 +[33, 77, 36] Bryn 36 28587302322204 588 +[33, 77, 36] Bryn 36 28587302322204 1021 +[33, 77, 36] Bryn 36 28587302322204 1767 +[33, 77, 36] Bryn 36 28587302322204 1940 +[33, 77, 36] Bryn 36 28587302322204 1995 +[33, 77, 36] Bryn 36 28587302322204 2018 +[33, 77, 36] Bryn 36 28587302322204 5174 +[33, 77, 36] Bryn 36 28587302322204 6413 +[33, 77, 36] Bryn 36 28587302322204 7328 +[33, 77, 36] Bryn 36 28587302322204 9170 +[33, 77, 36] Bryn 36 28587302322204 11695 +[33, 77, 36] Bryn 36 28587302322204 12002 +[33, 78, 38] Bryn 38 28587302322223 775 +[33, 78, 38] Bryn 38 28587302322223 1938 +[33, 79, 39] Bryn 39 30786325577731 196 +[33, 79, 39] Bryn 39 30786325577731 1031 +[33, 80, 43] Bryn 43 32985348833329 3 +[33, 80, 43] Bryn 43 32985348833329 139 +[33, 80, 43] Bryn 43 32985348833329 470 +[33, 80, 43] Bryn 43 32985348833329 580 +[33, 80, 43] Bryn 43 32985348833329 1985 +[33, 80, 43] Bryn 43 32985348833329 2058 +[33, 80, 43] Bryn 43 32985348833329 2777 +[33, 80, 43] Bryn 43 32985348833329 2836 +[33, 80, 43] Bryn 43 32985348833329 5114 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 804 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 973 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 1170 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 1185 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 1206 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 1749 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 1908 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 1954 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 2003 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 2786 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 2816 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 2969 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 2985 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 4865 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 6399 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 6815 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 7025 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 7142 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 7689 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 9929 + +query IIII +-FROM GRAPH_TABLE (snb + MATCH (a:Person)-[w:knows]->{1,3}(b:Person)-[i:hasInterest]->(t:Tag) + WHERE a.id = 28587302322180 + COLUMNS (a.firstname as p_name, b.rowid as b_rowid, b.id as b_id, t.id as t_id) + ) tmp +ORDER BY b_id, t_id +limit 52; +---- +Bryn 36 28587302322204 6 +Bryn 36 28587302322204 588 +Bryn 36 28587302322204 1021 +Bryn 36 28587302322204 1767 +Bryn 36 28587302322204 1940 +Bryn 36 28587302322204 1995 +Bryn 36 28587302322204 2018 +Bryn 36 28587302322204 5174 +Bryn 36 28587302322204 6413 +Bryn 36 28587302322204 7328 +Bryn 36 28587302322204 9170 +Bryn 36 28587302322204 11695 +Bryn 36 28587302322204 12002 +Bryn 38 28587302322223 775 +Bryn 38 28587302322223 1938 +Bryn 39 30786325577731 196 +Bryn 39 30786325577731 1031 +Bryn 43 32985348833329 3 +Bryn 43 32985348833329 139 +Bryn 43 32985348833329 470 +Bryn 43 32985348833329 580 +Bryn 43 32985348833329 1985 +Bryn 43 32985348833329 2058 +Bryn 43 32985348833329 2777 +Bryn 43 32985348833329 2836 +Bryn 43 32985348833329 5114 +Bryn 45 35184372088850 804 +Bryn 45 35184372088850 973 +Bryn 45 35184372088850 1170 +Bryn 45 35184372088850 1185 +Bryn 45 35184372088850 1206 +Bryn 45 35184372088850 1749 +Bryn 45 35184372088850 1908 +Bryn 45 35184372088850 1954 +Bryn 45 35184372088850 2003 +Bryn 45 35184372088850 2786 +Bryn 45 35184372088850 2816 +Bryn 45 35184372088850 2969 +Bryn 45 35184372088850 2985 +Bryn 45 35184372088850 4865 +Bryn 45 35184372088850 6399 +Bryn 45 35184372088850 6815 +Bryn 45 35184372088850 7025 +Bryn 45 35184372088850 7142 +Bryn 45 35184372088850 7689 +Bryn 45 35184372088850 9929 + +query IIII +-FROM GRAPH_TABLE (snb + MATCH (a:Person WHERE a.id = 28587302322180)-[w:knows]-> {1,3}(b:Person)-[i:hasInterest]->(t:Tag) + COLUMNS (a.firstname as p_name, b.rowid as b_rowid, b.id as b_id, t.id as t_id) + ) tmp +ORDER BY b_id, t_id +---- +Bryn 36 28587302322204 6 +Bryn 36 28587302322204 588 +Bryn 36 28587302322204 1021 +Bryn 36 28587302322204 1767 +Bryn 36 28587302322204 1940 +Bryn 36 28587302322204 1995 +Bryn 36 28587302322204 2018 +Bryn 36 28587302322204 5174 +Bryn 36 28587302322204 6413 +Bryn 36 28587302322204 7328 +Bryn 36 28587302322204 9170 +Bryn 36 28587302322204 11695 +Bryn 36 28587302322204 12002 +Bryn 38 28587302322223 775 +Bryn 38 28587302322223 1938 +Bryn 39 30786325577731 196 +Bryn 39 30786325577731 1031 +Bryn 43 32985348833329 3 +Bryn 43 32985348833329 139 +Bryn 43 32985348833329 470 +Bryn 43 32985348833329 580 +Bryn 43 32985348833329 1985 +Bryn 43 32985348833329 2058 +Bryn 43 32985348833329 2777 +Bryn 43 32985348833329 2836 +Bryn 43 32985348833329 5114 +Bryn 45 35184372088850 804 +Bryn 45 35184372088850 973 +Bryn 45 35184372088850 1170 +Bryn 45 35184372088850 1185 +Bryn 45 35184372088850 1206 +Bryn 45 35184372088850 1749 +Bryn 45 35184372088850 1908 +Bryn 45 35184372088850 1954 +Bryn 45 35184372088850 2003 +Bryn 45 35184372088850 2786 +Bryn 45 35184372088850 2816 +Bryn 45 35184372088850 2969 +Bryn 45 35184372088850 2985 +Bryn 45 35184372088850 4865 +Bryn 45 35184372088850 6399 +Bryn 45 35184372088850 6815 +Bryn 45 35184372088850 7025 +Bryn 45 35184372088850 7142 +Bryn 45 35184372088850 7689 +Bryn 45 35184372088850 9929 + +query IIII +-FROM GRAPH_TABLE (snb + MATCH (a:Person WHERE a.id = 28587302322180)-[w:knows]-> {1,3}(b:Person)-[i:hasInterest]->(t:Tag WHERE t.id = 6) + COLUMNS (a.firstname as p_name, b.rowid as b_rowid, b.id as b_id, t.id as t_id) + ) tmp +ORDER BY b_id, t_id +---- +Bryn 36 28587302322204 6 + +statement error +-FROM GRAPH_TABLE (snb + MATCH (a:Person WHERE a.id = 28587302322180){3}) + COLUMNS (a.firstname as a_name) + ) tmp +---- +Parser Error: syntax error at or near "{" + +query III +-FROM GRAPH_TABLE (snb + MATCH p = (a:Person where a.id = 16)-[k:knows]->{1,3}(b:Person) + COLUMNS (element_id(p), a.id, b.id) + ) tmp; +---- +[1, 3, 5] 16 2199023255594 +[1, 3, 5, 16, 10] 16 8796093022244 +[1, 3, 5, 17, 12] 16 10995116277761 +[1, 3, 5, 18, 16] 16 13194139533342 +[1, 3, 5, 19, 17] 16 13194139533352 +[1, 3, 5, 16, 10, 32, 18] 16 13194139533355 +[1, 3, 5, 20, 19] 16 15393162788877 +[1, 3, 5, 17, 12, 39, 20] 16 17592186044443 +[1, 3, 5, 21, 21] 16 17592186044461 +[1, 3, 5, 19, 17, 48, 23] 16 19791209299987 +[1, 3, 5, 22, 26] 16 24189255811081 +[1, 3, 5, 22, 26, 61, 27] 16 24189255811109 +[1, 3, 5, 19, 17, 49, 29] 16 26388279066641 +[1, 4, 30] 16 26388279066655 +[1, 3, 5, 23, 31] 16 26388279066658 +[1, 3, 5, 24, 32] 16 26388279066668 +[1, 5, 33] 16 28587302322180 +[1, 3, 5, 26, 35] 16 28587302322196 +[1, 6, 36] 16 28587302322204 +[1, 5, 33, 78, 38] 16 28587302322223 +[1, 5, 33, 79, 39] 16 30786325577731 +[1, 3, 5, 27, 40] 16 30786325577740 +[1, 5, 33, 80, 43] 16 32985348833329 +[1, 3, 5, 22, 26, 66, 44] 16 35184372088834 +[1, 3, 5, 28, 45] 16 35184372088850 +[1, 3, 5, 23, 31, 74, 46] 16 35184372088856 diff --git a/test/sql/path_finding/edgeless_graph.test b/test/sql/path_finding/edgeless_graph.test new file mode 100644 index 00000000..42abad3e --- /dev/null +++ b/test/sql/path_finding/edgeless_graph.test @@ -0,0 +1,34 @@ +# name: test/sql/path_finding/edgeless_graph.test +# group: [path_finding] + +require duckpgq + +statement ok +CREATE TABLE nodes (id INTEGER); + +statement ok +CREATE TABLE edges (src INTEGER, dst INTEGER); + +statement ok +INSERT INTO nodes VALUES (1), (2), (3); + +statement ok +-CREATE PROPERTY GRAPH testgraph + VERTEX TABLES ( + nodes LABEL N + ) + EDGE TABLES ( + edges SOURCE KEY (src) REFERENCES nodes (id) + DESTINATION KEY (dst) REFERENCES nodes (id) + LABEL E +); + +query IIIII +-FROM GRAPH_TABLE(testgraph + MATCH p = ANY SHORTEST (n1:N)-[e:E]-> * (n2:N) + COLUMNS (n1.id, n2.id, element_id(p), edges(p) AS path_edges, path_length(p)) +); +---- +1 1 [0] [] 0 +2 2 [1] [] 0 +3 3 [2] [] 0 \ No newline at end of file diff --git a/test/sql/path_finding/kleene_star.test b/test/sql/path_finding/kleene_star.test new file mode 100644 index 00000000..28a39cde --- /dev/null +++ b/test/sql/path_finding/kleene_star.test @@ -0,0 +1,119 @@ +# name: test/sql/path_finding/kleene_star.test +# group: [path_finding] + +require duckpgq + +statement ok +CREATE TABLE nodes (id INTEGER); + +statement ok +CREATE TABLE edges (src INTEGER, dst INTEGER); + +statement ok +INSERT INTO nodes VALUES (1), (2), (3); + +statement ok +-CREATE PROPERTY GRAPH testgraph + VERTEX TABLES ( + nodes LABEL N + ) + EDGE TABLES ( + edges SOURCE KEY (src) REFERENCES nodes (id) + DESTINATION KEY (dst) REFERENCES nodes (id) + LABEL E +); + +query IIIII +-FROM GRAPH_TABLE(testgraph + MATCH p = ANY SHORTEST (n1:N)-[e:E]->*(n2:N) + COLUMNS (n1.id, n2.id, element_id(p), edges(p) AS path_edges, path_length(p)) +); +---- +1 1 [0] [] 0 +2 2 [1] [] 0 +3 3 [2] [] 0 + +query IIIII +-FROM GRAPH_TABLE(testgraph + MATCH p = ANY SHORTEST (n1:N)-[e:E]->+(n2:N) + COLUMNS (n1.id, n2.id, element_id(p), edges(p) AS path_edges, path_length(p)) +); +---- + +query IIIII +-FROM GRAPH_TABLE(testgraph + MATCH p = ANY SHORTEST (n1:N)-[e:E]->{1,3}(n2:N) + COLUMNS (n1.id, n2.id, element_id(p), edges(p) AS path_edges, path_length(p)) +); +---- + +query IIIII +-FROM GRAPH_TABLE(testgraph + MATCH p = ANY SHORTEST (n1:N)-[e:E]->{0,3}(n2:N) + COLUMNS (n1.id, n2.id, element_id(p), edges(p) AS path_edges, path_length(p)) +); +---- +1 1 [0] [] 0 +2 2 [1] [] 0 +3 3 [2] [] 0 + +query IIIII +-FROM GRAPH_TABLE(testgraph + MATCH p = ANY SHORTEST (n1:N)-[e:E]->{,3}(n2:N) + COLUMNS (n1.id, n2.id, element_id(p), edges(p) AS path_edges, path_length(p)) +); +---- +1 1 [0] [] 0 +2 2 [1] [] 0 +3 3 [2] [] 0 + + +statement error +-FROM GRAPH_TABLE(testgraph + MATCH p = ANY SHORTEST (n1:N)*<-[e:E]->(n2:N) + COLUMNS (n1.id, n2.id, element_id(p), edges(p) AS path_edges, path_length(p)) +); +---- +Parser Error: syntax error at or near "*<" + +statement error +-FROM GRAPH_TABLE(testgraph + MATCH p = ANY SHORTEST (n1:N)*-[e:E]->(n2:N) + COLUMNS (n1.id, n2.id, element_id(p), edges(p) AS path_edges, path_length(p)) +); +---- +Parser Error: syntax error at or near "*" + +statement error +-FROM GRAPH_TABLE(testgraph + MATCH p = ANY SHORTEST (n1:N)-[e:E]->{3,1}(n2:N) + COLUMNS (n1.id, n2.id, element_id(p), edges(p) AS path_edges, path_length(p)) +); +---- +Constraint Error: Lower bound greater than upper bound + +query IIIII +-FROM GRAPH_TABLE(testgraph + MATCH p = ANY SHORTEST (n1:N)-[e:E]->{,}(n2:N) + COLUMNS (n1.id, n2.id, element_id(p), edges(p) AS path_edges, path_length(p)) +); +---- +1 1 [0] [] 0 +2 2 [1] [] 0 +3 3 [2] [] 0 + +query IIIII +-FROM GRAPH_TABLE(testgraph + MATCH p = ANY SHORTEST (n1:N)-[e:E]->{1,1}(n2:N) + COLUMNS (n1.id, n2.id, element_id(p), edges(p) AS path_edges, path_length(p)) +); +---- + +statement error +-FROM GRAPH_TABLE(testgraph + MATCH p = ANY SHORTEST (n1:N)-[e:E]->*(n2:N + COLUMNS (n1.id, n2.id, element_id(p), edges(p) AS path_edges, path_length(p)) +); +---- +Parser Error: syntax error at or near "COLUMNS" + diff --git a/test/sql/path_finding/non-unique-vertices.test b/test/sql/path_finding/non-unique-vertices.test new file mode 100644 index 00000000..d6606d54 --- /dev/null +++ b/test/sql/path_finding/non-unique-vertices.test @@ -0,0 +1,86 @@ +# name: test/sql/path_finding/non-unique-vertices.test +# group: [path_finding] + +require duckpgq + +statement ok +CREATE TABLE v (x VARCHAR);INSERT INTO v VALUES ('a'), ('b'), ('b'); + +statement ok +CREATE TABLE e (x1 VARCHAR, x2 VARCHAR);INSERT INTO e VALUES ('a', 'b'); + +statement ok +-CREATE PROPERTY GRAPH g +VERTEX TABLES ( + v +) +EDGE TABLES ( + e + SOURCE KEY (x1) REFERENCES v (x) + DESTINATION KEY (x2) REFERENCES v (x) +); + +# v-[e]->(v) has no error: +# Output has duplicate `x` records with the value `b` returned as expected. They can be distinguished by rowid in vertices() +statement ok +-FROM GRAPH_TABLE(g + MATCH p =(v1:v)-[e:e]->(v2:v) + COLUMNS (vertices(p), v2.x) +); + +# ANY SHORTEST v-[e]->(v) has no error: +# Output again has duplicate `x` records are returned as expected +statement ok +-FROM GRAPH_TABLE(g + MATCH p = ANY SHORTEST (v1:v)-[e:e]->(v2:v) + COLUMNS (path_length(p), vertices(p), v2.x) +); + +## ANY SHORTEST v-[e]-> +(v) fails with "INTERNAL Error: Attempted to access index 1 within vector of size 1" +statement error +-FROM GRAPH_TABLE(g + MATCH p = ANY SHORTEST (v1:v)-[e:e]-> +(v2:v) + COLUMNS (path_length(p), vertices(p), v2.x) +); +---- +Constraint Error: Non-existent/non-unique vertices detected. Make sure all vertices referred by edge tables exist and are unique for path-finding queries. + +# ANY SHORTEST v-[e]->{1,2}(v) also fails with "INTERNAL Error: Attempted to access index 1 within vector of size 1" +statement error +-FROM GRAPH_TABLE(g + MATCH p = ANY SHORTEST (v1:v)-[e:e]->{1,2}(v2:v) + COLUMNS (path_length(p), vertices(p), v2.x) +); +---- +Constraint Error: Non-existent/non-unique vertices detected. Make sure all vertices referred by edge tables exist and are unique for path-finding queries. + +statement ok +CREATE TABLE v2 (x VARCHAR);INSERT INTO v2 VALUES ('a'), ('b'), ('c'), ('c'), ('b'); + +statement ok +CREATE TABLE e2 (x1 VARCHAR, x2 VARCHAR);INSERT INTO e2 VALUES ('a', 'b'), ('b', 'c'); + +statement ok +-CREATE PROPERTY GRAPH g2 +VERTEX TABLES ( + v2 +) +EDGE TABLES ( + e2 + SOURCE KEY (x1) REFERENCES v2 (x) + DESTINATION KEY (x2) REFERENCES v2 (x) +); + +# ANY SHORTEST v-[e]->{1,2}(v) also fails with "INTERNAL Error: Attempted to access index 1 within vector of size 1" +statement error +-FROM GRAPH_TABLE(g2 + MATCH p = ANY SHORTEST (v1:v2)-[e:e2]->{1,2}(v2:v2) + COLUMNS (path_length(p), vertices(p), v2.x) +); +---- +Constraint Error: Non-existent/non-unique vertices detected. Make sure all vertices referred by edge tables exist and are unique for path-finding queries. + +statement error +from weakly_connected_component(g2, v2, e2); +---- +Constraint Error: Non-existent/non-unique vertices detected. Make sure all vertices referred by edge tables exist and are unique for path-finding queries. \ No newline at end of file diff --git a/test/sql/path_finding/parser_arrow_kleene.test b/test/sql/path_finding/parser_arrow_kleene.test new file mode 100644 index 00000000..4c3d8dab --- /dev/null +++ b/test/sql/path_finding/parser_arrow_kleene.test @@ -0,0 +1,122 @@ +# name: test/sql/path_finding/parser_arrow_kleene.test +# group: [path_finding] + +require duckpgq + +statement ok +CREATE TABLE nodes (id INTEGER); INSERT INTO nodes VALUES (1), (2), (3); + +statement ok +CREATE TABLE edges (src INTEGER, dst INTEGER); + +statement ok +-CREATE PROPERTY GRAPH testgraph + VERTEX TABLES ( + nodes LABEL N + ) + EDGE TABLES ( + edges SOURCE KEY (src) REFERENCES nodes (id) + DESTINATION KEY (dst) REFERENCES nodes (id) + LABEL E +); + +statement ok +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)-[e:E]->*(n2:N) COLUMNS (n1.*, n2.*)); + +statement ok +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)-[e:E]-> *(n2:N) COLUMNS (n1.*, n2.*)); + +statement ok +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)-[e:E]- > *(n2:N) COLUMNS (n1.*, n2.*)); + +statement ok +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)-[e:E] - > *(n2:N) COLUMNS (n1.*, n2.*)); + +statement ok +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)-[e:E] -> *(n2:N) COLUMNS (n1.*, n2.*)); + +statement error +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)-[e:E]< -> *(n2:N) COLUMNS (n1.*, n2.*)); +---- +Parser Error: syntax error at or near "<" + +statement error +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)-[e:E] /-> *(n2:N)); +---- +Parser Error: syntax error at or near "/->" + +# Not yet supported +statement error +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)-> *(n2:N)); +---- +Constraint Error: All patterns must bind to a label + +statement error +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)-[]-> *(n2:N)); +---- +Constraint Error: All patterns must bind to a label + +statement error +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)-[]- > *(n2:N)); +---- +Constraint Error: All patterns must bind to a label + +statement error +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)-[e2:E] > *(n2:N)); +---- +Parser Error: syntax error at or near ">" + +statement error +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)-[e:E]*(n2:N)); +---- +Parser Error: syntax error at or near "*" + +statement ok +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)-[e:E]- > *(n2:N) COLUMNS (n1.*, n2.*)); + +statement error +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)-[e:E]<- *(n2:N)); +---- +Parser Error: syntax error at or near "<" + +statement error +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)<-> *(n2:N)); +---- +Constraint Error: All patterns must bind to a label + +statement error +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)< - > *(n2:N)); +---- +Constraint Error: All patterns must bind to a label + +statement error +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)<- *(n2:N)); +---- +Constraint Error: All patterns must bind to a label + +statement error +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)< - *(n2:N)); +---- +Constraint Error: All patterns must bind to a label + +statement error +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)< -*(n2:N)); +---- +Constraint Error: All patterns must bind to a label + +statement error +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)< -?(n2:N)); +---- +Constraint Error: All patterns must bind to a label + +statement ok +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)<-[e1:E]-?(n2:N)); + +statement ok +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)<-[e1:E] - ?(n2:N)); + +statement ok +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)<-[e1:E] -> ?(n2:N)); + +statement ok +-FROM GRAPH_TABLE (testgraph MATCH ANY SHORTEST (n1:N)<-[e1:E] - >?(n2:N)); \ No newline at end of file diff --git a/test/sql/path_finding/path-finding-cte.test b/test/sql/path_finding/path-finding-cte.test new file mode 100644 index 00000000..6f044379 --- /dev/null +++ b/test/sql/path_finding/path-finding-cte.test @@ -0,0 +1,68 @@ +# name: test/sql/path_finding/path-finding-cte.test +# description: Testing the optimization to move the shortest path function to a materialized CTE +# group: [path_finding] + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR); INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT); INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17); + +statement ok +CREATE TABLE School(name VARCHAR, Id BIGINT, Kind VARCHAR); INSERT INTO School VALUES ('VU', 0, 'University'), ('UVA', 1, 'University'); + +statement ok +CREATE TABLE StudyAt(personId BIGINT, schoolId BIGINT); INSERT INTO StudyAt VALUES (0, 0), (1, 0), (2, 1), (3, 1), (4, 1); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student PROPERTIES ( id, name ) LABEL Person, + School LABEL SCHOOL + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + LABEL Knows, + studyAt SOURCE KEY ( personId ) REFERENCES Student ( id ) + DESTINATION KEY ( SchoolId ) REFERENCES School ( id ) + LABEL StudyAt + ); + +statement ok +-WITH cte1 AS ( + SELECT CREATE_CSR_EDGE( + 0, + (SELECT count(a.id) FROM Student a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 0, + (SELECT count(a.id) FROM Student a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM Student a + LEFT JOIN Know k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + (select count(*) from know k join student a on a.id = k.src join student c on c.id = k.dst), + a.rowid, + c.rowid, + k.rowid) as temp + FROM Know k + JOIN student a on a.id = k.src + JOIN student c on c.id = k.dst +), shortest_path_p AS MATERIALIZED ( + SELECT shortestpath(0, (select count(*) from student), a.rowid, b.rowid) as path, a.rowid as src_rowid, b.rowid as dst_rowid + FROM student a, student b, (select count(cte1.temp) * 0 as temp from cte1) __x + WHERE a.name = 'Daniel' and __x.temp * 0 + iterativelength(0, (select count(*) from student), a.rowid, b.rowid) between 1 and 3) +SELECT path, a.name, b.name, + len(path) // 2, + path[1:-:2], + path[2:-:2] +FROM shortest_path_p, student a, student b where a.name = 'Daniel' and a.rowid = src_rowid and b.rowid = dst_rowid; diff --git a/test/sql/path_finding/shortest_path.test b/test/sql/path_finding/shortest_path.test new file mode 100644 index 00000000..c65b567b --- /dev/null +++ b/test/sql/path_finding/shortest_path.test @@ -0,0 +1,137 @@ +# name: test/sql/path_finding/shortest_path.test +# description: Testing the shortest path matching +# group: [path_finding] + +#statement ok +#pragma enable_verification + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR); INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT); INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17); + +statement ok +CREATE TABLE School(name VARCHAR, Id BIGINT, Kind VARCHAR); INSERT INTO School VALUES ('VU', 0, 'University'), ('UVA', 1, 'University'); + +statement ok +CREATE TABLE StudyAt(personId BIGINT, schoolId BIGINT); INSERT INTO StudyAt VALUES (0, 0), (1, 0), (2, 1), (3, 1), (4, 1); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student PROPERTIES ( id, name ) LABEL Person, + School LABEL SCHOOL + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + LABEL Knows, + studyAt SOURCE KEY ( personId ) REFERENCES Student ( id ) + DESTINATION KEY ( SchoolId ) REFERENCES School ( id ) + LABEL StudyAt + ); + +query II +-FROM GRAPH_TABLE (pg + MATCH + ANY SHORTEST (a:Person)-[s:StudyAt]->(b:School) + WHERE a.name = 'Daniel' + COLUMNS (a.name as name, b.name as school) + ) study; +---- +Daniel VU + +# Note: Daniel->Daniel with path [0] is included because iterativelengthbounded +# finds a cycle within bounds, but shortestpath returns the length-0 path. +# TODO: Implement shortestpathbounded to fix this behavior (issue #67 follow-up) +query III +-FROM GRAPH_TABLE (pg + MATCH + p = ANY SHORTEST (a:Person WHERE a.name = 'Daniel')-[k:knows]->{1,3}(b:Person) + COLUMNS (element_id(p), a.name as name, b.name as b_name) + ) study + ORDER BY name, b_name; +---- +[0] Daniel Daniel +[0, 1, 2] Daniel Gabor +[0, 2, 3] Daniel Peter +[0, 0, 1] Daniel Tavneet + +# TODO: Self-loops with length 0 are included due to iterativelengthbounded/shortestpath mismatch +query IIII +-FROM GRAPH_TABLE (pg + MATCH + p = ANY SHORTEST (a:Person)-[k:knows]->{1,3}(b:Person) + COLUMNS (path_length(p), element_id(p), a.name as name, b.name as b_name) + ) study + order by study.name, study.b_name; +---- +0 [0] Daniel Daniel +1 [0, 1, 2] Daniel Gabor +1 [0, 2, 3] Daniel Peter +1 [0, 0, 1] Daniel Tavneet +2 [4, 7, 3, 3, 0] David Daniel +3 [4, 7, 3, 3, 0, 1, 2] David Gabor +1 [4, 7, 3] David Peter +3 [4, 7, 3, 3, 0, 0, 1] David Tavneet +2 [2, 6, 3, 3, 0] Gabor Daniel +0 [2] Gabor Gabor +1 [2, 6, 3] Gabor Peter +3 [2, 6, 3, 3, 0, 0, 1] Gabor Tavneet +1 [3, 3, 0] Peter Daniel +2 [3, 3, 0, 1, 2] Peter Gabor +0 [3] Peter Peter +2 [3, 3, 0, 0, 1] Peter Tavneet +2 [1, 5, 3, 3, 0] Tavneet Daniel +1 [1, 4, 2] Tavneet Gabor +1 [1, 5, 3] Tavneet Peter +0 [1] Tavneet Tavneet + + +statement error +-FROM GRAPH_TABLE (pg + MATCH + p = ANY SHORTEST (a:Person)-[k:knows]->{1,3}(b:Person) + WHERE a.name = 'Daniel' + COLUMNS (p, a.name as name, b.name as b_name) + ) study; +---- +Binder Error: Property p is never registered! + + +query III +WITH cte1 AS ( + SELECT CREATE_CSR_EDGE( + 0, + (SELECT count(a.id) FROM Student a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 0, + (SELECT count(a.id) FROM Student a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM Student a + LEFT JOIN Know k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + (select count(*) from know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), + a.rowid, + c.rowid, + k.rowid) as temp + FROM Know k + JOIN student a on a.id = k.src + JOIN student c on c.id = k.dst +) SELECT shortestpath(0, (select count(*) from student), a.rowid, b.rowid) as path, a.name as a_name, b.name as b_name + FROM student a, student b, (select count(cte1.temp) * 0 as temp from cte1) __x + WHERE a.name = 'Daniel' and __x.temp * 0 + iterativelength(0, (select count(*) from student), a.rowid, b.rowid) between 1 and 3 +---- +[0, 0, 1] Daniel Tavneet +[0, 1, 2] Daniel Gabor +[0, 2, 3] Daniel Peter diff --git a/test/sql/path_finding/subpath_match.test b/test/sql/path_finding/subpath_match.test new file mode 100644 index 00000000..ae1b7401 --- /dev/null +++ b/test/sql/path_finding/subpath_match.test @@ -0,0 +1,125 @@ +# name: test/sql/path_finding/subpath_match.test +# description: Testing the subpath matching +# group: [path_finding] + +#statement ok +#pragma enable_verification + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, id BIGINT); + +statement ok +CREATE TABLE School(school_name VARCHAR, school_id BIGINT, school_kind BIGINT); + +statement ok +INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17), (2, 4, 18); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student PROPERTIES ( id, name ) LABEL Person + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + PROPERTIES ( id ) LABEL Knows + ); + +query II +-SELECT study.a_id, study.name +FROM GRAPH_TABLE (pg + MATCH + (a:Person WHERE a.id = 0) + COLUMNS (a.id as a_id, a.name) + ) study +---- +0 Daniel + +query II +-SELECT study.a_id, study.b_id +FROM GRAPH_TABLE (pg + MATCH + (a:Person)-[k:Knows WHERE k.id = 10]->(b:Person) + COLUMNS (a.id as a_id, b.id as b_id) + ) study +---- +0 1 + +#query II +#WITH cte1 AS ( +# SELECT CREATE_CSR_EDGE( +# 0, +# (SELECT count(a.id) FROM Student a), +# CAST ( +# (SELECT sum(CREATE_CSR_VERTEX( +# 0, +# (SELECT count(a.id) FROM Student a), +# sub.dense_id, +# sub.cnt) +# ) +# FROM ( +# SELECT a.rowid as dense_id, count(k.src) as cnt +# FROM Student a +# LEFT JOIN Know k ON k.src = a.id +# GROUP BY a.rowid) sub +# ) +# AS BIGINT), +# a.rowid, +# c.rowid, +# k.rowid) as temp +# FROM Know k +# JOIN student a on a.id = k.src +# JOIN student c on c.id = k.dst +#) SELECT __p.a_name, __p.b_name +#FROM (SELECT count(temp) * 0 AS temp FROM cte1) x, (SELECT a.name as a_name, a.rowid as __src, b.name as b_name, b.rowid as __dst FROM student a, student b WHERE a.name = 'Peter') __p +#WHERE x.temp + iterativelength(0, (SELECT count(c.id) FROM student c), __p.__src, __p.__dst) BETWEEN 0 and 10000 +#---- +#Peter Daniel +#Peter Tavneet +#Peter Gabor +#Peter Peter +#Peter David + +statement error +-SELECT study.a_name, study.b_name +FROM GRAPH_TABLE (pg + MATCH + (a:Person WHERE a.name = 'Peter')-[k:Knows]-> *(b:Person) + COLUMNS (a.name as a_name, b.name as b_name) + ) study +---- +Constraint Error: ALL unbounded with path mode WALK is not possible as this could lead to infinite results. Consider specifying an upper bound or path mode other than WALK + + +query II +-SELECT study.a_name, study.b_name +FROM GRAPH_TABLE (pg + MATCH + (a:Person)-[k:Knows]->{1,2}(b:Person) + WHERE a.name = 'Peter' + COLUMNS (a.name as a_name, b.name as b_name) + ) study +---- +Peter Daniel +Peter Tavneet +Peter Gabor +Peter Peter + +statement error +-SELECT study.a_name, study.b_name +FROM GRAPH_TABLE (pg + MATCH + (a:Person)-[k:Knows]-> +(b:Person) + WHERE a.name = 'Peter' + COLUMNS (a.name as a_name, b.name as b_name) + ) study +---- +Constraint Error: ALL unbounded with path mode WALK is not possible as this could lead to infinite results. Consider specifying an upper bound or path mode other than WALK diff --git a/test/sql/path_finding/top_k.test b/test/sql/path_finding/top_k.test new file mode 100644 index 00000000..764e7ea5 --- /dev/null +++ b/test/sql/path_finding/top_k.test @@ -0,0 +1,57 @@ +# name: test/sql/path_finding/top_k.test +# description: Testing top-k functionality, not implemented yet. +# group: [path_finding] + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR); INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT); INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student LABEL person + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + label knows + ); + +statement error +-FROM GRAPH_TABLE (pg + MATCH + p = ANY SHORTEST 5 WALK (a:Person)-[k:knows]-> *(b:Person) + WHERE a.name = 'Daniel' + COLUMNS (p, a.name as name, b.name as school) + ) study; +---- +Parser Error: syntax error at or near "5" + + +statement error +-FROM GRAPH_TABLE (pg + MATCH + p = SHORTEST 5 (a:Person)-[k:knows]-> *(b:Person) + WHERE a.name = 'Daniel'); +---- +Not implemented Error: TopK has not been implemented yet. + +statement error +-FROM GRAPH_TABLE (pg + MATCH + p = SHORTEST 5 WALK (a:Person)-[k:knows]-> *(b:Person) + WHERE a.name = 'Daniel'); +---- +Not implemented Error: TopK has not been implemented yet. + +statement error +-FROM GRAPH_TABLE (pg + MATCH + p = ANY SHORTEST 5 WALK (a:Person)-[k:knows]-> *(b:Person) + WHERE a.name = 'Daniel'); +---- +Parser Error: syntax error at or near "5" diff --git a/test/sql/path_finding/undirected_paths.test b/test/sql/path_finding/undirected_paths.test new file mode 100644 index 00000000..23766f9e --- /dev/null +++ b/test/sql/path_finding/undirected_paths.test @@ -0,0 +1,156 @@ +# name: test/sql/path_finding/undirected_paths.test +# description: Testing undirected path-finding +# group: [path_finding] + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR);INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, id BIGINT);INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17), (2, 4, 18); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student + ) +EDGE TABLES ( + know SOURCE KEY (src) REFERENCES Student (id) + DESTINATION KEY (dst) REFERENCES Student (id) + ); + +query III +-FROM GRAPH_TABLE (pg + MATCH + o = ANY SHORTEST (a:Student WHERE a.id = 0)-[e:know]- *(b:Student) + COLUMNS (a.id as a_id, b.id as b_id, path_length(o)) + ) study + ORDER BY a_id, b_id; +---- +0 0 0 +0 1 1 +0 2 1 +0 3 1 +0 4 2 + +query III +-FROM GRAPH_TABLE (pg + MATCH + o = ANY SHORTEST (a:Student WHERE a.id = 4)-[e:know]- *(b:Student) + COLUMNS (a.id as a_id, b.id as b_id, path_length(o)) + ) study + ORDER BY a_id, b_id; +---- +4 0 2 +4 1 2 +4 2 1 +4 3 1 +4 4 0 + +statement error +-FROM GRAPH_TABLE (pg + MATCH + o = ANY SHORTEST (a:Student WHERE a.id = 4)<-[e:know]- *(b:Student) + COLUMNS (a.id as a_id, b.id as b_id, path_length(o)) + ) study + ORDER BY a_id, b_id; +---- +Cannot do shortest path for edge type MATCH_EDGE_LEFT + +statement error +-FROM GRAPH_TABLE (pg + MATCH + o = ANY SHORTEST (a:Student WHERE a.id = 4)<-[e:know]-> *(b:Student) + COLUMNS (a.id as a_id, b.id as b_id, path_length(o)) + ) study + ORDER BY a_id, b_id; +---- +Cannot do shortest path for edge type MATCH_EDGE_LEFT_RIGHT + +query II +-FROM GRAPH_TABLE (pg + MATCH + (a:Student WHERE a.id = 4)-[e:know]-{0,1}(b:Student) + COLUMNS (a.id as a_id, b.id as b_id, path_length(o)) + ) study + ORDER BY a_id, b_id; +---- +4 2 +4 3 + +query III +-FROM GRAPH_TABLE (pg + MATCH + o = ANY SHORTEST (a:Student WHERE a.id = 999)-[e:know]- *(b:Student) + COLUMNS (a.id as a_id, b.id as b_id, path_length(o)) + ) study + ORDER BY a_id, b_id; +---- + +query III +-FROM GRAPH_TABLE (pg + MATCH + o = ANY SHORTEST (a:Student)-[e:know]- *(b:Student) + COLUMNS (a.id as a_id, b.id as b_id, path_length(o)) + ) study + ORDER BY a_id, b_id; +---- +0 0 0 +0 1 1 +0 2 1 +0 3 1 +0 4 2 +1 0 1 +1 1 0 +1 2 1 +1 3 1 +1 4 2 +2 0 1 +2 1 1 +2 2 0 +2 3 1 +2 4 1 +3 0 1 +3 1 1 +3 2 1 +3 3 0 +3 4 1 +4 0 2 +4 1 2 +4 2 1 +4 3 1 +4 4 0 + +query III +-FROM GRAPH_TABLE (pg + MATCH + o = ANY SHORTEST (a:Student WHERE a.id = 3)-[e:know]- *(b:Student WHERE b.id = 3) + COLUMNS (a.id as a_id, b.id as b_id, path_length(o)) + ) study + ORDER BY a_id, b_id; +---- +3 3 0 + +query III +-FROM GRAPH_TABLE (pg + MATCH + o = ANY SHORTEST (a:Student WHERE a.id = 0)-[e:know]- *(b:Student WHERE b.id = 5) + COLUMNS (a.id as a_id, b.id as b_id, path_length(o)) + ) study + ORDER BY a_id, b_id; +---- + +query III +-FROM GRAPH_TABLE (pg + MATCH + o = ANY SHORTEST (a:Student WHERE a.id = 0)-[e:know]-{0,2}(b:Student) + COLUMNS (a.id as a_id, b.id as b_id, path_length(o)) + ) study + ORDER BY a_id, b_id; +---- +0 0 0 +0 1 1 +0 2 1 +0 3 1 +0 4 2 diff --git a/test/sql/pattern_matching/basic_match.test b/test/sql/pattern_matching/basic_match.test new file mode 100644 index 00000000..0eca8977 --- /dev/null +++ b/test/sql/pattern_matching/basic_match.test @@ -0,0 +1,335 @@ +# name: test/sql/pattern_matching/basic_match.test +# description: Testing basic pattern matching +# group: [pattern_matching] + +#statement ok +#pragma enable_verification + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR);INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT);INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17); + +statement ok +CREATE TABLE School(name VARCHAR, Id BIGINT, Kind VARCHAR);INSERT INTO School VALUES ('VU', 0, 'University'), ('UVA', 1, 'University'); + +statement ok +CREATE TABLE StudyAt(personId BIGINT, schoolId BIGINT);INSERT INTO StudyAt VALUES (0, 0), (1, 0), (2, 1), (3, 1), (4, 1); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student PROPERTIES ( id, name ) LABEL Person, + School LABEL SCHOOL + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + LABEL Knows, + studyAt SOURCE KEY ( personId ) REFERENCES Student ( id ) + DESTINATION KEY ( SchoolId ) REFERENCES School ( id ) + LABEL StudyAt + ); + +query II +-SELECT study.name, study.school +FROM GRAPH_TABLE (pg + MATCH + (a:Person)-[s:StudyAt]->(b:School) + WHERE a.name = 'Daniel' + COLUMNS (a.name as name, b.name as school) + ) study; +---- +Daniel VU + +query III +-SELECT study.name, study.school, study.know_name +FROM GRAPH_TABLE (pg + MATCH + (a2:Person)-[s2:StudyAt]->(b:School)<-[s:StudyAt]-(a:Person) + WHERE a.name = 'Daniel' and a.id <> a2.id + COLUMNS (a.name as name, b.name as school, a2.name as know_name) + ) study; +---- +Daniel VU Tavneet + + +query I +-SELECT study.id +FROM GRAPH_TABLE (pg + MATCH + (a:Person) + COLUMNS (a.id) + ) study; +---- +0 +1 +2 +3 +4 + +query II +-SELECT study.a_id, study.name +FROM GRAPH_TABLE (pg + MATCH + (a:Person) + WHERE a.id = 0 + COLUMNS (a.id as a_id, a.name) + ) study; +---- +0 Daniel + + +query II +-SELECT study.a_name, study.b_name +FROM GRAPH_TABLE (pg + MATCH + (a:Person)-[k:Knows]->(b:Person) + WHERE a.name = 'Daniel' + COLUMNS (a.name as a_name, b.name as b_name) + ) study; +---- +Daniel Tavneet +Daniel Gabor +Daniel Peter + +query II +-SELECT study.a_name, study.b_name +FROM GRAPH_TABLE (pg + MATCH + (a:Person)-[k:Knows]->(b:Person) + WHERE a.name = 'Daniel' and b.name = 'Gabor' + COLUMNS (a.name as a_name, b.name as b_name) + ) study; +---- +Daniel Gabor + +query II +-SELECT study.a_name, study.b_name +FROM GRAPH_TABLE (pg + MATCH + (a:Person)<-[k:Knows]-(b:Person) + WHERE a.name = 'Peter' + COLUMNS (a.name as a_name, b.name as b_name) + ) study; +---- +Peter Daniel +Peter Tavneet +Peter Gabor +Peter David + +query II +-SELECT study.a_name, study.b_name +FROM GRAPH_TABLE (pg + MATCH + (a:Person)-[k:Knows]-(b:Person) + WHERE a.name = 'Peter' + COLUMNS (a.name as a_name, b.name as b_name) + ) study + ORDER BY a_name, b_name; +---- +Peter Daniel +Peter Daniel +Peter David +Peter Gabor +Peter Tavneet + +query II +-SELECT study.a_name, study.b_name +FROM GRAPH_TABLE (pg + MATCH + (a:Person)<-[k:Knows]->(b:Person) + WHERE a.name = 'Peter' + COLUMNS (a.name as a_name, b.name as b_name) + ) study; +---- +Peter Daniel + +query II +-SELECT study.a_name, count(study.b_name) +FROM GRAPH_TABLE (pg + MATCH + (a:Person)-[k:Knows]->(b:Person) + COLUMNS (a.name as a_name, b.name as b_name) + ) study + GROUP BY study.a_name + ORDER BY count(study.b_name) DESC, study.a_name; +---- +Daniel 3 +Tavneet 2 +David 1 +Gabor 1 +Peter 1 + +query III +-SELECT study.a_name, study.b_name, study.c_name +FROM GRAPH_TABLE (pg + MATCH + (a:Person)-[k:Knows]->(b:Person)-[k2:Knows]->(c:Person)-[k3:Knows]->(a:Person) + COLUMNS (a.name as a_name, b.name as b_name, c.name as c_name) + ) study +ORDER BY study.a_name, study.b_name, study.c_name; +---- +Daniel Gabor Peter +Daniel Tavneet Peter +Gabor Peter Daniel +Peter Daniel Gabor +Peter Daniel Tavneet +Tavneet Peter Daniel + +statement error +-SELECT study.a_name, study.b_name, study.c_name +FROM GRAPH_TABLE (pg + MATCH + (:Person)-[k:Knows]->(b:Person) + COLUMNS (a.name as a_name, b.name as b_name) + ) study; +---- + +statement error +-SELECT study.a_name, study.b_name, study.c_name +FROM GRAPH_TABLE (pg + MATCH + (a)-[k:Knows]->(b:Person) + COLUMNS (a.ncame as a_name, b.name as b_name) + ) study; +---- + +statement error +-SELECT study.a_name, study.b_name, study.c_name +FROM GRAPH_TABLE (pg + MATCH + ()-[k:Knows]->(b:Person) + COLUMNS (a.name as a_name, b.name as b_name) + ) study; +---- + + +query II +-SELECT * +FROM GRAPH_TABLE (pg + MATCH + (a:Person)-[k:Knows]->(b:Person) + COLUMNS (a.name as a_name, b.name as b_name) + ) study; +---- +Daniel Tavneet +Daniel Gabor +Daniel Peter +Peter Daniel +Tavneet Gabor +Tavneet Peter +Gabor Peter +David Peter + +query II +-SELECT * +FROM GRAPH_TABLE (pg + MATCH + (a:PERSON)-[k:knows]->(B:Person) + COLUMNS (a.name as a_name, b.name as b_name) + ) study; +---- +Daniel Tavneet +Daniel Gabor +Daniel Peter +Peter Daniel +Tavneet Gabor +Tavneet Peter +Gabor Peter +David Peter + + +query I +-SELECT * +FROM GRAPH_TABLE (pg + MATCH + (a:PERSON)-[k:knows]->(b:person) + COLUMNS (avg(a.id)) + ) study; +---- +1.375 + + +query II +-FROM GRAPH_TABLE (pg + MATCH + (a:PERSON)-[k:knows]->(b:person) + COLUMNS (a.*) + ) study +ORDER BY study.id, study.name; +---- +0 Daniel +0 Daniel +0 Daniel +1 Tavneet +1 Tavneet +2 Gabor +3 Peter +4 David + +query II +-SELECT * +FROM GRAPH_TABLE (pg + MATCH + (a:PERSON)-[k:knows]->(b:person) + COLUMNS (a.*) + ) study +ORDER BY study.id; +---- +0 Daniel +0 Daniel +0 Daniel +1 Tavneet +1 Tavneet +2 Gabor +3 Peter +4 David + +query II +-FROM GRAPH_TABLE (pg + MATCH + (a:PERSON)-[k:knows]->(b:person) + COLUMNS (a.*) + ) p +ORDER BY p.id, p.name; +---- +0 Daniel +0 Daniel +0 Daniel +1 Tavneet +1 Tavneet +2 Gabor +3 Peter +4 David + +query III +-FROM GRAPH_TABLE (pg + MATCH + (a:PERSON)-[k:knows]->(b:person) + COLUMNS (a, k, b) + ) study; +---- +{'id': 0, 'name': Daniel} {'src': 0, 'dst': 1, 'createDate': 10} {'id': 1, 'name': Tavneet} +{'id': 0, 'name': Daniel} {'src': 0, 'dst': 2, 'createDate': 11} {'id': 2, 'name': Gabor} +{'id': 0, 'name': Daniel} {'src': 0, 'dst': 3, 'createDate': 12} {'id': 3, 'name': Peter} +{'id': 3, 'name': Peter} {'src': 3, 'dst': 0, 'createDate': 13} {'id': 0, 'name': Daniel} +{'id': 1, 'name': Tavneet} {'src': 1, 'dst': 2, 'createDate': 14} {'id': 2, 'name': Gabor} +{'id': 1, 'name': Tavneet} {'src': 1, 'dst': 3, 'createDate': 15} {'id': 3, 'name': Peter} +{'id': 2, 'name': Gabor} {'src': 2, 'dst': 3, 'createDate': 16} {'id': 3, 'name': Peter} +{'id': 4, 'name': David} {'src': 4, 'dst': 3, 'createDate': 17} {'id': 3, 'name': Peter} + +statement error +-FROM GRAPH_TABLE (pg + MATCH + (a:PERSON)-[k:knows]->(b:person) + COLUMNS (doesnotexist, k, b) + ) study; +---- +Binder Error: Property doesnotexist is never registered! + +statement ok +-DROP PROPERTY GRAPH pg; diff --git a/test/sql/pattern_matching/inheritance_support.test b/test/sql/pattern_matching/inheritance_support.test new file mode 100644 index 00000000..e3278e9a --- /dev/null +++ b/test/sql/pattern_matching/inheritance_support.test @@ -0,0 +1,229 @@ +# name: test/sql/pattern_matching/inheritance_support.test +# description: Testing inheritance support +# group: [pattern_matching] + +#statement ok +#pragma enable_verification + +require duckpgq + +statement ok +CREATE TABLE Person(id BIGINT, name VARCHAR); + +statement ok +CREATE TABLE Organisation(name VARCHAR, id BIGINT, mask BIGINT); + +statement ok +CREATE TABLE Company(name VARCHAR, id BIGINT, mask VARCHAR); + +statement ok +CREATE TABLE University(name VARCHAR, id BIGINT, mask VARCHAR); + +statement ok +CREATE TABLE worksAt(personId BIGINT, organisationId BIGINT); + +statement ok +INSERT INTO Person VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +INSERT INTO worksAt VALUES (0,1), (0,2), (0,3), (1,2), (1,3), (2,3), (3,0), (4,3); + +statement ok +INSERT INTO University VALUES ('VU', 0, 1), ('UvA', 1, 1); + +statement ok +INSERT INTO Company VALUES ('EY', 2, 2), ('CWI', 3, 2); + +statement ok +INSERT INTO Organisation (SELECT * from university union select * from company); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Person LABEL Person, + Organisation LABEL Organisation IN mask(university, company) + ) +EDGE TABLES ( + worksAt SOURCE KEY ( personId ) REFERENCES Person ( id ) + DESTINATION KEY ( organisationId ) REFERENCES Organisation ( id ) + LABEL worksAt + ); + +query IIII +-SELECT * +FROM GRAPH_TABLE(pg + MATCH (p:Person)-[w:worksAt]->(u:organisation) + COLUMNS (p.id, p.name, u.id, u.name) + ) result +---- +0 Daniel 1 UvA +0 Daniel 2 EY +0 Daniel 3 CWI +1 Tavneet 2 EY +1 Tavneet 3 CWI +2 Gabor 3 CWI +3 Peter 0 VU +4 David 3 CWI + +query IIIIIII +-SELECT * +FROM GRAPH_TABLE(pg + MATCH (p:Person)-[w:worksAt]->(u:organisation) + COLUMNS (p.*, u.*, w.*) + ) result +---- +0 Daniel 1 1 UvA 1 0 +0 Daniel 2 2 EY 2 0 +0 Daniel 3 2 CWI 3 0 +1 Tavneet 2 2 EY 2 1 +1 Tavneet 3 2 CWI 3 1 +2 Gabor 3 2 CWI 3 2 +3 Peter 0 1 VU 0 3 +4 David 3 2 CWI 3 4 + +query IIIII +-SELECT * +FROM GRAPH_TABLE(pg + MATCH (p:Person)-[w:worksAt]->(u:ORGANISATION) + COLUMNS (p.id, p.name, u.id, u.name, u.mask) + ) result +---- +0 Daniel 1 UvA 1 +0 Daniel 2 EY 2 +0 Daniel 3 CWI 2 +1 Tavneet 2 EY 2 +1 Tavneet 3 CWI 2 +2 Gabor 3 CWI 2 +3 Peter 0 VU 1 +4 David 3 CWI 2 + +query IIIII +-FROM GRAPH_TABLE(pg + MATCH (p:Person)-[w:worksAt]->(u:university) + COLUMNS (p.id, p.name, u.id, u.name, u.mask) + ) result +---- +0 Daniel 1 UvA 1 +3 Peter 0 VU 1 + +query IIIII +-SELECT * +FROM GRAPH_TABLE(pg + MATCH (p:Person)-[w:worksAt]->(u:company) + COLUMNS (p.id, p.name, u.id, u.name, u.mask) + ) result +---- +0 Daniel 3 CWI 2 +1 Tavneet 3 CWI 2 +2 Gabor 3 CWI 2 +4 David 3 CWI 2 +0 Daniel 2 EY 2 +1 Tavneet 2 EY 2 + +# Should work with different capitalization +query IIII +-SELECT * +FROM GRAPH_TABLE(pg + MATCH (p:Person)-[w:worksAt]->(u:COMPANY) + COLUMNS (p.id, p.name, u.id, u.name) + ) result +---- +0 Daniel 3 CWI +1 Tavneet 3 CWI +2 Gabor 3 CWI +4 David 3 CWI +0 Daniel 2 EY +1 Tavneet 2 EY + +query IIII +-SELECT * +FROM GRAPH_TABLE(pg + MATCH (p:organisation)<-[w:WORKSAT]-(u:person) + COLUMNS (p.id, p.name, u.id, u.name) + ) result +---- +1 UvA 0 Daniel +2 EY 0 Daniel +3 CWI 0 Daniel +2 EY 1 Tavneet +3 CWI 1 Tavneet +3 CWI 2 Gabor +0 VU 3 Peter +3 CWI 4 David + +query IIII +-SELECT * +FROM GRAPH_TABLE(pg + MATCH (u:university)<-[w:worksAt]-(p:person) + COLUMNS (p.id, p.name, u.name, u.mask) + ) result +---- +0 Daniel UvA 1 +3 Peter VU 1 + +statement ok +-drop property graph pg; + +statement ok +drop table person; + +statement ok +CREATE TABLE College(id int, college varchar); + +statement ok +INSERT INTO College VALUES(1,'stanford'),(2,'harvard'); + +statement ok +CREATE TABLE Person (id int, name varchar, birthDate date, msk bigint); + +statement ok +INSERT INTO Person VALUES (1,'Ana','2000-10-01', 1), (2,'Bo','2000-01-10', 3), + (2,'Ed','2001-10-10', 1), (2,'Jo','2001-01-01', 1) + +statement ok +CREATE TABLE Enrol(studentID int, collegeID int, classYear int); + +statement ok +INSERT INTO Enrol VALUES (1,1,2021), (1,2,2023), + (2,2,2023), (4,1,2022); + +statement ok +CREATE TABLE Know(src int, dst int, createDate date, msgCount int); + +statement ok +INSERT INTO Know VALUES (1,2,'2023-09-01',2), (1,4,'2022-10-15',10), + (4,3,'2015-03-08',20), (3,2,'2022-10-30',10); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Person PROPERTIES (id, name, birthDate) LABEL Person IN msk (student, TA), + College PROPERTIES (id, college) LABEL College ) +EDGE TABLES ( + know SOURCE KEY(src) REFERENCES Person(id) + DESTINATION KEY(dst) REFERENCES Person(id) + PROPERTIES (createDate, msgCount) LABEL know, + enrol SOURCE KEY(studentID) REFERENCES Person(id) + DESTINATION KEY(collegeID) REFERENCES College(id) + PROPERTIES (classYear) LABEL studiesAt ); + +query III +-FROM GRAPH_TABLE (pg MATCH (a:Student) COLUMNS(*)) tmp; +---- +1 Ana 2000-10-01 +2 Bo 2000-01-10 +2 Ed 2001-10-10 +2 Jo 2001-01-01 + +query III +-FROM GRAPH_TABLE (pg MATCH (a:Person) COLUMNS(*)) tmp; +---- +1 Ana 2000-10-01 +2 Bo 2000-01-10 +2 Ed 2001-10-10 +2 Jo 2001-01-01 + +query III +-FROM GRAPH_TABLE (pg MATCH (a:TA) COLUMNS(*)) tmp; +---- +2 Bo 2000-01-10 diff --git a/test/sql/pattern_matching/path_modes.test b/test/sql/pattern_matching/path_modes.test new file mode 100644 index 00000000..9543c621 --- /dev/null +++ b/test/sql/pattern_matching/path_modes.test @@ -0,0 +1,118 @@ +# name: test/sql/pattern_matching/path_modes.test +# description: Testing the path modes, most have not been implemented yet +# group: [pattern_matching] + +#statement ok +#pragma enable_verification + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT); + +statement ok +CREATE TABLE School(name VARCHAR, Id BIGINT, Kind VARCHAR); + +statement ok +CREATE TABLE StudyAt(personId BIGINT, schoolId BIGINT); + +statement ok +INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17); + +statement ok +INSERT INTO School VALUES ('VU', 0, 'University'), ('UVA', 1, 'University'); + +statement ok +INSERT INTO StudyAt VALUES (0, 0), (1, 0), (2, 1), (3, 1), (4, 1); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student PROPERTIES ( id, name ) LABEL Person, + School LABEL SCHOOL + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + LABEL Knows, + studyAt SOURCE KEY ( personId ) REFERENCES Student ( id ) + DESTINATION KEY ( SchoolId ) REFERENCES School ( id ) + LABEL StudyAt + ); + +#statement ok +#-FROM GRAPH_TABLE (pg +# MATCH +# p = ANY SHORTEST WALK PATH (a:Person)-[k:knows]-> *(b:Person) +# WHERE a.name = 'Daniel' +# COLUMNS (p, a.name as name, b.name as school) +# ) study; + +#statement ok +#-FROM GRAPH_TABLE (pg +# MATCH +# p = ANY SHORTEST (a:Person)-[k:knows]-> *(b:Person) +# WHERE a.name = 'Daniel' +# COLUMNS (p, a.name as name, b.name as school) +# ) study; + +statement error +-FROM GRAPH_TABLE (pg + MATCH + p = ALL SHORTEST (a:Person)-[k:knows]-> *(b:Person) + WHERE a.name = 'Daniel' + COLUMNS (p, a.name as name, b.name as school) + ) study; +---- +Not implemented Error: ALL SHORTEST has not been implemented yet. + +statement error +-FROM GRAPH_TABLE (pg + MATCH + p = ANY SHORTEST TRAIL (a:Person)-[k:knows]-> *(b:Person) + WHERE a.name = 'Daniel' + COLUMNS (p, a.name as name, b.name as school) + ) study; +---- +Not implemented Error: Path modes other than WALK have not been implemented yet. + +statement error +-FROM GRAPH_TABLE (pg + MATCH + p = ANY SHORTEST ACYCLIC (a:Person)-[k:knows]-> *(b:Person) + WHERE a.name = 'Daniel' + COLUMNS (p, a.name as name, b.name as school) + ) study; +---- +Not implemented Error: Path modes other than WALK have not been implemented yet. + +## https://github.com/cwida/duckpgq-extension/issues/46 +##statement error +##-FROM GRAPH_TABLE (pg +## MATCH +## p = ANY SHORTEST 5 WALK (a:Person)-[k:knows]-> *(b:Person) +## WHERE a.name = 'Daniel' +## COLUMNS (p, a.name as name, b.name as school) +## ) study; +##---- +##Not implemented Error: TopK has not been implemented yet. + + +statement error +-FROM GRAPH_TABLE (pg + MATCH + p = ANY SHORTEST SIMPLE (a:Person)-[k:knows]-> *(b:Person) + WHERE a.name = 'Daniel' + COLUMNS (p, a.name as name, b.name as school) + ) study; +---- +Not implemented Error: Path modes other than WALK have not been implemented yet. + + + diff --git a/test/sql/pattern_matching/undirected_edges.test b/test/sql/pattern_matching/undirected_edges.test new file mode 100644 index 00000000..3de2f1bd --- /dev/null +++ b/test/sql/pattern_matching/undirected_edges.test @@ -0,0 +1,68 @@ +# name: test/sql/pattern_matching/undirected_edges.test +# description: Testing the undirected edge path pattern matching +# group: [pattern_matching] + +#statement ok +#pragma enable_verification + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR);INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT);INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17), (4, 0, 18); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES (Student) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + ); + +query II +SELECT a.name AS person, b.name AS friend + FROM ((SELECT know.src, know.dst FROM know) UNION ALL (SELECT know.dst, know.src FROM know)) AS k , Student AS b , Student AS a + WHERE ((a.id = k.src) AND (b.id = k.dst) AND (a.name = 'Daniel')) +ORDER BY person, friend; +---- +Daniel David +Daniel Gabor +Daniel Peter +Daniel Peter +Daniel Tavneet + +# Daniel has 3 outgoing edges and 2 incoming edges, so there should be 5 tuples +query II +-SELECT person, friend +FROM GRAPH_TABLE (pg + MATCH + (a:Student)-[k:know]-(b:Student) + WHERE a.name = 'Daniel' + COLUMNS (a.name as person, b.name as friend) + ) +ORDER BY person, friend; +---- +Daniel David +Daniel Gabor +Daniel Peter +Daniel Peter +Daniel Tavneet + +# Daniel has 3 outgoing edges and 2 incoming edges, so there should be 5 tuples +query III +-FROM GRAPH_TABLE (pg + MATCH + (a:Student)-[k:know]-(b:Student) + WHERE a.name = 'Daniel' + COLUMNS (a.name as person, b.name as friend, k.createDate as date) + ) +ORDER BY person, friend, date; +---- +Daniel David 18 +Daniel Gabor 11 +Daniel Peter 12 +Daniel Peter 13 +Daniel Tavneet 10 + diff --git a/test/sql/pgq_keywords.test b/test/sql/pgq_keywords.test new file mode 100644 index 00000000..7c0b2fb4 --- /dev/null +++ b/test/sql/pgq_keywords.test @@ -0,0 +1,67 @@ +# name: test/sql/pgq_keywords.test +# description: Testing PGQ reserved keywords in other queries +# group: [sql] + +#statement ok +#pragma enable_verification + +require duckpgq + +statement ok +select 1 as path; + +statement ok +select 1 as group; + +statement ok +SELECT database_oid AS seq, database_name AS name, path AS file FROM duckdb_databases() WHERE NOT internal ORDER BY 1 + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT); + +statement ok +CREATE TABLE School(name VARCHAR, Id BIGINT, Kind VARCHAR); + +statement ok +CREATE TABLE StudyAt(personId BIGINT, schoolId BIGINT); + +statement ok +INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17); + +statement ok +INSERT INTO School VALUES ('VU', 0, 'University'), ('UVA', 1, 'University'); + +statement ok +INSERT INTO StudyAt VALUES (0, 0), (1, 0), (2, 1), (3, 1), (4, 1); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student PROPERTIES ( id, name ) LABEL Person, + School LABEL SCHOOL + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + LABEL Knows, + studyAt SOURCE KEY ( personId ) REFERENCES Student ( id ) + DESTINATION KEY ( SchoolId ) REFERENCES School ( id ) + LABEL StudyAt + ); + +query II +-SELECT study.name, study.school +FROM GRAPH_TABLE (pg + MATCH + (a:Person)-[s:StudyAt]->(b:School) + WHERE a.name = 'Daniel' + COLUMNS (a.name as name, b.name as school) + ) study; +---- +Daniel VU diff --git a/test/sql/pragma/create_vertex_table.test b/test/sql/pragma/create_vertex_table.test new file mode 100644 index 00000000..300d9596 --- /dev/null +++ b/test/sql/pragma/create_vertex_table.test @@ -0,0 +1,66 @@ +# name: test/sql/pragma/create_vertex_table.test +# description: Testing the pragma create_vertex_table +# group: [pragma] + +require duckpgq + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT);INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17); + +statement ok +pragma create_vertex_table(know, src, dst, v, id); + +statement ok +select * from v; + +query I +select count(*) from v; +---- +5 + +statement error +pragma create_vertex_table(know, src, dst, group, id); +---- +Parser Error: syntax error at or near "group" + +statement error +pragma create_vertex_table(know, nonexistingcolumn, dst, v2, id); +---- +Binder Error: Referenced column "nonexistingcolumn" not found in FROM clause! + +statement error +pragma create_vertex_table(know, src, nonexistingcolumn, v3, id); +---- +Binder Error: Referenced column "nonexistingcolumn" not found in FROM clause! + +statement error +pragma create_vertex_table(nonexistingtable, src, dst, v3, id); +---- +Catalog Error: Table with name nonexistingtable does not exist! + +statement error +pragma create_vertex_table(know, src, dst, v, id); +---- +Catalog Error: Table with name "v" already exists! + +statement error +pragma create_vertex_table(know, src, dst, v); +---- +Binder Error: No function matches the given name and argument types 'create_vertex_table(VARCHAR, VARCHAR, VARCHAR, VARCHAR)'. You might need to add explicit type casts. + +statement ok +create table person_knows_person(creationDate, Person1id, Person2id) as from 'duckdb/data/SNB0.003/person_knows_person.csv'; + +query I +select count(*) from person_knows_person; +---- +83 + +statement ok +pragma create_vertex_table(person_knows_person, person1id, person2id, v4, id); + +query I +select count(*) from v4; +---- +39 + diff --git a/test/sql/pragma/show_property_graphs.test b/test/sql/pragma/show_property_graphs.test new file mode 100644 index 00000000..bc92e2c1 --- /dev/null +++ b/test/sql/pragma/show_property_graphs.test @@ -0,0 +1,63 @@ +# name: test/sql/pragma/show_property_graphs.test +# group: [pragma] + +require duckpgq + +statement ok +import database 'duckdb/data/SNB0.003'; + +statement ok +-CREATE PROPERTY GRAPH snb +VERTEX TABLES ( + Person LABEL Person, + Forum LABEL Forum, + Organisation LABEL Organisation IN typemask(company, university), + Place LABEL Place, + Tag LABEL Tag, + TagClass LABEL TagClass, + Country LABEL Country, + City LABEL City, + Message LABEL Message + ) +EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL Knows, + Forum_hasMember_Person SOURCE KEY (ForumId) REFERENCES Forum (id) + DESTINATION KEY (PersonId) REFERENCES Person (id) + LABEL hasMember, + Forum_hasTag_Tag SOURCE KEY (ForumId) REFERENCES Forum (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL Forum_hasTag, + Person_hasInterest_Tag SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL hasInterest, + person_workAt_Organisation SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (OrganisationId) REFERENCES Organisation (id) + LABEL workAt_Organisation, + Person_likes_Message SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (id) REFERENCES Message (id) + LABEL likes_Message, + Message_hasTag_Tag SOURCE KEY (id) REFERENCES Message (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL message_hasTag, + Message_hasAuthor_Person SOURCE KEY (messageId) REFERENCES Message (id) + DESTINATION KEY (PersonId) REFERENCES Person (id) + LABEL hasAuthor, + Message_replyOf_Message SOURCE KEY (messageId) REFERENCES Message (id) + DESTINATION KEY (ParentMessageId) REFERENCES Message (id) + LABEL replyOf + ); + +query I +pragma show_property_graphs; +---- +snb + +statement ok +-drop property graph snb; + +query I +pragma show_property_graphs; +---- + diff --git a/test/sql/scalar/delete_csr.test b/test/sql/scalar/delete_csr.test new file mode 100644 index 00000000..dff695e7 --- /dev/null +++ b/test/sql/scalar/delete_csr.test @@ -0,0 +1,103 @@ +# name: test/sql/scalar/delete_csr.test +# description: Testing the delete csr UDF +# group: [scalar] + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, id BIGINT); + +statement ok +CREATE TABLE School(school_name VARCHAR, school_id BIGINT, school_kind BIGINT); + +statement ok +INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17), (2, 4, 18); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student PROPERTIES ( id, name ) LABEL Person + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + PROPERTIES ( id ) LABEL Knows + ); + +statement ok +SELECT CREATE_CSR_EDGE( + 0, + (SELECT count(a.id) FROM Student a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 0, + (SELECT count(a.id) FROM Student a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM Student a + LEFT JOIN Know k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + (select count() FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), + a.rowid, + c.rowid, + k.rowid) as temp + FROM Know k + JOIN student a on a.id = k.src + JOIN student c on c.id = k.dst; + +statement ok +SELECT CREATE_CSR_EDGE( + 1, + (SELECT count(a.id) FROM Student a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 1, + (SELECT count(a.id) FROM Student a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM Student a + LEFT JOIN Know k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + (select count() FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), + a.rowid, + c.rowid, + k.rowid) as temp + FROM Know k + JOIN student a on a.id = k.src + JOIN student c on c.id = k.dst; + +query I +SELECT delete_csr(0) as flag; +---- +true + +query I +SELECT delete_csr(1) as flag; +---- +true + +query I +SELECT delete_csr(0) as flag; +---- +false + +query I +SELECT delete_csr(3) as flag; +---- +false diff --git a/test/sql/scalar/get_csr_w_type.test b/test/sql/scalar/get_csr_w_type.test new file mode 100644 index 00000000..87e87752 --- /dev/null +++ b/test/sql/scalar/get_csr_w_type.test @@ -0,0 +1,155 @@ +# name: test/sql/scalar/get_csr_w_type.test +# description: Testing getting the CSR W type UDF +# group: [scalar] + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, id BIGINT); + +statement ok +CREATE TABLE School(school_name VARCHAR, school_id BIGINT, school_kind BIGINT); + +statement ok +INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17), (2, 4, 18); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student PROPERTIES ( id, name ) LABEL Person + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + PROPERTIES ( id ) LABEL Knows + ); + +statement ok +SELECT CREATE_CSR_EDGE( + 0, + (SELECT count(a.id) FROM Student a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 0, + (SELECT count(a.id) FROM Student a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM Student a + LEFT JOIN Know k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + (select count() FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), + a.rowid, + c.rowid, + k.rowid) as temp + FROM Know k + JOIN student a on a.id = k.src + JOIN student c on c.id = k.dst; + +statement ok +SELECT CREATE_CSR_EDGE( + 1, + (SELECT count(a.id) FROM Student a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 1, + (SELECT count(a.id) FROM Student a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM Student a + LEFT JOIN Know k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + (select count() FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), + a.rowid, + c.rowid, + k.rowid) as temp + FROM Know k + JOIN student a on a.id = k.src + JOIN student c on c.id = k.dst; + +query I +SELECT csr_get_w_type(0); +---- +0 + +statement ok +SELECT CREATE_CSR_EDGE( + 1, + (SELECT count(a.id) FROM Student a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 1, + (SELECT count(a.id) FROM Student a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM Student a + LEFT JOIN Know k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + (select count() FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), + a.rowid, + c.rowid, + k.rowid, 12) as temp + FROM Know k + JOIN student a on a.id = k.src + JOIN student c on c.id = k.dst; + +query I +SELECT csr_get_w_type(1); +---- +1 + +statement ok +SELECT CREATE_CSR_EDGE( + 2, + (SELECT count(a.id) FROM Student a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 2, + (SELECT count(a.id) FROM Student a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM Student a + LEFT JOIN Know k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + (select count() FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), + a.rowid, + c.rowid, + k.rowid, 1.2) as temp + FROM Know k + JOIN student a on a.id = k.src + JOIN student c on c.id = k.dst; + +query I +SELECT csr_get_w_type(2); +---- +2 + +statement error +SELECT csr_get_w_type(3); +---- + diff --git a/test/sql/scalar/getpgschema.test b/test/sql/scalar/getpgschema.test new file mode 100644 index 00000000..51e3dd8b --- /dev/null +++ b/test/sql/scalar/getpgschema.test @@ -0,0 +1,204 @@ +# name: test/sql/scalar/getpgschema.test +# description: Testing getting the various UDFs to get CSR statistics +# group: [scalar] + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, id BIGINT); + +statement ok +CREATE TABLE School(school_name VARCHAR, school_id BIGINT, school_kind BIGINT); + +statement ok +INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17), (2, 4, 18); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student PROPERTIES ( id, name ) LABEL Person + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + PROPERTIES ( id ) LABEL Knows + ); + +statement ok +SELECT CREATE_CSR_EDGE( + 0, + (SELECT count(a.id) FROM Student a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 0, + (SELECT count(a.id) FROM Student a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM Student a + LEFT JOIN Know k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + (select count() FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), + a.rowid, + c.rowid, + k.rowid) as temp + FROM Know k + JOIN student a on a.id = k.src + JOIN student c on c.id = k.dst; + +statement ok +SELECT CREATE_CSR_EDGE( + 1, + (SELECT count(a.id) FROM Student a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 1, + (SELECT count(a.id) FROM Student a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM Student a + LEFT JOIN Know k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + (select count() FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), + a.rowid, + c.rowid, + k.rowid) as temp + FROM Know k + JOIN student a on a.id = k.src + JOIN student c on c.id = k.dst; + +query I +SELECT * from get_csr_e(0); +---- +1 +2 +3 +2 +3 +3 +4 +0 +3 + +query I +SELECT * from get_csr_v(0); +---- +0 +3 +5 +7 +8 +9 +9 + +query I +SELECT * from get_pg_vtablenames('pg'); +---- +Student + +query I +SELECT * from get_pg_etablenames('pg'); +---- +know + +query I +SELECT * from get_pg_vcolnames('pg', 'Student'); +---- +id +name + +query I +SELECT * from get_pg_ecolnames('pg', 'know'); +---- +id + +query I +SELECT delete_csr(0) as flag; +---- +true + +statement ok +create or replace table snb_pairs as ( + select src, dst + from (select a.rowid as src from student a), + (select b.rowid as dst from student b) + using sample reservoir(5 rows) repeatable (300) +); + +statement ok +WITH cte1 AS ( + SELECT CREATE_CSR_EDGE( + 0, + (SELECT count(a.id) FROM student a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 0, + (SELECT count(a.id) FROM student a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM student a + LEFT JOIN know k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + (select count(*) from know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), + a.rowid, + c.rowid, + k.rowid) as temp + FROM know k + JOIN student a on a.id = k.src + JOIN student c on c.id = k.dst + ) SELECT src as source, dst as destination, iterativelength(0, (select count(*) from student), snb_pairs.src, snb_pairs.dst) as path + FROM snb_pairs, (select count(cte1.temp) * 0 as temp from cte1) __x + WHERE __x.temp * 0 = 0; + +statement ok +SELECT CREATE_CSR_EDGE( + 0, + (SELECT count(a.id) FROM student a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 0, + (SELECT count(a.id) FROM student a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM student a + LEFT JOIN know k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + (select count(*) from know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), + a.rowid, + c.rowid, + k.rowid) as temp + FROM know k + JOIN student a on a.id = k.src + JOIN student c on c.id = k.dst; + + +statement ok +SELECT csrv FROM get_csr_v(0) + +statement ok +SELECT csre FROM get_csr_e(0) \ No newline at end of file diff --git a/test/sql/scalar/local_clustering_coefficient.test b/test/sql/scalar/local_clustering_coefficient.test new file mode 100644 index 00000000..65ac30b6 --- /dev/null +++ b/test/sql/scalar/local_clustering_coefficient.test @@ -0,0 +1,257 @@ +# name: test/sql/scalar/local_clustering_coefficient.test +# description: Testing the local clustering coefficient calculations +# group: [scalar] + +require duckpgq + +# Test with a graph having no edges +statement ok +CREATE TABLE IsolatedStudent(id BIGINT, name VARCHAR);INSERT INTO IsolatedStudent VALUES (0, 'Alice'), (1, 'Bob'), (2, 'Charlie'); + +statement ok +CREATE TABLE NoEdgeKnow(src BIGINT, dst BIGINT); + +statement ok +-CREATE PROPERTY GRAPH no_edge_pg +VERTEX TABLES ( + IsolatedStudent + ) +EDGE TABLES ( + NoEdgeKnow SOURCE KEY ( src ) REFERENCES IsolatedStudent ( id ) + DESTINATION KEY ( dst ) REFERENCES IsolatedStudent ( id ) + ); + +statement error +select id, local_clustering_coefficient from local_clustering_coefficient(no_edge_pg, isolatedstudent, noedgeknow); +---- +Constraint Error: CSR not found. Is the graph populated? + + +# Test with an empty graph +statement ok +CREATE TABLE EmptyStudent(id BIGINT, name VARCHAR); + +statement ok +CREATE TABLE EmptyKnow(src BIGINT, dst BIGINT); + +statement ok +-CREATE PROPERTY GRAPH empty_pg +VERTEX TABLES ( + EmptyStudent + ) +EDGE TABLES ( + EmptyKnow SOURCE KEY ( src ) REFERENCES EmptyStudent ( id ) + DESTINATION KEY ( dst ) REFERENCES EmptyStudent ( id ) + ); + +query II +select id, local_clustering_coefficient from local_clustering_coefficient(empty_pg, emptystudent, emptyknow); +---- + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR);INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT);INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17); + +statement ok +CREATE TABLE Foo(id BIGINT);INSERT INTO Foo VALUES (0), (1), (2), (3), (4); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student, + Foo + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + ); + +query II +select id, local_clustering_coefficient from local_clustering_coefficient(pg, student, know); +---- +0 1.0 +1 1.0 +2 1.0 +3 0.5 +4 0.0 + +query III +select a.id, a.name, local_clustering_coefficient from local_clustering_coefficient(pg, student, know), student a where a.id = lcc.id; +---- +0 Daniel 1.0 +1 Tavneet 1.0 +2 Gabor 1.0 +3 Peter 0.5 +4 David 0.0 + +statement error +select local_clustering_coefficient from local_clustering_coefficient(pgdoesnotexist, student, know), student a where a.id = lcc.id; +---- +Invalid Error: Property graph pgdoesnotexist not found + +statement error +select local_clustering_coefficient from local_clustering_coefficient(pg, a, know), student a where a.id = lcc.id; +---- +Invalid Error: Label 'a' not found. Did you mean the vertex label 'foo'? + +statement error +select local_clustering_coefficient from local_clustering_coefficient(pg, student, b), student a where a.id = lcc.id; +---- +Invalid Error: Label 'b' not found. Did you mean the edge label 'know'? + +statement error +select local_clustering_coefficient from local_clustering_coefficient(pg, foo, student), student a where a.id = lcc.id; +---- +Invalid Error: Exact label 'student' found, but it is not a edge table. + +statement error +select local_clustering_coefficient from local_clustering_coefficient(pg, student, foo), student a where a.id = lcc.id; +---- +Invalid Error: Exact label 'foo' found, but it is not a edge table. + +statement ok +import database 'duckdb/data/SNB0.003'; + +statement ok +-CREATE PROPERTY GRAPH snb +VERTEX TABLES ( + Person + ) +EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL Knows); + +query II +select id, local_clustering_coefficient from local_clustering_coefficient(snb, person, knows); +---- +14 0.33333334 +16 0.5 +32 0.8333333 +2199023255557 0.6666667 +2199023255573 1.0 +2199023255594 0.1904762 +4398046511139 0.0 +6597069766702 0.0 +8796093022234 0.0 +8796093022237 0.6666667 +8796093022244 0.0 +8796093022249 0.4 +10995116277761 0.3 +10995116277782 0.23809524 +10995116277783 0.0 +10995116277808 0.0 +13194139533342 1.0 +13194139533352 0.31111112 +13194139533355 0.2 +15393162788877 0.0 +17592186044443 0.0 +17592186044461 1.0 +19791209299968 1.0 +19791209299987 0.0 +21990232555526 0.0 +21990232555527 0.0 +24189255811081 0.125 +24189255811109 1.0 +26388279066632 0.0 +26388279066641 0.8333333 +26388279066655 0.33333334 +26388279066658 0.21794872 +26388279066668 0.5 +28587302322180 0.16666667 +28587302322191 0.0 +28587302322196 0.8333333 +28587302322204 0.2857143 +28587302322209 0.0 +28587302322223 0.0 +30786325577731 0.0 +30786325577740 1.0 +32985348833291 0.0 +32985348833318 0.0 +32985348833329 0.0 +35184372088834 0.0 +35184372088850 0.6666667 +35184372088856 0.33333334 +35184372088871 0.0 +37383395344394 0.0 +37383395344409 0.0 + + +# Test with a graph having self-loops +statement ok +CREATE TABLE SelfLoopStudent(id BIGINT, name VARCHAR);INSERT INTO SelfLoopStudent VALUES (0, 'Alice'), (1, 'Bob'), (2, 'Charlie'); + +statement ok +CREATE TABLE SelfLoopKnow(src BIGINT, dst BIGINT);INSERT INTO SelfLoopKnow VALUES (0,0), (1,1), (2,2); + +statement ok +-CREATE PROPERTY GRAPH self_loop_pg +VERTEX TABLES ( + SelfLoopStudent + ) +EDGE TABLES ( + SelfLoopKnow SOURCE KEY ( src ) REFERENCES SelfLoopStudent ( id ) + DESTINATION KEY ( dst ) REFERENCES SelfLoopStudent ( id ) + ); + +query II +select id, local_clustering_coefficient from local_clustering_coefficient(self_loop_pg, selfloopstudent, selfloopknow); +---- +0 0.0 +1 0.0 +2 0.0 + +# Test with a disconnected graph +statement ok +CREATE TABLE DisconnectedStudent(id BIGINT, name VARCHAR);INSERT INTO DisconnectedStudent VALUES (0, 'Alice'), (1, 'Bob'), (2, 'Charlie'), (3, 'Dave'), (4, 'Eve'); + +statement ok +CREATE TABLE DisconnectedKnow(src BIGINT, dst BIGINT);INSERT INTO DisconnectedKnow VALUES (0,1), (2,3); + +statement ok +-CREATE PROPERTY GRAPH disconnected_pg +VERTEX TABLES ( + DisconnectedStudent + ) +EDGE TABLES ( + DisconnectedKnow SOURCE KEY ( src ) REFERENCES DisconnectedStudent ( id ) + DESTINATION KEY ( dst ) REFERENCES DisconnectedStudent ( id ) + ); + +query II +select id, local_clustering_coefficient from local_clustering_coefficient(disconnected_pg, disconnectedstudent, disconnectedknow); +---- +0 0.0 +1 0.0 +2 0.0 +3 0.0 +4 0.0 + +# Test with a different number of vertices and edges +statement ok +CREATE TABLE VariedStudent(id BIGINT, name VARCHAR);INSERT INTO VariedStudent VALUES (0, 'Alice'), (1, 'Bob'), (2, 'Charlie'), (3, 'Dave'), (4, 'Eve'), (5, 'Frank'); + +statement ok +CREATE TABLE VariedKnow(src BIGINT, dst BIGINT);INSERT INTO VariedKnow VALUES (0,1), (0,2), (0,3), (1,2), (2,3), (3,4), (4,5); + +statement ok +-CREATE PROPERTY GRAPH varied_pg +VERTEX TABLES ( + VariedStudent + ) +EDGE TABLES ( + VariedKnow SOURCE KEY ( src ) REFERENCES VariedStudent ( id ) + DESTINATION KEY ( dst ) REFERENCES VariedStudent ( id ) + ); + +query II +select id, local_clustering_coefficient from local_clustering_coefficient(varied_pg, variedstudent, variedknow); +---- +0 0.6666667 +1 1.0 +2 0.6666667 +3 0.33333334 +4 0.0 +5 0.0 diff --git a/test/sql/scalar/pagerank.test b/test/sql/scalar/pagerank.test new file mode 100644 index 00000000..cad94771 --- /dev/null +++ b/test/sql/scalar/pagerank.test @@ -0,0 +1,87 @@ +# name: test/sql/scalar/pagerank.test +# description: Testing the pagerank implementation +# group: [scalar] + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR);INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT);INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + ); + +query II +select id, pagerank from pagerank(pg, student, know); +---- +0 0.30722555839452875 +1 0.11534940106637968 +2 0.16437299553018173 +3 0.32814638463154105 +4 0.028301886792456276 + + +statement ok +CREATE OR REPLACE TABLE Student ( + id BIGINT +); + +statement ok +INSERT INTO Student (id) VALUES +(0), +(1), +(2), +(3), +(4); + +statement ok +CREATE OR REPLACE TABLE know ( + src BIGINT, + dst BIGINT, + edge BIGINT +); + +statement ok +INSERT INTO know (src, dst, edge) VALUES +(2, 1, 4), +(3, 1, 5), +(3, 2, 6), +(1, 2, 4), +(1, 0, 0), +(2, 0, 1), +(3, 0, 2), +(0, 1, 0), +(4, 3, 7), +(0, 3, 3), +(1, 3, 5), +(2, 3, 6), +(3, 4, 7), +(0, 2, 1); + +statement ok +-CREATE OR REPLACE PROPERTY GRAPH pg +VERTEX TABLES ( + Student + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + ); + +query II +select id, pagerank from pagerank(pg, student, know); +---- +0 0.19672392385442233 +1 0.19672392385442233 +2 0.19672392385442233 +3 0.26797750004549203 +4 0.08524695480585476 \ No newline at end of file diff --git a/test/sql/scalar/weakly_connected_component.test b/test/sql/scalar/weakly_connected_component.test new file mode 100644 index 00000000..962cda89 --- /dev/null +++ b/test/sql/scalar/weakly_connected_component.test @@ -0,0 +1,324 @@ +# name: test/sql/scalar/weakly_connected_component.test +# description: Testing the weakly connected component implementation +# group: [scalar] + +require duckpgq + +statement ok +CREATE TABLE Student(id BIGINT, name VARCHAR);INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David'); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT);INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17); + +statement ok +CREATE TABLE Foo(id BIGINT);INSERT INTO Foo VALUES (0), (1), (2), (3), (4); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Student, + Foo + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) + ); + +query II +select id, componentId from weakly_connected_component(pg, student, know); +---- +0 4 +1 4 +2 4 +3 4 +4 4 + +statement ok +CREATE OR REPLACE TABLE Student(id BIGINT, name VARCHAR); +INSERT INTO Student VALUES (0, 'Alice'), (1, 'Bob'), (2, 'Charlie'), (3, 'David'), (4, 'Eve'); + +statement ok +CREATE OR REPLACE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT); +INSERT INTO know VALUES (0, 0, 10), (1, 1, 11), (2, 2, 12), (3, 3, 13), (4, 4, 14); +# Self loops + +statement ok +-CREATE OR REPLACE PROPERTY GRAPH pg_disconnected +VERTEX TABLES ( + Student +) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) +); + +query II +select id, componentId from weakly_connected_component(pg_disconnected, student, know); +---- +0 0 +1 1 +2 2 +3 3 +4 4 + +statement ok +CREATE OR REPLACE TABLE Student(id BIGINT, name VARCHAR); +INSERT INTO Student VALUES (0, 'Alice'), (1, 'Bob'), (2, 'Charlie'), (3, 'David'), (4, 'Eve'), (5, 'Frank'); + +statement ok +CREATE OR REPLACE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT); +INSERT INTO know VALUES (0, 1, 10), (1, 2, 11), (2, 3, 12), (3, 0, 13); + +statement ok +-CREATE OR REPLACE PROPERTY GRAPH pg_isolated +VERTEX TABLES ( + Student +) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) +); + +query II +select id, componentId from weakly_connected_component(pg_isolated, student, know); +---- +0 2 +1 2 +2 2 +3 2 +4 4 +5 5 + + +statement ok +CREATE OR REPLACE TABLE Student(id BIGINT, name VARCHAR); +INSERT INTO Student VALUES (0, 'Alice'), (1, 'Bob'), (2, 'Charlie'), (3, 'David'), (4, 'Eve'); + +statement ok +CREATE OR REPLACE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT); +INSERT INTO know VALUES (0, 1, 10), (1, 2, 11), (3, 4, 12); + +statement ok +-CREATE OR REPLACE PROPERTY GRAPH pg_two_components +VERTEX TABLES ( + Student +) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) +); + +query II +select id, componentId from weakly_connected_component(pg_two_components, student, know); +---- +0 2 +1 2 +2 2 +3 4 +4 4 + +statement ok +CREATE OR REPLACE TABLE Student(id BIGINT, name VARCHAR); +INSERT INTO Student VALUES (0, 'Alice'), (1, 'Bob'), (2, 'Charlie'), (3, 'David'), (4, 'Eve'); + +statement ok +CREATE OR REPLACE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT); +INSERT INTO know VALUES (0, 1, 10), (1, 2, 11), (2, 3, 12), (3, 0, 13), (3, 4, 14); + +statement ok +-CREATE OR REPLACE PROPERTY GRAPH pg_cyclic +VERTEX TABLES ( + Student +) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) +); + +query II +select id, componentId from weakly_connected_component(pg_cyclic, student, know); +---- +0 4 +1 4 +2 4 +3 4 +4 4 + +statement ok +CREATE OR REPLACE TABLE Student(id BIGINT, name VARCHAR); +INSERT INTO Student VALUES (0, 'Node0'), (1, 'Node1'), (2, 'Node2'), (3, 'Node3'), + (4, 'Node4'), (5, 'Node5'), (6, 'Node6'), (7, 'Node7'), + (8, 'Node8'), (9, 'Node9'); + +statement ok +CREATE OR REPLACE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT); +INSERT INTO know VALUES (0, 1, 10), (1, 2, 11), (2, 3, 12), (3, 4, 13), + (5, 6, 14), (6, 7, 15), (7, 8, 16), (8, 9, 17), + (0, 4, 18), (5, 9, 19); + +statement ok +-CREATE OR REPLACE PROPERTY GRAPH pg_larger_graph +VERTEX TABLES ( + Student +) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) +); + +query I +select count(distinct componentId) as c from weakly_connected_component(pg_larger_graph, student, know) order by c; +---- +2 + +statement error +select id, componentId from weakly_connected_component(non_existent_graph, student, know); +---- +Invalid Error: Property graph non_existent_graph not found + +statement error +-CREATE PROPERTY GRAPH pg_no_vertex_table +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Student ( id ) + DESTINATION KEY ( dst ) REFERENCES Student ( id ) +); +---- +Parser Error: syntax error at or near "EDGE" + +statement error +select id, componentId from weakly_connected_component(pg_no_vertex_table, non_existent_vertex, know); +---- +Invalid Error: Property graph pg_no_vertex_table not found + +statement ok +CREATE OR REPLACE TABLE Student(id BIGINT, name VARCHAR); +INSERT INTO Student VALUES (0, 'Alice'), (1, 'Bob'); + +statement ok +-CREATE PROPERTY GRAPH pg_no_edge_table +VERTEX TABLES ( + Student +); + +statement error +select id, componentId from weakly_connected_component(pg_no_edge_table, student, non_existent_edge); +---- +Invalid Error: Label 'non_existent_edge' not found in the property graph for a edge table. + +statement ok +import database 'duckdb/data/SNB0.003'; + +statement ok +-CREATE PROPERTY GRAPH snb + VERTEX TABLES ( + Person + ) + EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL Knows); + +query I +select count(distinct componentId) as c from weakly_connected_component(snb, person, knows) order by c; +---- +12 + +statement ok +CREATE or replace TABLE edges ( + source INTEGER, + target INTEGER +); + +statement ok +INSERT INTO edges VALUES (1, 2), (2, 3), (4, 5), (2, 4), (10,11); + +statement ok +CREATE OR REPLACE TABLE nodes AS + (SELECT DISTINCT id FROM + (SELECT DISTINCT source AS id FROM edges + UNION + SELECT DISTINCT target AS id FROM edges + ) ); + +statement ok +CREATE TABLE thisisadifferenttable (id INTEGER); + +statement ok +INSERT INTO thisisadifferenttable VALUES (1); + +statement ok +-CREATE OR REPLACE PROPERTY GRAPH my_graph + VERTEX TABLES ( + nodes LABEL nodes, + thisisadifferenttable label thisisadifferenttablelabel + ) + EDGE TABLES ( + edges SOURCE KEY (source) REFERENCES nodes (id) + DESTINATION KEY (target) REFERENCES nodes (id) + LABEL knows + ); + +statement error +SELECT * FROM weakly_connected_component(my_graph, nodes, edges); +---- +Invalid Error: Table 'edges' found in the property graph, but does not have the correct label. Did you mean the label 'knows' instead? + +statement error +SELECT * FROM weakly_connected_component(my_graph, nodes, kows); +---- +Invalid Error: Label 'kows' not found. Did you mean the edge label 'knows'? + +statement error +SELECT * FROM weakly_connected_component(my_graph, knows, knows); +---- +Invalid Error: Exact label 'knows' found, but it is not a vertex table. + +statement error +SELECT * FROM weakly_connected_component(my_graph, no, knows); +---- +Invalid Error: Label 'no' not found. Did you mean the vertex label 'nodes'? + +statement error +SELECT * FROM weakly_connected_component(my_graph, qaaaaaa, knows); +---- +Invalid Error: Label 'qaaaaaa' not found. Did you mean the vertex label 'nodes'? + +statement error +SELECT * FROM weakly_connected_component(my_graph, thisisadifferent, knows); +---- +Invalid Error: Label 'thisisadifferent' not found. Did you mean the vertex label 'thisisadifferenttablelabel'? + +# statement ok +# create or replace table nodes as select unnest(generate_series(0, 5_000)) as id; +# +# statement ok +# CREATE OR REPLACE TABLE edges AS +# WITH +# sampled_sources AS ( +# SELECT id AS source +# FROM nodes +# WHERE RANDOM() < 0.8 +# ), +# sampled_destinations AS ( +# SELECT id AS target +# FROM nodes +# WHERE RANDOM() < 0.8 +# ) +# SELECT source, target +# FROM sampled_sources +# CROSS JOIN sampled_destinations +# WHERE source != target +# using sample 10% (bernoulli); +# +# statement ok +# -CREATE OR REPLACE PROPERTY GRAPH random_graph +# VERTEX TABLES ( +# nodes +# ) +# EDGE TABLES ( +# edges SOURCE KEY (source) REFERENCES nodes (id) +# DESTINATION KEY (target) REFERENCES nodes (id) +# ); +# +# statement ok +# select id, componentId from weakly_connected_component(random_graph, nodes, edges); \ No newline at end of file diff --git a/test/sql/snb/bi.test b/test/sql/snb/bi.test new file mode 100644 index 00000000..4f6b1d31 --- /dev/null +++ b/test/sql/snb/bi.test @@ -0,0 +1,58 @@ +# name: test/sql/snb/bi.test +# description: Testing the SNB bi queries +# group: [snb] + +require duckpgq + +statement ok +import database 'duckdb/data/SNB0.003'; + +statement ok +-CREATE PROPERTY GRAPH snb +VERTEX TABLES ( + Person LABEL Person, + Forum LABEL Forum, + Organisation LABEL Organisation IN typemask(company, university), + Place LABEL Place, + Tag LABEL Tag, + TagClass LABEL TagClass, + Country LABEL Country, + City LABEL City, + Message LABEL Message + ) +EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL Knows, + Forum_hasMember_Person SOURCE KEY (ForumId) REFERENCES Forum (id) + DESTINATION KEY (PersonId) REFERENCES Person (id) + LABEL hasMember, + Forum_hasTag_Tag SOURCE KEY (ForumId) REFERENCES Forum (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL Forum_hasTag, + Person_hasInterest_Tag SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL hasInterest, + person_workAt_Organisation SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (OrganisationId) REFERENCES Organisation (id) + LABEL workAt_Organisation, + Person_likes_Message SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (id) REFERENCES Message (id) + LABEL likes_Message, + Message_hasTag_Tag SOURCE KEY (id) REFERENCES Message (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL message_hasTag, + Message_hasAuthor_Person SOURCE KEY (messageId) REFERENCES Message (id) + DESTINATION KEY (PersonId) REFERENCES Person (id) + LABEL hasAuthor, + Message_replyOf_Message SOURCE KEY (messageId) REFERENCES Message (id) + DESTINATION KEY (ParentMessageId) REFERENCES Message (id) + LABEL replyOf + ); + +statement ok +-FROM GRAPH_TABLE (snb + MATCH (personA:Person)-[kAB:Knows where kAB.creationDate BETWEEN '2012-10-04' AND '2013-01-16']- + (personB:Person)-[kBC:Knows where kBC.creationDate BETWEEN '2012-10-04' AND '2013-01-16']- + (personC:Person)-[kCA:Knows where kCA.creationDate BETWEEN '2012-10-04' AND '2013-01-16'] + -(personA:Person)); diff --git a/test/sql/snb/snb.test b/test/sql/snb/snb.test new file mode 100644 index 00000000..9ecfdf7d --- /dev/null +++ b/test/sql/snb/snb.test @@ -0,0 +1,188 @@ +# name: test/sql/snb/snb.test +# description: Testing SNB Interactive queries and loading the property graph +# group: [snb] + +require duckpgq + +statement ok +import database 'duckdb/data/SNB0.003'; + +statement ok +-CREATE PROPERTY GRAPH snb +VERTEX TABLES ( + Person LABEL Person, + Forum LABEL Forum, + Organisation LABEL Organisation IN typemask(company, university), + Place LABEL Place, + Tag LABEL Tag, + TagClass LABEL TagClass, + Country LABEL Country, + City LABEL City, + Message LABEL Message + ) +EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL Knows, + Forum_hasMember_Person SOURCE KEY (ForumId) REFERENCES Forum (id) + DESTINATION KEY (PersonId) REFERENCES Person (id) + LABEL hasMember, + Forum_hasTag_Tag SOURCE KEY (ForumId) REFERENCES Forum (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL Forum_hasTag, + Person_hasInterest_Tag SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL hasInterest, + person_workAt_Organisation SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (OrganisationId) REFERENCES Organisation (id) + LABEL workAt_Organisation, + Person_likes_Message SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (id) REFERENCES Message (id) + LABEL likes_Message, + Message_hasTag_Tag SOURCE KEY (id) REFERENCES Message (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL message_hasTag, + Message_hasAuthor_Person SOURCE KEY (messageId) REFERENCES Message (id) + DESTINATION KEY (PersonId) REFERENCES Person (id) + LABEL hasAuthor, + Message_replyOf_Message SOURCE KEY (messageId) REFERENCES Message (id) + DESTINATION KEY (ParentMessageId) REFERENCES Message (id) + LABEL replyOf + ); + +query III +-FROM GRAPH_TABLE (snb + MATCH (p:Person)-[w:workAt_Organisation]->(u:University) + COLUMNS (p.id as p_id, u.id as u_id, u.type as u_type) + ) tmp + order by p_id, u_id + limit 10; +---- +14 4593 University +16 5809 University +32 5047 University +2199023255557 1953 University +2199023255573 5263 University +2199023255594 1597 University +4398046511139 4929 University +6597069766702 5038 University +8796093022234 3008 University +8796093022244 3008 University + +#IC 2 +query IIIIII +-FROM GRAPH_TABLE (snb + MATCH (a:Person WHERE a.id = 17592186044461)-[k:knows]-(b:Person)<-[au:hasAuthor]-(m:message WHERE m.creationDate < '2010-10-16') + COLUMNS (a.id, a.firstName, a.lastName, m.id as messageId, coalesce(m.imageFile, m.content), m.creationDate) + ) tmp + ORDER BY creationDate DESC, Messageid ASC + LIMIT 20; +---- +17592186044461 Ali Abouba 274877907945 photo274877907945.jpg 2010-09-25 12:41:53.571+00 +17592186044461 Ali Abouba 274877907944 photo274877907944.jpg 2010-09-25 12:41:52.571+00 +17592186044461 Ali Abouba 274877907943 photo274877907943.jpg 2010-09-25 12:41:51.571+00 +17592186044461 Ali Abouba 274877907942 photo274877907942.jpg 2010-09-25 12:41:50.571+00 +17592186044461 Ali Abouba 274877908063 photo274877908063.jpg 2010-09-23 03:50:32.62+00 +17592186044461 Ali Abouba 274877908062 photo274877908062.jpg 2010-09-23 03:50:31.62+00 +17592186044461 Ali Abouba 274877908061 photo274877908061.jpg 2010-09-23 03:50:30.62+00 +17592186044461 Ali Abouba 274877908060 photo274877908060.jpg 2010-09-23 03:50:29.62+00 +17592186044461 Ali Abouba 274877908059 photo274877908059.jpg 2010-09-23 03:50:28.62+00 +17592186044461 Ali Abouba 274877908058 photo274877908058.jpg 2010-09-23 03:50:27.62+00 +17592186044461 Ali Abouba 274877908057 photo274877908057.jpg 2010-09-23 03:50:26.62+00 +17592186044461 Ali Abouba 274877908056 photo274877908056.jpg 2010-09-23 03:50:25.62+00 +17592186044461 Ali Abouba 274877908055 photo274877908055.jpg 2010-09-23 03:50:24.62+00 +17592186044461 Ali Abouba 274877908054 photo274877908054.jpg 2010-09-23 03:50:23.62+00 +17592186044461 Ali Abouba 274877908053 photo274877908053.jpg 2010-09-23 03:50:22.62+00 +17592186044461 Ali Abouba 274877908052 photo274877908052.jpg 2010-09-23 03:50:21.62+00 +17592186044461 Ali Abouba 274877908051 photo274877908051.jpg 2010-09-23 03:50:20.62+00 +17592186044461 Ali Abouba 274877908050 photo274877908050.jpg 2010-09-23 03:50:19.62+00 +17592186044461 Ali Abouba 274877908049 photo274877908049.jpg 2010-09-23 03:50:18.62+00 +17592186044461 Ali Abouba 274877908048 photo274877908048.jpg 2010-09-23 03:50:17.62+00 + + +# IC 13 todo(dtenwolde) currently directed edge but should be undirected +# person1Id 17592186044461 +# person2Id 35184372088856 +query III +-FROM GRAPH_TABLE (snb + MATCH p = ANY SHORTEST (a:Person WHERE a.id = 17592186044461)-[k:knows]-> *(b:Person where b.id = 35184372088856) + COLUMNS (path_length(p), a.id as a_id, b.id as b_id) + ) tmp LIMIT 20 +---- +2 17592186044461 35184372088856 + +# IS 1 +# personId 17592186044461 +query IIIIIIII +-FROM GRAPH_TABLE (snb + MATCH (a:person where a.id = 17592186044461) + COLUMNS(firstName, lastName, birthday, locationIP, browserUsed, LocationCityId, gender, creationDate) + ) tmp; +---- +Ali Abouba 1987-05-29 41.203.147.168 Internet Explorer 1264 male 2011-05-12 02:46:47.595+00 + +# IS3. Friends of a Person +# set personId 17592186044461 +query IIII +-FROM GRAPH_TABLE (snb + MATCH (a:person WHERE a.id = 17592186044461)-[k:knows]-(b:person) + COLUMNS (b.id, b.firstname, b.lastname, k.creationDate) + ) tmp + ORDER BY creationDate DESC, id ASC; +---- +32 Miguel Gonzalez 2012-11-12 10:57:04.309+00 +26388279066658 Roberto Diaz 2012-10-17 21:32:52.428+00 +2199023255594 Ali Achiou 2012-07-08 23:38:19.049+00 + +# IS4. Content of a message +# set messageId 824633720985 +query II +-FROM GRAPH_TABLE (snb + MATCH (m:message WHERE m.id = 824633720985) + COLUMNS ( coalesce(imageFile, content, ''), creationDate) + ) tmp; +---- +photo824633720985.jpg 2012-01-12 00:17:04.151+00 + +# IS5. Creator of a message +# messageId 824633720985 +query III +-FROM GRAPH_TABLE (snb + MATCH (m:message where m.id = 824633720985)-[au:hasAuthor]->(p:person) + COLUMNS (p.id, p.firstName, p.lastName) + ) tmp; +---- +14 Hossein Forouhar + + +# IS7. Replies of a message +#set messageId 618475290624 +query IIIIII +-FROM GRAPH_TABLE (snb + MATCH (replyAuthor:person)<-[au2:hasAuthor]-(c:message where c.ParentMessageId is not null)-[r:replyOf]->(m:message where m.id = 618475290624)-[au:hasAuthor]->(messageAuthor:person), + (replyAuthor:person)-[k:knows]-(messageAuthor:person) + COLUMNS (c.id,c.content,c.creationDate, replyAuthor.id, replyAuthor.firstName, replyAuthor.lastName) + ) tmp + ORDER BY tmp.content; +---- +962072674306 thanks 2012-07-08 20:32:03.239+00 24189255811081 Alim Guliyev +962072674305 yes 2012-07-08 23:48:41.63+00 24189255811081 Alim Guliyev + +# IS7. Replies of a message +#set messageId 618475290624 +query IIIII +-FROM GRAPH_TABLE (snb + MATCH (replyAuthor:person)<-[au2:hasAuthor]-(c:message where c.ParentMessageId is not null)-[r:replyOf]->(m:message where m.id = 618475290624)-[au:hasAuthor]->(messageAuthor:person), + (replyAuthor:person)-[k:knows]-(messageAuthor:person) + COLUMNS (c.id,c.content,c.creationDate, replyAuthor.id % 10, replyAuthor.firstName || replyAuthor.lastName) + ) tmp + ORDER BY tmp.content; +---- +962072674306 thanks 2012-07-08 20:32:03.239+00 1 AlimGuliyev +962072674305 yes 2012-07-08 23:48:41.63+00 1 AlimGuliyev + +statement ok +-EXPLAIN ANALYZE COPY (FROM GRAPH_TABLE (snb +MATCH p = ANY SHORTEST (a:Person WHERE a.id = 19791209309999)-[k:knows]-> *(b:Person where b.id = 30786325579519) +COLUMNS (path_length(p), a.id as a_id, b.id as b_id) +) tmp LIMIT 20) TO '__TEST_DIR__/ic13-ea.txt'; diff --git a/test/sql/snb/snb_inheritance.test b/test/sql/snb/snb_inheritance.test new file mode 100644 index 00000000..ec52d42b --- /dev/null +++ b/test/sql/snb/snb_inheritance.test @@ -0,0 +1,149 @@ +# name: test/sql/snb/snb_inheritance.test +# description: Testing the inheritance support for the SNB dataset +# group: [snb] + +require duckpgq + +statement ok +import database 'duckdb/data/SNB0.003' + +statement ok +-CREATE PROPERTY GRAPH snb +VERTEX TABLES ( + Person LABEL Person, + Organisation LABEL Organisation IN typemask(company, university), + Message + ) +EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL Knows, + person_workAt_Organisation SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (OrganisationId) REFERENCES Organisation (id) + LABEL workAt_Organisation, + message_hasAuthor_Person SOURCE KEY (MessageId) REFERENCES Message (id) + DESTINATION KEY (PersonId) REFERENCES Person (id) + LABEL hasAuthor, + message_replyOf_Message SOURCE KEY (MessageId) REFERENCES Message (id) + DESTINATION KEY (ParentMessageId) REFERENCES Message (id) + LABEL replyOf +); + +query III +-FROM GRAPH_TABLE (snb + MATCH (p:Person)-[w:workAt_Organisation]->(u:University) + COLUMNS (p.id as p_id, u.id as u_id, u.type) + ) tmp + ORDER BY p_id, u_id + limit 10; +---- +14 4593 University +16 5809 University +32 5047 University +2199023255557 1953 University +2199023255573 5263 University +2199023255594 1597 University +4398046511139 4929 University +6597069766702 5038 University +8796093022234 3008 University +8796093022244 3008 University + +#IC 2 +query IIIIII +-FROM GRAPH_TABLE (snb + MATCH (a:Person WHERE a.id = 17592186044461)-[k:knows]-(b:Person)<-[au:hasAuthor]-(m:message WHERE m.creationDate < '2010-10-16') + COLUMNS (a.id, a.firstName, a.lastName, m.id as messageId, coalesce(m.imageFile, m.content), m.creationDate) + ) tmp + ORDER BY creationDate DESC, Messageid ASC + LIMIT 20 +---- +17592186044461 Ali Abouba 274877907945 photo274877907945.jpg 2010-09-25 12:41:53.571+00 +17592186044461 Ali Abouba 274877907944 photo274877907944.jpg 2010-09-25 12:41:52.571+00 +17592186044461 Ali Abouba 274877907943 photo274877907943.jpg 2010-09-25 12:41:51.571+00 +17592186044461 Ali Abouba 274877907942 photo274877907942.jpg 2010-09-25 12:41:50.571+00 +17592186044461 Ali Abouba 274877908063 photo274877908063.jpg 2010-09-23 03:50:32.62+00 +17592186044461 Ali Abouba 274877908062 photo274877908062.jpg 2010-09-23 03:50:31.62+00 +17592186044461 Ali Abouba 274877908061 photo274877908061.jpg 2010-09-23 03:50:30.62+00 +17592186044461 Ali Abouba 274877908060 photo274877908060.jpg 2010-09-23 03:50:29.62+00 +17592186044461 Ali Abouba 274877908059 photo274877908059.jpg 2010-09-23 03:50:28.62+00 +17592186044461 Ali Abouba 274877908058 photo274877908058.jpg 2010-09-23 03:50:27.62+00 +17592186044461 Ali Abouba 274877908057 photo274877908057.jpg 2010-09-23 03:50:26.62+00 +17592186044461 Ali Abouba 274877908056 photo274877908056.jpg 2010-09-23 03:50:25.62+00 +17592186044461 Ali Abouba 274877908055 photo274877908055.jpg 2010-09-23 03:50:24.62+00 +17592186044461 Ali Abouba 274877908054 photo274877908054.jpg 2010-09-23 03:50:23.62+00 +17592186044461 Ali Abouba 274877908053 photo274877908053.jpg 2010-09-23 03:50:22.62+00 +17592186044461 Ali Abouba 274877908052 photo274877908052.jpg 2010-09-23 03:50:21.62+00 +17592186044461 Ali Abouba 274877908051 photo274877908051.jpg 2010-09-23 03:50:20.62+00 +17592186044461 Ali Abouba 274877908050 photo274877908050.jpg 2010-09-23 03:50:19.62+00 +17592186044461 Ali Abouba 274877908049 photo274877908049.jpg 2010-09-23 03:50:18.62+00 +17592186044461 Ali Abouba 274877908048 photo274877908048.jpg 2010-09-23 03:50:17.62+00 + + +# IC 13 +# person1Id 17592186044461 +# person2Id 35184372088856 +query III +-FROM GRAPH_TABLE (snb + MATCH p = ANY SHORTEST (a:Person WHERE a.id = 17592186044461)-[k:knows]- *(b:Person where b.id = 35184372088856) + COLUMNS (path_length(p), a.id as a_id, b.id as b_id) + ) tmp LIMIT 20 +---- +2 17592186044461 35184372088856 + +# IS 1 +# personId 17592186044461 +query IIIIIIII +-FROM GRAPH_TABLE (snb + MATCH (a:person where a.id = 17592186044461) + COLUMNS(firstName, lastName, birthday, locationIP, browserUsed, LocationCityId, gender, creationDate) + ) tmp; +---- +Ali Abouba 1987-05-29 41.203.147.168 Internet Explorer 1264 male 2011-05-12 02:46:47.595+00 + +# IS3. Friends of a Person +# set personId 17592186044461 +query IIII +-FROM GRAPH_TABLE (snb + MATCH (a:person WHERE a.id = 17592186044461)-[k:knows]-(b:person) + COLUMNS (b.id, b.firstname, b.lastname, k.creationDate) + ) tmp + ORDER BY creationDate DESC, id ASC; +---- +32 Miguel Gonzalez 2012-11-12 10:57:04.309+00 +26388279066658 Roberto Diaz 2012-10-17 21:32:52.428+00 +2199023255594 Ali Achiou 2012-07-08 23:38:19.049+00 + +# IS4. Content of a message +# set messageId 824633720985 +query II +-FROM GRAPH_TABLE (snb + MATCH (m:message WHERE m.id = 824633720985) + COLUMNS ( coalesce(imageFile, content, ''), creationDate) + ) tmp; +---- +photo824633720985.jpg 2012-01-12 00:17:04.151+00 + +# IS5. Creator of a message +# messageId 824633720985 +query III +-FROM GRAPH_TABLE (snb + MATCH (m:message where m.id = 824633720985)-[au:hasAuthor]->(p:person) + COLUMNS (p.id, p.firstName, p.lastName) + ) tmp; +---- +14 Hossein Forouhar + + +# IS7. Replies of a message +#set messageId 618475290624 +query IIIIII +-FROM GRAPH_TABLE (snb + MATCH (replyAuthor:person)<-[au2:hasAuthor]-(c:message where c.ParentMessageId is not null)-[r:replyOf]->(m:message where m.id = 618475290624)-[au:hasAuthor]->(messageAuthor:person), + (replyAuthor:person)-[k:knows]-(messageAuthor:person) + COLUMNS (c.id,c.content,c.creationDate, replyAuthor.id, replyAuthor.firstName, replyAuthor.lastName) + ) tmp; +---- +962072674306 thanks 2012-07-08 20:32:03.239+00 24189255811081 Alim Guliyev +962072674305 yes 2012-07-08 23:48:41.63+00 24189255811081 Alim Guliyev + + diff --git a/test/sql/snb/snb_projected.test_slow b/test/sql/snb/snb_projected.test_slow new file mode 100644 index 00000000..9823e982 --- /dev/null +++ b/test/sql/snb/snb_projected.test_slow @@ -0,0 +1,109 @@ +# name: test/sql/snb/snb_projected.test_slow +# description: Testing the SNB projected dataset +# group: [snb] + +#require duckpgq + +#statement ok +#import database 'duckdb/data/SNB1-projected|'; + +#statement ok +#-CREATE PROPERTY GRAPH snb_projected +#VERTEX TABLES ( +# Forum LABEL Forum, +# Message LABEL Message IN Subcategory(Comment, Post), +# Organisation LABEL Organisation IN Subcategory(University, Company), +# Person LABEL Person, +# Place LABEL Place IN Subcategory(Continent, Country, City), +# Tag LABEL Tag, +# TagClass LABEL TagClass +# ) +#EDGE TABLES ( +# Comment_hasCreator_Person SOURCE KEY (CommentId) REFERENCES Message (id) +# DESTINATION KEY (PersonId) REFERENCES Person (id) +# LABEL Comment_hasCreator, +# Comment_hasTag_Tag SOURCE KEY (CommentId) REFERENCES Message (id) +# DESTINATION KEY (TagId) REFERENCES Tag (id) +# LABEL Comment_hasTag, +# Comment_isLocatedIn_Country SOURCE KEY (CommentId) REFERENCES Message (id) +# DESTINATION KEY (CountryId) REFERENCES Tag (id) +# LABEL Comment_isLocatedIn, +# Comment_replyOf_Comment SOURCE KEY (Comment1Id) REFERENCES Message (id) +# DESTINATION KEY (Comment2Id) REFERENCES Message (id) +# LABEL replyOf_Comment, +# Comment_replyOf_Post SOURCE KEY (CommentId) REFERENCES Message (id) +# DESTINATION KEY (PostId) REFERENCES Message (id) +# LABEL replyOf_Post, +# Forum_containerOf_Post SOURCE KEY (ForumId) REFERENCES Forum (id) +# DESTINATION KEY (PostId) REFERENCES Message (id) +# LABEL containerOf, +# Forum_hasMember_Person SOURCE KEY (ForumId) REFERENCES Forum (id) +# DESTINATION KEY (PersonId) REFERENCES Person (id) +# LABEL hasMember, +# Forum_hasModerator_Person SOURCE KEY (ForumId) REFERENCES Forum (id) +# DESTINATION KEY (PersonId) REFERENCES Person (id) +# LABEL hasModerator, +# Forum_hasTag_Tag SOURCE KEY (ForumId) REFERENCES Forum (id) +# DESTINATION KEY (TagId) REFERENCES Tag (id) +# LABEL Forum_hasTag, +# Organisation_isLocatedIn_Place SOURCE KEY (OrganisationId) REFERENCES Organisation (id) +# DESTINATION KEY (PlaceId) REFERENCES Place (id) +# LABEL Organisation_isLocatedIn, +# Person_hasInterest_Tag SOURCE KEY (PersonId) REFERENCES Person (id) +# DESTINATION KEY (interestId) REFERENCES Tag (id) +# LABEL hasInterest, +# Person_isLocatedIn_City SOURCE KEY (PersonId) REFERENCES Person (id) +# DESTINATION KEY (CityId) REFERENCES Place (id) +# LABEL Person_isLocatedIn, +# Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) +# DESTINATION KEY (Person2Id) REFERENCES Person (id) +# LABEL Knows, +# Person_likes_Comment SOURCE KEY (PersonId) REFERENCES Person (id) +# DESTINATION KEY (CommentId) REFERENCES Message (id) +# LABEL likes_Comment, +# Person_likes_Post SOURCE KEY (PersonId) REFERENCES Person (id) +# DESTINATION KEY (PostId) REFERENCES Message (id) +# LABEL likes_Post, +# Person_studyAt_University SOURCE KEY (PersonId) REFERENCES Person (id) +# DESTINATION KEY (UniversityId) REFERENCES Organisation (id) +# LABEL studyAt, +# Person_workAt_Company SOURCE KEY (PersonId) REFERENCES Person (id) +# DESTINATION KEY (CompanyId) REFERENCES Organisation (id) +# LABEL workAt, +# Place_isPartOf_Place SOURCE KEY (Place1Id) REFERENCES Place (id) +# DESTINATION KEY (Place2Id) REFERENCES Place (id) +# LABEL isPartOf, +# Post_hasCreator_Person SOURCE KEY (PostId) REFERENCES Message (id) +# DESTINATION KEY (PersonId) REFERENCES Person (id) +# LABEL Post_hasCreator, +# Message_hasCreator_Person SOURCE KEY (MessageId) REFERENCES Message (id) +# DESTINATION KEY (PersonId) REFERENCES Person (id) +# LABEL Message_hasCreator, +# Message_hasTag_Tag SOURCE KEY (MessageId) REFERENCES Message (id) +# DESTINATION KEY (TagId) REFERENCES Tag (id) +# LABEL Message_hasTag, +# Message_isLocatedIn_Country SOURCE KEY (MessageId) REFERENCES Message (id) +# DESTINATION KEY (CountryId) REFERENCES Place (id) +# LABEL Message_isLocatedIn, +# Post_hasTag_Tag SOURCE KEY (PostId) REFERENCES Message (id) +# DESTINATION KEY (TagId) REFERENCES Tag (id) +# LABEL Post_hasTag, +# Post_isLocatedIn_Country SOURCE KEY (PostId) REFERENCES Message (id) +# DESTINATION KEY (CountryId) REFERENCES Place (id) +# LABEL Post_isLocatedIn, +# Tag_hasType_TagClass SOURCE KEY (TagId) REFERENCES Tag (id) +# DESTINATION KEY (TagClassId) REFERENCES TagClass (id) +# LABEL hasType, +# TagClass_isSubClassOf_TagClass SOURCE KEY (TagClass1Id) REFERENCES TagClass (id) +# DESTINATION KEY (TagClass2Id) REFERENCES TagClass (id) +# LABEL isSubClassOf +# ); + +# IS2 +#query IIIIIIII +#-FROM GRAPH_TABLE (snb_projected +# MATCH (a:person where a.id = 4026)-[i:Person_isLocatedIn]->(c:City) +# COLUMNS(a.firstName, a.lastName, a.birthday, a.locationIP, a.browserUsed, c.id, a.gender, a.creationDate) +# ) tmp; +#---- +#Ivan Dobrunov 1988-10-21 31.28.20.134 Firefox 865 female 2010-02-09 17:26:35.413 diff --git a/test/sql/source_keyword.test b/test/sql/source_keyword.test new file mode 100644 index 00000000..4cf929d2 --- /dev/null +++ b/test/sql/source_keyword.test @@ -0,0 +1,66 @@ +# name: test/sql/source_keyword.test +# description: Testing the SOURCE keyword +# group: [sql] + +require duckpgq + +#statement ok +#select 1 source; + +statement ok +FROM duckdb_constraints() + +statement ok +SELECT + *, + regexp_extract(constraint_text, 'FOREIGN KEY \\(([a-zA-Z_0-9]+)\\) REFERENCES ([a-zA-Z_0-9]+)\\(([a-zA-Z_0-9]+)\\)', ['source', 'target', 'target_column']) AS name_extract + FROM duckdb_constraints() + WHERE constraint_type = 'FOREIGN KEY' + +statement ok +SELECT + *, + name_extract['source'] AS source, + name_extract['target'] AS target, + name_extract['target_column'] AS target_column +FROM ( + SELECT + *, + regexp_extract(constraint_text, 'FOREIGN KEY \\(([a-zA-Z_0-9]+)\\) REFERENCES ([a-zA-Z_0-9]+)\\(([a-zA-Z_0-9]+)\\)', ['source', 'target', 'target_column']) AS name_extract + FROM duckdb_constraints() + WHERE constraint_type = 'FOREIGN KEY' +); + + +statement ok +SELECT + f.database_name AS constraint_catalog, + f.schema_name AS constraint_schema, + CONCAT(f.source, '_', f.target, '_', f.target_column, '_fkey') AS constraint_name, + current_database() AS unique_constraint_catalog, + c.schema_name AS unique_constraint_schema, + CONCAT(c.table_name, '_', f.target_column, '_', + CASE WHEN c.constraint_type = 'UNIQUE' THEN 'key' ELSE 'pkey' END) AS unique_constraint_name, + 'NONE' AS match_option, + 'NO ACTION' AS update_rule, + 'NO ACTION' AS delete_rule +FROM duckdb_constraints() c +JOIN ( + SELECT + *, + name_extract['source'] AS source, + name_extract['target'] AS target, + name_extract['target_column'] AS target_column + FROM ( + SELECT + *, + regexp_extract(constraint_text, 'FOREIGN KEY \\(([a-zA-Z_0-9]+)\\) REFERENCES ([a-zA-Z_0-9]+)\\(([a-zA-Z_0-9]+)\\)', ['source', 'target', 'target_column']) AS name_extract + FROM duckdb_constraints() + WHERE constraint_type = 'FOREIGN KEY' + ) +) f ON name_extract['target'] = c.table_name +AND (c.constraint_type = 'UNIQUE' OR c.constraint_type = 'PRIMARY KEY'); + + +statement ok +FROM information_schema.tables; diff --git a/test/sql/summarize_property_graph.test b/test/sql/summarize_property_graph.test new file mode 100644 index 00000000..e1dbbc66 --- /dev/null +++ b/test/sql/summarize_property_graph.test @@ -0,0 +1,67 @@ +# name: test/sql/summarize_property_graph.test +# group: [sql] + +require duckpgq + +statement ok +import database 'duckdb/data/SNB0.003'; + +statement ok +-CREATE PROPERTY GRAPH snb +VERTEX TABLES ( + Person LABEL Person + ) +EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL Knows + ); + +query IIIIIIIIIIIIIIIIIIIIII +FROM summarize_property_graph(snb) order by table_name; +---- +Person 1 NULL NULL 50 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +Person_knows_person 0 Person Person NULL 83 28 29 22 21 2.8620689655172415 1 10 1 2 3 2.9642857142857144 1 13 1 2 4 + +statement ok +-CREATE PROPERTY GRAPH snb1 +VERTEX TABLES ( + Person LABEL Person, + Message LABEL Message + ) +EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL Knows, + Person_likes_Message SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (id) REFERENCES Message (id) + LABEL likes_Message, + Message_replyOf_Message SOURCE KEY (messageId) REFERENCES Message (id) + DESTINATION KEY (ParentMessageId) REFERENCES Message (id) + LABEL replyOf + ); + +query IIIIIIIIIIIIIIIIIIIIII +FROM summarize_property_graph(snb1) order by table_name; +---- +Message true NULL NULL 3660 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +Message_replyOf_Message false Message Message NULL 471 471 150 3189 3510 3.14 1 11 2 3 4 1.0 1 1 1 1 1 +Person true NULL NULL 50 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +Person_knows_person false Person Person NULL 83 28 29 22 21 2.8620689655172415 1 10 1 2 3 2.9642857142857144 1 13 1 2 4 +Person_likes_Message false Person Message NULL 492 48 234 2 3426 2.1025641025641026 1 41 1 1 1 10.25 1 57 2 6 15 + +statement ok +-CREATE PROPERTY GRAPH snb2 +VERTEX TABLES ( + Person LABEL Person +); + +query IIIIIIIIIIIIIIIIIIIIII +FROM summarize_property_graph(snb2) order by table_name; +---- +Person true NULL NULL 50 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL + +statement error +from summarize_property_graph(pgdoesnotexist) order by table_name; +---- +Binder Error: Property graph pgdoesnotexist does not exist \ No newline at end of file diff --git a/test/sql/unnamed_subquery.test b/test/sql/unnamed_subquery.test new file mode 100644 index 00000000..5611485d --- /dev/null +++ b/test/sql/unnamed_subquery.test @@ -0,0 +1,96 @@ +# name: test/sql/unnamed_subquery.test +# description: Testing unnamed subquery support +# group: [sql] + +require duckpgq + +statement ok +import database 'duckdb/data/SNB0.003'; + +statement ok +-CREATE PROPERTY GRAPH snb +VERTEX TABLES ( + Person + ) +EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + ); + + +query II +-FROM GRAPH_TABLE (snb + MATCH (p:Person)-[k:Person_knows_Person]->(p2:Person) + COLUMNS (p.firstname, p2.firstname) + ) + limit 10; +---- +Hossein Ken +Hossein Alim +Hossein Alexei +Jan Ali +Jan Otto +Jan Bryn +Jan Hans +Miguel Ali +Miguel Celso +Miguel Ali + +query II +-FROM GRAPH_TABLE (snb + MATCH (p:Person)-[k:Person_knows_Person]->(p2:Person) + COLUMNS (p.firstname, p2.firstname) + ) tmp + limit 10; +---- +Hossein Ken +Hossein Alim +Hossein Alexei +Jan Ali +Jan Otto +Jan Bryn +Jan Hans +Miguel Ali +Miguel Celso +Miguel Ali + +# Be a bit more explicit +query II +-SELECT tmp.p_firstname, tmp.p2_firstname +FROM GRAPH_TABLE (snb + MATCH (p:Person)-[k:Person_knows_Person]->(p2:Person) + COLUMNS (p.firstname as p_firstname, p2.firstname as p2_firstname) + ) tmp + limit 10; +---- +Hossein Ken +Hossein Alim +Hossein Alexei +Jan Ali +Jan Otto +Jan Bryn +Jan Hans +Miguel Ali +Miguel Celso +Miguel Ali + + +# Be a bit more explicit +query II +-SELECT unnamed_subquery.p_firstname, unnamed_subquery.p2_firstname +FROM GRAPH_TABLE (snb + MATCH (p:Person)-[k:Person_knows_Person]->(p2:Person) + COLUMNS (p.firstname as p_firstname, p2.firstname as p2_firstname) + ) + limit 10; +---- +Hossein Ken +Hossein Alim +Hossein Alexei +Jan Ali +Jan Otto +Jan Bryn +Jan Hans +Miguel Ali +Miguel Celso +Miguel Ali diff --git a/test/sql/wcc_segfault.test b/test/sql/wcc_segfault.test new file mode 100644 index 00000000..8d5cb105 --- /dev/null +++ b/test/sql/wcc_segfault.test @@ -0,0 +1,32 @@ +# name: test/sql/wcc_segfault.test +# group: [sql] + +# require duckpgq +# +# +# statement ok +# create or replace table data_table as select * from read_csv("/Users/dljtw/git/duckpgq/test/python/links.tsv"); +# +# statement ok +# CREATE or replace TABLE devices AS SELECT a AS device FROM data_table UNION SELECT b AS device FROM data_table order by device; +# +# statement ok +# CREATE or replace TABLE edges AS SELECT a.rowid as a, b.rowid as b FROM data_table e join devices a on a.device = e.a join devices b on b.device = e.b; +# +# statement ok +# CREATE or replace TABLE nodes AS SELECT rowid as id from devices; +# +# statement ok +# -CREATE OR REPLACE PROPERTY GRAPH graph +# VERTEX TABLES ( +# nodes +# ) +# EDGE TABLES ( +# edges +# SOURCE KEY (a) REFERENCES nodes(id) +# DESTINATION KEY (b) REFERENCES nodes(id) +# LABEL connects +# ); +# +# statement ok +# -FROM weakly_connected_component(graph, nodes, connects); diff --git a/test/sql/with_clause.test b/test/sql/with_clause.test new file mode 100644 index 00000000..aa378df1 --- /dev/null +++ b/test/sql/with_clause.test @@ -0,0 +1,199 @@ +# name: test/sql/with_clause.test +# description: Testing queries where PGQ statement inside WITH statement +# group: [sql] + +require duckpgq + +statement ok +import database 'duckdb/data/SNB0.003'; + +statement ok +-CREATE PROPERTY GRAPH snb + VERTEX TABLES ( + Person, + Message, + Forum, + Tag, + Organisation IN typemask(company, university) + ) + EDGE TABLES ( + Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) + DESTINATION KEY (Person2Id) REFERENCES Person (id) + LABEL Knows, + Forum_hasMember_Person SOURCE KEY (ForumId) REFERENCES Forum (id) + DESTINATION KEY (PersonId) REFERENCES Person (id) + LABEL hasMember, + Person_likes_Message SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (id) REFERENCES Message (id) + LABEL Likes, + person_workAt_Organisation SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (OrganisationId) REFERENCES Organisation (id) + LABEL worksAt, + Person_hasInterest_Tag SOURCE KEY (PersonId) REFERENCES Person (id) + DESTINATION KEY (TagId) REFERENCES Tag (id) + LABEL hasInterest +); + +statement ok +-select * FROM GRAPH_TABLE(snb MATCH (a:Person)); + +query I +-WITH foo as (SELECT id FROM GRAPH_TABLE(snb MATCH (a:Person))) select * from foo LIMIT 10; +---- +14 +16 +32 +2199023255557 +2199023255573 +2199023255594 +4398046511139 +6597069766702 +8796093022234 +8796093022237 + +query II +-WITH foo as (SELECT id FROM GRAPH_TABLE(snb MATCH (a:Person))), bar as (SELECT id from Person) select * from foo, bar LIMIT 10; +---- +14 14 +16 14 +32 14 +2199023255557 14 +2199023255573 14 +2199023255594 14 +4398046511139 14 +6597069766702 14 +8796093022234 14 +8796093022237 14 + +statement ok +-WITH foo AS (SELECT id, firstName FROM GRAPH_TABLE(snb MATCH (a:Person))) +SELECT * FROM foo LIMIT 10; + +statement ok +-WITH foo AS (SELECT id FROM GRAPH_TABLE(snb MATCH (a:Person))), + bar AS (SELECT id, firstName FROM Person WHERE id IN (SELECT id FROM foo)) +SELECT * FROM bar LIMIT 10; + +statement ok +-WITH foo AS (FROM GRAPH_TABLE(snb MATCH (a:Person)-[r:Knows]->(b:Person) COLUMNS (a.id as id, b.id as knows_id))) +SELECT * FROM foo LIMIT 10; + +statement ok +-WITH foo AS (FROM GRAPH_TABLE(snb MATCH (a:Person)-[r:Knows]->(b:Person) COLUMNS (a.id as id, b.id as knows_id))), + bar AS (SELECT id, COUNT(knows_id) AS knows_count FROM foo GROUP BY id) +SELECT * FROM bar LIMIT 10; + +statement ok +-WITH foo AS (SELECT id, firstName FROM GRAPH_TABLE(snb MATCH (a:Person))), + bar AS (SELECT id, title FROM GRAPH_TABLE(snb MATCH (f:Forum))) +SELECT foo.firstName, bar.title +FROM foo, bar +LIMIT 10; + +statement ok +-WITH person_aggregate AS ( + SELECT id, COUNT(friend_id) AS friend_count + FROM GRAPH_TABLE(snb MATCH (a:Person)-[r:Knows]->(b:Person) COLUMNS (a.id as id, b.id as friend_id)) + GROUP BY id +), message_likes AS ( + SELECT id, COUNT(person_id) AS like_count + FROM GRAPH_TABLE(snb MATCH (p:Person)-[r:Likes]->(m:Message) COLUMNS (m.id as id, p.id as person_id)) + GROUP BY id +) +SELECT * +FROM person_aggregate +JOIN message_likes ON person_aggregate.id = message_likes.id +LIMIT 10; + +statement ok +-WITH persons AS ( + SELECT id, firstName + FROM GRAPH_TABLE(snb MATCH (a:Person)) +), forums AS ( + SELECT id, title + FROM GRAPH_TABLE(snb MATCH (f:Forum)) +) +SELECT p.firstName, f.title +FROM persons p +CROSS JOIN forums f +LIMIT 10; + +statement ok +-WITH likes_per_person AS ( + SELECT person_id, COUNT(message_id) AS likes_count + FROM GRAPH_TABLE(snb MATCH (p:Person)-[r:Likes]->(m:Message) COLUMNS (p.id as person_id, m.id as message_id)) + GROUP BY person_id +), persons AS ( + SELECT id, firstName + FROM GRAPH_TABLE(snb MATCH (a:Person)) +) +SELECT persons.firstName, likes_per_person.likes_count +FROM persons +LEFT JOIN likes_per_person ON persons.id = likes_per_person.person_id +LIMIT 10; + +statement ok +-WITH person_with_friends AS ( + SELECT person_id, friend_id + FROM GRAPH_TABLE(snb MATCH (a:Person)-[r:Knows]->(b:Person) COLUMNS (a.id as person_id, b.id as friend_id)) +), persons AS ( + SELECT id, firstName + FROM GRAPH_TABLE(snb MATCH (a:Person)) +) +SELECT p.firstName, COUNT(pwf.friend_id) AS friend_count +FROM persons p +LEFT JOIN person_with_friends pwf ON p.id = pwf.person_id +GROUP BY p.firstName +LIMIT 10; + +statement ok +-WITH person_orgs AS ( + SELECT person_id, org_name + FROM GRAPH_TABLE(snb MATCH (p:Person)-[r:worksAt]->(o:Organisation) COLUMNS (p.id as person_id, o.name as org_name)) +), persons AS ( + SELECT id, firstName + FROM GRAPH_TABLE(snb MATCH (a:Person)) +) +SELECT p.firstName, po.org_name +FROM persons p +JOIN person_orgs po ON p.id = po.person_id +LIMIT 10; + +statement ok +-WITH person_companies AS ( + SELECT person_id, company_name + FROM GRAPH_TABLE(snb MATCH (p:Person)-[r:worksAt]->(c:Company) COLUMNS(p.id as person_id, c.name as company_name)) +), persons AS ( + SELECT id, firstName + FROM GRAPH_TABLE(snb MATCH (a:Person)) +) +SELECT p.firstName, pc.company_name +FROM persons p +JOIN person_companies pc ON p.id = pc.person_id +LIMIT 10; + +statement ok +-WITH person_universities AS ( + SELECT person_id, university_name + FROM GRAPH_TABLE(snb MATCH (p:Person)-[r:worksAt]->(u:University) COLUMNS (p.id as person_id, u.name as university_name)) +), persons AS ( + SELECT id, firstName + FROM GRAPH_TABLE(snb MATCH (a:Person)) +) +SELECT p.firstName, pu.university_name +FROM persons p +JOIN person_universities pu ON p.id = pu.person_id +LIMIT 10; + +statement ok +-WITH person_interests AS ( + SELECT person_id, tag_name + FROM GRAPH_TABLE(snb MATCH (p:Person)-[r:hasInterest]->(t:Tag) COLUMNS (p.id as person_id, t.name as tag_name)) +), persons AS ( + SELECT id, firstName + FROM GRAPH_TABLE(snb MATCH (a:Person)) +) +SELECT p.firstName, pi.tag_name +FROM persons p +JOIN person_interests pi ON p.id = pi.person_id +LIMIT 10; diff --git a/test/sql/with_statement_duckpgq.test b/test/sql/with_statement_duckpgq.test new file mode 100644 index 00000000..05b85669 --- /dev/null +++ b/test/sql/with_statement_duckpgq.test @@ -0,0 +1,74 @@ +# name: test/sql/with_statement_duckpgq.test +# description: Testing PGQ query and WITH in single query +# group: [sql] + +require duckpgq + +statement ok +import database 'duckdb/data/SNB0.003'; + +statement ok +-CREATE PROPERTY GRAPH snb_projected +VERTEX TABLES (Message); + +query IIIIIII +-WITH message_count AS ( + SELECT count(*) as m_count + FROM Message m + WHERE m.creationDate < '2010-05-27 11:16:36.013' +) +SELECT year, isComment, + CASE WHEN m_length < 40 THEN 0 + WHEN m_length < 80 THEN 1 + WHEN m_length < 160 THEN 2 + ELSE 3 END as lengthCategory, + count(*) as messageCount, + avg(m_length) as averageMessageLength, + sum(m_length) as sumMessageLength, + count(*) / mc.m_count as percentageOfMessages +FROM GRAPH_TABLE(snb_projected + MATCH (message:Message where message.creationDate < '2010-05-27 11:16:36.013') + COLUMNS (date_part('year', message.creationDate::TIMESTAMP) as year, message.ImageFile is NULL as isComment, message.length as m_length, message.id) + ) tmp, message_count mc +GROUP BY year, isComment, lengthCategory, m_count +ORDER BY year DESC, isComment ASC, lengthCategory ASC; +---- +2010 false 0 63 0.0 0 0.9692307692307692 +2010 true 2 2 109.0 218 0.03076923076923077 + + +query II +-FROM GRAPH_TABLE (snb_projected + MATCH (m:message) + COLUMNS (m.id) + ) tmp, (SELECT id from message limit 1) +LIMIT 10; +---- +618475290624 618475290624 +343597383683 618475290624 +343597383684 618475290624 +962072674309 618475290624 +962072674310 618475290624 +962072674311 618475290624 +962072674312 618475290624 +962072674313 618475290624 +962072674314 618475290624 +962072674315 618475290624 + +query II +-FROM (SELECT id from message limit 1), GRAPH_TABLE (snb_projected + MATCH (m:message) + COLUMNS (m.id) + ) tmp +LIMIT 10; +---- +618475290624 618475290624 +618475290624 343597383683 +618475290624 343597383684 +618475290624 962072674309 +618475290624 962072674310 +618475290624 962072674311 +618475290624 962072674312 +618475290624 962072674313 +618475290624 962072674314 +618475290624 962072674315 diff --git a/vcpkg.json b/vcpkg.json new file mode 100644 index 00000000..b97f01a6 --- /dev/null +++ b/vcpkg.json @@ -0,0 +1,10 @@ +{ + "dependencies": [ + "openssl" + ], + "vcpkg-configuration": { + "overlay-ports": [ + "./extension-ci-tools/vcpkg_ports" + ] + } +}