diff --git a/.github/workflows/tenzir.yaml b/.github/workflows/tenzir.yaml index 81c7157e310..db4338e6181 100644 --- a/.github/workflows/tenzir.yaml +++ b/.github/workflows/tenzir.yaml @@ -579,7 +579,7 @@ jobs: cc: clang cxx: clang++ dependencies-script-path: scripts/macOS/install-dev-dependencies.sh - cmake-extra-flags: -DTENZIR_ENABLE_BUNDLED_CAF:BOOL=ON + cmake-extra-flags: -DTENZIR_ENABLE_BUNDLED_CAF:BOOL=ON -DTENZIR_PLUGINS_BLACKLIST=snowflake bundled-plugins: # macOS runners in GitHub Actions CI are very limited, so we # prefer to have fewer jobs running and build the proprietary @@ -656,7 +656,8 @@ jobs: ./scripts/debian/install-fluent-bit.sh - name: Install Dependencies (macOS) if: ${{ matrix.tenzir.name == 'macOS' }} - run: ./scripts/macOS/install-dev-dependencies.sh + run: | + ./scripts/macOS/install-dev-dependencies.sh - name: Setup Python if: ${{ matrix.tenzir.name == 'macOS' }} uses: actions/setup-python@v5 diff --git a/Dockerfile b/Dockerfile index 54fc8f73cd8..1f3ed63c4d3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -416,6 +416,16 @@ RUN cmake -S contrib/tenzir-plugins/platform -B build-platform -G Ninja \ DESTDIR=/plugin/platform cmake --install build-platform --strip --component Runtime && \ rm -rf build-platform +FROM plugins-source AS snowflake-plugin + +COPY contrib/tenzir-plugins/snowflake ./contrib/tenzir-plugins/snowflake +RUN cmake -S contrib/tenzir-plugins/snowflake -B build-snowflake -G Ninja \ + -D CMAKE_INSTALL_PREFIX:STRING="$PREFIX" && \ + cmake --build build-snowflake --parallel && \ + cmake --build build-snowflake --target integration && \ + DESTDIR=/plugin/snowflake cmake --install build-snowflake --strip --component Runtime && \ + rm -rf build-snowflake + FROM plugins-source AS to_splunk-plugin COPY contrib/tenzir-plugins/to_splunk ./contrib/tenzir-plugins/to_splunk @@ -438,7 +448,7 @@ RUN cmake -S contrib/tenzir-plugins/vast -B build-vast -G Ninja \ # -- tenzir-ce ------------------------------------------------------------------- -FROM tenzir-de AS tenzir-ce +FROM tenzir-de AS tenzir-ce-arm64 COPY --from=azure-log-analytics-plugin --chown=tenzir:tenzir /plugin/azure-log-analytics / COPY --from=compaction-plugin --chown=tenzir:tenzir /plugin/compaction / @@ -449,6 +459,26 @@ COPY --from=platform-plugin --chown=tenzir:tenzir /plugin/platform / COPY --from=to_splunk-plugin --chown=tenzir:tenzir /plugin/to_splunk / COPY --from=vast-plugin --chown=tenzir:tenzir /plugin/vast / +FROM tenzir-ce-arm64 AS tenzir-ce-amd64 + +USER root:root + +RUN if [ "$(uname -m)" = "x86_64" ]; then \ + wget "https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb" && \ + apt-get -y --no-install-recommends install \ + ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \ + apt-get update && \ + apt-get -y --no-install-recommends install libadbc-driver-manager103 libadbc-driver-snowflake103 && \ + snowflake_sopath="$(ldconfig -p | grep snowflake | grep -o '[^ ]*$')"; echo ${snowflake_sopath}; ln -s ${snowflake_sopath} ${snowflake_sopath%.*} && \ + rm -rf /var/lib/apt/lists/*; \ + fi + +COPY --from=snowflake-plugin --chown=tenzir:tenzir /plugin/snowflake / + +USER tenzir:tenzir + +FROM tenzir-ce-${TARGETARCH} AS tenzir-ce + # -- tenzir-node-ce ------------------------------------------------------------ FROM tenzir-ce AS tenzir-node-ce diff --git a/changelog/next/features/4589--snowflake-sink.md b/changelog/next/features/4589--snowflake-sink.md new file mode 100644 index 00000000000..21586fc43b7 --- /dev/null +++ b/changelog/next/features/4589--snowflake-sink.md @@ -0,0 +1,2 @@ +We have added a new `to_snowflake` sink operator, writing events into a +[snowflake](https://www.snowflake.com/) table. diff --git a/contrib/tenzir-plugins b/contrib/tenzir-plugins index 3ce6627bebd..24902ba81e6 160000 --- a/contrib/tenzir-plugins +++ b/contrib/tenzir-plugins @@ -1 +1 @@ -Subproject commit 3ce6627bebde90b702c724d9e2b1065522c983bb +Subproject commit 24902ba81e6d3635414e15c8caac0aa533aa39e3 diff --git a/libtenzir/include/tenzir/detail/actor_metrics.hpp b/libtenzir/include/tenzir/detail/actor_metrics.hpp index a47515b6488..07193000f59 100644 --- a/libtenzir/include/tenzir/detail/actor_metrics.hpp +++ b/libtenzir/include/tenzir/detail/actor_metrics.hpp @@ -6,6 +6,8 @@ // SPDX-FileCopyrightText: (c) 2024 The Tenzir Contributors // SPDX-License-Identifier: BSD-3-Clause +#pragma once + #include "tenzir/series_builder.hpp" namespace tenzir::detail { diff --git a/libtenzir/include/tenzir/detail/alarm_clock.hpp b/libtenzir/include/tenzir/detail/alarm_clock.hpp index 7c40ca1a6a9..7f2a4aa177b 100644 --- a/libtenzir/include/tenzir/detail/alarm_clock.hpp +++ b/libtenzir/include/tenzir/detail/alarm_clock.hpp @@ -1,3 +1,13 @@ +// _ _____ __________ +// | | / / _ | / __/_ __/ Visibility +// | |/ / __ |_\ \ / / Across +// |___/_/ |_/___/ /_/ Space and Time +// +// SPDX-FileCopyrightText: (c) 2024 The Tenzir Contributors +// SPDX-License-Identifier: BSD-3-Clause + +#pragma once + #include #include diff --git a/libtenzir/include/tenzir/detail/url.hpp b/libtenzir/include/tenzir/detail/url.hpp index 8cc54adb13e..ae4622138ba 100644 --- a/libtenzir/include/tenzir/detail/url.hpp +++ b/libtenzir/include/tenzir/detail/url.hpp @@ -6,6 +6,8 @@ // SPDX-FileCopyrightText: (c) 2024 The Tenzir Contributors // SPDX-License-Identifier: BSD-3-Clause +#pragma once + #include #include #include diff --git a/nix/arrow-adbc-cpp/default.nix b/nix/arrow-adbc-cpp/default.nix new file mode 100644 index 00000000000..8fd0b7f0608 --- /dev/null +++ b/nix/arrow-adbc-cpp/default.nix @@ -0,0 +1,55 @@ +{ + lib, + stdenv, + fetchFromGitHub, + cmake, + arrow-adbc-go, + testers, +}: +stdenv.mkDerivation (finalAttrs: { + pname = "arrow-adbc-cpp"; + version = "1.3.0"; + + src = fetchFromGitHub { + owner = "apache"; + repo = "arrow-adbc"; + rev = "apache-arrow-adbc-15"; + hash = "sha256-QRWVmUYNdMxQqe9dIBxcY8pY8aAbKIh3dhX3rzCGYI4="; + }; + + patches = [ + ./use-prebuilt-go-lib.patch + ]; + + sourceRoot = "${finalAttrs.src.name}/c"; + + nativeBuildInputs = [ cmake ]; + + buildInputs = [ + arrow-adbc-go + ]; + + cmakeFlags = [ + (lib.cmakeBool "ADBC_BUILD_SHARED" (!stdenv.hostPlatform.isStatic)) + (lib.cmakeBool "ADBC_BUILD_STATIC" stdenv.hostPlatform.isStatic) + (lib.cmakeBool "ADBC_DRIVER_MANAGER" true) + (lib.cmakeBool "ADBC_DRIVER_SNOWFLAKE" true) + (lib.cmakeFeature "adbc_driver_snowflake_prebuilt" + "${arrow-adbc-go}/lib/snowflake${stdenv.hostPlatform.extensions.library}") + ]; + + meta = with lib; { + description = "Database connectivity API standard and libraries for Apache Arrow "; + homepage = "https://arrow.apache.org/adbc/"; + license = licenses.asl20; + platforms = platforms.unix; + maintainers = [ maintainers.tobim ]; + pkgConfigModules = [ + "arrow-adbc" + ]; + }; + + passthru = { + tests.pkg-config = testers.testMetaPkgConfig finalAttrs.finalPackage; + }; +}) diff --git a/nix/arrow-adbc-cpp/use-prebuilt-go-lib.patch b/nix/arrow-adbc-cpp/use-prebuilt-go-lib.patch new file mode 100644 index 00000000000..864bc27aae0 --- /dev/null +++ b/nix/arrow-adbc-cpp/use-prebuilt-go-lib.patch @@ -0,0 +1,58 @@ +diff --git a/cmake_modules/BuildUtils.cmake b/cmake_modules/BuildUtils.cmake +index 88209ac5e..aefff1d5e 100644 +--- a/cmake_modules/BuildUtils.cmake ++++ b/cmake_modules/BuildUtils.cmake +@@ -310,7 +310,7 @@ function(ADD_ARROW_LIB LIB_NAME) + if(BUILD_STATIC) + add_library(${LIB_NAME}_static STATIC ${LIB_DEPS}) + target_compile_features(${LIB_NAME}_static PRIVATE cxx_std_11) +- set_property(TARGET ${LIB_NAME}_shared PROPERTY CXX_STANDARD_REQUIRED ON) ++ set_property(TARGET ${LIB_NAME}_static PROPERTY CXX_STANDARD_REQUIRED ON) + adbc_configure_target(${LIB_NAME}_static) + if(EXTRA_DEPS) + add_dependencies(${LIB_NAME}_static ${EXTRA_DEPS}) +diff --git a/cmake_modules/GoUtils.cmake b/cmake_modules/GoUtils.cmake +index 085d46fef..5c0a043c5 100644 +--- a/cmake_modules/GoUtils.cmake ++++ b/cmake_modules/GoUtils.cmake +@@ -15,8 +15,8 @@ + # specific language governing permissions and limitations + # under the License. + +-find_program(GO_BIN "go" REQUIRED) +-message(STATUS "Detecting Go executable: Found ${GO_BIN}") ++#find_program(GO_BIN "go" REQUIRED) ++#message(STATUS "Detecting Go executable: Found ${GO_BIN}") + + function(add_go_lib GO_MOD_DIR GO_LIBNAME) + set(options) +@@ -131,13 +131,8 @@ function(add_go_lib GO_MOD_DIR GO_LIBNAME) + add_custom_command(OUTPUT "${LIBOUT_SHARED}.${ADBC_FULL_SO_VERSION}" + WORKING_DIRECTORY ${GO_MOD_DIR} + DEPENDS ${ARG_SOURCES} +- COMMAND ${CMAKE_COMMAND} -E env ${GO_ENV_VARS} ${GO_BIN} build +- ${GO_BUILD_TAGS} "${GO_BUILD_FLAGS}" -o +- ${LIBOUT_SHARED}.${ADBC_FULL_SO_VERSION} +- -buildmode=c-shared ${GO_LDFLAGS} . +- COMMAND ${CMAKE_COMMAND} -E remove -f +- "${LIBOUT_SHARED}.${ADBC_SO_VERSION}.0.h" +- COMMENT "Building Go Shared lib ${GO_LIBNAME}" ++ COMMAND ${CMAKE_COMMAND} -E copy "${${GO_LIBNAME}_prebuilt}" "${LIBOUT_SHARED}.${ADBC_FULL_SO_VERSION}" ++ COMMENT "Copying Go Shared lib ${GO_LIBNAME}" + COMMAND_EXPAND_LISTS) + + add_custom_command(OUTPUT "${LIBOUT_SHARED}.${ADBC_SO_VERSION}" "${LIBOUT_SHARED}" +@@ -226,11 +221,8 @@ function(add_go_lib GO_MOD_DIR GO_LIBNAME) + add_custom_command(OUTPUT "${LIBOUT_STATIC}" + WORKING_DIRECTORY ${GO_MOD_DIR} + DEPENDS ${ARG_SOURCES} +- COMMAND ${CMAKE_COMMAND} -E env "${GO_ENV_VARS}" ${GO_BIN} build +- "${GO_BUILD_TAGS}" -o "${LIBOUT_STATIC}" +- -buildmode=c-archive "${GO_BUILD_FLAGS}" . +- COMMAND ${CMAKE_COMMAND} -E remove -f "${LIBOUT_HEADER}" +- COMMENT "Building Go Static lib ${GO_LIBNAME}" ++ COMMAND ${CMAKE_COMMAND} -E copy "${${GO_LIBNAME}_prebuilt}" "${LIBOUT_STATIC}" ++ COMMENT "Copying Go Static lib ${GO_LIBNAME}" + COMMAND_EXPAND_LISTS) + + add_custom_target(${GO_LIBNAME}_static_target ALL DEPENDS "${LIBOUT_STATIC}") diff --git a/nix/arrow-adbc-go/default.nix b/nix/arrow-adbc-go/default.nix new file mode 100644 index 00000000000..75778de0da2 --- /dev/null +++ b/nix/arrow-adbc-go/default.nix @@ -0,0 +1,103 @@ +{ + lib, + stdenv, + buildGoModule, + fetchFromGitHub, +}: +buildGoModule ({ + pname = "arrow-adbc-go"; + version = "1.3.0"; + + src = fetchFromGitHub { + owner = "apache"; + repo = "arrow-adbc"; + rev = "apache-arrow-adbc-15"; + hash = "sha256-QRWVmUYNdMxQqe9dIBxcY8pY8aAbKIh3dhX3rzCGYI4="; + }; + + sourceRoot = "source/go/adbc"; + + proxyVendor = true; + + vendorHash = "sha256-+hUYaFvmySnz2rzDszejcwzoVoCe1lAoj8qNwfMEfp4="; + + postUnpack = '' + rm -rf source/go/adbc/driver/flightsql/cmd + rm -rf source/go/adbc/driver/bigquery + rm -rf source/go/adbc/pkg/bigquery + ''; + + #subPackages = [ + # "driver/snowflake/..." + #]; + + tags = [ + "driverlib" + ]; + + env = { + GOBIN = "${placeholder "out"}/lib"; + NIX_DEBUG = 3; + }; + + #GOFLAGS = [ + # "-shared" + #]; + + ldflags = + [ + "-s" + "-w" + ] + ++ (if stdenv.hostPlatform.isStatic then [ + "-buildmode=c-archive" + "-extar=${stdenv.cc.targetPrefix}ar" + ] else [ + "-buildmode=c-shared" + ]); + #++ [ "-buildmode=c-archive" ]; + + doCheck = false; + + postInstall = lib.optionalString stdenv.hostPlatform.isStatic '' + for f in $out/lib/*; do + mv $f $f.a + chmod -x $f.a + done + ''; + + meta = { + description = "Database connectivity API standard and libraries for Apache Arrow"; + homepage = "https://arrow.apache.org/adbc/"; + license = lib.licenses.asl20; + platforms = lib.platforms.unix; + maintainers = [ lib.maintainers.tobim ]; + }; +} // lib.optionalAttrs (!stdenv.hostPlatform.isStatic) { + buildPhase = '' + runHook preBuild + + if [ -z "$enableParallelBuilding" ]; then + export NIX_BUILD_CORES=1 + fi + cd pkg/snowflake + go build -tags=driverlib -buildmode=c-shared -o snowflake.so -v -p $NIX_BUILD_CORES . + + runHook postBuild + ''; + checkPhase = '' + runHook preCheck + + go test -v -p $NIX_BUILD_CORES . + + runHook postCheck + ''; + installPhase = '' + runHook preInstall + + mkdir -p $out/lib + cp snowflake.so $out/lib + + runHook postInstall + ''; +}) diff --git a/nix/overlay.nix b/nix/overlay.nix index 8a4a9ec2901..240b730d7e6 100644 --- a/nix/overlay.nix +++ b/nix/overlay.nix @@ -175,6 +175,8 @@ in { doInstallCheck = false; env.NIX_LDFLAGS = lib.optionalString stdenv.isDarwin "-lc++abi"; }); + arrow-adbc-cpp = prev.callPackage ./arrow-adbc-cpp { }; + arrow-adbc-go = prev.callPackage ./arrow-adbc-go { }; zeromq = if !isStatic then prev.zeromq @@ -509,6 +511,8 @@ in { ps.pipeline-manager ps.platform ps.vast + ] ++ lib.optionals (!isStatic) [ + ps.snowflake ]); }; toChecked = diff --git a/nix/tenzir/default.nix b/nix/tenzir/default.nix index c92c7e7966f..3044ec8b600 100644 --- a/nix/tenzir/default.nix +++ b/nix/tenzir/default.nix @@ -16,6 +16,7 @@ curl, libpcap, arrow-cpp, + arrow-adbc-cpp, aws-sdk-cpp-tenzir, azure-sdk-for-cpp, fast_float, @@ -154,6 +155,8 @@ simdjson spdlog xxHash + ] ++ lib.optionals (!isStatic) [ + arrow-adbc-cpp ] ++ lib.optionals isMusl [ jemalloc ]; diff --git a/nix/tenzir/plugins/names.nix b/nix/tenzir/plugins/names.nix index c260c4b1611..4aa2b1bff46 100644 --- a/nix/tenzir/plugins/names.nix +++ b/nix/tenzir/plugins/names.nix @@ -5,6 +5,7 @@ "packages" "pipeline-manager" "platform" + "snowflake" "to_splunk" "vast" ] diff --git a/nix/tenzir/plugins/source.json b/nix/tenzir/plugins/source.json index 1296d541aad..467663ab5cb 100644 --- a/nix/tenzir/plugins/source.json +++ b/nix/tenzir/plugins/source.json @@ -2,7 +2,8 @@ "name": "tenzir-plugins", "url": "git@github.com:tenzir/tenzir-plugins", "ref": "main", - "rev": "3ce6627bebde90b702c724d9e2b1065522c983bb", + "rev": "24902ba81e6d3635414e15c8caac0aa533aa39e3", "submodules": true, - "shallow": true + "shallow": true, + "allRefs": true } diff --git a/plugins/azure-blob-storage/include/loader.hpp b/plugins/azure-blob-storage/include/loader.hpp index bf7a95c2800..f59a111c24d 100644 --- a/plugins/azure-blob-storage/include/loader.hpp +++ b/plugins/azure-blob-storage/include/loader.hpp @@ -6,6 +6,8 @@ // SPDX-FileCopyrightText: (c) 2024 The Tenzir Contributors // SPDX-License-Identifier: BSD-3-Clause +#pragma once + #include #include diff --git a/plugins/azure-blob-storage/include/saver.hpp b/plugins/azure-blob-storage/include/saver.hpp index 5ed852b2979..8c762108f74 100644 --- a/plugins/azure-blob-storage/include/saver.hpp +++ b/plugins/azure-blob-storage/include/saver.hpp @@ -6,6 +6,8 @@ // SPDX-FileCopyrightText: (c) 2024 The Tenzir Contributors // SPDX-License-Identifier: BSD-3-Clause +#pragma once + #include #include #include diff --git a/plugins/fluent-bit/include/fluent-bit/fluent-bit_operator.hpp b/plugins/fluent-bit/include/fluent-bit/fluent-bit_operator.hpp index da70ffc6bd1..a8bd466126e 100644 --- a/plugins/fluent-bit/include/fluent-bit/fluent-bit_operator.hpp +++ b/plugins/fluent-bit/include/fluent-bit/fluent-bit_operator.hpp @@ -6,6 +6,8 @@ // SPDX-FileCopyrightText: (c) 2024 The VAST Contributors // SPDX-License-Identifier: BSD-3-Clause +#pragma once + #include #include #include diff --git a/scripts/debian/install-dev-dependencies.sh b/scripts/debian/install-dev-dependencies.sh index fca3ed7a76e..f2cd3ecd4a3 100755 --- a/scripts/debian/install-dev-dependencies.sh +++ b/scripts/debian/install-dev-dependencies.sh @@ -70,9 +70,16 @@ apt-get update # The apt download sometimes fails with a 403. We employ a similar workaround as # arrow itself: https://github.com/apache/arrow/pull/36836. # See also: https://github.com/apache/arrow/issues/35292. -apt-get -y --no-install-recommends install libarrow-dev=18.0.0-1 libprotobuf-dev libparquet-dev=18.0.0-1 || \ - apt-get -y --no-install-recommends install libarrow-dev=18.0.0-1 libprotobuf-dev libparquet-dev=18.0.0-1 || \ - apt-get -y --no-install-recommends install libarrow-dev=18.0.0-1 libprotobuf-dev libparquet-dev=18.0.0-1 +apt-get -y --no-install-recommends install -o 'Acquire::Retries=3' \ + libarrow-dev=18.0.0-1 \ + libprotobuf-dev \ + libparquet-dev=18.0.0-1 + +if [ "$(uname -m)" == "x86_64" ]; then + apt-get -y --no-install-recommends install -o 'Acquire::Retries=3' \ + libadbc-driver-snowflake-dev \ + libadbc-driver-manager-dev +fi rm ./"apache-arrow-apt-source-latest-${codename}.deb" # Node 18.x and Yarn diff --git a/scripts/macOS/install-arrow-adbc.sh b/scripts/macOS/install-arrow-adbc.sh new file mode 100755 index 00000000000..dc5ca5aaa2d --- /dev/null +++ b/scripts/macOS/install-arrow-adbc.sh @@ -0,0 +1,12 @@ +#! /usr/bin/env bash + +set -euo pipefail + +brew --version +brew install --overwrite go + +git clone https://github.com/apache/arrow-adbc.git +cd arrow-adbc +cmake -B build c/ -DCMAKE_BUILD_TYPE=Release -DADBC_DRIVER_SNOWFLAKE=ON -DADBC_DRIVER_MANAGER=ON +cmake --build build +cmake --install build diff --git a/shell.nix b/shell.nix index a88b268614f..b9f072823d7 100644 --- a/shell.nix +++ b/shell.nix @@ -36,7 +36,7 @@ in pkgs.pandoc ]; # To build libcaf_openssl with bundled CAF. - buildInputs = [pkgs.openssl]; + buildInputs = [pkgs.openssl pkgs.arrow-adbc-go]; shellHook = '' # Use editable mode for python code part of the python operator. This # makes changes to the python code observable in the python operator diff --git a/tenzir/CMakeLists.txt b/tenzir/CMakeLists.txt index fd11f996546..d526c8c9c56 100644 --- a/tenzir/CMakeLists.txt +++ b/tenzir/CMakeLists.txt @@ -203,6 +203,12 @@ set(TENZIR_PLUGINS ${plugins_default} CACHE STRING "Specify a list of plugins to build with Tenzir (supports globbing)") +set(TENZIR_PLUGINS_BLACKLIST + ${plugins_default} + CACHE + STRING + "Specify a list of plugins to remove after the globs in TENZIR_PLUGINS are expanded." +) cmake_dependent_option( TENZIR_ENABLE_STATIC_PLUGINS "Force plugins to be linked statically" OFF "NOT TENZIR_ENABLE_STATIC_EXECUTABLE" ON) @@ -227,6 +233,10 @@ if (TENZIR_PLUGINS) OR NOT EXISTS "${plugin_source_dir}/CMakeLists.txt") continue() endif () + get_filename_component(plugin_source_name plugin_source_dir NAME) + if (plugin_source_name IN_LIST TENZIR_PLUGINS_BLACKLIST) + continue() + endif () get_filename_component(plugin_binary_dir "${plugin_source_dir}" NAME) string(PREPEND plugin_binary_dir "${PROJECT_BINARY_DIR}/plugins/") add_subdirectory("${plugin_source_dir}" "${plugin_binary_dir}") diff --git a/web/docs/tql2/operators.md b/web/docs/tql2/operators.md index 63214f2507f..fbfeded0936 100644 --- a/web/docs/tql2/operators.md +++ b/web/docs/tql2/operators.md @@ -97,6 +97,7 @@ Operator | Description | Example [`to_fluent_bit`](./operators/to_fluent_bit.md) | Sends events to Fluent Bit| `to_fluent_bit "elasticsearch" …` [`to_hive`](./operators/to_hive.md) | Writes events using hive partitioning | `to_hive "s3://…", partition_by=[x]` [`to_opensearch`](./operators/to_opensearch.md) | Sends incoming events to the OpenSearch Bulk API | `to_opensearch 'localhost:9200", ...` +[`to_snowflake`](./operators/to_snowflake.md) | Sends incoming events to a Snowflake database | `to_snowflake account_identifier="…` [`to_splunk`](./operators/to_splunk.md) | Sends incoming events to a Splunk HEC | `to_splunk "localhost:8088", …` #### Bytes diff --git a/web/docs/tql2/operators/to_snowflake.md b/web/docs/tql2/operators/to_snowflake.md new file mode 100644 index 00000000000..695f193008d --- /dev/null +++ b/web/docs/tql2/operators/to_snowflake.md @@ -0,0 +1,84 @@ +# to_snowflake + +Sends events to a [snowflake](https://www.snowflake.com/) database via bulk ingestion. + +```tql +to_snowflake account_identifier=string, user_name=string, password=string, + snowflake_database=string snowflake_schema=string table=string, + [ingest_mode=string] +``` + +:::note +This plugin is currently only available in the amd64 docker images. +::: + +## Description + +The `snowflake` operator makes it possible to upload events to a snowflake database. + +It uploads the events via bulk-ingestion under the hood and then copies them into the target table. + +It supports nested types as +[snowflake semi-structured types](https://docs.snowflake.com/en/sql-reference/data-types-semistructured). +Alternatively, you can use Tenzir's [`flatten` function](../functions/flatten.md) +operator before the snowflake sink. + +### `account_identifier = string` + +The [snowflake account identifier](https://docs.snowflake.com/en/user-guide/admin-account-identifier) to use. + +### `user_name = string` + +The snowflake user name. The user must have the +[`CREATE STAGE`](https://docs.snowflake.com/en/sql-reference/sql/create-stage#access-control-requirements) +privilege on the given schema. + +### `password = string` + +The password for the user. + +### `database = string` + +The [snowflake database](https://docs.snowflake.com/en/sql-reference/ddl-database) +to write to. The user must be allowed to access it. + +### `schema = string` + +The [snowflake schema](https://docs.snowflake.com/en/sql-reference/ddl-database) +to use. The user be allowed to access it. + +### `table = string` + +The name of the table that should be used/created. The user must have the required +permissions to create/write to it. + +Table columns that are not in the event will be null, while event fields +that are not in the table will be dropped. Type mismatches between the table and +events are a hard error. + +### `ingest_mode = string (optional)` + +You can set the ingest mode to one of three options: + +* `"create_append"`: (default) Creates the table if it does not exist, otherwise appends to it. +* `"create"`: creates the table, causing an error if it already exists. +* `"append"`: appends to the table, causing an error if it does not exist. + +In case the operator creates the table it will use the the first event to infer +the columns. + +## Examples + +Upload `suricata.alert` events to a table `TENZIR` in `MY_DB@SURICATA_ALERT`: + +```tql +export +where @name == "suricata.alert" +to_snowflake \ + account_identifier="asldyuf-xgb47555", + user_name="tenzir_user", + password="password1234", + database="MY_DB", + schema="SURICATA_ALERT", + table="TENZIR" +```