Skip to content

Commit

Permalink
Add Snowflake sink (#4589)
Browse files Browse the repository at this point in the history
This PR adds a simple snowflake sink, enabling writing data into a
snowflake database.

Under the hood, it uses arrow-adbc's snowflake driver, which does bulk
ingestion via file staging.

Fixes tenzir/issues#2031
  • Loading branch information
tobim authored Dec 23, 2024
2 parents 952250d 7a9ca90 commit f8c1f84
Show file tree
Hide file tree
Showing 23 changed files with 402 additions and 10 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/tenzir.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 579,7 @@ jobs:
cc: clang
cxx: clang
dependencies-script-path: scripts/macOS/install-dev-dependencies.sh
cmake-extra-flags: -DTENZIR_ENABLE_BUNDLED_CAF:BOOL=ON
cmake-extra-flags: -DTENZIR_ENABLE_BUNDLED_CAF:BOOL=ON -DTENZIR_PLUGINS_BLACKLIST=snowflake
bundled-plugins:
# macOS runners in GitHub Actions CI are very limited, so we
# prefer to have fewer jobs running and build the proprietary
Expand Down Expand Up @@ -656,7 656,8 @@ jobs:
./scripts/debian/install-fluent-bit.sh
- name: Install Dependencies (macOS)
if: ${{ matrix.tenzir.name == 'macOS' }}
run: ./scripts/macOS/install-dev-dependencies.sh
run: |
./scripts/macOS/install-dev-dependencies.sh
- name: Setup Python
if: ${{ matrix.tenzir.name == 'macOS' }}
uses: actions/setup-python@v5
Expand Down
32 changes: 31 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 416,16 @@ RUN cmake -S contrib/tenzir-plugins/platform -B build-platform -G Ninja \
DESTDIR=/plugin/platform cmake --install build-platform --strip --component Runtime && \
rm -rf build-platform

FROM plugins-source AS snowflake-plugin

COPY contrib/tenzir-plugins/snowflake ./contrib/tenzir-plugins/snowflake
RUN cmake -S contrib/tenzir-plugins/snowflake -B build-snowflake -G Ninja \
-D CMAKE_INSTALL_PREFIX:STRING="$PREFIX" && \
cmake --build build-snowflake --parallel && \
cmake --build build-snowflake --target integration && \
DESTDIR=/plugin/snowflake cmake --install build-snowflake --strip --component Runtime && \
rm -rf build-snowflake

FROM plugins-source AS to_splunk-plugin

COPY contrib/tenzir-plugins/to_splunk ./contrib/tenzir-plugins/to_splunk
Expand All @@ -438,7 448,7 @@ RUN cmake -S contrib/tenzir-plugins/vast -B build-vast -G Ninja \

# -- tenzir-ce -------------------------------------------------------------------

FROM tenzir-de AS tenzir-ce
FROM tenzir-de AS tenzir-ce-arm64

COPY --from=azure-log-analytics-plugin --chown=tenzir:tenzir /plugin/azure-log-analytics /
COPY --from=compaction-plugin --chown=tenzir:tenzir /plugin/compaction /
Expand All @@ -449,6 459,26 @@ COPY --from=platform-plugin --chown=tenzir:tenzir /plugin/platform /
COPY --from=to_splunk-plugin --chown=tenzir:tenzir /plugin/to_splunk /
COPY --from=vast-plugin --chown=tenzir:tenzir /plugin/vast /

FROM tenzir-ce-arm64 AS tenzir-ce-amd64

USER root:root

RUN if [ "$(uname -m)" = "x86_64" ]; then \
wget "https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb" && \
apt-get -y --no-install-recommends install \
./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \
apt-get update && \
apt-get -y --no-install-recommends install libadbc-driver-manager103 libadbc-driver-snowflake103 && \
snowflake_sopath="$(ldconfig -p | grep snowflake | grep -o '[^ ]*$')"; echo ${snowflake_sopath}; ln -s ${snowflake_sopath} ${snowflake_sopath%.*} && \
rm -rf /var/lib/apt/lists/*; \
fi

COPY --from=snowflake-plugin --chown=tenzir:tenzir /plugin/snowflake /

USER tenzir:tenzir

FROM tenzir-ce-${TARGETARCH} AS tenzir-ce

# -- tenzir-node-ce ------------------------------------------------------------

FROM tenzir-ce AS tenzir-node-ce
Expand Down
2 changes: 2 additions & 0 deletions changelog/next/features/4589--snowflake-sink.md
Original file line number Diff line number Diff line change
@@ -0,0 1,2 @@
We have added a new `to_snowflake` sink operator, writing events into a
[snowflake](https://www.snowflake.com/) table.
2 changes: 1 addition & 1 deletion contrib/tenzir-plugins
2 changes: 2 additions & 0 deletions libtenzir/include/tenzir/detail/actor_metrics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 6,8 @@
// SPDX-FileCopyrightText: (c) 2024 The Tenzir Contributors
// SPDX-License-Identifier: BSD-3-Clause

#pragma once

#include "tenzir/series_builder.hpp"

namespace tenzir::detail {
Expand Down
10 changes: 10 additions & 0 deletions libtenzir/include/tenzir/detail/alarm_clock.hpp
Original file line number Diff line number Diff line change
@@ -1,3 1,13 @@
// _ _____ __________
// | | / / _ | / __/_ __/ Visibility
// | |/ / __ |_\ \ / / Across
// |___/_/ |_/___/ /_/ Space and Time
//
// SPDX-FileCopyrightText: (c) 2024 The Tenzir Contributors
// SPDX-License-Identifier: BSD-3-Clause

#pragma once

#include <tenzir/detail/weak_run_delayed.hpp>

#include <caf/response_promise.hpp>
Expand Down
2 changes: 2 additions & 0 deletions libtenzir/include/tenzir/detail/url.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 6,8 @@
// SPDX-FileCopyrightText: (c) 2024 The Tenzir Contributors
// SPDX-License-Identifier: BSD-3-Clause

#pragma once

#include <boost/url/url.hpp>
#include <fmt/format.h>
#include <fmt/ostream.h>
Expand Down
55 changes: 55 additions & 0 deletions nix/arrow-adbc-cpp/default.nix
Original file line number Diff line number Diff line change
@@ -0,0 1,55 @@
{
lib,
stdenv,
fetchFromGitHub,
cmake,
arrow-adbc-go,
testers,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "arrow-adbc-cpp";
version = "1.3.0";

src = fetchFromGitHub {
owner = "apache";
repo = "arrow-adbc";
rev = "apache-arrow-adbc-15";
hash = "sha256-QRWVmUYNdMxQqe9dIBxcY8pY8aAbKIh3dhX3rzCGYI4=";
};

patches = [
./use-prebuilt-go-lib.patch
];

sourceRoot = "${finalAttrs.src.name}/c";

nativeBuildInputs = [ cmake ];

buildInputs = [
arrow-adbc-go
];

cmakeFlags = [
(lib.cmakeBool "ADBC_BUILD_SHARED" (!stdenv.hostPlatform.isStatic))
(lib.cmakeBool "ADBC_BUILD_STATIC" stdenv.hostPlatform.isStatic)
(lib.cmakeBool "ADBC_DRIVER_MANAGER" true)
(lib.cmakeBool "ADBC_DRIVER_SNOWFLAKE" true)
(lib.cmakeFeature "adbc_driver_snowflake_prebuilt"
"${arrow-adbc-go}/lib/snowflake${stdenv.hostPlatform.extensions.library}")
];

meta = with lib; {
description = "Database connectivity API standard and libraries for Apache Arrow ";
homepage = "https://arrow.apache.org/adbc/";
license = licenses.asl20;
platforms = platforms.unix;
maintainers = [ maintainers.tobim ];
pkgConfigModules = [
"arrow-adbc"
];
};

passthru = {
tests.pkg-config = testers.testMetaPkgConfig finalAttrs.finalPackage;
};
})
58 changes: 58 additions & 0 deletions nix/arrow-adbc-cpp/use-prebuilt-go-lib.patch
Original file line number Diff line number Diff line change
@@ -0,0 1,58 @@
diff --git a/cmake_modules/BuildUtils.cmake b/cmake_modules/BuildUtils.cmake
index 88209ac5e..aefff1d5e 100644
--- a/cmake_modules/BuildUtils.cmake
b/cmake_modules/BuildUtils.cmake
@@ -310,7 310,7 @@ function(ADD_ARROW_LIB LIB_NAME)
if(BUILD_STATIC)
add_library(${LIB_NAME}_static STATIC ${LIB_DEPS})
target_compile_features(${LIB_NAME}_static PRIVATE cxx_std_11)
- set_property(TARGET ${LIB_NAME}_shared PROPERTY CXX_STANDARD_REQUIRED ON)
set_property(TARGET ${LIB_NAME}_static PROPERTY CXX_STANDARD_REQUIRED ON)
adbc_configure_target(${LIB_NAME}_static)
if(EXTRA_DEPS)
add_dependencies(${LIB_NAME}_static ${EXTRA_DEPS})
diff --git a/cmake_modules/GoUtils.cmake b/cmake_modules/GoUtils.cmake
index 085d46fef..5c0a043c5 100644
--- a/cmake_modules/GoUtils.cmake
b/cmake_modules/GoUtils.cmake
@@ -15,8 15,8 @@
# specific language governing permissions and limitations
# under the License.

-find_program(GO_BIN "go" REQUIRED)
-message(STATUS "Detecting Go executable: Found ${GO_BIN}")
#find_program(GO_BIN "go" REQUIRED)
#message(STATUS "Detecting Go executable: Found ${GO_BIN}")

function(add_go_lib GO_MOD_DIR GO_LIBNAME)
set(options)
@@ -131,13 131,8 @@ function(add_go_lib GO_MOD_DIR GO_LIBNAME)
add_custom_command(OUTPUT "${LIBOUT_SHARED}.${ADBC_FULL_SO_VERSION}"
WORKING_DIRECTORY ${GO_MOD_DIR}
DEPENDS ${ARG_SOURCES}
- COMMAND ${CMAKE_COMMAND} -E env ${GO_ENV_VARS} ${GO_BIN} build
- ${GO_BUILD_TAGS} "${GO_BUILD_FLAGS}" -o
- ${LIBOUT_SHARED}.${ADBC_FULL_SO_VERSION}
- -buildmode=c-shared ${GO_LDFLAGS} .
- COMMAND ${CMAKE_COMMAND} -E remove -f
- "${LIBOUT_SHARED}.${ADBC_SO_VERSION}.0.h"
- COMMENT "Building Go Shared lib ${GO_LIBNAME}"
COMMAND ${CMAKE_COMMAND} -E copy "${${GO_LIBNAME}_prebuilt}" "${LIBOUT_SHARED}.${ADBC_FULL_SO_VERSION}"
COMMENT "Copying Go Shared lib ${GO_LIBNAME}"
COMMAND_EXPAND_LISTS)

add_custom_command(OUTPUT "${LIBOUT_SHARED}.${ADBC_SO_VERSION}" "${LIBOUT_SHARED}"
@@ -226,11 221,8 @@ function(add_go_lib GO_MOD_DIR GO_LIBNAME)
add_custom_command(OUTPUT "${LIBOUT_STATIC}"
WORKING_DIRECTORY ${GO_MOD_DIR}
DEPENDS ${ARG_SOURCES}
- COMMAND ${CMAKE_COMMAND} -E env "${GO_ENV_VARS}" ${GO_BIN} build
- "${GO_BUILD_TAGS}" -o "${LIBOUT_STATIC}"
- -buildmode=c-archive "${GO_BUILD_FLAGS}" .
- COMMAND ${CMAKE_COMMAND} -E remove -f "${LIBOUT_HEADER}"
- COMMENT "Building Go Static lib ${GO_LIBNAME}"
COMMAND ${CMAKE_COMMAND} -E copy "${${GO_LIBNAME}_prebuilt}" "${LIBOUT_STATIC}"
COMMENT "Copying Go Static lib ${GO_LIBNAME}"
COMMAND_EXPAND_LISTS)

add_custom_target(${GO_LIBNAME}_static_target ALL DEPENDS "${LIBOUT_STATIC}")
103 changes: 103 additions & 0 deletions nix/arrow-adbc-go/default.nix
Original file line number Diff line number Diff line change
@@ -0,0 1,103 @@
{
lib,
stdenv,
buildGoModule,
fetchFromGitHub,
}:
buildGoModule ({
pname = "arrow-adbc-go";
version = "1.3.0";

src = fetchFromGitHub {
owner = "apache";
repo = "arrow-adbc";
rev = "apache-arrow-adbc-15";
hash = "sha256-QRWVmUYNdMxQqe9dIBxcY8pY8aAbKIh3dhX3rzCGYI4=";
};

sourceRoot = "source/go/adbc";

proxyVendor = true;

vendorHash = "sha256- hUYaFvmySnz2rzDszejcwzoVoCe1lAoj8qNwfMEfp4=";

postUnpack = ''
rm -rf source/go/adbc/driver/flightsql/cmd
rm -rf source/go/adbc/driver/bigquery
rm -rf source/go/adbc/pkg/bigquery
'';

#subPackages = [
# "driver/snowflake/..."
#];

tags = [
"driverlib"
];

env = {
GOBIN = "${placeholder "out"}/lib";
NIX_DEBUG = 3;
};

#GOFLAGS = [
# "-shared"
#];

ldflags =
[
"-s"
"-w"
]
(if stdenv.hostPlatform.isStatic then [
"-buildmode=c-archive"
"-extar=${stdenv.cc.targetPrefix}ar"
] else [
"-buildmode=c-shared"
]);
# [ "-buildmode=c-archive" ];

doCheck = false;

postInstall = lib.optionalString stdenv.hostPlatform.isStatic ''
for f in $out/lib/*; do
mv $f $f.a
chmod -x $f.a
done
'';

meta = {
description = "Database connectivity API standard and libraries for Apache Arrow";
homepage = "https://arrow.apache.org/adbc/";
license = lib.licenses.asl20;
platforms = lib.platforms.unix;
maintainers = [ lib.maintainers.tobim ];
};
} // lib.optionalAttrs (!stdenv.hostPlatform.isStatic) {
buildPhase = ''
runHook preBuild
if [ -z "$enableParallelBuilding" ]; then
export NIX_BUILD_CORES=1
fi
cd pkg/snowflake
go build -tags=driverlib -buildmode=c-shared -o snowflake.so -v -p $NIX_BUILD_CORES .
runHook postBuild
'';
checkPhase = ''
runHook preCheck
go test -v -p $NIX_BUILD_CORES .
runHook postCheck
'';
installPhase = ''
runHook preInstall
mkdir -p $out/lib
cp snowflake.so $out/lib
runHook postInstall
'';
})
4 changes: 4 additions & 0 deletions nix/overlay.nix
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 175,8 @@ in {
doInstallCheck = false;
env.NIX_LDFLAGS = lib.optionalString stdenv.isDarwin "-lc abi";
});
arrow-adbc-cpp = prev.callPackage ./arrow-adbc-cpp { };
arrow-adbc-go = prev.callPackage ./arrow-adbc-go { };
zeromq =
if !isStatic
then prev.zeromq
Expand Down Expand Up @@ -509,6 511,8 @@ in {
ps.pipeline-manager
ps.platform
ps.vast
] lib.optionals (!isStatic) [
ps.snowflake
]);
};
toChecked =
Expand Down
3 changes: 3 additions & 0 deletions nix/tenzir/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 16,7 @@
curl,
libpcap,
arrow-cpp,
arrow-adbc-cpp,
aws-sdk-cpp-tenzir,
azure-sdk-for-cpp,
fast_float,
Expand Down Expand Up @@ -154,6 155,8 @@
simdjson
spdlog
xxHash
] lib.optionals (!isStatic) [
arrow-adbc-cpp
] lib.optionals isMusl [
jemalloc
];
Expand Down
1 change: 1 addition & 0 deletions nix/tenzir/plugins/names.nix
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 5,7 @@
"packages"
"pipeline-manager"
"platform"
"snowflake"
"to_splunk"
"vast"
]
5 changes: 3 additions & 2 deletions nix/tenzir/plugins/source.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 2,8 @@
"name": "tenzir-plugins",
"url": "[email protected]:tenzir/tenzir-plugins",
"ref": "main",
"rev": "3ce6627bebde90b702c724d9e2b1065522c983bb",
"rev": "24902ba81e6d3635414e15c8caac0aa533aa39e3",
"submodules": true,
"shallow": true
"shallow": true,
"allRefs": true
}
2 changes: 2 additions & 0 deletions plugins/azure-blob-storage/include/loader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 6,8 @@
// SPDX-FileCopyrightText: (c) 2024 The Tenzir Contributors
// SPDX-License-Identifier: BSD-3-Clause

#pragma once

#include <tenzir/location.hpp>
#include <tenzir/plugin.hpp>

Expand Down
Loading

0 comments on commit f8c1f84

Please sign in to comment.