diff --git a/.github/workflows/build-cloudberry.yml b/.github/workflows/build-cloudberry.yml index dcad89f30cb..27590205548 100644 --- a/.github/workflows/build-cloudberry.yml +++ b/.github/workflows/build-cloudberry.yml @@ -271,6 +271,10 @@ jobs: }, "enable_core_check":false }, + {"test":"gpcontrib-yagp-hooks-collector", + "make_configs":["gpcontrib/yagp_hooks_collector:installcheck"], + "extension":"yagp_hooks_collector" + }, {"test":"ic-expandshrink", "make_configs":["src/test/isolation2:installcheck-expandshrink"] }, @@ -530,10 +534,11 @@ jobs: if: needs.check-skip.outputs.should_skip != 'true' env: SRC_DIR: ${{ github.workspace }} + CONFIGURE_EXTRA_OPTS: --with-yagp-hooks-collector run: | set -eo pipefail chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh - if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} CONFIGURE_EXTRA_OPTS=${{ env.CONFIGURE_EXTRA_OPTS }} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then echo "::error::Configure script failed" exit 1 fi @@ -1390,6 +1395,7 @@ jobs: if: success() && needs.check-skip.outputs.should_skip != 'true' env: SRC_DIR: ${{ github.workspace }} + BUILD_DESTINATION: /usr/local/cloudberry-db shell: bash {0} run: | set -o pipefail @@ -1419,6 +1425,30 @@ jobs: PG_OPTS="$PG_OPTS -c optimizer=${{ matrix.pg_settings.optimizer }}" fi + # Create extension if required + if [[ "${{ matrix.extension != '' }}" == "true" ]]; then + case "${{ matrix.extension }}" in + yagp_hooks_collector) + if ! su - gpadmin -c "source ${BUILD_DESTINATION}/cloudberry-env.sh && \ + source ${SRC_DIR}/gpAux/gpdemo/gpdemo-env.sh && \ + gpconfig -c shared_preload_libraries -v 'yagp_hooks_collector' && \ + gpstop -ra && \ + echo 'CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; \ + SHOW shared_preload_libraries; \ + TABLE pg_extension;' | \ + psql postgres" + then + echo "Error creating yagp_hooks_collector extension" + exit 1 + fi + ;; + *) + echo "Unknown extension: ${{ matrix.extension }}" + exit 1 + ;; + esac + fi + if [[ "${{ matrix.pg_settings.default_table_access_method != '' }}" == "true" ]]; then PG_OPTS="$PG_OPTS -c default_table_access_method=${{ matrix.pg_settings.default_table_access_method }}" fi diff --git a/configure b/configure index 49362c1f015..1b06e6965ea 100755 --- a/configure +++ b/configure @@ -698,6 +698,7 @@ BISON MKDIR_P LN_S TAR +USE_MDBLOCALES install_bin INSTALL_DATA INSTALL_SCRIPT @@ -721,6 +722,7 @@ with_apr_config with_libcurl with_rt with_zstd +with_yagp_hooks_collector with_libbz2 LZ4_LIBS LZ4_CFLAGS @@ -941,10 +943,12 @@ with_zlib with_lz4 with_libbz2 with_zstd +with_yagp_hooks_collector with_rt with_libcurl with_apr_config with_gnu_ld +with_mdblocales with_ssl with_openssl enable_openssl_redirect @@ -1693,6 +1697,7 @@ Optional Packages: --without-libcurl do not use libcurl --with-apr-config=PATH path to apr-1-config utility --with-gnu-ld assume the C compiler uses GNU ld [default=no] + --without-mdblocales build without MDB locales --with-ssl=LIB use LIB for SSL/TLS support (openssl) --with-openssl obsolete spelling of --with-ssl=openssl @@ -2909,7 +2914,6 @@ PG_PACKAGE_VERSION=14.4 - ac_aux_dir= for ac_dir in config "$srcdir"/config; do if test -f "$ac_dir/install-sh"; then @@ -11148,6 +11152,32 @@ $as_echo "yes" >&6; } fi fi +# +# yagp_hooks_collector +# + + + +# Check whether --with-yagp-hooks-collector was given. +if test "${with_yagp_hooks_collector+set}" = set; then : + withval=$with_yagp_hooks_collector; + case $withval in + yes) + : + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-yagp-hooks-collector option" "$LINENO" 5 + ;; + esac + +else + with_yagp_hooks_collector=no + +fi + # # Realtime library # @@ -12208,6 +12238,38 @@ case $INSTALL in esac +# +# MDB locales +# + + + + +# Check whether --with-mdblocales was given. +if test "${with_mdblocales+set}" = set; then : + withval=$with_mdblocales; + case $withval in + yes) + +$as_echo "#define USE_MDBLOCALES 1" >>confdefs.h + + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-mdblocales option" "$LINENO" 5 + ;; + esac + +else + with_mdblocales=no + +fi + + + + if test -z "$TAR"; then for ac_prog in tar do @@ -12844,6 +12906,56 @@ $as_echo "${python_libspec} ${python_additional_libs}" >&6; } +fi + +if test "$with_mdblocales" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for mdb_setlocale in -lmdblocales" >&5 +$as_echo_n "checking for mdb_setlocale in -lmdblocales... " >&6; } +if ${ac_cv_lib_mdblocales_mdb_setlocale+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lmdblocales $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char mdb_setlocale (); +int +main () +{ +return mdb_setlocale (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_mdblocales_mdb_setlocale=yes +else + ac_cv_lib_mdblocales_mdb_setlocale=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mdblocales_mdb_setlocale" >&5 +$as_echo "$ac_cv_lib_mdblocales_mdb_setlocale" >&6; } +if test "x$ac_cv_lib_mdblocales_mdb_setlocale" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBMDBLOCALES 1 +_ACEOF + + LIBS="-lmdblocales $LIBS" + +else + as_fn_error $? "mdblocales library not found" "$LINENO" 5 +fi + fi if test x"$cross_compiling" = x"yes" && test -z "$with_system_tzdata"; then @@ -17065,6 +17177,17 @@ fi done +fi + +if test "$with_mdblocales" = yes; then + ac_fn_c_check_header_mongrel "$LINENO" "mdblocales.h" "ac_cv_header_mdblocales_h" "$ac_includes_default" +if test "x$ac_cv_header_mdblocales_h" = xyes; then : + +else + as_fn_error $? "mdblocales header not found." "$LINENO" 5 +fi + + fi if test "$with_gssapi" = yes ; then diff --git a/configure.ac b/configure.ac index 8bfdcedf7f1..792878fde4b 100644 --- a/configure.ac +++ b/configure.ac @@ -1365,6 +1365,13 @@ PGAC_ARG_BOOL(with, zstd, yes, [do not build with Zstandard], AC_MSG_RESULT([$with_zstd]) AC_SUBST(with_zstd) +# +# yagp_hooks_collector +# +PGAC_ARG_BOOL(with, yagp_hooks_collector, no, + [build with YAGP hooks collector extension]) +AC_SUBST(with_yagp_hooks_collector) + if test "$with_zstd" = yes; then dnl zstd_errors.h was renamed from error_public.h in v1.4.0 PKG_CHECK_MODULES([ZSTD], [libzstd >= 1.4.0]) @@ -1462,6 +1469,14 @@ case $INSTALL in esac AC_SUBST(install_bin) +# +# MDB locales +# + +PGAC_ARG_BOOL(with, mdblocales, yes, [build without MDB locales], + [AC_DEFINE([USE_MDBLOCALES], 1, [Define to 1 to build with MDB locales. (--with-mdblocales)])]) +AC_SUBST(USE_MDBLOCALES) + PGAC_PATH_PROGS(TAR, tar) AC_PROG_LN_S AC_PROG_MKDIR_P @@ -1620,6 +1635,11 @@ failure. It is possible the compiler isn't looking in the proper directory. Use --without-zlib to disable zlib support.])]) fi +if test "$with_mdblocales" = yes; then + AC_CHECK_LIB(mdblocales, mdb_setlocale, [], + [AC_MSG_ERROR([mdblocales library not found])]) +fi + if test "$enable_external_fts" = yes; then AC_CHECK_LIB(jansson, jansson_version_str, [], [AC_MSG_ERROR([jansson library not found or version is too old, version must >= 2.13])]) @@ -1999,6 +2019,10 @@ if test "$with_lz4" = yes; then AC_CHECK_HEADERS(lz4.h, [], [AC_MSG_ERROR([lz4.h header file is required for LZ4])]) fi +if test "$with_mdblocales" = yes; then + AC_CHECK_HEADER(mdblocales.h, [], [AC_MSG_ERROR([mdblocales header not found.])]) +fi + if test "$with_gssapi" = yes ; then AC_CHECK_HEADERS(gssapi/gssapi.h, [], [AC_CHECK_HEADERS(gssapi.h, [], [AC_MSG_ERROR([gssapi.h header file is required for GSSAPI])])]) diff --git a/contrib/pax_storage/src/cpp/storage/oper/pax_oper.cc b/contrib/pax_storage/src/cpp/storage/oper/pax_oper.cc index 44d4e49d7f8..d08c7a445b9 100644 --- a/contrib/pax_storage/src/cpp/storage/oper/pax_oper.cc +++ b/contrib/pax_storage/src/cpp/storage/oper/pax_oper.cc @@ -25,6 +25,7 @@ *------------------------------------------------------------------------- */ +#include "common/mdb_locale.h" #include "storage/oper/pax_oper.h" #include "comm/cbdb_wrappers.h" @@ -588,9 +589,9 @@ static inline bool LocaleIsC(Oid collation) { return (bool)result; } - localeptr = setlocale(LC_COLLATE, NULL); + localeptr = SETLOCALE(LC_COLLATE, NULL); CBDB_CHECK(localeptr, cbdb::CException::ExType::kExTypeCError, - fmt("Invalid locale, fail to `setlocale`, errno: %d", errno)); + fmt("Invalid locale, fail to `SETLOCALE`, errno: %d", errno)); if (strcmp(localeptr, "C") == 0 || // cut line strcmp(localeptr, "POSIX") == 0) { diff --git a/contrib/pax_storage/src/test/regress/expected/create_function_3.out b/contrib/pax_storage/src/test/regress/expected/create_function_3.out index 8380df1591f..7842a3c1c82 100644 --- a/contrib/pax_storage/src/test/regress/expected/create_function_3.out +++ b/contrib/pax_storage/src/test/regress/expected/create_function_3.out @@ -166,10 +166,10 @@ SET SESSION AUTHORIZATION regress_unpriv_user; SET search_path TO temp_func_test, public; ALTER FUNCTION functest_E_1(int) NOT LEAKPROOF; ALTER FUNCTION functest_E_2(int) LEAKPROOF; -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function CREATE FUNCTION functest_E_3(int) RETURNS bool LANGUAGE 'sql' LEAKPROOF AS 'SELECT $1 < 200'; -- fail -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function RESET SESSION AUTHORIZATION; -- -- CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT diff --git a/contrib/pax_storage/src/test/regress/expected/create_function_3_optimizer.out b/contrib/pax_storage/src/test/regress/expected/create_function_3_optimizer.out index 3ae669d518a..3256709e1aa 100644 --- a/contrib/pax_storage/src/test/regress/expected/create_function_3_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/create_function_3_optimizer.out @@ -166,10 +166,10 @@ SET SESSION AUTHORIZATION regress_unpriv_user; SET search_path TO temp_func_test, public; ALTER FUNCTION functest_E_1(int) NOT LEAKPROOF; ALTER FUNCTION functest_E_2(int) LEAKPROOF; -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function CREATE FUNCTION functest_E_3(int) RETURNS bool LANGUAGE 'sql' LEAKPROOF AS 'SELECT $1 < 200'; -- fail -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function RESET SESSION AUTHORIZATION; -- -- CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT diff --git a/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh b/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh index bc046695032..0134699b28a 100755 --- a/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh +++ b/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh @@ -53,6 +53,7 @@ # # Optional Environment Variables: # LOG_DIR - Directory for logs (defaults to ${SRC_DIR}/build-logs) +# CONFIGURE_EXTRA_OPTS - Args to pass to configure command # ENABLE_DEBUG - Enable debug build options (true/false, defaults to # false) # @@ -165,7 +166,8 @@ execute_cmd ./configure --prefix=${BUILD_DESTINATION} \ --with-openssl \ --with-uuid=e2fs \ --with-includes=/usr/local/xerces-c/include \ - --with-libraries=${BUILD_DESTINATION}/lib || exit 4 + --with-libraries=${BUILD_DESTINATION}/lib \ + ${CONFIGURE_EXTRA_OPTS:-""} || exit 4 log_section_end "Configure" # Capture version information diff --git a/gpcontrib/Makefile b/gpcontrib/Makefile index 60fef1778c6..4c92cce4c46 100644 --- a/gpcontrib/Makefile +++ b/gpcontrib/Makefile @@ -33,6 +33,9 @@ else pg_hint_plan endif +ifeq "$(with_yagp_hooks_collector)" "yes" + recurse_targets += yagp_hooks_collector +endif ifeq "$(with_zstd)" "yes" recurse_targets += zstd endif diff --git a/gpcontrib/orafce/others.c b/gpcontrib/orafce/others.c index 2fb612efe19..5bf8b650e4c 100644 --- a/gpcontrib/orafce/others.c +++ b/gpcontrib/orafce/others.c @@ -45,6 +45,7 @@ #include "utils/uuid.h" #include "orafce.h" #include "builtins.h" +#include "common/mdb_locale.h" /* * Source code for nlssort is taken from postgresql-nls-string @@ -322,7 +323,7 @@ _nls_run_strxfrm(text *string, text *locale) */ if (!lc_collate_cache) { - if ((lc_collate_cache = setlocale(LC_COLLATE, NULL))) + if ((lc_collate_cache = SETLOCALE(LC_COLLATE, NULL))) /* Make a copy of the locale name string. */ #ifdef _MSC_VER lc_collate_cache = _strdup(lc_collate_cache); @@ -364,7 +365,7 @@ _nls_run_strxfrm(text *string, text *locale) * If setlocale failed, we know the default stayed the same, * co we can safely elog. */ - if (!setlocale(LC_COLLATE, locale_str)) + if (!SETLOCALE(LC_COLLATE, locale_str)) elog(ERROR, "failed to set the requested LC_COLLATE value [%s]", locale_str); changed_locale = true; @@ -409,7 +410,7 @@ _nls_run_strxfrm(text *string, text *locale) /* * Set original locale */ - if (!setlocale(LC_COLLATE, lc_collate_cache)) + if (!SETLOCALE(LC_COLLATE, lc_collate_cache)) elog(FATAL, "failed to set back the default LC_COLLATE value [%s]", lc_collate_cache); } @@ -422,7 +423,7 @@ _nls_run_strxfrm(text *string, text *locale) /* * Set original locale */ - if (!setlocale(LC_COLLATE, lc_collate_cache)) + if (!SETLOCALE(LC_COLLATE, lc_collate_cache)) elog(FATAL, "failed to set back the default LC_COLLATE value [%s]", lc_collate_cache); pfree(locale_str); } diff --git a/gpcontrib/yagp_hooks_collector/.clang-format b/gpcontrib/yagp_hooks_collector/.clang-format new file mode 100644 index 00000000000..99130575c9a --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/.clang-format @@ -0,0 +1,2 @@ +BasedOnStyle: LLVM +SortIncludes: false diff --git a/gpcontrib/yagp_hooks_collector/.gitignore b/gpcontrib/yagp_hooks_collector/.gitignore new file mode 100644 index 00000000000..e8dfe855dad --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/.gitignore @@ -0,0 +1,5 @@ +*.o +*.so +src/protos/ +.vscode +compile_commands.json diff --git a/gpcontrib/yagp_hooks_collector/Makefile b/gpcontrib/yagp_hooks_collector/Makefile new file mode 100644 index 00000000000..49825c55f35 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/Makefile @@ -0,0 +1,41 @@ +MODULE_big = yagp_hooks_collector +EXTENSION = yagp_hooks_collector +DATA = $(wildcard *--*.sql) +REGRESS = yagp_cursors yagp_dist yagp_select yagp_utf8_trim yagp_utility yagp_guc_cache yagp_uds yagp_locale + +PROTO_BASES = yagpcc_plan yagpcc_metrics yagpcc_set_service +PROTO_OBJS = $(patsubst %,src/protos/%.pb.o,$(PROTO_BASES)) + +C_OBJS = $(patsubst %.c,%.o,$(wildcard src/*.c src/*/*.c)) +CPP_OBJS = $(patsubst %.cpp,%.o,$(wildcard src/*.cpp src/*/*.cpp)) +OBJS = $(C_OBJS) $(CPP_OBJS) $(PROTO_OBJS) + +override CXXFLAGS = -Werror -fPIC -g3 -Wall -Wpointer-arith -Wendif-labels \ + -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv \ + -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation \ + -Wno-stringop-truncation -g -ggdb -std=c++17 -Iinclude -Isrc/protos -Isrc -DGPBUILD + +PG_CXXFLAGS += -Isrc -Iinclude +SHLIB_LINK += -lprotobuf -lpthread -lstdc++ +EXTRA_CLEAN = src/protos + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = gpcontrib/yagp_hooks_collector +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif + +src/protos/%.pb.cpp src/protos/%.pb.h: protos/%.proto + @mkdir -p src/protos + sed -i 's/optional //g' $^ + sed -i 's|cloud/mdb/yagpcc/api/proto/common/|protos/|g' $^ + protoc -I /usr/include -I /usr/local/include -I . --cpp_out=src $^ + mv src/protos/$*.pb.cc src/protos/$*.pb.cpp + +$(CPP_OBJS): src/protos/yagpcc_metrics.pb.h src/protos/yagpcc_plan.pb.h src/protos/yagpcc_set_service.pb.h +src/protos/yagpcc_set_service.pb.o: src/protos/yagpcc_metrics.pb.h diff --git a/gpcontrib/yagp_hooks_collector/README.md b/gpcontrib/yagp_hooks_collector/README.md new file mode 100644 index 00000000000..9f465a190cb --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/README.md @@ -0,0 +1,28 @@ +## YAGP Hooks Collector + +An extension for collecting greenplum query execution metrics and reporting them to an external agent. + +### Collected Statistics + +#### 1. Query Lifecycle +- **What:** Captures query text, normalized query text, timestamps (submit, start, end, done), and user/database info. +- **GUC:** `yagpcc.enable`. + +#### 2. `EXPLAIN` data +- **What:** Triggers generation of the `EXPLAIN (TEXT, COSTS, VERBOSE)` and captures it. +- **GUC:** `yagpcc.enable`. + +#### 3. `EXPLAIN ANALYZE` data +- **What:** Triggers generation of the `EXPLAIN (TEXT, ANALYZE, BUFFERS, TIMING, VERBOSE)` and captures it. +- **GUCs:** `yagpcc.enable`, `yagpcc.min_analyze_time`, `yagpcc.enable_cdbstats`(ANALYZE), `yagpcc.enable_analyze`(BUFFERS, TIMING, VERBOSE). + +#### 4. Other Metrics +- **What:** Captures Instrument, Greenplum, System, Network, Interconnect, Spill metrics. +- **GUC:** `yagpcc.enable`. + +### General Configuration +- **Nested Queries:** When `yagpcc.report_nested_queries` is `false`, only top-level queries are reported from the coordinator and segments, when `true`, both top-level and nested queries are reported from the coordinator, from segments collected as aggregates. +- **Data Destination:** All collected data is sent to a Unix Domain Socket. Configure the path with `yagpcc.uds_path`. +- **User Filtering:** To exclude activity from certain roles, add them to the comma-separated list in `yagpcc.ignored_users_list`. +- **Trimming plans:** Query texts and execution plans are trimmed based on `yagpcc.max_text_size` and `yagpcc.max_plan_size` (default: 1024KB). For now, it is not recommended to set these GUCs higher than 1024KB. +- **Analyze collection:** Analyze is sent if execution time exceeds `yagpcc.min_analyze_time`, which is 10 seconds by default. Analyze is collected if `yagpcc.enable_analyze` is true. diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out b/gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out new file mode 100644 index 00000000000..df12e3e1b66 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out @@ -0,0 +1,163 @@ +CREATE EXTENSION yagp_hooks_collector; +CREATE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; +-- DECLARE +SET yagpcc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_0 CURSOR FOR SELECT 0; +CLOSE cursor_stats_0; +COMMIT; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------+--------------------- + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_0; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_0; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE +(10 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- DECLARE WITH HOLD +SET yagpcc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; +CLOSE cursor_stats_1; +DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; +CLOSE cursor_stats_2; +COMMIT; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------------------------+--------------------- + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_1; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_1; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_2; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_2; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE +(14 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- ROLLBACK +SET yagpcc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_3 CURSOR FOR SELECT 1; +CLOSE cursor_stats_3; +DECLARE cursor_stats_4 CURSOR FOR SELECT 1; +ROLLBACK; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------+--------------------- + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_3; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_3; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_4 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_4 CURSOR FOR SELECT 1; | QUERY_STATUS_DONE + -1 | ROLLBACK; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE +(12 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- FETCH +SET yagpcc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; +DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; +FETCH 1 IN cursor_stats_5; + ?column? +---------- + 2 +(1 row) + +FETCH 1 IN cursor_stats_6; + ?column? +---------- + 3 +(1 row) + +CLOSE cursor_stats_5; +CLOSE cursor_stats_6; +COMMIT; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------------------------+--------------------- + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; | QUERY_STATUS_DONE + -1 | FETCH 1 IN cursor_stats_5; | QUERY_STATUS_SUBMIT + -1 | FETCH 1 IN cursor_stats_5; | QUERY_STATUS_DONE + -1 | FETCH 1 IN cursor_stats_6; | QUERY_STATUS_SUBMIT + -1 | FETCH 1 IN cursor_stats_6; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_5; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_5; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_6; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_6; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE +(18 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_dist.out b/gpcontrib/yagp_hooks_collector/expected/yagp_dist.out new file mode 100644 index 00000000000..3b1e3504923 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_dist.out @@ -0,0 +1,175 @@ +CREATE EXTENSION yagp_hooks_collector; +CREATE OR REPLACE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; +SET yagpcc.enable_utility TO FALSE; +-- Hash distributed table +CREATE TABLE test_hash_dist (id int) DISTRIBUTED BY (id); +INSERT INTO test_hash_dist SELECT 1; +SET yagpcc.logging_mode to 'TBL'; +SET optimizer_enable_direct_dispatch TO TRUE; +-- Direct dispatch is used here, only one segment is scanned. +select * from test_hash_dist where id = 1; + id +---- + 1 +(1 row) + +RESET optimizer_enable_direct_dispatch; +RESET yagpcc.logging_mode; +-- Should see 8 rows. +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+--------------------------------------------+--------------------- + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_SUBMIT + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_START + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_END + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_DONE + 1 | | QUERY_STATUS_SUBMIT + 1 | | QUERY_STATUS_START + 1 | | QUERY_STATUS_END + 1 | | QUERY_STATUS_DONE +(8 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +SET yagpcc.logging_mode to 'TBL'; +-- Scan all segments. +select * from test_hash_dist; + id +---- + 1 +(1 row) + +DROP TABLE test_hash_dist; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------+--------------------- + -1 | select * from test_hash_dist; | QUERY_STATUS_SUBMIT + -1 | select * from test_hash_dist; | QUERY_STATUS_START + -1 | select * from test_hash_dist; | QUERY_STATUS_END + -1 | select * from test_hash_dist; | QUERY_STATUS_DONE + 1 | | QUERY_STATUS_SUBMIT + 1 | | QUERY_STATUS_START + 1 | | QUERY_STATUS_END + 1 | | QUERY_STATUS_DONE + 2 | | QUERY_STATUS_SUBMIT + 2 | | QUERY_STATUS_START + 2 | | QUERY_STATUS_END + 2 | | QUERY_STATUS_DONE + | | QUERY_STATUS_SUBMIT + | | QUERY_STATUS_START + | | QUERY_STATUS_END + | | QUERY_STATUS_DONE +(16 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Replicated table +CREATE FUNCTION force_segments() RETURNS SETOF text AS $$ +BEGIN + RETURN NEXT 'seg'; +END; +$$ LANGUAGE plpgsql VOLATILE EXECUTE ON ALL SEGMENTS; +CREATE TABLE test_replicated (id int) DISTRIBUTED REPLICATED; +INSERT INTO test_replicated SELECT 1; +SET yagpcc.logging_mode to 'TBL'; +SELECT COUNT(*) FROM test_replicated, force_segments(); + count +------- + 3 +(1 row) + +DROP TABLE test_replicated; +DROP FUNCTION force_segments(); +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------------------+--------------------- + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_SUBMIT + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_START + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_END + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_DONE + 1 | | QUERY_STATUS_SUBMIT + 1 | | QUERY_STATUS_START + 1 | | QUERY_STATUS_END + 1 | | QUERY_STATUS_DONE + 2 | | QUERY_STATUS_SUBMIT + 2 | | QUERY_STATUS_START + 2 | | QUERY_STATUS_END + 2 | | QUERY_STATUS_DONE + | | QUERY_STATUS_SUBMIT + | | QUERY_STATUS_START + | | QUERY_STATUS_END + | | QUERY_STATUS_DONE +(16 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Partially distributed table (2 numsegments) +SET allow_system_table_mods = ON; +CREATE TABLE test_partial_dist (id int, data text) DISTRIBUTED BY (id); +UPDATE gp_distribution_policy SET numsegments = 2 WHERE localoid = 'test_partial_dist'::regclass; +INSERT INTO test_partial_dist SELECT * FROM generate_series(1, 100); +SET yagpcc.logging_mode to 'TBL'; +SELECT COUNT(*) FROM test_partial_dist; + count +------- + 100 +(1 row) + +RESET yagpcc.logging_mode; +DROP TABLE test_partial_dist; +RESET allow_system_table_mods; +-- Should see 12 rows. +SELECT query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + query_text | query_status +-----------------------------------------+--------------------- + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_SUBMIT + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_START + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_END + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_DONE + | QUERY_STATUS_SUBMIT + | QUERY_STATUS_START + | QUERY_STATUS_END + | QUERY_STATUS_DONE + | QUERY_STATUS_SUBMIT + | QUERY_STATUS_START + | QUERY_STATUS_END + | QUERY_STATUS_DONE +(12 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_guc_cache.out b/gpcontrib/yagp_hooks_collector/expected/yagp_guc_cache.out new file mode 100644 index 00000000000..3085cfa42e1 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_guc_cache.out @@ -0,0 +1,57 @@ +-- +-- Test GUC caching for query lifecycle consistency. +-- +-- The extension logs SUBMIT and DONE events for each query. +-- GUC values that control logging (enable_utility, ignored_users_list, ...) +-- must be cached at SUBMIT time to ensure DONE uses the same filtering +-- criteria. Otherwise, a SET command that modifies these GUCs would +-- have its DONE event rejected, creating orphaned SUBMIT entries. +-- This is due to query being actually executed between SUBMIT and DONE. +-- start_ignore +CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; +SELECT yagpcc.truncate_log(); +-- end_ignore +CREATE OR REPLACE FUNCTION print_last_query(query text) +RETURNS TABLE(query_status text) AS $$ + SELECT query_status + FROM yagpcc.log + WHERE segid = -1 AND query_text = query + ORDER BY ccnt DESC +$$ LANGUAGE sql; +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.logging_mode TO 'TBL'; +-- SET below disables utility logging and DONE must still be logged. +SET yagpcc.enable_utility TO FALSE; +SELECT * FROM print_last_query('SET yagpcc.enable_utility TO FALSE;'); + query_status +--------------------- + QUERY_STATUS_SUBMIT + QUERY_STATUS_DONE +(2 rows) + +-- SELECT below adds current user to ignore list and DONE must still be logged. +-- start_ignore +SELECT set_config('yagpcc.ignored_users_list', current_user, false); + set_config +------------ + gpadmin +(1 row) + +-- end_ignore +SELECT * FROM print_last_query('SELECT set_config(''yagpcc.ignored_users_list'', current_user, false);'); + query_status +--------------------- + QUERY_STATUS_SUBMIT + QUERY_STATUS_START + QUERY_STATUS_END + QUERY_STATUS_DONE +(4 rows) + +DROP FUNCTION print_last_query(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; +RESET yagpcc.logging_mode; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_locale.out b/gpcontrib/yagp_hooks_collector/expected/yagp_locale.out new file mode 100644 index 00000000000..6689b6a4ed3 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_locale.out @@ -0,0 +1,23 @@ +-- The extension generates normalized query text and plan using jumbling functions. +-- Those functions may fail when translating to wide character if the current locale +-- cannot handle the character set. This test checks that even when those functions +-- fail, the plan is still generated and executed. This test is partially taken from +-- gp_locale. +-- start_ignore +DROP DATABASE IF EXISTS yagp_test_locale; +-- end_ignore +CREATE DATABASE yagp_test_locale WITH LC_COLLATE='C' LC_CTYPE='C' TEMPLATE=template0; +\c yagp_test_locale +CREATE EXTENSION yagp_hooks_collector; +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.enable TO TRUE; +CREATE TABLE yagp_hi_안녕세계 (a int, 안녕세계1 text, 안녕세계2 text, 안녕세계3 text) DISTRIBUTED BY (a); +INSERT INTO yagp_hi_안녕세계 VALUES(1, '안녕세계1 first', '안녕세2 first', '안녕세계3 first'); +-- Should not see error here +UPDATE yagp_hi_안녕세계 SET 안녕세계1='안녕세계1 first UPDATE' WHERE 안녕세계1='안녕세계1 first'; +RESET yagpcc.enable; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; +DROP TABLE yagp_hi_안녕세계; +DROP EXTENSION yagp_hooks_collector; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_select.out b/gpcontrib/yagp_hooks_collector/expected/yagp_select.out new file mode 100644 index 00000000000..af08f2d1def --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_select.out @@ -0,0 +1,136 @@ +CREATE EXTENSION yagp_hooks_collector; +CREATE OR REPLACE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; +SET yagpcc.enable_utility TO FALSE; +-- Basic SELECT tests +SET yagpcc.logging_mode to 'TBL'; +SELECT 1; + ?column? +---------- + 1 +(1 row) + +SELECT COUNT(*) FROM generate_series(1,10); + count +------- + 10 +(1 row) + +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------+--------------------- + -1 | SELECT 1; | QUERY_STATUS_SUBMIT + -1 | SELECT 1; | QUERY_STATUS_START + -1 | SELECT 1; | QUERY_STATUS_END + -1 | SELECT 1; | QUERY_STATUS_DONE + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_SUBMIT + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_START + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_END + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_DONE +(8 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Transaction test +SET yagpcc.logging_mode to 'TBL'; +BEGIN; +SELECT 1; + ?column? +---------- + 1 +(1 row) + +COMMIT; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+------------+--------------------- + -1 | SELECT 1; | QUERY_STATUS_SUBMIT + -1 | SELECT 1; | QUERY_STATUS_START + -1 | SELECT 1; | QUERY_STATUS_END + -1 | SELECT 1; | QUERY_STATUS_DONE +(4 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- CTE test +SET yagpcc.logging_mode to 'TBL'; +WITH t AS (VALUES (1), (2)) +SELECT * FROM t; + column1 +--------- + 1 + 2 +(2 rows) + +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+-----------------------------+--------------------- + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_SUBMIT + | SELECT * FROM t; | + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_START + | SELECT * FROM t; | + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_END + | SELECT * FROM t; | + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_DONE + | SELECT * FROM t; | +(4 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Prepared statement test +SET yagpcc.logging_mode to 'TBL'; +PREPARE test_stmt AS SELECT 1; +EXECUTE test_stmt; + ?column? +---------- + 1 +(1 row) + +DEALLOCATE test_stmt; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+--------------------------------+--------------------- + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_SUBMIT + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_START + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_END + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_DONE +(4 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_uds.out b/gpcontrib/yagp_hooks_collector/expected/yagp_uds.out new file mode 100644 index 00000000000..d04929ffb4a --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_uds.out @@ -0,0 +1,42 @@ +-- Test UDS socket +-- start_ignore +CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; +-- end_ignore +\set UDS_PATH '/tmp/yagpcc_test.sock' +-- Configure extension to send via UDS +SET yagpcc.uds_path TO :'UDS_PATH'; +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.logging_mode TO 'UDS'; +-- Start receiver +SELECT yagpcc.__test_uds_start_server(:'UDS_PATH'); + __test_uds_start_server +------------------------- +(0 rows) + +-- Send +SELECT 1; + ?column? +---------- + 1 +(1 row) + +-- Receive +SELECT yagpcc.__test_uds_receive() > 0 as received; + received +---------- + t +(1 row) + +-- Stop receiver +SELECT yagpcc.__test_uds_stop_server(); + __test_uds_stop_server +------------------------ +(0 rows) + +-- Cleanup +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.uds_path; +RESET yagpcc.ignored_users_list; +RESET yagpcc.enable; +RESET yagpcc.logging_mode; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_utf8_trim.out b/gpcontrib/yagp_hooks_collector/expected/yagp_utf8_trim.out new file mode 100644 index 00000000000..9de126dd882 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_utf8_trim.out @@ -0,0 +1,68 @@ +CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; +CREATE OR REPLACE FUNCTION get_marked_query(marker TEXT) +RETURNS TEXT AS $$ + SELECT query_text + FROM yagpcc.log + WHERE query_text LIKE '%' || marker || '%' + ORDER BY datetime DESC + LIMIT 1 +$$ LANGUAGE sql VOLATILE; +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +-- Test 1: 1 byte chars +SET yagpcc.max_text_size to 19; +SET yagpcc.logging_mode to 'TBL'; +SELECT /*test1*/ 'HelloWorld'; + ?column? +------------ + HelloWorld +(1 row) + +RESET yagpcc.logging_mode; +SELECT octet_length(get_marked_query('test1')) = 19 AS correct_length; + correct_length +---------------- + t +(1 row) + +-- Test 2: 2 byte chars +SET yagpcc.max_text_size to 19; +SET yagpcc.logging_mode to 'TBL'; +SELECT /*test2*/ 'РУССКИЙЯЗЫК'; + ?column? +------------- + РУССКИЙЯЗЫК +(1 row) + +RESET yagpcc.logging_mode; +-- Character 'Р' has two bytes and cut in the middle => not included. +SELECT octet_length(get_marked_query('test2')) = 18 AS correct_length; + correct_length +---------------- + t +(1 row) + +-- Test 3: 4 byte chars +SET yagpcc.max_text_size to 21; +SET yagpcc.logging_mode to 'TBL'; +SELECT /*test3*/ '😀'; + ?column? +---------- + 😀 +(1 row) + +RESET yagpcc.logging_mode; +-- Emoji has 4 bytes and cut before the last byte => not included. +SELECT octet_length(get_marked_query('test3')) = 18 AS correct_length; + correct_length +---------------- + t +(1 row) + +-- Cleanup +DROP FUNCTION get_marked_query(TEXT); +RESET yagpcc.max_text_size; +RESET yagpcc.logging_mode; +RESET yagpcc.enable; +RESET yagpcc.ignored_users_list; +DROP EXTENSION yagp_hooks_collector; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out b/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out new file mode 100644 index 00000000000..7df1d2816eb --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out @@ -0,0 +1,248 @@ +CREATE EXTENSION yagp_hooks_collector; +CREATE OR REPLACE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; +SET yagpcc.logging_mode to 'TBL'; +CREATE TABLE test_table (a int, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE INDEX test_idx ON test_table(a); +ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; +DROP TABLE test_table; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+----------------------------------------------------+--------------------- + -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_SUBMIT + -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_DONE + -1 | CREATE INDEX test_idx ON test_table(a); | QUERY_STATUS_SUBMIT + -1 | CREATE INDEX test_idx ON test_table(a); | QUERY_STATUS_DONE + -1 | ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; | QUERY_STATUS_SUBMIT + -1 | ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; | QUERY_STATUS_DONE + -1 | DROP TABLE test_table; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE test_table; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE +(10 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Partitioning +SET yagpcc.logging_mode to 'TBL'; +CREATE TABLE pt_test (a int, b int) +DISTRIBUTED BY (a) +PARTITION BY RANGE (a) +(START (0) END (100) EVERY (50)); +DROP TABLE pt_test; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------+--------------------- + -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_SUBMIT + | DISTRIBUTED BY (a) +| + | PARTITION BY RANGE (a) +| + | (START (0) END (100) EVERY (50)); | + -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_DONE + | DISTRIBUTED BY (a) +| + | PARTITION BY RANGE (a) +| + | (START (0) END (100) EVERY (50)); | + -1 | DROP TABLE pt_test; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE pt_test; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE +(6 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Views and Functions +SET yagpcc.logging_mode to 'TBL'; +CREATE VIEW test_view AS SELECT 1 AS a; +CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; +DROP VIEW test_view; +DROP FUNCTION test_func(int); +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+------------------------------------------------------------------------------------+--------------------- + -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_SUBMIT + -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_DONE + -1 | CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; | QUERY_STATUS_SUBMIT + -1 | CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; | QUERY_STATUS_DONE + -1 | DROP VIEW test_view; | QUERY_STATUS_SUBMIT + -1 | DROP VIEW test_view; | QUERY_STATUS_DONE + -1 | DROP FUNCTION test_func(int); | QUERY_STATUS_SUBMIT + -1 | DROP FUNCTION test_func(int); | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE +(10 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Transaction Operations +SET yagpcc.logging_mode to 'TBL'; +BEGIN; +SAVEPOINT sp1; +ROLLBACK TO sp1; +COMMIT; +BEGIN; +SAVEPOINT sp2; +ABORT; +BEGIN; +ROLLBACK; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+----------------------------+--------------------- + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp2; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | ROLLBACK; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE +(18 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- DML Operations +SET yagpcc.logging_mode to 'TBL'; +CREATE TABLE dml_test (a int, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO dml_test VALUES (1, 'test'); +UPDATE dml_test SET b = 'updated' WHERE a = 1; +DELETE FROM dml_test WHERE a = 1; +DROP TABLE dml_test; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+----------------------------------------+--------------------- + -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_SUBMIT + -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_DONE + -1 | DROP TABLE dml_test; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE dml_test; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE +(6 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- COPY Operations +SET yagpcc.logging_mode to 'TBL'; +CREATE TABLE copy_test (a int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +COPY (SELECT 1) TO STDOUT; +1 +DROP TABLE copy_test; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------+--------------------- + -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_SUBMIT + -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_DONE + -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_SUBMIT + -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_DONE + -1 | DROP TABLE copy_test; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE copy_test; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE +(8 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Prepared Statements and error during execute +SET yagpcc.logging_mode to 'TBL'; +PREPARE test_prep(int) AS SELECT $1/0 AS value; +EXECUTE test_prep(0::int); +ERROR: division by zero +DEALLOCATE test_prep; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------------------+--------------------- + -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_SUBMIT + -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_DONE + -1 | EXECUTE test_prep(0::int); | QUERY_STATUS_SUBMIT + -1 | EXECUTE test_prep(0::int); | QUERY_STATUS_ERROR + -1 | DEALLOCATE test_prep; | QUERY_STATUS_SUBMIT + -1 | DEALLOCATE test_prep; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE +(8 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- GUC Settings +SET yagpcc.logging_mode to 'TBL'; +SET yagpcc.report_nested_queries TO FALSE; +RESET yagpcc.report_nested_queries; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+--------------------------------------------+--------------------- + -1 | SET yagpcc.report_nested_queries TO FALSE; | QUERY_STATUS_SUBMIT + -1 | SET yagpcc.report_nested_queries TO FALSE; | QUERY_STATUS_DONE + -1 | RESET yagpcc.report_nested_queries; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.report_nested_queries; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE +(6 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/metric.md b/gpcontrib/yagp_hooks_collector/metric.md new file mode 100644 index 00000000000..5df56877edb --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/metric.md @@ -0,0 +1,126 @@ +## YAGP Hooks Collector Metrics + +### States +A Postgres process goes through 4 executor functions to execute a query: +1) `ExecutorStart()` - resource allocation for the query. +2) `ExecutorRun()` - query execution. +3) `ExecutorFinish()` - cleanup. +4) `ExecutorEnd()` - cleanup. + +yagp-hooks-collector sends messages with 4 states, from _Dispatcher_ and/or _Execute_ processes: `submit`, `start`, `end`, `done`, in this order: +``` +submit -> ExecutorStart() -> start -> ExecutorRun() -> ExecutorFinish() -> end -> ExecutorEnd() -> done +``` + +### Key Points +- Some queries may skip the _end_ state, then the _end_ statistics is sent during _done_. +- If a query finishes with an error (`METRICS_QUERY_ERROR`), or is cancelled (`METRICS_QUERY_CANCELLED`), statistics is sent at _done_. +- Some statistics is calculated as the difference between the current global metric and the previous. The initial snapshot is taken at submit, and at _end_/_done_ the diff is calculated. +- Nested queries on _Dispatcher_ become top-level on _Execute_. +- Each process (_Dispatcher_/_Execute_) sends its own statistics + +### Notations +- **S** = Submit event. +- **T** = Start event. +- **E** = End event. +- **D** = Done event. +- **DIFF** = current_value - submit_value (submit event). +- **ABS** = Absolute value, or where diff is not applicable, the value taken. +- **Local*** - Statistics that starts counting from zero for each new query. A nested query is also considered new. +- **Node** - PG process, either a `Query Dispatcher` (on master) or an `Execute` (on segment). + +### Statistics Table + +| Proto Field | Type | When | DIFF/ABS | Local* | Scope | Dispatcher | Execute | Units | Notes | +| :--------------------------- | :----- | :------ | :------- | ------ | :------ | :--------: | :-----: | :------ | :-------------------------------------------------- | +| **SystemStat** | | | | | | | | | | +| `runningTimeSeconds` | double | E, D | DIFF | - | Node | + | + | seconds | Wall clock time | +| `userTimeSeconds` | double | E, D | DIFF | - | Node | + | + | seconds | /proc/pid/stat utime | +| `kernelTimeSeconds` | double | E, D | DIFF | - | Node | + | + | seconds | /proc/pid/stat stime | +| `vsize` | uint64 | E, D | ABS | - | Node | + | + | bytes | /proc/pid/stat vsize | +| `rss` | uint64 | E, D | ABS | - | Node | + | + | pages | /proc/pid/stat rss | +| `VmSizeKb` | uint64 | E, D | ABS | - | Node | + | + | KB | /proc/pid/status VmSize | +| `VmPeakKb` | uint64 | E, D | ABS | - | Node | + | + | KB | /proc/pid/status VmPeak | +| `rchar` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io rchar | +| `wchar` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io wchar | +| `syscr` | uint64 | E, D | DIFF | - | Node | + | + | count | /proc/pid/io syscr | +| `syscw` | uint64 | E, D | DIFF | - | Node | + | + | count | /proc/pid/io syscw | +| `read_bytes` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io read_bytes | +| `write_bytes` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io write_bytes | +| `cancelled_write_bytes` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io cancelled_write_bytes | +| **MetricInstrumentation** | | | | | | | | | | +| `ntuples` | uint64 | E, D | ABS | + | Node | + | + | tuples | Accumulated total tuples | +| `nloops` | uint64 | E, D | ABS | + | Node | + | + | count | Number of cycles | +| `tuplecount` | uint64 | E, D | ABS | + | Node | + | + | tuples | Accumulated tuples per cycle | +| `firsttuple` | double | E, D | ABS | + | Node | + | + | seconds | Time for first tuple of this cycle | +| `startup` | double | E, D | ABS | + | Node | + | + | seconds | Start time of current iteration | +| `total` | double | E, D | ABS | + | Node | + | + | seconds | Total time taken | +| `shared_blks_hit` | uint64 | E, D | ABS | + | Node | + | + | blocks | Shared buffer blocks found in cache | +| `shared_blks_read` | uint64 | E, D | ABS | + | Node | + | + | blocks | Shared buffer blocks read from disk | +| `shared_blks_dirtied` | uint64 | E, D | ABS | + | Node | + | + | blocks | Shared blocks dirtied | +| `shared_blks_written` | uint64 | E, D | ABS | + | Node | + | + | blocks | Dirty shared buffer blocks written to disk | +| `local_blks_hit` | uint64 | E, D | ABS | + | Node | + | + | blocks | Local buffer hits | +| `local_blks_read` | uint64 | E, D | ABS | + | Node | + | + | blocks | Disk blocks read | +| `local_blks_dirtied` | uint64 | E, D | ABS | + | Node | + | + | blocks | Local blocks dirtied | +| `local_blks_written` | uint64 | E, D | ABS | + | Node | + | + | blocks | Local blocks written to disk | +| `temp_blks_read` | uint64 | E, D | ABS | + | Node | + | + | blocks | Temp file blocks read | +| `temp_blks_written` | uint64 | E, D | ABS | + | Node | + | + | blocks | Temp file blocks written | +| `blk_read_time` | double | E, D | ABS | + | Node | + | + | seconds | Time reading data blocks | +| `blk_write_time` | double | E, D | ABS | + | Node | + | + | seconds | Time writing data blocks | +| `inherited_calls` | uint64 | E, D | ABS | - | Node | + | + | count | Nested query count (YAGPCC-specific) | +| `inherited_time` | double | E, D | ABS | - | Node | + | + | seconds | Nested query time (YAGPCC-specific) | +| **NetworkStat (sent)** | | | | | | | | | | +| `sent.total_bytes` | uint32 | D | ABS | - | Node | + | + | bytes | Bytes sent, including headers | +| `sent.tuple_bytes` | uint32 | D | ABS | - | Node | + | + | bytes | Bytes of pure tuple-data sent | +| `sent.chunks` | uint32 | D | ABS | - | Node | + | + | count | Tuple-chunks sent | +| **NetworkStat (received)** | | | | | | | | | | +| `received.total_bytes` | uint32 | D | ABS | - | Node | + | + | bytes | Bytes of pure tuple-data received | +| `received.tuple_bytes` | uint32 | D | ABS | - | Node | + | + | bytes | Bytes of pure tuple-data received | +| `received.chunks` | uint32 | D | ABS | - | Node | + | + | count | Tuple-chunks received | +| **InterconnectStat** | | | | | | | | | | +| `total_recv_queue_size` | uint64 | D | DIFF | - | Node | + | + | bytes | Receive queue size sum | +| `recv_queue_size_counting_t` | uint64 | D | DIFF | - | Node | + | + | count | Counting times when computing total_recv_queue_size | +| `total_capacity` | uint64 | D | DIFF | - | Node | + | + | bytes | the capacity sum for sent packets | +| `capacity_counting_time` | uint64 | D | DIFF | - | Node | + | + | count | counting times used to compute total_capacity | +| `total_buffers` | uint64 | D | DIFF | - | Node | + | + | count | Available buffers | +| `buffer_counting_time` | uint64 | D | DIFF | - | Node | + | + | count | counting times when compute total_buffers | +| `active_connections_num` | uint64 | D | DIFF | - | Node | + | + | count | Active connections | +| `retransmits` | int64 | D | DIFF | - | Node | + | + | count | Packet retransmits | +| `startup_cached_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Startup cached packets | +| `mismatch_num` | int64 | D | DIFF | - | Node | + | + | count | Mismatched packets received | +| `crc_errors` | int64 | D | DIFF | - | Node | + | + | count | CRC errors | +| `snd_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Packets sent | +| `recv_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Packets received | +| `disordered_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Out-of-order packets | +| `duplicated_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Duplicate packets | +| `recv_ack_num` | int64 | D | DIFF | - | Node | + | + | count | ACKs received | +| `status_query_msg_num` | int64 | D | DIFF | - | Node | + | + | count | Status query messages sent | +| **SpillInfo** | | | | | | | | | | +| `fileCount` | int32 | E, D | DIFF | - | Node | + | + | count | Spill (temp) files created | +| `totalBytes` | int64 | E, D | DIFF | - | Node | + | + | bytes | Spill bytes written | +| **QueryInfo** | | | | | | | | | | +| `generator` | enum | T, E, D | ABS | - | Cluster | + | - | enum | Planner/Optimizer | +| `query_id` | uint64 | T, E, D | ABS | - | Cluster | + | - | id | Query ID | +| `plan_id` | uint64 | T, E, D | ABS | - | Cluster | + | - | id | Hash of normalized plan | +| `query_text` | string | S | ABS | - | Cluster | + | - | text | Query text | +| `plan_text` | string | T | ABS | - | Cluster | + | - | text | EXPLAIN text | +| `template_query_text` | string | S | ABS | - | Cluster | + | - | text | Normalized query text | +| `template_plan_text` | string | T | ABS | - | Cluster | + | - | text | Normalized plan text | +| `userName` | string | All | ABS | - | Cluster | + | - | text | Session user | +| `databaseName` | string | All | ABS | - | Cluster | + | - | text | Database name | +| `rsgname` | string | All | ABS | - | Cluster | + | - | text | Resource group name | +| `analyze_text` | string | D | ABS | - | Cluster | + | - | text | EXPLAIN ANALYZE | +| **AdditionalQueryInfo** | | | | | | | | | | +| `nested_level` | int64 | All | ABS | - | Node | + | + | count | Current nesting level | +| `error_message` | string | D | ABS | - | Node | + | + | text | Error message | +| `slice_id` | int64 | All | ABS | - | Node | + | + | id | Slice ID | +| **QueryKey** | | | | | | | | | | +| `tmid` | int32 | All | ABS | - | Node | + | + | id | Transaction start time | +| `ssid` | int32 | All | ABS | - | Node | + | + | id | Session ID | +| `ccnt` | int32 | All | ABS | - | Node | + | + | count | Command counter | +| **SegmentKey** | | | | | | | | | | +| `dbid` | int32 | All | ABS | - | Node | + | + | id | Database ID | +| `segment_index` | int32 | All | ABS | - | Node | + | + | id | Segment index (-1=coordinator) | + +--- + diff --git a/gpcontrib/yagp_hooks_collector/protos/yagpcc_metrics.proto b/gpcontrib/yagp_hooks_collector/protos/yagpcc_metrics.proto new file mode 100644 index 00000000000..91ac0c4941a --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/protos/yagpcc_metrics.proto @@ -0,0 +1,185 @@ +syntax = "proto3"; + +package yagpcc; +option java_outer_classname = "SegmentYAGPCCM"; +option go_package = "a.yandex-team.ru/cloud/mdb/yagpcc/api/proto/common;greenplum"; + +enum QueryStatus { + QUERY_STATUS_UNSPECIFIED = 0; + QUERY_STATUS_SUBMIT = 1; + QUERY_STATUS_START = 2; + QUERY_STATUS_DONE = 3; + QUERY_STATUS_QUERY_DONE = 4; + QUERY_STATUS_ERROR = 5; + QUERY_STATUS_CANCELLING = 6; + QUERY_STATUS_CANCELED = 7; + QUERY_STATUS_END = 8; +} + +enum PlanNodeStatus { + PLAN_NODE_STATUS_UNSPECIFIED = 0; + PLAN_NODE_STATUS_INITIALIZED = 1; + PLAN_NODE_STATUS_EXECUTING = 2; + PLAN_NODE_STATUS_FINISHED = 3; +} + +message QueryInfo { + PlanGenerator generator = 1; + uint64 query_id = 2; + uint64 plan_id = 3; + string query_text = 4; + string plan_text = 5; + string template_query_text = 6; + string template_plan_text = 7; + string userName = 8; + string databaseName = 9; + string rsgname = 10; + string analyze_text = 11; +} + +message AdditionalQueryInfo { + int64 nested_level = 1; + string error_message = 2; + int64 slice_id = 3; +} + +message AdditionalQueryStat { + string error_message = 1; + repeated int64 slices = 2; +} + +enum PlanGenerator +{ + PLAN_GENERATOR_UNSPECIFIED = 0; + PLAN_GENERATOR_PLANNER = 1; /* plan produced by the planner*/ + PLAN_GENERATOR_OPTIMIZER = 2; /* plan produced by the optimizer*/ +} + +message GPMetrics { + SystemStat systemStat = 1; + MetricInstrumentation instrumentation = 2; + SpillInfo spill = 3; +} + +message QueryKey { + int32 tmid = 1; /* A time identifier for a particular query. All records associated with the query will have the same tmid. */ + int32 ssid = 2; /* The session id as shown by gp_session_id. All records associated with the query will have the same ssid */ + int32 ccnt = 3; /* The command number within this session as shown by gp_command_count. All records associated with the query will have the same ccnt */ +} + +message SegmentKey { + int32 dbid = 1; /* the dbid of this database */ + int32 segindex = 2; /* content indicator: -1 for entry database, + * 0, ..., n-1 for segment database * + * a primary and its mirror have the same segIndex */ +} + +message SystemStat { + /* CPU stat*/ + double runningTimeSeconds = 1; + double userTimeSeconds = 2; + double kernelTimeSeconds = 3; + + /* Memory stat */ + uint64 vsize = 4; + uint64 rss = 5; + uint64 VmSizeKb = 6; + uint64 VmPeakKb = 7; + + /* Storage stat */ + uint64 rchar = 8; + uint64 wchar = 9; + uint64 syscr = 10; + uint64 syscw = 11; + uint64 read_bytes = 12; + uint64 write_bytes = 13; + uint64 cancelled_write_bytes = 14; +} + +message NetworkStat { + uint32 total_bytes = 1; + uint32 tuple_bytes = 2; + uint32 chunks = 3; +} + +message InterconnectStat { + // Receive queue size sum when main thread is trying to get a packet + uint64 total_recv_queue_size = 1; + // Counting times when computing total_recv_queue_size + uint64 recv_queue_size_counting_time = 2; + + // The capacity sum when packets are tried to be sent + uint64 total_capacity = 3; + // Counting times used to compute total_capacity + uint64 capacity_counting_time = 4; + + // Total buffers available when sending packets + uint64 total_buffers = 5; + // Counting times when compute total_buffers + uint64 buffer_counting_time = 6; + + // The number of active connections + uint64 active_connections_num = 7; + + // The number of packet retransmits + int64 retransmits = 8; + + // The number of cached future packets + int64 startup_cached_pkt_num = 9; + + // The number of mismatched packets received + int64 mismatch_num = 10; + + // The number of crc errors + int64 crc_errors = 11; + + // The number of packets sent by sender + int64 snd_pkt_num = 12; + + // The number of packets received by receiver + int64 recv_pkt_num = 13; + + // Disordered packet number + int64 disordered_pkt_num = 14; + + // Duplicate packet number + int64 duplicated_pkt_num = 15; + + // The number of Acks received + int64 recv_ack_num = 16; + + // The number of status query messages sent + int64 status_query_msg_num = 17; +} + +message MetricInstrumentation { + uint64 ntuples = 1; /* Total tuples produced */ + uint64 nloops = 2; /* # of run cycles for this node */ + uint64 tuplecount = 3; /* Tuples emitted so far this cycle */ + double firsttuple = 4; /* Time for first tuple of this cycle */ + double startup = 5; /* Total startup time (in seconds) (optimiser's cost estimation) */ + double total = 6; /* Total total time (in seconds) */ + uint64 shared_blks_hit = 7; /* shared blocks stats*/ + uint64 shared_blks_read = 8; + uint64 shared_blks_dirtied = 9; + uint64 shared_blks_written = 10; + uint64 local_blks_hit = 11; /* data read from disks */ + uint64 local_blks_read = 12; + uint64 local_blks_dirtied = 13; + uint64 local_blks_written = 14; + uint64 temp_blks_read = 15; /* temporary tables read stat */ + uint64 temp_blks_written = 16; + double blk_read_time = 17; /* measured read/write time */ + double blk_write_time = 18; + NetworkStat sent = 19; + NetworkStat received = 20; + double startup_time = 21; /* real query startup time (planning + queue time) */ + uint64 inherited_calls = 22; /* the number of executed sub-queries */ + double inherited_time = 23; /* total time spend on inherited execution */ + InterconnectStat interconnect = 24; +} + +message SpillInfo { + int32 fileCount = 1; + int64 totalBytes = 2; +} diff --git a/gpcontrib/yagp_hooks_collector/protos/yagpcc_plan.proto b/gpcontrib/yagp_hooks_collector/protos/yagpcc_plan.proto new file mode 100644 index 00000000000..962fab4bbdd --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/protos/yagpcc_plan.proto @@ -0,0 +1,570 @@ +syntax = "proto3"; + +package yagpcc; +option java_outer_classname = "SegmentYAGPCCP"; +option go_package = "a.yandex-team.ru/cloud/mdb/yagpcc/api/proto/common;greenplum"; + +message MetricPlan { + GpdbNodeType type = 1; + + int32 plan_node_id = 2; + int32 parent_plan_node_id = 3; // Valid only for QueryInfoMetricQuerySubmit + + double startup_cost = 4; /* cost expended before fetching any tuples */ + double total_cost = 5; /* total cost (assuming all tuples fetched) */ + double plan_rows = 6; /* number of rows plan is expected to emit */ + int32 plan_width = 7; /* average row width in bytes */ + + int32 arg1 = 8; // for some nodes it's additional opperand type + int32 arg2 = 9; // for some nodes it's additional opperand type + + MetricMotionInfo motion_info = 10; + MetricRelationInfo relation_info = 11; + + string scan_index_name = 12; + ScanDirection scan_direction = 13; + MetricSliceInfo slice_info = 14; + string statement = 15; +} + +message MetricMotionInfo { + MotionType type = 1; + bool isBroadcast = 2; + CdbLocusType locusType = 3; + + int32 sliceId = 4; + int32 parentSliceId = 5; +} + +message MetricRelationInfo { + int32 oid = 1; + string name = 2; + string schema = 3; + string alias = 4; + int32 dynamicScanId = 5; +} + +message MetricSliceInfo { + int32 slice = 1; + int32 segments = 2; + GangType gangType = 3; + int32 gang = 4; +} + +enum ScanDirection +{ + SCAN_DIRECTION_UNSPECIFIED = 0; + SCAN_DIRECTION_BACKWARD = 1; + SCAN_DIRECTION_FORWARD = 2; +} + +/* GangType enumeration is used in several structures related to CDB + * slice plan support. + */ +enum GangType +{ + GANG_TYPE_UNSPECIFIED = 0; + GANG_TYPE_UNALLOCATED = 1; /* a root slice executed by the qDisp */ + GANG_TYPE_ENTRYDB_READER = 2; /* a 1-gang with read access to the entry db */ + GANG_TYPE_SINGLETON_READER = 3; /* a 1-gang to read the segment dbs */ + GANG_TYPE_PRIMARY_READER = 4; /* a 1-gang or N-gang to read the segment dbs */ + GANG_TYPE_PRIMARY_WRITER = 5; /* the N-gang that can update the segment dbs */ +} + + +enum CdbLocusType +{ + CDB_LOCUS_TYPE_UNSPECIFIED = 0; + CDB_LOCUS_TYPE_ENTRY = 1; /* a single backend process on the entry db: + * usually the qDisp itself, but could be a + * qExec started by the entry postmaster. + */ + + CDB_LOCUS_TYPE_SINGLE_QE = 2; /* a single backend process on any db: the + * qDisp itself, or a qExec started by a + * segment postmaster or the entry postmaster. + */ + + CDB_LOCUS_TYPE_GENERAL = 3; /* compatible with any locus (data is + * self-contained in the query plan or + * generally available in any qExec or qDisp) */ + + CDB_LOCUS_TYPE_SEGMENT_GENERAL = 4; /* generally available in any qExec, but not + * available in qDisp */ + + CDB_LOCUS_TYPE_REPLICATED = 5; /* replicated over all qExecs of an N-gang */ + CDB_LOCUS_TYPE_HASHED = 6; /* hash partitioned over all qExecs of N-gang */ + CDB_LOCUS_TYPE_HASHED_OJ = 7; /* result of hash partitioned outer join, NULLs can be anywhere */ + CDB_LOCUS_TYPE_STREWN = 8; /* partitioned on no known function */ + CDB_LOCUS_TYPE_END = 9; /* = last valid CdbLocusType + 1 */ +} + +enum MotionType +{ + MOTION_TYPE_UNSPECIFIED = 0; + MOTION_TYPE_HASH = 1; // Use hashing to select a segindex destination + MOTION_TYPE_FIXED = 2; // Send tuples to a fixed set of segindexes + MOTION_TYPE_EXPLICIT = 3; // Send tuples to the segment explicitly specified in their segid column +} + +enum GpdbNodeType { + GPDB_NODE_TYPE_UNSPECIFIED = 0; + INDEX_INFO = 1; + EXPR_CONTEXT = 2; + PROJECTION_INFO = 3; + JUNK_FILTER = 4; + RESULT_REL_INFO = 5; + E_STATE = 6; + TUPLE_TABLE_SLOT = 7; + CDB_PROCESS = 8; + SLICE = 9; + SLICE_TABLE = 10; + CURSOR_POS_INFO = 11; + SHARE_NODE_ENTRY = 12; + PARTITION_STATE = 13; + QUERY_DISPATCH_DESC = 14; + OID_ASSIGNMENT = 15; + PLAN = 16; + SCAN = 17; + JOIN = 18; + RESULT = 19; + MODIFY_TABLE = 20; + APPEND = 21; + MERGE_APPEND = 22; + RECURSIVE_UNION = 23; + SEQUENCE = 24; + BITMAP_AND = 25; + BITMAP_OR = 26; + SEQ_SCAN = 27; + DYNAMIC_SEQ_SCAN = 28; + EXTERNAL_SCAN = 29; + INDEX_SCAN = 30; + DYNAMIC_INDEX_SCAN = 31; + INDEX_ONLY_SCAN = 32; + BITMAP_INDEX_SCAN = 33; + DYNAMIC_BITMAP_INDEX_SCAN = 34; + BITMAP_HEAP_SCAN = 35; + DYNAMIC_BITMAP_HEAP_SCAN = 36; + TID_SCAN = 37; + SUBQUERY_SCAN = 38; + FUNCTION_SCAN = 39; + TABLE_FUNCTION_SCAN = 40; + VALUES_SCAN = 41; + CTE_SCAN = 42; + WORK_TABLE_SCAN = 43; + FOREIGN_SCAN = 44; + NEST_LOOP = 45; + MERGE_JOIN = 46; + HASH_JOIN = 47; + MATERIAL = 48; + SORT = 49; + AGG = 50; + WINDOW_AGG = 51; + UNIQUE = 52; + HASH = 53; + SET_OP = 54; + LOCK_ROWS = 55; + LIMIT = 56; + MOTION = 57; + SHARE_INPUT_SCAN = 58; + REPEAT = 59; + DML = 60; + SPLIT_UPDATE = 61; + ROW_TRIGGER = 62; + ASSERT_OP = 63; + PARTITION_SELECTOR = 64; + PLAN_END = 65; + NEST_LOOP_PARAM = 66; + PLAN_ROW_MARK = 67; + PLAN_INVAL_ITEM = 68; + PLAN_STATE = 69; + SCAN_STATE = 70; + JOIN_STATE = 71; + RESULT_STATE = 72; + MODIFY_TABLE_STATE = 73; + APPEND_STATE = 74; + MERGE_APPEND_STATE = 75; + RECURSIVE_UNION_STATE = 76; + SEQUENCE_STATE = 77; + BITMAP_AND_STATE = 78; + BITMAP_OR_STATE = 79; + SEQ_SCAN_STATE = 80; + DYNAMIC_SEQ_SCAN_STATE = 81; + EXTERNAL_SCAN_STATE = 82; + INDEX_SCAN_STATE = 83; + DYNAMIC_INDEX_SCAN_STATE = 84; + INDEX_ONLY_SCAN_STATE = 85; + BITMAP_INDEX_SCAN_STATE = 86; + DYNAMIC_BITMAP_INDEX_SCAN_STATE = 87; + BITMAP_HEAP_SCAN_STATE = 88; + DYNAMIC_BITMAP_HEAP_SCAN_STATE = 89; + TID_SCAN_STATE = 90; + SUBQUERY_SCAN_STATE = 91; + FUNCTION_SCAN_STATE = 92; + TABLE_FUNCTION_STATE = 93; + VALUES_SCAN_STATE = 94; + CTE_SCAN_STATE = 95; + WORK_TABLE_SCAN_STATE = 96; + FOREIGN_SCAN_STATE = 97; + NEST_LOOP_STATE = 98; + MERGE_JOIN_STATE = 99; + HASH_JOIN_STATE = 100; + MATERIAL_STATE = 101; + SORT_STATE = 102; + AGG_STATE = 103; + WINDOW_AGG_STATE = 104; + UNIQUE_STATE = 105; + HASH_STATE = 106; + SET_OP_STATE = 107; + LOCK_ROWS_STATE = 108; + LIMIT_STATE = 109; + MOTION_STATE = 110; + SHARE_INPUT_SCAN_STATE = 111; + REPEAT_STATE = 112; + DML_STATE = 113; + SPLIT_UPDATE_STATE = 114; + ROW_TRIGGER_STATE = 115; + ASSERT_OP_STATE = 116; + PARTITION_SELECTOR_STATE = 117; + TUPLE_DESC_NODE = 118; + SERIALIZED_PARAM_EXTERN_DATA = 119; + ALIAS = 120; + RANGE_VAR = 121; + EXPR = 122; + VAR = 123; + CONST = 124; + PARAM = 125; + AGGREF = 126; + WINDOW_FUNC = 127; + ARRAY_REF = 128; + FUNC_EXPR = 129; + NAMED_ARG_EXPR = 130; + OP_EXPR = 131; + DISTINCT_EXPR = 132; + NULL_IF_EXPR = 133; + SCALAR_ARRAY_OP_EXPR = 134; + BOOL_EXPR = 135; + SUB_LINK = 136; + SUB_PLAN = 137; + ALTERNATIVE_SUB_PLAN = 138; + FIELD_SELECT = 139; + FIELD_STORE = 140; + RELABEL_TYPE = 141; + COERCE_VIA_IO = 142; + ARRAY_COERCE_EXPR = 143; + CONVERT_ROWTYPE_EXPR = 144; + COLLATE_EXPR = 145; + CASE_EXPR = 146; + CASE_WHEN = 147; + CASE_TEST_EXPR = 148; + ARRAY_EXPR = 149; + ROW_EXPR = 150; + ROW_COMPARE_EXPR = 151; + COALESCE_EXPR = 152; + MIN_MAX_EXPR = 153; + XML_EXPR = 154; + NULL_TEST = 155; + BOOLEAN_TEST = 156; + COERCE_TO_DOMAIN = 157; + COERCE_TO_DOMAIN_VALUES = 158; + SET_TO_DEFAULT = 159; + CURRENT_OF_EXPR = 160; + TARGET_ENTRY = 161; + RANGE_TBL_REF = 162; + JOIN_EXPR = 163; + FROM_EXPR = 164; + INTO_CLAUSE = 165; + COPY_INTO_CLAUSE = 166; + REFRESH_CLAUSE = 167; + FLOW = 168; + GROUPING = 169; + GROUP_ID = 170; + DISTRIBUTED_BY = 171; + DML_ACTION_EXPR = 172; + PART_SELECTED_EXPR = 173; + PART_DEFAULT_EXPR = 174; + PART_BOUND_EXPR = 175; + PART_BOUND_INCLUSION_EXPR = 176; + PART_BOUND_OPEN_EXPR = 177; + PART_LIST_RULE_EXPR = 178; + PART_LIST_NULL_TEST_EXPR = 179; + TABLE_OID_INFO = 180; + EXPR_STATE = 181; + GENERIC_EXPR_STATE = 182; + WHOLE_ROW_VAR_EXPR_STATE = 183; + AGGREF_EXPR_STATE = 184; + WINDOW_FUNC_EXPR_STATE = 185; + ARRAY_REF_EXPR_STATE = 186; + FUNC_EXPR_STATE = 187; + SCALAR_ARRAY_OP_EXPR_STATE = 188; + BOOL_EXPR_STATE = 189; + SUB_PLAN_STATE = 190; + ALTERNATIVE_SUB_PLAN_STATE = 191; + FIELD_SELECT_STATE = 192; + FIELD_STORE_STATE = 193; + COERCE_VIA_IO_STATE = 194; + ARRAY_COERCE_EXPR_STATE = 195; + CONVERT_ROWTYPE_EXPR_STATE = 196; + CASE_EXPR_STATE = 197; + CASE_WHEN_STATE = 198; + ARRAY_EXPR_STATE = 199; + ROW_EXPR_STATE = 200; + ROW_COMPARE_EXPR_STATE = 201; + COALESCE_EXPR_STATE = 202; + MIN_MAX_EXPR_STATE = 203; + XML_EXPR_STATE = 204; + NULL_TEST_STATE = 205; + COERCE_TO_DOMAIN_STATE = 206; + DOMAIN_CONSTRAINT_STATE = 207; + GROUPING_FUNC_EXPR_STATE = 208; + PART_SELECTED_EXPR_STATE = 209; + PART_DEFAULT_EXPR_STATE = 210; + PART_BOUND_EXPR_STATE = 211; + PART_BOUND_INCLUSION_EXPR_STATE = 212; + PART_BOUND_OPEN_EXPR_STATE = 213; + PART_LIST_RULE_EXPR_STATE = 214; + PART_LIST_NULL_TEST_EXPR_STATE = 215; + PLANNER_INFO = 216; + PLANNER_GLOBAL = 217; + REL_OPT_INFO = 218; + INDEX_OPT_INFO = 219; + PARAM_PATH_INFO = 220; + PATH = 221; + APPEND_ONLY_PATH = 222; + AOCS_PATH = 223; + EXTERNAL_PATH = 224; + INDEX_PATH = 225; + BITMAP_HEAP_PATH = 226; + BITMAP_AND_PATH = 227; + BITMAP_OR_PATH = 228; + NEST_PATH = 229; + MERGE_PATH = 230; + HASH_PATH = 231; + TID_PATH = 232; + FOREIGN_PATH = 233; + APPEND_PATH = 234; + MERGE_APPEND_PATH = 235; + RESULT_PATH = 236; + MATERIAL_PATH = 237; + UNIQUE_PATH = 238; + PROJECTION_PATH = 239; + EQUIVALENCE_CLASS = 240; + EQUIVALENCE_MEMBER = 241; + PATH_KEY = 242; + RESTRICT_INFO = 243; + PLACE_HOLDER_VAR = 244; + SPECIAL_JOIN_INFO = 245; + LATERAL_JOIN_INFO = 246; + APPEND_REL_INFO = 247; + PLACE_HOLDER_INFO = 248; + MIN_MAX_AGG_INFO = 249; + PARTITION = 250; + PARTITION_RULE = 251; + PARTITION_NODE = 252; + PG_PART_RULE = 253; + SEGFILE_MAP_NODE = 254; + PLANNER_PARAM_ITEM = 255; + CDB_MOTION_PATH = 256; + PARTITION_SELECTOR_PATH = 257; + CDB_REL_COLUMN_INFO = 258; + DISTRIBUTION_KEY = 259; + MEMORY_CONTEXT = 260; + ALLOC_SET_CONTEXT = 261; + MEMORY_ACCOUNT = 262; + VALUE = 263; + INTEGER = 264; + FLOAT = 265; + STRING = 266; + BIT_STRING = 267; + NULL_VALUE = 268; + LIST = 269; + INT_LIST = 270; + OID_LIST = 271; + QUERY = 272; + PLANNED_STMT = 273; + INSERT_STMT = 274; + DELETE_STMT = 275; + UPDATE_STMT = 276; + SELECT_STMT = 277; + ALTER_TABLE_STMT = 278; + ALTER_TABLE_CMD = 279; + ALTER_DOMAIN_STMT = 280; + SET_OPERATION_STMT = 281; + GRANT_STMT = 282; + GRANT_ROLE_STMT = 283; + ALTER_DEFAULT_PRIVILEGES_STMT = 284; + CLOSE_PORTAL_STMT = 285; + CLUSTER_STMT = 286; + COPY_STMT = 287; + CREATE_STMT = 288; + SINGLE_ROW_ERROR_DESC = 289; + EXT_TABLE_TYPE_DESC = 290; + CREATE_EXTERNAL_STMT = 291; + DEFINE_STMT = 292; + DROP_STMT = 293; + TRUNCATE_STMT = 294; + COMMENT_STMT = 295; + FETCH_STMT = 296; + INDEX_STMT = 297; + CREATE_FUNCTION_STMT = 298; + ALTER_FUNCTION_STMT = 299; + DO_STMT = 300; + RENAME_STMT = 301; + RULE_STMT = 302; + NOTIFY_STMT = 303; + LISTEN_STMT = 304; + UNLISTEN_STMT = 305; + TRANSACTION_STMT = 306; + VIEW_STMT = 307; + LOAD_STMT = 308; + CREATE_DOMAIN_STMT = 309; + CREATEDB_STMT = 310; + DROPDB_STMT = 311; + VACUUM_STMT = 312; + EXPLAIN_STMT = 313; + CREATE_TABLE_AS_STMT = 314; + CREATE_SEQ_STMT = 315; + ALTER_SEQ_STMT = 316; + VARIABLE_SET_STMT = 317; + VARIABLE_SHOW_STMT = 318; + DISCARD_STMT = 319; + CREATE_TRIG_STMT = 320; + CREATE_P_LANG_STMT = 321; + CREATE_ROLE_STMT = 322; + ALTER_ROLE_STMT = 323; + DROP_ROLE_STMT = 324; + CREATE_QUEUE_STMT = 325; + ALTER_QUEUE_STMT = 326; + DROP_QUEUE_STMT = 327; + CREATE_RESOURCE_GROUP_STMT = 328; + DROP_RESOURCE_GROUP_STMT = 329; + ALTER_RESOURCE_GROUP_STMT = 330; + LOCK_STMT = 331; + CONSTRAINTS_SET_STMT = 332; + REINDEX_STMT = 333; + CHECK_POINT_STMT = 334; + CREATE_SCHEMA_STMT = 335; + ALTER_DATABASE_STMT = 336; + ALTER_DATABASE_SET_STMT = 337; + ALTER_ROLE_SET_STMT = 338; + CREATE_CONVERSION_STMT = 339; + CREATE_CAST_STMT = 340; + CREATE_OP_CLASS_STMT = 341; + CREATE_OP_FAMILY_STMT = 342; + ALTER_OP_FAMILY_STMT = 343; + PREPARE_STMT = 344; + EXECUTE_STMT = 345; + DEALLOCATE_STMT = 346; + DECLARE_CURSOR_STMT = 347; + CREATE_TABLE_SPACE_STMT = 348; + DROP_TABLE_SPACE_STMT = 349; + ALTER_OBJECT_SCHEMA_STMT = 350; + ALTER_OWNER_STMT = 351; + DROP_OWNED_STMT = 352; + REASSIGN_OWNED_STMT = 353; + COMPOSITE_TYPE_STMT = 354; + CREATE_ENUM_STMT = 355; + CREATE_RANGE_STMT = 356; + ALTER_ENUM_STMT = 357; + ALTER_TS_DICTIONARY_STMT = 358; + ALTER_TS_CONFIGURATION_STMT = 359; + CREATE_FDW_STMT = 360; + ALTER_FDW_STMT = 361; + CREATE_FOREIGN_SERVER_STMT = 362; + ALTER_FOREIGN_SERVER_STMT = 363; + CREATE_USER_MAPPING_STMT = 364; + ALTER_USER_MAPPING_STMT = 365; + DROP_USER_MAPPING_STMT = 366; + ALTER_TABLE_SPACE_OPTIONS_STMT = 367; + ALTER_TABLE_MOVE_ALL_STMT = 368; + SEC_LABEL_STMT = 369; + CREATE_FOREIGN_TABLE_STMT = 370; + CREATE_EXTENSION_STMT = 371; + ALTER_EXTENSION_STMT = 372; + ALTER_EXTENSION_CONTENTS_STMT = 373; + CREATE_EVENT_TRIG_STMT = 374; + ALTER_EVENT_TRIG_STMT = 375; + REFRESH_MAT_VIEW_STMT = 376; + REPLICA_IDENTITY_STMT = 377; + ALTER_SYSTEM_STMT = 378; + PARTITION_BY = 379; + PARTITION_ELEM = 380; + PARTITION_RANGE_ITEM = 381; + PARTITION_BOUND_SPEC = 382; + PARTITION_SPEC = 383; + PARTITION_VALUES_SPEC = 384; + ALTER_PARTITION_ID = 385; + ALTER_PARTITION_CMD = 386; + INHERIT_PARTITION_CMD = 387; + CREATE_FILE_SPACE_STMT = 388; + FILE_SPACE_ENTRY = 389; + DROP_FILE_SPACE_STMT = 390; + TABLE_VALUE_EXPR = 391; + DENY_LOGIN_INTERVAL = 392; + DENY_LOGIN_POINT = 393; + ALTER_TYPE_STMT = 394; + SET_DISTRIBUTION_CMD = 395; + EXPAND_STMT_SPEC = 396; + A_EXPR = 397; + COLUMN_REF = 398; + PARAM_REF = 399; + A_CONST = 400; + FUNC_CALL = 401; + A_STAR = 402; + A_INDICES = 403; + A_INDIRECTION = 404; + A_ARRAY_EXPR = 405; + RES_TARGET = 406; + TYPE_CAST = 407; + COLLATE_CLAUSE = 408; + SORT_BY = 409; + WINDOW_DEF = 410; + RANGE_SUBSELECT = 411; + RANGE_FUNCTION = 412; + TYPE_NAME = 413; + COLUMN_DEF = 414; + INDEX_ELEM = 415; + CONSTRAINT = 416; + DEF_ELEM = 417; + RANGE_TBL_ENTRY = 418; + RANGE_TBL_FUNCTION = 419; + WITH_CHECK_OPTION = 420; + GROUPING_CLAUSE = 421; + GROUPING_FUNC = 422; + SORT_GROUP_CLAUSE = 423; + WINDOW_CLAUSE = 424; + PRIV_GRANTEE = 425; + FUNC_WITH_ARGS = 426; + ACCESS_PRIV = 427; + CREATE_OP_CLASS_ITEM = 428; + TABLE_LIKE_CLAUSE = 429; + FUNCTION_PARAMETER = 430; + LOCKING_CLAUSE = 431; + ROW_MARK_CLAUSE = 432; + XML_SERIALIZE = 433; + WITH_CLAUSE = 434; + COMMON_TABLE_EXPR = 435; + COLUMN_REFERENCE_STORAGE_DIRECTIVE = 436; + IDENTIFY_SYSTEM_CMD = 437; + BASE_BACKUP_CMD = 438; + CREATE_REPLICATION_SLOT_CMD = 439; + DROP_REPLICATION_SLOT_CMD = 440; + START_REPLICATION_CMD = 441; + TIME_LINE_HISTORY_CMD = 442; + TRIGGER_DATA = 443; + EVENT_TRIGGER_DATA = 444; + RETURN_SET_INFO = 445; + WINDOW_OBJECT_DATA = 446; + TID_BITMAP = 447; + INLINE_CODE_BLOCK = 448; + FDW_ROUTINE = 449; + STREAM_BITMAP = 450; + FORMATTER_DATA = 451; + EXT_PROTOCOL_DATA = 452; + EXT_PROTOCOL_VALIDATOR_DATA = 453; + SELECTED_PARTS = 454; + COOKED_CONSTRAINT = 455; + CDB_EXPLAIN_STAT_HDR = 456; + GP_POLICY = 457; + RETRIEVE_STMT = 458; +} diff --git a/gpcontrib/yagp_hooks_collector/protos/yagpcc_set_service.proto b/gpcontrib/yagp_hooks_collector/protos/yagpcc_set_service.proto new file mode 100644 index 00000000000..0b9e34df49d --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/protos/yagpcc_set_service.proto @@ -0,0 +1,53 @@ +syntax = "proto3"; + +import "google/protobuf/timestamp.proto"; + +import "protos/yagpcc_metrics.proto"; +import "protos/yagpcc_plan.proto"; + +package yagpcc; +option java_outer_classname = "SegmentYAGPCCAS"; +option go_package = "a.yandex-team.ru/cloud/mdb/yagpcc/api/proto/agent_segment;greenplum"; + +service SetQueryInfo { + rpc SetMetricPlanNode (SetPlanNodeReq) returns (MetricResponse) {} + + rpc SetMetricQuery (SetQueryReq) returns (MetricResponse) {} +} + +message MetricResponse { + MetricResponseStatusCode error_code = 1; + string error_text = 2; +} + +enum MetricResponseStatusCode { + METRIC_RESPONSE_STATUS_CODE_UNSPECIFIED = 0; + METRIC_RESPONSE_STATUS_CODE_SUCCESS = 1; + METRIC_RESPONSE_STATUS_CODE_ERROR = 2; +} + +message SetQueryReq { + QueryStatus query_status = 1; + google.protobuf.Timestamp datetime = 2; + QueryKey query_key = 3; + SegmentKey segment_key = 4; + QueryInfo query_info = 5; + GPMetrics query_metrics = 6; + repeated MetricPlan plan_tree = 7; + google.protobuf.Timestamp submit_time = 8; + google.protobuf.Timestamp start_time = 9; + google.protobuf.Timestamp end_time = 10; + AdditionalQueryInfo add_info = 11; +} + +message SetPlanNodeReq { + PlanNodeStatus node_status = 1; + google.protobuf.Timestamp datetime = 2; + QueryKey query_key = 3; + SegmentKey segment_key = 4; + GPMetrics node_metrics = 5; + MetricPlan plan_node = 6; + google.protobuf.Timestamp submit_time = 7; + google.protobuf.Timestamp start_time = 8; + google.protobuf.Timestamp end_time = 9; +} diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_cursors.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_cursors.sql new file mode 100644 index 00000000000..f56351e0d43 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/sql/yagp_cursors.sql @@ -0,0 +1,85 @@ +CREATE EXTENSION yagp_hooks_collector; + +CREATE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; + +-- DECLARE +SET yagpcc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_0 CURSOR FOR SELECT 0; +CLOSE cursor_stats_0; +COMMIT; + +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- DECLARE WITH HOLD +SET yagpcc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; +CLOSE cursor_stats_1; +DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; +CLOSE cursor_stats_2; +COMMIT; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- ROLLBACK +SET yagpcc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_3 CURSOR FOR SELECT 1; +CLOSE cursor_stats_3; +DECLARE cursor_stats_4 CURSOR FOR SELECT 1; +ROLLBACK; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- FETCH +SET yagpcc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; +DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; +FETCH 1 IN cursor_stats_5; +FETCH 1 IN cursor_stats_6; +CLOSE cursor_stats_5; +CLOSE cursor_stats_6; +COMMIT; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_dist.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_dist.sql new file mode 100644 index 00000000000..d5519d0cd96 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/sql/yagp_dist.sql @@ -0,0 +1,88 @@ +CREATE EXTENSION yagp_hooks_collector; + +CREATE OR REPLACE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; +SET yagpcc.enable_utility TO FALSE; + +-- Hash distributed table + +CREATE TABLE test_hash_dist (id int) DISTRIBUTED BY (id); +INSERT INTO test_hash_dist SELECT 1; + +SET yagpcc.logging_mode to 'TBL'; +SET optimizer_enable_direct_dispatch TO TRUE; +-- Direct dispatch is used here, only one segment is scanned. +select * from test_hash_dist where id = 1; +RESET optimizer_enable_direct_dispatch; + +RESET yagpcc.logging_mode; +-- Should see 8 rows. +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +SET yagpcc.logging_mode to 'TBL'; + +-- Scan all segments. +select * from test_hash_dist; + +DROP TABLE test_hash_dist; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Replicated table +CREATE FUNCTION force_segments() RETURNS SETOF text AS $$ +BEGIN + RETURN NEXT 'seg'; +END; +$$ LANGUAGE plpgsql VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE TABLE test_replicated (id int) DISTRIBUTED REPLICATED; +INSERT INTO test_replicated SELECT 1; + +SET yagpcc.logging_mode to 'TBL'; +SELECT COUNT(*) FROM test_replicated, force_segments(); +DROP TABLE test_replicated; +DROP FUNCTION force_segments(); + +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Partially distributed table (2 numsegments) +SET allow_system_table_mods = ON; +CREATE TABLE test_partial_dist (id int, data text) DISTRIBUTED BY (id); +UPDATE gp_distribution_policy SET numsegments = 2 WHERE localoid = 'test_partial_dist'::regclass; +INSERT INTO test_partial_dist SELECT * FROM generate_series(1, 100); + +SET yagpcc.logging_mode to 'TBL'; +SELECT COUNT(*) FROM test_partial_dist; +RESET yagpcc.logging_mode; + +DROP TABLE test_partial_dist; +RESET allow_system_table_mods; +-- Should see 12 rows. +SELECT query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_guc_cache.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_guc_cache.sql new file mode 100644 index 00000000000..9e6de69d61e --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/sql/yagp_guc_cache.sql @@ -0,0 +1,43 @@ +-- +-- Test GUC caching for query lifecycle consistency. +-- +-- The extension logs SUBMIT and DONE events for each query. +-- GUC values that control logging (enable_utility, ignored_users_list, ...) +-- must be cached at SUBMIT time to ensure DONE uses the same filtering +-- criteria. Otherwise, a SET command that modifies these GUCs would +-- have its DONE event rejected, creating orphaned SUBMIT entries. +-- This is due to query being actually executed between SUBMIT and DONE. +-- start_ignore +CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; +SELECT yagpcc.truncate_log(); +-- end_ignore + +CREATE OR REPLACE FUNCTION print_last_query(query text) +RETURNS TABLE(query_status text) AS $$ + SELECT query_status + FROM yagpcc.log + WHERE segid = -1 AND query_text = query + ORDER BY ccnt DESC +$$ LANGUAGE sql; + +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.logging_mode TO 'TBL'; + +-- SET below disables utility logging and DONE must still be logged. +SET yagpcc.enable_utility TO FALSE; +SELECT * FROM print_last_query('SET yagpcc.enable_utility TO FALSE;'); + +-- SELECT below adds current user to ignore list and DONE must still be logged. +-- start_ignore +SELECT set_config('yagpcc.ignored_users_list', current_user, false); +-- end_ignore +SELECT * FROM print_last_query('SELECT set_config(''yagpcc.ignored_users_list'', current_user, false);'); + +DROP FUNCTION print_last_query(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; +RESET yagpcc.logging_mode; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_locale.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_locale.sql new file mode 100644 index 00000000000..65d867d1680 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/sql/yagp_locale.sql @@ -0,0 +1,29 @@ +-- The extension generates normalized query text and plan using jumbling functions. +-- Those functions may fail when translating to wide character if the current locale +-- cannot handle the character set. This test checks that even when those functions +-- fail, the plan is still generated and executed. This test is partially taken from +-- gp_locale. + +-- start_ignore +DROP DATABASE IF EXISTS yagp_test_locale; +-- end_ignore + +CREATE DATABASE yagp_test_locale WITH LC_COLLATE='C' LC_CTYPE='C' TEMPLATE=template0; +\c yagp_test_locale + +CREATE EXTENSION yagp_hooks_collector; + +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.enable TO TRUE; + +CREATE TABLE yagp_hi_안녕세계 (a int, 안녕세계1 text, 안녕세계2 text, 안녕세계3 text) DISTRIBUTED BY (a); +INSERT INTO yagp_hi_안녕세계 VALUES(1, '안녕세계1 first', '안녕세2 first', '안녕세계3 first'); +-- Should not see error here +UPDATE yagp_hi_안녕세계 SET 안녕세계1='안녕세계1 first UPDATE' WHERE 안녕세계1='안녕세계1 first'; + +RESET yagpcc.enable; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; +DROP TABLE yagp_hi_안녕세계; +DROP EXTENSION yagp_hooks_collector; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_select.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_select.sql new file mode 100644 index 00000000000..90e972ae4c1 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/sql/yagp_select.sql @@ -0,0 +1,69 @@ +CREATE EXTENSION yagp_hooks_collector; + +CREATE OR REPLACE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; +SET yagpcc.enable_utility TO FALSE; + +-- Basic SELECT tests +SET yagpcc.logging_mode to 'TBL'; + +SELECT 1; +SELECT COUNT(*) FROM generate_series(1,10); + +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Transaction test +SET yagpcc.logging_mode to 'TBL'; + +BEGIN; +SELECT 1; +COMMIT; + +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- CTE test +SET yagpcc.logging_mode to 'TBL'; + +WITH t AS (VALUES (1), (2)) +SELECT * FROM t; + +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Prepared statement test +SET yagpcc.logging_mode to 'TBL'; + +PREPARE test_stmt AS SELECT 1; +EXECUTE test_stmt; +DEALLOCATE test_stmt; + +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_uds.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_uds.sql new file mode 100644 index 00000000000..3eef697a4e7 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/sql/yagp_uds.sql @@ -0,0 +1,31 @@ +-- Test UDS socket +-- start_ignore +CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; +-- end_ignore + +\set UDS_PATH '/tmp/yagpcc_test.sock' + +-- Configure extension to send via UDS +SET yagpcc.uds_path TO :'UDS_PATH'; +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.logging_mode TO 'UDS'; + +-- Start receiver +SELECT yagpcc.__test_uds_start_server(:'UDS_PATH'); + +-- Send +SELECT 1; + +-- Receive +SELECT yagpcc.__test_uds_receive() > 0 as received; + +-- Stop receiver +SELECT yagpcc.__test_uds_stop_server(); + +-- Cleanup +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.uds_path; +RESET yagpcc.ignored_users_list; +RESET yagpcc.enable; +RESET yagpcc.logging_mode; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_utf8_trim.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_utf8_trim.sql new file mode 100644 index 00000000000..c3053e4af0c --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/sql/yagp_utf8_trim.sql @@ -0,0 +1,45 @@ +CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; + +CREATE OR REPLACE FUNCTION get_marked_query(marker TEXT) +RETURNS TEXT AS $$ + SELECT query_text + FROM yagpcc.log + WHERE query_text LIKE '%' || marker || '%' + ORDER BY datetime DESC + LIMIT 1 +$$ LANGUAGE sql VOLATILE; + +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; + +-- Test 1: 1 byte chars +SET yagpcc.max_text_size to 19; +SET yagpcc.logging_mode to 'TBL'; +SELECT /*test1*/ 'HelloWorld'; +RESET yagpcc.logging_mode; +SELECT octet_length(get_marked_query('test1')) = 19 AS correct_length; + +-- Test 2: 2 byte chars +SET yagpcc.max_text_size to 19; +SET yagpcc.logging_mode to 'TBL'; +SELECT /*test2*/ 'РУССКИЙЯЗЫК'; +RESET yagpcc.logging_mode; +-- Character 'Р' has two bytes and cut in the middle => not included. +SELECT octet_length(get_marked_query('test2')) = 18 AS correct_length; + +-- Test 3: 4 byte chars +SET yagpcc.max_text_size to 21; +SET yagpcc.logging_mode to 'TBL'; +SELECT /*test3*/ '😀'; +RESET yagpcc.logging_mode; +-- Emoji has 4 bytes and cut before the last byte => not included. +SELECT octet_length(get_marked_query('test3')) = 18 AS correct_length; + +-- Cleanup +DROP FUNCTION get_marked_query(TEXT); +RESET yagpcc.max_text_size; +RESET yagpcc.logging_mode; +RESET yagpcc.enable; +RESET yagpcc.ignored_users_list; + +DROP EXTENSION yagp_hooks_collector; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_utility.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_utility.sql new file mode 100644 index 00000000000..cf9c1d253d0 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/sql/yagp_utility.sql @@ -0,0 +1,135 @@ +CREATE EXTENSION yagp_hooks_collector; + +CREATE OR REPLACE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; + +SET yagpcc.logging_mode to 'TBL'; + +CREATE TABLE test_table (a int, b text); +CREATE INDEX test_idx ON test_table(a); +ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; +DROP TABLE test_table; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Partitioning +SET yagpcc.logging_mode to 'TBL'; + +CREATE TABLE pt_test (a int, b int) +DISTRIBUTED BY (a) +PARTITION BY RANGE (a) +(START (0) END (100) EVERY (50)); +DROP TABLE pt_test; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Views and Functions +SET yagpcc.logging_mode to 'TBL'; + +CREATE VIEW test_view AS SELECT 1 AS a; +CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; +DROP VIEW test_view; +DROP FUNCTION test_func(int); + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Transaction Operations +SET yagpcc.logging_mode to 'TBL'; + +BEGIN; +SAVEPOINT sp1; +ROLLBACK TO sp1; +COMMIT; + +BEGIN; +SAVEPOINT sp2; +ABORT; + +BEGIN; +ROLLBACK; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- DML Operations +SET yagpcc.logging_mode to 'TBL'; + +CREATE TABLE dml_test (a int, b text); +INSERT INTO dml_test VALUES (1, 'test'); +UPDATE dml_test SET b = 'updated' WHERE a = 1; +DELETE FROM dml_test WHERE a = 1; +DROP TABLE dml_test; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- COPY Operations +SET yagpcc.logging_mode to 'TBL'; + +CREATE TABLE copy_test (a int); +COPY (SELECT 1) TO STDOUT; +DROP TABLE copy_test; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Prepared Statements and error during execute +SET yagpcc.logging_mode to 'TBL'; + +PREPARE test_prep(int) AS SELECT $1/0 AS value; +EXECUTE test_prep(0::int); +DEALLOCATE test_prep; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- GUC Settings +SET yagpcc.logging_mode to 'TBL'; + +SET yagpcc.report_nested_queries TO FALSE; +RESET yagpcc.report_nested_queries; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/src/Config.cpp b/gpcontrib/yagp_hooks_collector/src/Config.cpp new file mode 100644 index 00000000000..62c16e91d1f --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/Config.cpp @@ -0,0 +1,177 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * Config.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/Config.cpp + * + *------------------------------------------------------------------------- + */ + +#include "Config.h" +#include "memory/gpdbwrappers.h" +#include +#include +#include +#include + +extern "C" { +#include "postgres.h" +#include "utils/guc.h" +} + +static char *guc_uds_path = nullptr; +static bool guc_enable_analyze = true; +static bool guc_enable_cdbstats = true; +static bool guc_enable_collector = true; +static bool guc_report_nested_queries = true; +static char *guc_ignored_users = nullptr; +static int guc_max_text_size = 1 << 20; // in bytes (1MB) +static int guc_max_plan_size = 1024; // in KB +static int guc_min_analyze_time = 10000; // in ms +static int guc_logging_mode = LOG_MODE_UDS; +static bool guc_enable_utility = false; + +static const struct config_enum_entry logging_mode_options[] = { + {"uds", LOG_MODE_UDS, false /* hidden */}, + {"tbl", LOG_MODE_TBL, false}, + {NULL, 0, false}}; + +static bool ignored_users_guc_dirty = false; + +static void assign_ignored_users_hook(const char *, void *) { + ignored_users_guc_dirty = true; +} + +void Config::init_gucs() { + DefineCustomStringVariable( + "yagpcc.uds_path", "Sets filesystem path of the agent socket", 0LL, + &guc_uds_path, "/tmp/yagpcc_agent.sock", PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomBoolVariable( + "yagpcc.enable", "Enable metrics collector", 0LL, &guc_enable_collector, + true, PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomBoolVariable( + "yagpcc.enable_analyze", "Collect analyze metrics in yagpcc", 0LL, + &guc_enable_analyze, true, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomBoolVariable( + "yagpcc.enable_cdbstats", "Collect CDB metrics in yagpcc", 0LL, + &guc_enable_cdbstats, true, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomBoolVariable( + "yagpcc.report_nested_queries", "Collect stats on nested queries", 0LL, + &guc_report_nested_queries, true, PGC_USERSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomStringVariable("yagpcc.ignored_users_list", + "Make yagpcc ignore queries issued by given users", + 0LL, &guc_ignored_users, + "gpadmin,repl,gpperfmon,monitor", PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, + assign_ignored_users_hook, 0LL); + + DefineCustomIntVariable( + "yagpcc.max_text_size", + "Make yagpcc trim query texts longer than configured size in bytes", NULL, + &guc_max_text_size, 1 << 20 /* 1MB */, 0, INT_MAX, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, NULL, NULL, NULL); + + DefineCustomIntVariable( + "yagpcc.max_plan_size", + "Make yagpcc trim plan longer than configured size", NULL, + &guc_max_plan_size, 1024, 0, INT_MAX / 1024, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_KB, NULL, NULL, NULL); + + DefineCustomIntVariable( + "yagpcc.min_analyze_time", + "Sets the minimum execution time above which plans will be logged.", + "Zero prints all plans. -1 turns this feature off.", + &guc_min_analyze_time, 10000, -1, INT_MAX, PGC_USERSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_MS, NULL, NULL, NULL); + + DefineCustomEnumVariable( + "yagpcc.logging_mode", "Logging mode: UDS or PG Table", NULL, + &guc_logging_mode, LOG_MODE_UDS, logging_mode_options, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_SUPERUSER_ONLY, NULL, NULL, + NULL); + + DefineCustomBoolVariable( + "yagpcc.enable_utility", "Collect utility statement stats", NULL, + &guc_enable_utility, false, PGC_USERSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, NULL, NULL, NULL); +} + +void Config::update_ignored_users(const char *new_guc_ignored_users) { + auto new_ignored_users_set = std::make_unique(); + if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') { + /* Need a modifiable copy of string */ + char *rawstring = ya_gpdb::pstrdup(new_guc_ignored_users); + List *elemlist; + ListCell *l; + + /* Parse string into list of identifiers */ + if (!ya_gpdb::split_identifier_string(rawstring, ',', &elemlist)) { + /* syntax error in list */ + ya_gpdb::pfree(rawstring); + ya_gpdb::list_free(elemlist); + ereport( + LOG, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg( + "invalid list syntax in parameter yagpcc.ignored_users_list"))); + return; + } + foreach (l, elemlist) { + new_ignored_users_set->insert((char *)lfirst(l)); + } + ya_gpdb::pfree(rawstring); + ya_gpdb::list_free(elemlist); + } + ignored_users_ = std::move(new_ignored_users_set); +} + +bool Config::filter_user(const std::string &username) const { + if (!ignored_users_) { + return true; + } + return ignored_users_->find(username) != ignored_users_->end(); +} + +void Config::sync() { + if (ignored_users_guc_dirty) { + update_ignored_users(guc_ignored_users); + ignored_users_guc_dirty = false; + } + uds_path_ = guc_uds_path; + enable_analyze_ = guc_enable_analyze; + enable_cdbstats_ = guc_enable_cdbstats; + enable_collector_ = guc_enable_collector; + enable_utility_ = guc_enable_utility; + report_nested_queries_ = guc_report_nested_queries; + max_text_size_ = guc_max_text_size; + max_plan_size_ = guc_max_plan_size; + min_analyze_time_ = guc_min_analyze_time; + logging_mode_ = guc_logging_mode; +} diff --git a/gpcontrib/yagp_hooks_collector/src/Config.h b/gpcontrib/yagp_hooks_collector/src/Config.h new file mode 100644 index 00000000000..01ae5ea328e --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/Config.h @@ -0,0 +1,71 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * Config.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/Config.h + * + *------------------------------------------------------------------------- + */ + +#pragma once + +#include +#include +#include + +#define LOG_MODE_UDS 0 +#define LOG_MODE_TBL 1 + +using IgnoredUsers = std::unordered_set; + +class Config { +public: + static void init_gucs(); + + void sync(); + + const std::string &uds_path() const { return uds_path_; } + bool enable_analyze() const { return enable_analyze_; } + bool enable_cdbstats() const { return enable_cdbstats_; } + bool enable_collector() const { return enable_collector_; } + bool enable_utility() const { return enable_utility_; } + bool report_nested_queries() const { return report_nested_queries_; } + int max_text_size() const { return max_text_size_; } + int max_plan_size() const { return max_plan_size_ * 1024; } + int min_analyze_time() const { return min_analyze_time_; } + int logging_mode() const { return logging_mode_; } + bool filter_user(const std::string &username) const; + +private: + void update_ignored_users(const char *new_guc_ignored_users); + + std::unique_ptr ignored_users_; + std::string uds_path_; + bool enable_analyze_; + bool enable_cdbstats_; + bool enable_collector_; + bool enable_utility_; + bool report_nested_queries_; + int max_text_size_; + int max_plan_size_; + int min_analyze_time_; + int logging_mode_; +}; diff --git a/gpcontrib/yagp_hooks_collector/src/EventSender.cpp b/gpcontrib/yagp_hooks_collector/src/EventSender.cpp new file mode 100644 index 00000000000..f1cc0cc6ea1 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/EventSender.cpp @@ -0,0 +1,531 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * EventSender.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/EventSender.cpp + * + *------------------------------------------------------------------------- + */ + +#include "UDSConnector.h" +#include "memory/gpdbwrappers.h" +#include "log/LogOps.h" + +#define typeid __typeid +extern "C" { +#include "postgres.h" + +#include "executor/executor.h" +#include "utils/elog.h" +#include "utils/guc.h" + +#include "cdb/cdbexplain.h" +#include "cdb/cdbvars.h" +#include "cdb/ml_ipc.h" +} +#undef typeid + +#include "EventSender.h" +#include "PgUtils.h" +#include "ProtoUtils.h" + +#define need_collect_analyze() \ + (Gp_role == GP_ROLE_DISPATCH && config.min_analyze_time() >= 0 && \ + config.enable_analyze()) + +bool EventSender::verify_query(QueryDesc *query_desc, QueryState state, + bool utility) { + if (!proto_verified) { + return false; + } + if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { + return false; + } + + switch (state) { + case QueryState::SUBMIT: + // Cache GUCs once at SUBMIT. Synced GUCs are visible to all subsequent + // states. Without caching, a query that unsets/sets filtering GUCs would + // see different filter criteria at DONE, because at SUBMIT the query was + // not executed yet, causing DONE to be skipped/added. + config.sync(); + + if (utility && !config.enable_utility()) { + return false; + } + + // Register qkey for a nested query we won't report, + // so we can detect nesting_level > 0 and skip reporting at end/done. + if (!need_report_nested_query() && nesting_level > 0) { + QueryKey::register_qkey(query_desc, nesting_level); + return false; + } + if (is_top_level_query(query_desc, nesting_level)) { + nested_timing = 0; + nested_calls = 0; + } + break; + case QueryState::START: + if (!qdesc_submitted(query_desc)) { + collect_query_submit(query_desc, false /* utility */); + } + break; + case QueryState::DONE: + if (utility && !config.enable_utility()) { + return false; + } + default: + break; + } + + if (filter_query(query_desc)) { + return false; + } + if (!nesting_is_valid(query_desc, nesting_level)) { + return false; + } + + return true; +} + +bool EventSender::log_query_req(const yagpcc::SetQueryReq &req, + const std::string &event, bool utility) { + bool clear_big_fields = false; + switch (config.logging_mode()) { + case LOG_MODE_UDS: + clear_big_fields = UDSConnector::report_query(req, event, config); + break; + case LOG_MODE_TBL: + ya_gpdb::insert_log(req, utility); + clear_big_fields = false; + break; + default: + Assert(false); + } + return clear_big_fields; +} + +void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg, + bool utility, ErrorData *edata) { + auto *query_desc = reinterpret_cast(arg); + switch (status) { + case METRICS_PLAN_NODE_INITIALIZE: + case METRICS_PLAN_NODE_EXECUTING: + case METRICS_PLAN_NODE_FINISHED: + // TODO + break; + case METRICS_QUERY_SUBMIT: + collect_query_submit(query_desc, utility); + break; + case METRICS_QUERY_START: + // no-op: executor_after_start is enough + break; + case METRICS_QUERY_CANCELING: + // it appears we're only interested in the actual CANCELED event. + // for now we will ignore CANCELING state unless otherwise requested from + // end users + break; + case METRICS_QUERY_DONE: + case METRICS_QUERY_ERROR: + case METRICS_QUERY_CANCELED: + case METRICS_INNER_QUERY_DONE: + collect_query_done(query_desc, utility, status, edata); + break; + default: + ereport(FATAL, (errmsg("Unknown query status: %d", status))); + } +} + +void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { + if (!verify_query(query_desc, QueryState::START, false /* utility*/)) { + return; + } + + if (Gp_role == GP_ROLE_DISPATCH && config.enable_analyze() && + (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0) { + query_desc->instrument_options |= INSTRUMENT_BUFFERS; + query_desc->instrument_options |= INSTRUMENT_ROWS; + query_desc->instrument_options |= INSTRUMENT_TIMER; + if (config.enable_cdbstats()) { + query_desc->instrument_options |= INSTRUMENT_CDB; + if (!query_desc->showstatctx) { + instr_time starttime; + INSTR_TIME_SET_CURRENT(starttime); + query_desc->showstatctx = + ya_gpdb::cdbexplain_showExecStatsBegin(query_desc, starttime); + } + } + } +} + +void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { + if (!verify_query(query_desc, QueryState::START, false /* utility */)) { + return; + } + + auto &query = get_query(query_desc); + auto query_msg = query.message.get(); + *query_msg->mutable_start_time() = current_ts(); + update_query_state(query, QueryState::START, false /* utility */); + set_query_plan(query_msg, query_desc, config); + if (need_collect_analyze()) { + // Set up to track total elapsed time during query run. + // Make sure the space is allocated in the per-query + // context so it will go away at executor_end. + if (query_desc->totaltime == NULL) { + MemoryContext oldcxt = + ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + query_desc->totaltime = ya_gpdb::instr_alloc(1, INSTRUMENT_ALL, false); + ya_gpdb::mem_ctx_switch_to(oldcxt); + } + } + yagpcc::GPMetrics stats; + std::swap(stats, *query_msg->mutable_query_metrics()); + if (log_query_req(*query_msg, "started", false /* utility */)) { + clear_big_fields(query_msg); + } + std::swap(stats, *query_msg->mutable_query_metrics()); +} + +void EventSender::executor_end(QueryDesc *query_desc) { + if (!verify_query(query_desc, QueryState::END, false /* utility */)) { + return; + } + + auto &query = get_query(query_desc); + auto *query_msg = query.message.get(); + *query_msg->mutable_end_time() = current_ts(); + update_query_state(query, QueryState::END, false /* utility */); + if (is_top_level_query(query_desc, nesting_level)) { + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, nested_calls, + nested_timing); + } else { + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); + } + if (log_query_req(*query_msg, "ended", false /* utility */)) { + clear_big_fields(query_msg); + } +} + +void EventSender::collect_query_submit(QueryDesc *query_desc, bool utility) { + if (!verify_query(query_desc, QueryState::SUBMIT, utility)) { + return; + } + + submit_query(query_desc); + auto &query = get_query(query_desc); + auto *query_msg = query.message.get(); + *query_msg = create_query_req(yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); + *query_msg->mutable_submit_time() = current_ts(); + set_query_info(query_msg); + set_qi_nesting_level(query_msg, nesting_level); + set_qi_slice_id(query_msg); + set_query_text(query_msg, query_desc, config); + if (log_query_req(*query_msg, "submit", utility)) { + clear_big_fields(query_msg); + } + // take initial metrics snapshot so that we can safely take diff afterwards + // in END or DONE events. + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); +#ifdef IC_TEARDOWN_HOOK + // same for interconnect statistics + ic_metrics_collect(); + set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), + &ic_statistics); +#endif +} + +void EventSender::report_query_done(QueryDesc *query_desc, QueryItem &query, + QueryMetricsStatus status, bool utility, + ErrorData *edata) { + yagpcc::QueryStatus query_status; + std::string msg; + switch (status) { + case METRICS_QUERY_DONE: + case METRICS_INNER_QUERY_DONE: + query_status = yagpcc::QueryStatus::QUERY_STATUS_DONE; + msg = "done"; + break; + case METRICS_QUERY_ERROR: + query_status = yagpcc::QueryStatus::QUERY_STATUS_ERROR; + msg = "error"; + break; + case METRICS_QUERY_CANCELING: + // at the moment we don't track this event, but I`ll leave this code + // here just in case + Assert(false); + query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELLING; + msg = "cancelling"; + break; + case METRICS_QUERY_CANCELED: + query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELED; + msg = "cancelled"; + break; + default: + ereport(FATAL, + (errmsg("Unexpected query status in query_done hook: %d", status))); + } + auto prev_state = query.state; + update_query_state(query, QueryState::DONE, utility, + query_status == yagpcc::QueryStatus::QUERY_STATUS_DONE); + auto query_msg = query.message.get(); + query_msg->set_query_status(query_status); + if (status == METRICS_QUERY_ERROR) { + bool error_flushed = elog_message() == NULL; + if (error_flushed && edata->message == NULL) { + ereport(WARNING, (errmsg("YAGPCC missing error message"))); + ereport(DEBUG3, + (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + } else { + set_qi_error_message( + query_msg, error_flushed ? edata->message : elog_message(), config); + } + } + if (prev_state == START) { + // We've missed ExecutorEnd call due to query cancel or error. It's + // fine, but now we need to collect and report execution stats + *query_msg->mutable_end_time() = current_ts(); + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, nested_calls, + nested_timing); + } +#ifdef IC_TEARDOWN_HOOK + ic_metrics_collect(); + set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), + &ic_statistics); +#endif + (void)log_query_req(*query_msg, msg, utility); +} + +void EventSender::collect_query_done(QueryDesc *query_desc, bool utility, + QueryMetricsStatus status, + ErrorData *edata) { + if (!verify_query(query_desc, QueryState::DONE, utility)) { + return; + } + + // Skip sending done message if query errored before submit. + if (!qdesc_submitted(query_desc)) { + if (status != METRICS_QUERY_ERROR) { + ereport(WARNING, (errmsg("YAGPCC trying to process DONE hook for " + "unsubmitted and unerrored query"))); + ereport(DEBUG3, + (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + } + return; + } + + if (queries.empty()) { + ereport(WARNING, (errmsg("YAGPCC cannot find query to process DONE hook"))); + ereport(DEBUG3, + (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + return; + } + auto &query = get_query(query_desc); + + report_query_done(query_desc, query, status, utility, edata); + + if (need_report_nested_query()) + update_nested_counters(query_desc); + + queries.erase(QueryKey::from_qdesc(query_desc)); + pfree(query_desc->yagp_query_key); + query_desc->yagp_query_key = NULL; +} + +void EventSender::ic_metrics_collect() { +#ifdef IC_TEARDOWN_HOOK + if (Gp_interconnect_type != INTERCONNECT_TYPE_UDPIFC) { + return; + } + if (!proto_verified || gp_command_count == 0 || !config.enable_collector() || + config.filter_user(get_user_name())) { + return; + } + // we also would like to know nesting level here and filter queries BUT we + // don't have this kind of information from this callback. Will have to + // collect stats anyways and throw it away later, if necessary + auto metrics = UDPIFCGetICStats(); + ic_statistics.totalRecvQueueSize += metrics.totalRecvQueueSize; + ic_statistics.recvQueueSizeCountingTime += metrics.recvQueueSizeCountingTime; + ic_statistics.totalCapacity += metrics.totalCapacity; + ic_statistics.capacityCountingTime += metrics.capacityCountingTime; + ic_statistics.totalBuffers += metrics.totalBuffers; + ic_statistics.bufferCountingTime += metrics.bufferCountingTime; + ic_statistics.activeConnectionsNum += metrics.activeConnectionsNum; + ic_statistics.retransmits += metrics.retransmits; + ic_statistics.startupCachedPktNum += metrics.startupCachedPktNum; + ic_statistics.mismatchNum += metrics.mismatchNum; + ic_statistics.crcErrors += metrics.crcErrors; + ic_statistics.sndPktNum += metrics.sndPktNum; + ic_statistics.recvPktNum += metrics.recvPktNum; + ic_statistics.disorderedPktNum += metrics.disorderedPktNum; + ic_statistics.duplicatedPktNum += metrics.duplicatedPktNum; + ic_statistics.recvAckNum += metrics.recvAckNum; + ic_statistics.statusQueryMsgNum += metrics.statusQueryMsgNum; +#endif +} + +void EventSender::analyze_stats_collect(QueryDesc *query_desc) { + if (!verify_query(query_desc, QueryState::END, false /* utility */)) { + return; + } + if (Gp_role != GP_ROLE_DISPATCH) { + return; + } + if (!query_desc->totaltime || !need_collect_analyze()) { + return; + } + // Make sure stats accumulation is done. + // (Note: it's okay if several levels of hook all do this.) + ya_gpdb::instr_end_loop(query_desc->totaltime); + + double ms = query_desc->totaltime->total * 1000.0; + if (ms >= config.min_analyze_time()) { + auto &query = get_query(query_desc); + auto *query_msg = query.message.get(); + set_analyze_plan_text(query_desc, query_msg, config); + } +} + +EventSender::EventSender() { + // Perform initial sync to get default GUC values + config.sync(); + + if (config.enable_collector()) { + try { + GOOGLE_PROTOBUF_VERIFY_VERSION; + proto_verified = true; + } catch (const std::exception &e) { + ereport(INFO, (errmsg("Unable to start query tracing %s", e.what()))); + } + } +#ifdef IC_TEARDOWN_HOOK + memset(&ic_statistics, 0, sizeof(ICStatistics)); +#endif +} + +EventSender::~EventSender() { + for (const auto &[qkey, _] : queries) { + ereport(LOG, (errmsg("YAGPCC query with missing done event: " + "tmid=%d ssid=%d ccnt=%d nlvl=%d", + qkey.tmid, qkey.ssid, qkey.ccnt, qkey.nesting_level))); + } +} + +// That's basically a very simplistic state machine to fix or highlight any bugs +// coming from GP +void EventSender::update_query_state(QueryItem &query, QueryState new_state, + bool utility, bool success) { + switch (new_state) { + case QueryState::SUBMIT: + Assert(false); + break; + case QueryState::START: + if (query.state == QueryState::SUBMIT) { + query.message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); + } else { + Assert(false); + } + break; + case QueryState::END: + // Example of below assert triggering: CURSOR closes before ever being + // executed Assert(query->state == QueryState::START || + // IsAbortInProgress()); + query.message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_END); + break; + case QueryState::DONE: + Assert(query.state == QueryState::END || !success || utility); + query.message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); + break; + default: + Assert(false); + } + query.state = new_state; +} + +EventSender::QueryItem &EventSender::get_query(QueryDesc *query_desc) { + if (!qdesc_submitted(query_desc)) { + ereport(WARNING, + (errmsg("YAGPCC attempting to get query that was not submitted"))); + ereport(DEBUG3, + (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + throw std::runtime_error("Attempting to get query that was not submitted"); + } + return queries.find(QueryKey::from_qdesc(query_desc))->second; +} + +void EventSender::submit_query(QueryDesc *query_desc) { + if (query_desc->yagp_query_key) { + ereport(WARNING, + (errmsg("YAGPCC trying to submit already submitted query"))); + ereport(DEBUG3, + (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + } + QueryKey::register_qkey(query_desc, nesting_level); + auto key = QueryKey::from_qdesc(query_desc); + auto [_, inserted] = queries.emplace(key, QueryItem(QueryState::SUBMIT)); + if (!inserted) { + ereport(WARNING, (errmsg("YAGPCC duplicate query submit detected"))); + ereport(DEBUG3, + (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + } +} + +void EventSender::update_nested_counters(QueryDesc *query_desc) { + if (!is_top_level_query(query_desc, nesting_level)) { + auto &query = get_query(query_desc); + nested_calls++; + double end_time = protots_to_double(query.message->end_time()); + double start_time = protots_to_double(query.message->start_time()); + if (end_time >= start_time) { + nested_timing += end_time - start_time; + } else { + ereport(WARNING, (errmsg("YAGPCC query start_time > end_time (%f > %f)", + start_time, end_time))); + ereport(DEBUG3, + (errmsg("YAGPCC nested query text %s", query_desc->sourceText))); + } + } +} + +bool EventSender::qdesc_submitted(QueryDesc *query_desc) { + if (query_desc->yagp_query_key == NULL) { + return false; + } + return queries.find(QueryKey::from_qdesc(query_desc)) != queries.end(); +} + +bool EventSender::nesting_is_valid(QueryDesc *query_desc, int nesting_level) { + return need_report_nested_query() || + is_top_level_query(query_desc, nesting_level); +} + +bool EventSender::need_report_nested_query() { + return config.report_nested_queries() && Gp_role == GP_ROLE_DISPATCH; +} + +bool EventSender::filter_query(QueryDesc *query_desc) { + return gp_command_count == 0 || query_desc->sourceText == nullptr || + !config.enable_collector() || config.filter_user(get_user_name()); +} + +EventSender::QueryItem::QueryItem(QueryState st) + : message(std::make_unique()), state(st) {} diff --git a/gpcontrib/yagp_hooks_collector/src/EventSender.h b/gpcontrib/yagp_hooks_collector/src/EventSender.h new file mode 100644 index 00000000000..ef7dcb0bf8c --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/EventSender.h @@ -0,0 +1,168 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * EventSender.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/EventSender.h + * + *------------------------------------------------------------------------- + */ + +#pragma once + +#include +#include +#include + +#define typeid __typeid +extern "C" { +#include "utils/metrics_utils.h" +#ifdef IC_TEARDOWN_HOOK +#include "cdb/ic_udpifc.h" +#endif +} +#undef typeid + +#include "memory/gpdbwrappers.h" +#include "Config.h" + +class UDSConnector; +struct QueryDesc; +namespace yagpcc { +class SetQueryReq; +} + +#include + +extern void gp_gettmid(int32 *); + +struct QueryKey { + int tmid; + int ssid; + int ccnt; + int nesting_level; + uintptr_t query_desc_addr; + + bool operator==(const QueryKey &other) const { + return std::tie(tmid, ssid, ccnt, nesting_level, query_desc_addr) == + std::tie(other.tmid, other.ssid, other.ccnt, other.nesting_level, + other.query_desc_addr); + } + + static void register_qkey(QueryDesc *query_desc, size_t nesting_level) { + query_desc->yagp_query_key = + (YagpQueryKey *)ya_gpdb::palloc0(sizeof(YagpQueryKey)); + int32 tmid; + gp_gettmid(&tmid); + query_desc->yagp_query_key->tmid = tmid; + query_desc->yagp_query_key->ssid = gp_session_id; + query_desc->yagp_query_key->ccnt = gp_command_count; + query_desc->yagp_query_key->nesting_level = nesting_level; + query_desc->yagp_query_key->query_desc_addr = (uintptr_t)query_desc; + } + + static QueryKey from_qdesc(QueryDesc *query_desc) { + return { + .tmid = query_desc->yagp_query_key->tmid, + .ssid = query_desc->yagp_query_key->ssid, + .ccnt = query_desc->yagp_query_key->ccnt, + .nesting_level = query_desc->yagp_query_key->nesting_level, + .query_desc_addr = query_desc->yagp_query_key->query_desc_addr, + }; + } +}; + +// https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html +template inline void hash_combine(std::size_t &seed, const T &v) { + std::hash hasher; + seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + +namespace std { +template <> struct hash { + size_t operator()(const QueryKey &k) const noexcept { + size_t seed = hash{}(k.tmid); + hash_combine(seed, k.ssid); + hash_combine(seed, k.ccnt); + hash_combine(seed, k.nesting_level); + uintptr_t addr = k.query_desc_addr; + if constexpr (SIZE_MAX < UINTPTR_MAX) { + addr %= SIZE_MAX; + } + hash_combine(seed, addr); + return seed; + } +}; +} // namespace std + +class EventSender { +public: + void executor_before_start(QueryDesc *query_desc, int eflags); + void executor_after_start(QueryDesc *query_desc, int eflags); + void executor_end(QueryDesc *query_desc); + void query_metrics_collect(QueryMetricsStatus status, void *arg, bool utility, + ErrorData *edata = NULL); + void ic_metrics_collect(); + void analyze_stats_collect(QueryDesc *query_desc); + void incr_depth() { nesting_level++; } + void decr_depth() { nesting_level--; } + EventSender(); + ~EventSender(); + +private: + enum QueryState { SUBMIT, START, END, DONE }; + + struct QueryItem { + std::unique_ptr message; + QueryState state; + + explicit QueryItem(QueryState st); + }; + + bool log_query_req(const yagpcc::SetQueryReq &req, const std::string &event, + bool utility); + bool verify_query(QueryDesc *query_desc, QueryState state, bool utility); + void update_query_state(QueryItem &query, QueryState new_state, bool utility, + bool success = true); + QueryItem &get_query(QueryDesc *query_desc); + void submit_query(QueryDesc *query_desc); + void collect_query_submit(QueryDesc *query_desc, bool utility); + void report_query_done(QueryDesc *query_desc, QueryItem &query, + QueryMetricsStatus status, bool utility, + ErrorData *edata = NULL); + void collect_query_done(QueryDesc *query_desc, bool utility, + QueryMetricsStatus status, ErrorData *edata = NULL); + void update_nested_counters(QueryDesc *query_desc); + bool qdesc_submitted(QueryDesc *query_desc); + bool nesting_is_valid(QueryDesc *query_desc, int nesting_level); + bool need_report_nested_query(); + bool filter_query(QueryDesc *query_desc); + + bool proto_verified = false; + int nesting_level = 0; + int64_t nested_calls = 0; + double nested_timing = 0; +#ifdef IC_TEARDOWN_HOOK + ICStatistics ic_statistics; +#endif + std::unordered_map queries; + + Config config; +}; \ No newline at end of file diff --git a/gpcontrib/yagp_hooks_collector/src/PgUtils.cpp b/gpcontrib/yagp_hooks_collector/src/PgUtils.cpp new file mode 100644 index 00000000000..ed4bf4d7e64 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/PgUtils.cpp @@ -0,0 +1,94 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * PgUtils.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/PgUtils.cpp + * + *------------------------------------------------------------------------- + */ + +#include "PgUtils.h" +#include "Config.h" +#include "memory/gpdbwrappers.h" + +extern "C" { +#include "commands/resgroupcmds.h" +#include "cdb/cdbvars.h" +} + +std::string get_user_name() { + // username is allocated on stack, we don't need to pfree it. + const char *username = + ya_gpdb::get_config_option("session_authorization", false, false); + return username ? std::string(username) : ""; +} + +std::string get_db_name() { + char *dbname = ya_gpdb::get_database_name(MyDatabaseId); + if (dbname) { + std::string result(dbname); + ya_gpdb::pfree(dbname); + return result; + } + return ""; +} + +std::string get_rg_name() { + auto groupId = ya_gpdb::get_rg_id_by_session_id(MySessionState->sessionId); + if (!OidIsValid(groupId)) + return ""; + + char *rgname = ya_gpdb::get_rg_name_for_id(groupId); + if (rgname == nullptr) + return ""; + + std::string result(rgname); + ya_gpdb::pfree(rgname); + return result; +} + +/** + * Things get tricky with nested queries. + * a) A nested query on master is a real query optimized and executed from + * master. An example would be `select some_insert_function();`, where + * some_insert_function does something like `insert into tbl values (1)`. Master + * will create two statements. Outer select statement and inner insert statement + * with nesting level 1. + * For segments both statements are top-level statements with nesting level 0. + * b) A nested query on segment is something executed as sub-statement on + * segment. An example would be `select a from tbl where is_good_value(b);`. In + * this case master will issue one top-level statement, but segments will change + * contexts for UDF execution and execute is_good_value(b) once for each tuple + * as a nested query. Creating massive load on gpcc agent. + * + * Hence, here is a decision: + * 1) ignore all queries that are nested on segments + * 2) record (if enabled) all queries that are nested on master + * NODE: The truth is, we can't really ignore nested master queries, because + * segment sees those as top-level. + */ + +bool is_top_level_query(QueryDesc *query_desc, int nesting_level) { + if (query_desc->yagp_query_key == NULL) { + return nesting_level == 0; + } + return query_desc->yagp_query_key->nesting_level == 0; +} diff --git a/gpcontrib/yagp_hooks_collector/src/PgUtils.h b/gpcontrib/yagp_hooks_collector/src/PgUtils.h new file mode 100644 index 00000000000..5113fadbff2 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/PgUtils.h @@ -0,0 +1,38 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * PgUtils.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/PgUtils.h + * + *------------------------------------------------------------------------- + */ + +extern "C" { +#include "postgres.h" +#include "commands/explain.h" +} + +#include + +std::string get_user_name(); +std::string get_db_name(); +std::string get_rg_name(); +bool is_top_level_query(QueryDesc *query_desc, int nesting_level); diff --git a/gpcontrib/yagp_hooks_collector/src/ProcStats.cpp b/gpcontrib/yagp_hooks_collector/src/ProcStats.cpp new file mode 100644 index 00000000000..72a12e8ca00 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/ProcStats.cpp @@ -0,0 +1,125 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * ProcStats.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/ProcStats.cpp + * + *------------------------------------------------------------------------- + */ + +#include "ProcStats.h" +#include "yagpcc_metrics.pb.h" +#include +#include +#include + +extern "C" { +#include "postgres.h" +#include "utils/elog.h" +} + +namespace { +#define FILL_IO_STAT(stat_name) \ + uint64_t stat_name; \ + proc_stat >> tmp >> stat_name; \ + stats->set_##stat_name(stat_name - stats->stat_name()); + +void fill_io_stats(yagpcc::SystemStat *stats) { + std::ifstream proc_stat("/proc/self/io"); + std::string tmp; + FILL_IO_STAT(rchar); + FILL_IO_STAT(wchar); + FILL_IO_STAT(syscr); + FILL_IO_STAT(syscw); + FILL_IO_STAT(read_bytes); + FILL_IO_STAT(write_bytes); + FILL_IO_STAT(cancelled_write_bytes); +} + +void fill_cpu_stats(yagpcc::SystemStat *stats) { + static const int UTIME_ID = 13; + static const int STIME_ID = 14; + static const int VSIZE_ID = 22; + static const int RSS_ID = 23; + static const double tps = sysconf(_SC_CLK_TCK); + + std::ifstream proc_stat("/proc/self/stat"); + std::string trash; + for (int i = 0; i <= RSS_ID; ++i) { + switch (i) { + case UTIME_ID: + double utime; + proc_stat >> utime; + stats->set_usertimeseconds(utime / tps - stats->usertimeseconds()); + break; + case STIME_ID: + double stime; + proc_stat >> stime; + stats->set_kerneltimeseconds(stime / tps - stats->kerneltimeseconds()); + break; + case VSIZE_ID: + uint64_t vsize; + proc_stat >> vsize; + stats->set_vsize(vsize); + break; + case RSS_ID: + uint64_t rss; + proc_stat >> rss; + // NOTE: this is a double AFAIU, need to double-check + stats->set_rss(rss); + break; + default: + proc_stat >> trash; + } + } +} + +void fill_status_stats(yagpcc::SystemStat *stats) { + std::ifstream proc_stat("/proc/self/status"); + std::string key, measure; + while (proc_stat >> key) { + if (key == "VmPeak:") { + uint64_t value; + proc_stat >> value; + stats->set_vmpeakkb(value); + proc_stat >> measure; + if (measure != "kB") { + throw std::runtime_error("Expected memory sizes in kB, but got in " + + measure); + } + } else if (key == "VmSize:") { + uint64_t value; + proc_stat >> value; + stats->set_vmsizekb(value); + if (measure != "kB") { + throw std::runtime_error("Expected memory sizes in kB, but got in " + + measure); + } + } + } +} +} // namespace + +void fill_self_stats(yagpcc::SystemStat *stats) { + fill_io_stats(stats); + fill_cpu_stats(stats); + fill_status_stats(stats); +} \ No newline at end of file diff --git a/gpcontrib/yagp_hooks_collector/src/ProcStats.h b/gpcontrib/yagp_hooks_collector/src/ProcStats.h new file mode 100644 index 00000000000..7629edd0aea --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/ProcStats.h @@ -0,0 +1,34 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * ProcStats.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/ProcStats.h + * + *------------------------------------------------------------------------- + */ + +#pragma once + +namespace yagpcc { +class SystemStat; +} + +void fill_self_stats(yagpcc::SystemStat *stats); \ No newline at end of file diff --git a/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp b/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp new file mode 100644 index 00000000000..b449ae20900 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp @@ -0,0 +1,317 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * ProtoUtils.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp + * + *------------------------------------------------------------------------- + */ + +#include "ProtoUtils.h" +#include "PgUtils.h" +#include "ProcStats.h" +#include "Config.h" +#include "memory/gpdbwrappers.h" + +#define typeid __typeid +#define operator __operator +extern "C" { +#include "postgres.h" +#include "access/hash.h" +#include "access/xact.h" +#include "cdb/cdbinterconnect.h" +#include "cdb/cdbvars.h" +#include "cdb/ml_ipc.h" +#ifdef IC_TEARDOWN_HOOK +#include "cdb/ic_udpifc.h" +#endif +#include "utils/workfile_mgr.h" +} +#undef typeid +#undef operator + +#include +#include + +extern void gp_gettmid(int32 *); + +namespace { +constexpr uint8_t UTF8_CONTINUATION_BYTE_MASK = (1 << 7) | (1 << 6); +constexpr uint8_t UTF8_CONTINUATION_BYTE = (1 << 7); +constexpr uint8_t UTF8_MAX_SYMBOL_BYTES = 4; + +// Returns true if byte is the starting byte of utf8 +// character, false if byte is the continuation (10xxxxxx). +inline bool utf8_start_byte(uint8_t byte) { + return (byte & UTF8_CONTINUATION_BYTE_MASK) != UTF8_CONTINUATION_BYTE; +} +} // namespace + +google::protobuf::Timestamp current_ts() { + google::protobuf::Timestamp current_ts; + struct timeval tv; + gettimeofday(&tv, nullptr); + current_ts.set_seconds(tv.tv_sec); + current_ts.set_nanos(static_cast(tv.tv_usec * 1000)); + return current_ts; +} + +void set_query_key(yagpcc::QueryKey *key) { + key->set_ccnt(gp_command_count); + key->set_ssid(gp_session_id); + int32 tmid = 0; + gp_gettmid(&tmid); + key->set_tmid(tmid); +} + +void set_segment_key(yagpcc::SegmentKey *key) { + key->set_dbid(GpIdentity.dbid); + key->set_segindex(GpIdentity.segindex); +} + +std::string trim_str_shrink_utf8(const char *str, size_t len, size_t lim) { + if (unlikely(str == nullptr)) { + return std::string(); + } + if (likely(len <= lim || GetDatabaseEncoding() != PG_UTF8)) { + return std::string(str, std::min(len, lim)); + } + + // Handle trimming of utf8 correctly, do not cut multi-byte characters. + size_t cut_pos = lim; + size_t visited_bytes = 1; + while (visited_bytes < UTF8_MAX_SYMBOL_BYTES && cut_pos > 0) { + if (utf8_start_byte(static_cast(str[cut_pos]))) { + break; + } + ++visited_bytes; + --cut_pos; + } + + return std::string(str, cut_pos); +} + +void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc, + const Config &config) { + if (Gp_role == GP_ROLE_DISPATCH && query_desc->plannedstmt) { + auto qi = req->mutable_query_info(); + qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER + ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER + : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); + MemoryContext oldcxt = + ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + ExplainState es = ya_gpdb::get_explain_state(query_desc, true); + if (es.str) { + *qi->mutable_plan_text() = trim_str_shrink_utf8(es.str->data, es.str->len, + config.max_plan_size()); + StringInfo norm_plan = ya_gpdb::gen_normplan(es.str->data); + if (norm_plan) { + *qi->mutable_template_plan_text() = trim_str_shrink_utf8( + norm_plan->data, norm_plan->len, config.max_plan_size()); + qi->set_plan_id( + hash_any((unsigned char *)norm_plan->data, norm_plan->len)); + ya_gpdb::pfree(norm_plan->data); + } + qi->set_query_id(query_desc->plannedstmt->queryId); + ya_gpdb::pfree(es.str->data); + } + ya_gpdb::mem_ctx_switch_to(oldcxt); + } +} + +void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc, + const Config &config) { + if (Gp_role == GP_ROLE_DISPATCH && query_desc->sourceText) { + auto qi = req->mutable_query_info(); + *qi->mutable_query_text() = trim_str_shrink_utf8( + query_desc->sourceText, strlen(query_desc->sourceText), + config.max_text_size()); + char *norm_query = ya_gpdb::gen_normquery(query_desc->sourceText); + if (norm_query) { + *qi->mutable_template_query_text() = trim_str_shrink_utf8( + norm_query, strlen(norm_query), config.max_text_size()); + ya_gpdb::pfree(norm_query); + } + } +} + +void clear_big_fields(yagpcc::SetQueryReq *req) { + if (Gp_role == GP_ROLE_DISPATCH) { + auto qi = req->mutable_query_info(); + qi->clear_plan_text(); + qi->clear_template_plan_text(); + qi->clear_query_text(); + qi->clear_template_query_text(); + qi->clear_analyze_text(); + } +} + +void set_query_info(yagpcc::SetQueryReq *req) { + if (Gp_role == GP_ROLE_DISPATCH) { + auto qi = req->mutable_query_info(); + qi->set_username(get_user_name()); + if (IsTransactionState()) + qi->set_databasename(get_db_name()); + qi->set_rsgname(get_rg_name()); + } +} + +void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level) { + auto aqi = req->mutable_add_info(); + aqi->set_nested_level(nesting_level); +} + +void set_qi_slice_id(yagpcc::SetQueryReq *req) { + auto aqi = req->mutable_add_info(); + aqi->set_slice_id(currentSliceId); +} + +void set_qi_error_message(yagpcc::SetQueryReq *req, const char *err_msg, + const Config &config) { + auto aqi = req->mutable_add_info(); + *aqi->mutable_error_message() = + trim_str_shrink_utf8(err_msg, strlen(err_msg), config.max_text_size()); +} + +void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, + QueryDesc *query_desc, int nested_calls, + double nested_time) { + auto instrument = query_desc->planstate->instrument; + if (instrument) { + metrics->set_ntuples(instrument->ntuples); + metrics->set_nloops(instrument->nloops); + metrics->set_tuplecount(instrument->tuplecount); + metrics->set_firsttuple(instrument->firsttuple); + metrics->set_startup(instrument->startup); + metrics->set_total(instrument->total); + auto &buffusage = instrument->bufusage; + metrics->set_shared_blks_hit(buffusage.shared_blks_hit); + metrics->set_shared_blks_read(buffusage.shared_blks_read); + metrics->set_shared_blks_dirtied(buffusage.shared_blks_dirtied); + metrics->set_shared_blks_written(buffusage.shared_blks_written); + metrics->set_local_blks_hit(buffusage.local_blks_hit); + metrics->set_local_blks_read(buffusage.local_blks_read); + metrics->set_local_blks_dirtied(buffusage.local_blks_dirtied); + metrics->set_local_blks_written(buffusage.local_blks_written); + metrics->set_temp_blks_read(buffusage.temp_blks_read); + metrics->set_temp_blks_written(buffusage.temp_blks_written); + metrics->set_blk_read_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_read_time)); + metrics->set_blk_write_time( + INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); + } + if (query_desc->estate && query_desc->estate->motionlayer_context) { + MotionLayerState *mlstate = + (MotionLayerState *)query_desc->estate->motionlayer_context; + metrics->mutable_sent()->set_total_bytes(mlstate->stat_total_bytes_sent); + metrics->mutable_sent()->set_tuple_bytes(mlstate->stat_tuple_bytes_sent); + metrics->mutable_sent()->set_chunks(mlstate->stat_total_chunks_sent); + metrics->mutable_received()->set_total_bytes( + mlstate->stat_total_bytes_recvd); + metrics->mutable_received()->set_tuple_bytes( + mlstate->stat_tuple_bytes_recvd); + metrics->mutable_received()->set_chunks(mlstate->stat_total_chunks_recvd); + } + metrics->set_inherited_calls(nested_calls); + metrics->set_inherited_time(nested_time); +} + +void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, + int nested_calls, double nested_time) { + if (query_desc->planstate && query_desc->planstate->instrument) { + set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc, + nested_calls, nested_time); + } + fill_self_stats(metrics->mutable_systemstat()); + metrics->mutable_systemstat()->set_runningtimeseconds( + time(NULL) - metrics->mutable_systemstat()->runningtimeseconds()); + metrics->mutable_spill()->set_filecount( + WorkfileTotalFilesCreated() - metrics->mutable_spill()->filecount()); + metrics->mutable_spill()->set_totalbytes( + WorkfileTotalBytesWritten() - metrics->mutable_spill()->totalbytes()); +} + +#define UPDATE_IC_STATS(proto_name, stat_name) \ + metrics->mutable_interconnect()->set_##proto_name( \ + ic_statistics->stat_name - \ + metrics->mutable_interconnect()->proto_name()); \ + Assert(metrics->mutable_interconnect()->proto_name() >= 0 && \ + metrics->mutable_interconnect()->proto_name() <= \ + ic_statistics->stat_name) + +void set_ic_stats(yagpcc::MetricInstrumentation *metrics, + const ICStatistics *ic_statistics) { +#ifdef IC_TEARDOWN_HOOK + UPDATE_IC_STATS(total_recv_queue_size, totalRecvQueueSize); + UPDATE_IC_STATS(recv_queue_size_counting_time, recvQueueSizeCountingTime); + UPDATE_IC_STATS(total_capacity, totalCapacity); + UPDATE_IC_STATS(capacity_counting_time, capacityCountingTime); + UPDATE_IC_STATS(total_buffers, totalBuffers); + UPDATE_IC_STATS(buffer_counting_time, bufferCountingTime); + UPDATE_IC_STATS(active_connections_num, activeConnectionsNum); + UPDATE_IC_STATS(retransmits, retransmits); + UPDATE_IC_STATS(startup_cached_pkt_num, startupCachedPktNum); + UPDATE_IC_STATS(mismatch_num, mismatchNum); + UPDATE_IC_STATS(crc_errors, crcErrors); + UPDATE_IC_STATS(snd_pkt_num, sndPktNum); + UPDATE_IC_STATS(recv_pkt_num, recvPktNum); + UPDATE_IC_STATS(disordered_pkt_num, disorderedPktNum); + UPDATE_IC_STATS(duplicated_pkt_num, duplicatedPktNum); + UPDATE_IC_STATS(recv_ack_num, recvAckNum); + UPDATE_IC_STATS(status_query_msg_num, statusQueryMsgNum); +#endif +} + +yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status) { + yagpcc::SetQueryReq req; + req.set_query_status(status); + *req.mutable_datetime() = current_ts(); + set_query_key(req.mutable_query_key()); + set_segment_key(req.mutable_segment_key()); + return req; +} + +double protots_to_double(const google::protobuf::Timestamp &ts) { + return double(ts.seconds()) + double(ts.nanos()) / 1000000000.0; +} + +void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *req, + const Config &config) { + // Make sure it is a valid txn and it is not an utility + // statement for ExplainPrintPlan() later. + if (!IsTransactionState() || !query_desc->plannedstmt) { + return; + } + MemoryContext oldcxt = + ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + ExplainState es = ya_gpdb::get_analyze_state( + query_desc, query_desc->instrument_options && config.enable_analyze()); + ya_gpdb::mem_ctx_switch_to(oldcxt); + if (es.str) { + // Remove last line break. + if (es.str->len > 0 && es.str->data[es.str->len - 1] == '\n') { + es.str->data[--es.str->len] = '\0'; + } + auto trimmed_analyze = + trim_str_shrink_utf8(es.str->data, es.str->len, config.max_plan_size()); + req->mutable_query_info()->set_analyze_text(trimmed_analyze); + ya_gpdb::pfree(es.str->data); + } +} diff --git a/gpcontrib/yagp_hooks_collector/src/ProtoUtils.h b/gpcontrib/yagp_hooks_collector/src/ProtoUtils.h new file mode 100644 index 00000000000..c954545494f --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/ProtoUtils.h @@ -0,0 +1,54 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * ProtoUtils.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/ProtoUtils.h + * + *------------------------------------------------------------------------- + */ + +#pragma once + +#include "protos/yagpcc_set_service.pb.h" + +struct QueryDesc; +struct ICStatistics; +class Config; + +google::protobuf::Timestamp current_ts(); +void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc, + const Config &config); +void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc, + const Config &config); +void clear_big_fields(yagpcc::SetQueryReq *req); +void set_query_info(yagpcc::SetQueryReq *req); +void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level); +void set_qi_slice_id(yagpcc::SetQueryReq *req); +void set_qi_error_message(yagpcc::SetQueryReq *req, const char *err_msg, + const Config &config); +void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, + int nested_calls, double nested_time); +void set_ic_stats(yagpcc::MetricInstrumentation *metrics, + const ICStatistics *ic_statistics); +yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status); +double protots_to_double(const google::protobuf::Timestamp &ts); +void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *message, + const Config &config); diff --git a/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp b/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp new file mode 100644 index 00000000000..d13a82a5ca9 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp @@ -0,0 +1,130 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * UDSConnector.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp + * + *------------------------------------------------------------------------- + */ + +#include "UDSConnector.h" +#include "Config.h" +#include "YagpStat.h" +#include "memory/gpdbwrappers.h" +#include "log/LogOps.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +extern "C" { +#include "postgres.h" +} + +static void inline log_tracing_failure(const yagpcc::SetQueryReq &req, + const std::string &event) { + ereport(LOG, (errmsg("Query {%d-%d-%d} %s tracing failed with error %m", + req.query_key().tmid(), req.query_key().ssid(), + req.query_key().ccnt(), event.c_str()))); +} + +bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, + const std::string &event, + const Config &config) { + sockaddr_un address{}; + address.sun_family = AF_UNIX; + const auto &uds_path = config.uds_path(); + + if (uds_path.size() >= sizeof(address.sun_path)) { + ereport(WARNING, (errmsg("UDS path is too long for socket buffer"))); + YagpStat::report_error(); + return false; + } + strcpy(address.sun_path, uds_path.c_str()); + + const auto sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (sockfd == -1) { + log_tracing_failure(req, event); + YagpStat::report_error(); + return false; + } + + // Close socket automatically on error path. + struct SockGuard { + int fd; + ~SockGuard() { close(fd); } + } sock_guard{sockfd}; + + if (fcntl(sockfd, F_SETFL, O_NONBLOCK) == -1) { + // That's a very important error that should never happen, so make it + // visible to an end-user and admins. + ereport(WARNING, + (errmsg("Unable to create non-blocking socket connection %m"))); + YagpStat::report_error(); + return false; + } + + if (connect(sockfd, reinterpret_cast(&address), + sizeof(address)) == -1) { + log_tracing_failure(req, event); + YagpStat::report_bad_connection(); + return false; + } + + const auto data_size = req.ByteSizeLong(); + const auto total_size = data_size + sizeof(uint32_t); + auto *buf = static_cast(ya_gpdb::palloc(total_size)); + // Free buf automatically on error path. + struct BufGuard { + void *p; + ~BufGuard() { ya_gpdb::pfree(p); } + } buf_guard{buf}; + + *reinterpret_cast(buf) = data_size; + req.SerializeWithCachedSizesToArray(buf + sizeof(uint32_t)); + + int64_t sent = 0, sent_total = 0; + do { + sent = + send(sockfd, buf + sent_total, total_size - sent_total, MSG_DONTWAIT); + if (sent > 0) + sent_total += sent; + } while (sent > 0 && size_t(sent_total) != total_size && + // the line below is a small throttling hack: + // if a message does not fit a single packet, we take a nap + // before sending the next one. + // Otherwise, MSG_DONTWAIT send might overflow the UDS + (std::this_thread::sleep_for(std::chrono::milliseconds(1)), true)); + + if (sent < 0) { + log_tracing_failure(req, event); + YagpStat::report_bad_send(total_size); + return false; + } + + YagpStat::report_send(total_size); + return true; +} diff --git a/gpcontrib/yagp_hooks_collector/src/UDSConnector.h b/gpcontrib/yagp_hooks_collector/src/UDSConnector.h new file mode 100644 index 00000000000..be5ab1ef413 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/UDSConnector.h @@ -0,0 +1,38 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * UDSConnector.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/UDSConnector.h + * + *------------------------------------------------------------------------- + */ + +#pragma once + +#include "protos/yagpcc_set_service.pb.h" + +class Config; + +class UDSConnector { +public: + bool static report_query(const yagpcc::SetQueryReq &req, + const std::string &event, const Config &config); +}; diff --git a/gpcontrib/yagp_hooks_collector/src/YagpStat.cpp b/gpcontrib/yagp_hooks_collector/src/YagpStat.cpp new file mode 100644 index 00000000000..3a760b6ea97 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/YagpStat.cpp @@ -0,0 +1,118 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * YagpStat.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/YagpStat.cpp + * + *------------------------------------------------------------------------- + */ + +#include "YagpStat.h" + +#include + +extern "C" { +#include "postgres.h" +#include "miscadmin.h" +#include "storage/ipc.h" +#include "storage/lwlock.h" +#include "storage/shmem.h" +#include "storage/spin.h" +} + +namespace { +struct ProtectedData { + slock_t mutex; + YagpStat::Data data; +}; +shmem_startup_hook_type prev_shmem_startup_hook = NULL; +ProtectedData *data = nullptr; + +void yagp_shmem_startup() { + if (prev_shmem_startup_hook) + prev_shmem_startup_hook(); + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + bool found; + data = reinterpret_cast( + ShmemInitStruct("yagp_stat_messages", sizeof(ProtectedData), &found)); + if (!found) { + SpinLockInit(&data->mutex); + data->data = YagpStat::Data(); + } + LWLockRelease(AddinShmemInitLock); +} + +class LockGuard { +public: + LockGuard(slock_t *mutex) : mutex_(mutex) { SpinLockAcquire(mutex_); } + ~LockGuard() { SpinLockRelease(mutex_); } + +private: + slock_t *mutex_; +}; +} // namespace + +void YagpStat::init() { + if (!process_shared_preload_libraries_in_progress) + return; + RequestAddinShmemSpace(sizeof(ProtectedData)); + prev_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = yagp_shmem_startup; +} + +void YagpStat::deinit() { shmem_startup_hook = prev_shmem_startup_hook; } + +void YagpStat::reset() { + LockGuard lg(&data->mutex); + data->data = YagpStat::Data(); +} + +void YagpStat::report_send(int32_t msg_size) { + LockGuard lg(&data->mutex); + data->data.total++; + data->data.max_message_size = std::max(msg_size, data->data.max_message_size); +} + +void YagpStat::report_bad_connection() { + LockGuard lg(&data->mutex); + data->data.total++; + data->data.failed_connects++; +} + +void YagpStat::report_bad_send(int32_t msg_size) { + LockGuard lg(&data->mutex); + data->data.total++; + data->data.failed_sends++; + data->data.max_message_size = std::max(msg_size, data->data.max_message_size); +} + +void YagpStat::report_error() { + LockGuard lg(&data->mutex); + data->data.total++; + data->data.failed_other++; +} + +YagpStat::Data YagpStat::get_stats() { + LockGuard lg(&data->mutex); + return data->data; +} + +bool YagpStat::loaded() { return data != nullptr; } diff --git a/gpcontrib/yagp_hooks_collector/src/YagpStat.h b/gpcontrib/yagp_hooks_collector/src/YagpStat.h new file mode 100644 index 00000000000..57fc90cd4d1 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/YagpStat.h @@ -0,0 +1,48 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * YagpStat.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/YagpStat.h + * + *------------------------------------------------------------------------- + */ + +#pragma once + +#include + +class YagpStat { +public: + struct Data { + int64_t total, failed_sends, failed_connects, failed_other; + int32_t max_message_size; + }; + + static void init(); + static void deinit(); + static void reset(); + static void report_send(int32_t msg_size); + static void report_bad_connection(); + static void report_bad_send(int32_t msg_size); + static void report_error(); + static Data get_stats(); + static bool loaded(); +}; \ No newline at end of file diff --git a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp new file mode 100644 index 00000000000..cb4970d60d9 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp @@ -0,0 +1,414 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * hook_wrappers.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp + * + *------------------------------------------------------------------------- + */ + +#define typeid __typeid +extern "C" { +#include "postgres.h" +#include "funcapi.h" +#include "executor/executor.h" +#include "executor/execUtils.h" +#include "utils/elog.h" +#include "utils/builtins.h" +#include "utils/metrics_utils.h" +#include "cdb/cdbvars.h" +#include "cdb/ml_ipc.h" +#include "tcop/utility.h" +#include "stat_statements_parser/pg_stat_statements_ya_parser.h" + +#include +#include +#include +#include +#include +} +#undef typeid + +#include "Config.h" +#include "YagpStat.h" +#include "EventSender.h" +#include "hook_wrappers.h" +#include "memory/gpdbwrappers.h" + +static ExecutorStart_hook_type previous_ExecutorStart_hook = nullptr; +static ExecutorRun_hook_type previous_ExecutorRun_hook = nullptr; +static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr; +static ExecutorEnd_hook_type previous_ExecutorEnd_hook = nullptr; +static query_info_collect_hook_type previous_query_info_collect_hook = nullptr; +#ifdef ANALYZE_STATS_COLLECT_HOOK +static analyze_stats_collect_hook_type previous_analyze_stats_collect_hook = + nullptr; +#endif +#ifdef IC_TEARDOWN_HOOK +static ic_teardown_hook_type previous_ic_teardown_hook = nullptr; +#endif +static ProcessUtility_hook_type previous_ProcessUtility_hook = nullptr; + +static void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags); +static void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, + uint64 count, bool execute_once); +static void ya_ExecutorFinish_hook(QueryDesc *query_desc); +static void ya_ExecutorEnd_hook(QueryDesc *query_desc); +static void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg); +#ifdef IC_TEARDOWN_HOOK +static void ya_ic_teardown_hook(ChunkTransportState *transportStates, + bool hasErrors); +#endif +#ifdef ANALYZE_STATS_COLLECT_HOOK +static void ya_analyze_stats_collect_hook(QueryDesc *query_desc); +#endif +static void ya_process_utility_hook(PlannedStmt *pstmt, const char *queryString, + bool readOnlyTree, + ProcessUtilityContext context, + ParamListInfo params, + QueryEnvironment *queryEnv, + DestReceiver *dest, QueryCompletion *qc); + +#define TEST_MAX_CONNECTIONS 4 +#define TEST_RCV_BUF_SIZE 8192 +#define TEST_POLL_TIMEOUT_MS 200 + +static int test_server_fd = -1; +static char *test_sock_path = NULL; + +static EventSender *sender = nullptr; + +static inline EventSender *get_sender() { + if (!sender) { + sender = new EventSender(); + } + return sender; +} + +template +R cpp_call(T *obj, R (T::*func)(Args...), Args... args) { + try { + return (obj->*func)(args...); + } catch (const std::exception &e) { + ereport(FATAL, (errmsg("Unexpected exception in yagpcc %s", e.what()))); + } +} + +void hooks_init() { + Config::init_gucs(); + YagpStat::init(); + previous_ExecutorStart_hook = ExecutorStart_hook; + ExecutorStart_hook = ya_ExecutorStart_hook; + previous_ExecutorRun_hook = ExecutorRun_hook; + ExecutorRun_hook = ya_ExecutorRun_hook; + previous_ExecutorFinish_hook = ExecutorFinish_hook; + ExecutorFinish_hook = ya_ExecutorFinish_hook; + previous_ExecutorEnd_hook = ExecutorEnd_hook; + ExecutorEnd_hook = ya_ExecutorEnd_hook; + previous_query_info_collect_hook = query_info_collect_hook; + query_info_collect_hook = ya_query_info_collect_hook; +#ifdef IC_TEARDOWN_HOOK + previous_ic_teardown_hook = ic_teardown_hook; + ic_teardown_hook = ya_ic_teardown_hook; +#endif +#ifdef ANALYZE_STATS_COLLECT_HOOK + previous_analyze_stats_collect_hook = analyze_stats_collect_hook; + analyze_stats_collect_hook = ya_analyze_stats_collect_hook; +#endif + stat_statements_parser_init(); + previous_ProcessUtility_hook = ProcessUtility_hook; + ProcessUtility_hook = ya_process_utility_hook; +} + +void hooks_deinit() { + ExecutorStart_hook = previous_ExecutorStart_hook; + ExecutorEnd_hook = previous_ExecutorEnd_hook; + ExecutorRun_hook = previous_ExecutorRun_hook; + ExecutorFinish_hook = previous_ExecutorFinish_hook; + query_info_collect_hook = previous_query_info_collect_hook; +#ifdef IC_TEARDOWN_HOOK + ic_teardown_hook = previous_ic_teardown_hook; +#endif +#ifdef ANALYZE_STATS_COLLECT_HOOK + analyze_stats_collect_hook = previous_analyze_stats_collect_hook; +#endif + stat_statements_parser_deinit(); + if (sender) { + delete sender; + } + YagpStat::deinit(); + ProcessUtility_hook = previous_ProcessUtility_hook; +} + +void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { + cpp_call(get_sender(), &EventSender::executor_before_start, query_desc, + eflags); + if (previous_ExecutorStart_hook) { + (*previous_ExecutorStart_hook)(query_desc, eflags); + } else { + standard_ExecutorStart(query_desc, eflags); + } + cpp_call(get_sender(), &EventSender::executor_after_start, query_desc, + eflags); +} + +void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, + uint64 count, bool execute_once) { + get_sender()->incr_depth(); + PG_TRY(); + { + if (previous_ExecutorRun_hook) + previous_ExecutorRun_hook(query_desc, direction, count, execute_once); + else + standard_ExecutorRun(query_desc, direction, count, execute_once); + get_sender()->decr_depth(); + } + PG_CATCH(); + { + get_sender()->decr_depth(); + PG_RE_THROW(); + } + PG_END_TRY(); +} + +void ya_ExecutorFinish_hook(QueryDesc *query_desc) { + get_sender()->incr_depth(); + PG_TRY(); + { + if (previous_ExecutorFinish_hook) + previous_ExecutorFinish_hook(query_desc); + else + standard_ExecutorFinish(query_desc); + get_sender()->decr_depth(); + } + PG_CATCH(); + { + get_sender()->decr_depth(); + PG_RE_THROW(); + } + PG_END_TRY(); +} + +void ya_ExecutorEnd_hook(QueryDesc *query_desc) { + cpp_call(get_sender(), &EventSender::executor_end, query_desc); + if (previous_ExecutorEnd_hook) { + (*previous_ExecutorEnd_hook)(query_desc); + } else { + standard_ExecutorEnd(query_desc); + } +} + +void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { + cpp_call(get_sender(), &EventSender::query_metrics_collect, status, + arg /* queryDesc */, false /* utility */, (ErrorData *)NULL); + if (previous_query_info_collect_hook) { + (*previous_query_info_collect_hook)(status, arg); + } +} + +#ifdef IC_TEARDOWN_HOOK +void ya_ic_teardown_hook(ChunkTransportState *transportStates, bool hasErrors) { + cpp_call(get_sender(), &EventSender::ic_metrics_collect); + if (previous_ic_teardown_hook) { + (*previous_ic_teardown_hook)(transportStates, hasErrors); + } +} +#endif + +#ifdef ANALYZE_STATS_COLLECT_HOOK +void ya_analyze_stats_collect_hook(QueryDesc *query_desc) { + cpp_call(get_sender(), &EventSender::analyze_stats_collect, query_desc); + if (previous_analyze_stats_collect_hook) { + (*previous_analyze_stats_collect_hook)(query_desc); + } +} +#endif + +static void ya_process_utility_hook(PlannedStmt *pstmt, const char *queryString, + bool readOnlyTree, + ProcessUtilityContext context, + ParamListInfo params, + QueryEnvironment *queryEnv, + DestReceiver *dest, QueryCompletion *qc) { + /* Project utility data on QueryDesc to use existing logic */ + QueryDesc *query_desc = (QueryDesc *)palloc0(sizeof(QueryDesc)); + query_desc->sourceText = queryString; + + cpp_call(get_sender(), &EventSender::query_metrics_collect, + METRICS_QUERY_SUBMIT, (void *)query_desc, true /* utility */, + (ErrorData *)NULL); + + get_sender()->incr_depth(); + PG_TRY(); + { + if (previous_ProcessUtility_hook) { + (*previous_ProcessUtility_hook)(pstmt, queryString, readOnlyTree, context, + params, queryEnv, dest, qc); + } else { + standard_ProcessUtility(pstmt, queryString, readOnlyTree, context, params, + queryEnv, dest, qc); + } + + get_sender()->decr_depth(); + cpp_call(get_sender(), &EventSender::query_metrics_collect, + METRICS_QUERY_DONE, (void *)query_desc, true /* utility */, + (ErrorData *)NULL); + + pfree(query_desc); + } + PG_CATCH(); + { + ErrorData *edata; + MemoryContext oldctx; + + oldctx = MemoryContextSwitchTo(TopMemoryContext); + edata = CopyErrorData(); + FlushErrorState(); + MemoryContextSwitchTo(oldctx); + + get_sender()->decr_depth(); + cpp_call(get_sender(), &EventSender::query_metrics_collect, + METRICS_QUERY_ERROR, (void *)query_desc, true /* utility */, + edata); + + pfree(query_desc); + ReThrowError(edata); + } + PG_END_TRY(); +} + +static void check_stats_loaded() { + if (!YagpStat::loaded()) { + ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("yagp_hooks_collector must be loaded via " + "shared_preload_libraries"))); + } +} + +void yagp_functions_reset() { + check_stats_loaded(); + YagpStat::reset(); +} + +Datum yagp_functions_get(FunctionCallInfo fcinfo) { + const int ATTNUM = 6; + check_stats_loaded(); + auto stats = YagpStat::get_stats(); + TupleDesc tupdesc = CreateTemplateTupleDesc(ATTNUM); + TupleDescInitEntry(tupdesc, (AttrNumber)1, "segid", INT4OID, -1 /* typmod */, + 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber)2, "total_messages", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber)3, "send_failures", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber)4, "connection_failures", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber)5, "other_errors", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber)6, "max_message_size", INT4OID, + -1 /* typmod */, 0 /* attdim */); + tupdesc = BlessTupleDesc(tupdesc); + Datum values[ATTNUM]; + bool nulls[ATTNUM]; + MemSet(nulls, 0, sizeof(nulls)); + values[0] = Int32GetDatum(GpIdentity.segindex); + values[1] = Int64GetDatum(stats.total); + values[2] = Int64GetDatum(stats.failed_sends); + values[3] = Int64GetDatum(stats.failed_connects); + values[4] = Int64GetDatum(stats.failed_other); + values[5] = Int32GetDatum(stats.max_message_size); + HeapTuple tuple = ya_gpdb::heap_form_tuple(tupdesc, values, nulls); + Datum result = HeapTupleGetDatum(tuple); + PG_RETURN_DATUM(result); +} + +void test_uds_stop_server() { + if (test_server_fd >= 0) { + close(test_server_fd); + test_server_fd = -1; + } + if (test_sock_path) { + unlink(test_sock_path); + pfree(test_sock_path); + test_sock_path = NULL; + } +} + +void test_uds_start_server(const char *path) { + struct sockaddr_un addr = {.sun_family = AF_UNIX}; + + if (strlen(path) >= sizeof(addr.sun_path)) + ereport(ERROR, (errmsg("path too long"))); + + test_uds_stop_server(); + + strlcpy(addr.sun_path, path, sizeof(addr.sun_path)); + test_sock_path = MemoryContextStrdup(TopMemoryContext, path); + unlink(path); + + if ((test_server_fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0 || + bind(test_server_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0 || + listen(test_server_fd, TEST_MAX_CONNECTIONS) < 0) { + test_uds_stop_server(); + ereport(ERROR, (errmsg("socket setup failed: %m"))); + } +} + +int64 test_uds_receive(int timeout_ms) { + char buf[TEST_RCV_BUF_SIZE]; + int rc; + struct pollfd pfd = {.fd = test_server_fd, .events = POLLIN}; + int64 total = 0; + + if (test_server_fd < 0) + ereport(ERROR, (errmsg("server not started"))); + + for (;;) { + CHECK_FOR_INTERRUPTS(); + rc = poll(&pfd, 1, Min(timeout_ms, TEST_POLL_TIMEOUT_MS)); + if (rc > 0) + break; + if (rc < 0 && errno != EINTR) + ereport(ERROR, (errmsg("poll: %m"))); + timeout_ms -= TEST_POLL_TIMEOUT_MS; + if (timeout_ms <= 0) + return total; + } + + if (pfd.revents & POLLIN) { + int client = accept(test_server_fd, NULL, NULL); + ssize_t n; + + if (client < 0) + ereport(ERROR, (errmsg("accept: %m"))); + + while ((n = recv(client, buf, sizeof(buf), 0)) != 0) { + if (n > 0) + total += n; + else if (errno != EINTR) + break; + } + + close(client); + } + + return total; +} \ No newline at end of file diff --git a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.h b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.h new file mode 100644 index 00000000000..443406a5259 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.h @@ -0,0 +1,48 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * hook_wrappers.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/hook_wrappers.h + * + *------------------------------------------------------------------------- + */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +extern void hooks_init(); +extern void hooks_deinit(); +extern void yagp_functions_reset(); +extern Datum yagp_functions_get(FunctionCallInfo fcinfo); + +extern void init_log(); +extern void truncate_log(); + +extern void test_uds_start_server(const char *path); +extern int64_t test_uds_receive(int timeout_ms); +extern void test_uds_stop_server(); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp b/gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp new file mode 100644 index 00000000000..e8c927ece84 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp @@ -0,0 +1,158 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * LogOps.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp + * + *------------------------------------------------------------------------- + */ + +#include "protos/yagpcc_set_service.pb.h" + +#include "LogOps.h" +#include "LogSchema.h" + +extern "C" { +#include "postgres.h" + +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/xact.h" +#include "catalog/dependency.h" +#include "catalog/heap.h" +#include "catalog/namespace.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_type.h" +#include "cdb/cdbvars.h" +#include "commands/tablecmds.h" +#include "funcapi.h" +#include "fmgr.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" +#include "utils/timestamp.h" +} + +void init_log() { + Oid namespaceId; + Oid relationId; + ObjectAddress tableAddr; + ObjectAddress schemaAddr; + + namespaceId = get_namespace_oid(schema_name.data(), false /* missing_ok */); + + /* Create table */ + relationId = heap_create_with_catalog( + log_relname.data() /* relname */, namespaceId /* namespace */, + 0 /* tablespace */, InvalidOid /* relid */, InvalidOid /* reltype oid */, + InvalidOid /* reloftypeid */, GetUserId() /* owner */, HEAP_TABLE_AM_OID, + DescribeTuple() /* rel tuple */, NIL /* cooked_constraints */, RELKIND_RELATION, + RELPERSISTENCE_PERMANENT, false /* shared_relation */, false /* mapped_relation */, ONCOMMIT_NOOP, + NULL /* GP Policy */, (Datum)0 /* reloptions */, false /* use_user_acl */, true /* allow_system_table_mods */, true /* is_internal */, + InvalidOid /* relrewrite */, NULL /* typaddress */, + false /* valid_opts */); + + /* Make the table visible */ + CommandCounterIncrement(); + + /* Record dependency of the table on the schema */ + if (OidIsValid(relationId) && OidIsValid(namespaceId)) { + ObjectAddressSet(tableAddr, RelationRelationId, relationId); + ObjectAddressSet(schemaAddr, NamespaceRelationId, namespaceId); + + /* Table can be dropped only via DROP EXTENSION */ + recordDependencyOn(&tableAddr, &schemaAddr, DEPENDENCY_EXTENSION); + } else { + ereport(NOTICE, (errmsg("YAGPCC failed to create log table or schema"))); + } + + /* Make changes visible */ + CommandCounterIncrement(); +} + +void insert_log(const yagpcc::SetQueryReq &req, bool utility) { + Oid namespaceId; + Oid relationId; + Relation rel; + HeapTuple tuple; + + /* Return if xact is not valid (needed for catalog lookups). */ + if (!IsTransactionState()) { + return; + } + + /* Return if extension was not loaded */ + namespaceId = get_namespace_oid(schema_name.data(), true /* missing_ok */); + if (!OidIsValid(namespaceId)) { + return; + } + + /* Return if the table was not created yet */ + relationId = get_relname_relid(log_relname.data(), namespaceId); + if (!OidIsValid(relationId)) { + return; + } + + bool nulls[natts_yagp_log]; + Datum values[natts_yagp_log]; + + memset(nulls, true, sizeof(nulls)); + memset(values, 0, sizeof(values)); + + extract_query_req(req, "", values, nulls); + nulls[attnum_yagp_log_utility] = false; + values[attnum_yagp_log_utility] = BoolGetDatum(utility); + + rel = heap_open(relationId, RowExclusiveLock); + + /* Insert the tuple as a frozen one to ensure it is logged even if txn rolls + * back or aborts */ + tuple = heap_form_tuple(RelationGetDescr(rel), values, nulls); + frozen_heap_insert(rel, tuple); + + heap_freetuple(tuple); + /* Keep lock on rel until end of xact */ + heap_close(rel, NoLock); + + /* Make changes visible */ + CommandCounterIncrement(); +} + +void truncate_log() { + Oid namespaceId; + Oid relationId; + Relation relation; + + namespaceId = get_namespace_oid(schema_name.data(), false /* missing_ok */); + relationId = get_relname_relid(log_relname.data(), namespaceId); + + relation = heap_open(relationId, AccessExclusiveLock); + + /* Truncate the main table */ + heap_truncate_one_rel(relation); + + /* Keep lock on rel until end of xact */ + heap_close(relation, NoLock); + + /* Make changes visible */ + CommandCounterIncrement(); +} \ No newline at end of file diff --git a/gpcontrib/yagp_hooks_collector/src/log/LogOps.h b/gpcontrib/yagp_hooks_collector/src/log/LogOps.h new file mode 100644 index 00000000000..1fc30c21030 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/log/LogOps.h @@ -0,0 +1,46 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * LogOps.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/log/LogOps.h + * + *------------------------------------------------------------------------- + */ + +#pragma once + +#include + +extern "C" { +#include "postgres.h" +#include "fmgr.h" +} + +extern "C" { +/* CREATE TABLE yagpcc.__log (...); */ +void init_log(); + +/* TRUNCATE yagpcc.__log */ +void truncate_log(); +} + +/* INSERT INTO yagpcc.__log VALUES (...) */ +void insert_log(const yagpcc::SetQueryReq &req, bool utility); diff --git a/gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp b/gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp new file mode 100644 index 00000000000..a391b1a2209 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp @@ -0,0 +1,162 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * LogSchema.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp + * + *------------------------------------------------------------------------- + */ + +#include "google/protobuf/reflection.h" +#include "google/protobuf/descriptor.h" +#include "google/protobuf/timestamp.pb.h" + +#include "LogSchema.h" + +const std::unordered_map &proto_name_to_col_idx() { + static const auto name_col_idx = [] { + std::unordered_map map; + map.reserve(log_tbl_desc.size()); + + for (size_t idx = 0; idx < natts_yagp_log; ++idx) { + map.emplace(log_tbl_desc[idx].proto_field_name, idx); + } + + return map; + }(); + return name_col_idx; +} + +TupleDesc DescribeTuple() { + TupleDesc tupdesc = CreateTemplateTupleDesc(natts_yagp_log); + + for (size_t anum = 1; anum <= natts_yagp_log; ++anum) { + TupleDescInitEntry(tupdesc, anum, log_tbl_desc[anum - 1].pg_att_name.data(), + log_tbl_desc[anum - 1].type_oid, -1 /* typmod */, + 0 /* attdim */); + } + + return tupdesc; +} + +Datum protots_to_timestamptz(const google::protobuf::Timestamp &ts) { + TimestampTz pgtimestamp = + (TimestampTz)ts.seconds() * USECS_PER_SEC + (ts.nanos() / 1000); + pgtimestamp -= (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * USECS_PER_DAY; + return TimestampTzGetDatum(pgtimestamp); +} + +Datum field_to_datum(const google::protobuf::FieldDescriptor *field, + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg) { + using namespace google::protobuf; + + switch (field->cpp_type()) { + case FieldDescriptor::CPPTYPE_INT32: + return Int32GetDatum(reflection->GetInt32(msg, field)); + case FieldDescriptor::CPPTYPE_INT64: + return Int64GetDatum(reflection->GetInt64(msg, field)); + case FieldDescriptor::CPPTYPE_UINT32: + return Int64GetDatum(reflection->GetUInt32(msg, field)); + case FieldDescriptor::CPPTYPE_UINT64: + return Int64GetDatum( + static_cast(reflection->GetUInt64(msg, field))); + case FieldDescriptor::CPPTYPE_DOUBLE: + return Float8GetDatum(reflection->GetDouble(msg, field)); + case FieldDescriptor::CPPTYPE_FLOAT: + return Float4GetDatum(reflection->GetFloat(msg, field)); + case FieldDescriptor::CPPTYPE_BOOL: + return BoolGetDatum(reflection->GetBool(msg, field)); + case FieldDescriptor::CPPTYPE_ENUM: + return CStringGetTextDatum(reflection->GetEnum(msg, field)->name().data()); + case FieldDescriptor::CPPTYPE_STRING: + return CStringGetTextDatum(reflection->GetString(msg, field).c_str()); + default: + return (Datum)0; + } +} + +void process_field(const google::protobuf::FieldDescriptor *field, + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg, + const std::string &field_name, Datum *values, bool *nulls) { + + auto proto_idx_map = proto_name_to_col_idx(); + auto it = proto_idx_map.find(field_name); + + if (it == proto_idx_map.end()) { + ereport(NOTICE, + (errmsg("YAGPCC protobuf field %s is not registered in log table", + field_name.c_str()))); + return; + } + + int idx = it->second; + + if (!reflection->HasField(msg, field)) { + nulls[idx] = true; + return; + } + + if (field->cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE && + field->message_type()->full_name() == "google.protobuf.Timestamp") { + const auto &ts = static_cast( + reflection->GetMessage(msg, field)); + values[idx] = protots_to_timestamptz(ts); + } else { + values[idx] = field_to_datum(field, reflection, msg); + } + nulls[idx] = false; + + return; +} + +void extract_query_req(const google::protobuf::Message &msg, + const std::string &prefix, Datum *values, bool *nulls) { + using namespace google::protobuf; + + const Descriptor *descriptor = msg.GetDescriptor(); + const Reflection *reflection = msg.GetReflection(); + + for (int i = 0; i < descriptor->field_count(); ++i) { + const FieldDescriptor *field = descriptor->field(i); + + // For now, we do not log any repeated fields plus they need special + // treatment. + if (field->is_repeated()) { + continue; + } + + std::string curr_pref = prefix.empty() ? "" : prefix + "."; + std::string field_name = curr_pref + field->name().data(); + + if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE && + field->message_type()->full_name() != "google.protobuf.Timestamp") { + + if (reflection->HasField(msg, field)) { + const Message &nested = reflection->GetMessage(msg, field); + extract_query_req(nested, field_name, values, nulls); + } + } else { + process_field(field, reflection, msg, field_name, values, nulls); + } + } +} diff --git a/gpcontrib/yagp_hooks_collector/src/log/LogSchema.h b/gpcontrib/yagp_hooks_collector/src/log/LogSchema.h new file mode 100644 index 00000000000..f78acec7ce9 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/log/LogSchema.h @@ -0,0 +1,193 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * LogSchema.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/log/LogSchema.h + * + *------------------------------------------------------------------------- + */ + +#pragma once + +#include +#include +#include +#include + +extern "C" { +#include "postgres.h" +#include "access/htup_details.h" +#include "access/tupdesc.h" +#include "catalog/pg_type.h" +#include "utils/timestamp.h" +#include "utils/builtins.h" +} + +namespace google { +namespace protobuf { +class FieldDescriptor; +class Message; +class Reflection; +class Timestamp; +} // namespace protobuf +} // namespace google + +inline constexpr std::string_view schema_name = "yagpcc"; +inline constexpr std::string_view log_relname = "__log"; + +struct LogDesc { + std::string_view pg_att_name; + std::string_view proto_field_name; + Oid type_oid; +}; + +/* + * Definition of the log table structure. + * + * System stats collected as %lu (unsigned) may + * overflow INT8OID (signed), but this is acceptable. + */ +/* clang-format off */ +inline constexpr std::array log_tbl_desc = { + /* 8-byte aligned types first - Query Info */ + LogDesc{"query_id", "query_info.query_id", INT8OID}, + LogDesc{"plan_id", "query_info.plan_id", INT8OID}, + LogDesc{"nested_level", "add_info.nested_level", INT8OID}, + LogDesc{"slice_id", "add_info.slice_id", INT8OID}, + /* 8-byte aligned types - System Stats */ + LogDesc{"systemstat_vsize", "query_metrics.systemStat.vsize", INT8OID}, + LogDesc{"systemstat_rss", "query_metrics.systemStat.rss", INT8OID}, + LogDesc{"systemstat_vmsizekb", "query_metrics.systemStat.VmSizeKb", INT8OID}, + LogDesc{"systemstat_vmpeakkb", "query_metrics.systemStat.VmPeakKb", INT8OID}, + LogDesc{"systemstat_rchar", "query_metrics.systemStat.rchar", INT8OID}, + LogDesc{"systemstat_wchar", "query_metrics.systemStat.wchar", INT8OID}, + LogDesc{"systemstat_syscr", "query_metrics.systemStat.syscr", INT8OID}, + LogDesc{"systemstat_syscw", "query_metrics.systemStat.syscw", INT8OID}, + LogDesc{"systemstat_read_bytes", "query_metrics.systemStat.read_bytes", INT8OID}, + LogDesc{"systemstat_write_bytes", "query_metrics.systemStat.write_bytes", INT8OID}, + LogDesc{"systemstat_cancelled_write_bytes", "query_metrics.systemStat.cancelled_write_bytes", INT8OID}, + /* 8-byte aligned types - Metric Instrumentation */ + LogDesc{"instrumentation_ntuples", "query_metrics.instrumentation.ntuples", INT8OID}, + LogDesc{"instrumentation_nloops", "query_metrics.instrumentation.nloops", INT8OID}, + LogDesc{"instrumentation_tuplecount", "query_metrics.instrumentation.tuplecount", INT8OID}, + LogDesc{"instrumentation_shared_blks_hit", "query_metrics.instrumentation.shared_blks_hit", INT8OID}, + LogDesc{"instrumentation_shared_blks_read", "query_metrics.instrumentation.shared_blks_read", INT8OID}, + LogDesc{"instrumentation_shared_blks_dirtied", "query_metrics.instrumentation.shared_blks_dirtied", INT8OID}, + LogDesc{"instrumentation_shared_blks_written", "query_metrics.instrumentation.shared_blks_written", INT8OID}, + LogDesc{"instrumentation_local_blks_hit", "query_metrics.instrumentation.local_blks_hit", INT8OID}, + LogDesc{"instrumentation_local_blks_read", "query_metrics.instrumentation.local_blks_read", INT8OID}, + LogDesc{"instrumentation_local_blks_dirtied", "query_metrics.instrumentation.local_blks_dirtied", INT8OID}, + LogDesc{"instrumentation_local_blks_written", "query_metrics.instrumentation.local_blks_written", INT8OID}, + LogDesc{"instrumentation_temp_blks_read", "query_metrics.instrumentation.temp_blks_read", INT8OID}, + LogDesc{"instrumentation_temp_blks_written", "query_metrics.instrumentation.temp_blks_written", INT8OID}, + LogDesc{"instrumentation_inherited_calls", "query_metrics.instrumentation.inherited_calls", INT8OID}, + /* 8-byte aligned types - Network Stats */ + LogDesc{"instrumentation_sent_total_bytes", "query_metrics.instrumentation.sent.total_bytes", INT8OID}, + LogDesc{"instrumentation_sent_tuple_bytes", "query_metrics.instrumentation.sent.tuple_bytes", INT8OID}, + LogDesc{"instrumentation_sent_chunks", "query_metrics.instrumentation.sent.chunks", INT8OID}, + LogDesc{"instrumentation_received_total_bytes", "query_metrics.instrumentation.received.total_bytes", INT8OID}, + LogDesc{"instrumentation_received_tuple_bytes", "query_metrics.instrumentation.received.tuple_bytes", INT8OID}, + LogDesc{"instrumentation_received_chunks", "query_metrics.instrumentation.received.chunks", INT8OID}, + /* 8-byte aligned types - Interconnect Stats and spilled bytes */ + LogDesc{"interconnect_total_recv_queue_size", "query_metrics.instrumentation.interconnect.total_recv_queue_size", INT8OID}, + LogDesc{"interconnect_recv_queue_size_counting_time", "query_metrics.instrumentation.interconnect.recv_queue_size_counting_time", INT8OID}, + LogDesc{"interconnect_total_capacity", "query_metrics.instrumentation.interconnect.total_capacity", INT8OID}, + LogDesc{"interconnect_capacity_counting_time", "query_metrics.instrumentation.interconnect.capacity_counting_time", INT8OID}, + LogDesc{"interconnect_total_buffers", "query_metrics.instrumentation.interconnect.total_buffers", INT8OID}, + LogDesc{"interconnect_buffer_counting_time", "query_metrics.instrumentation.interconnect.buffer_counting_time", INT8OID}, + LogDesc{"interconnect_active_connections_num", "query_metrics.instrumentation.interconnect.active_connections_num", INT8OID}, + LogDesc{"interconnect_retransmits", "query_metrics.instrumentation.interconnect.retransmits", INT8OID}, + LogDesc{"interconnect_startup_cached_pkt_num", "query_metrics.instrumentation.interconnect.startup_cached_pkt_num", INT8OID}, + LogDesc{"interconnect_mismatch_num", "query_metrics.instrumentation.interconnect.mismatch_num", INT8OID}, + LogDesc{"interconnect_crc_errors", "query_metrics.instrumentation.interconnect.crc_errors", INT8OID}, + LogDesc{"interconnect_snd_pkt_num", "query_metrics.instrumentation.interconnect.snd_pkt_num", INT8OID}, + LogDesc{"interconnect_recv_pkt_num", "query_metrics.instrumentation.interconnect.recv_pkt_num", INT8OID}, + LogDesc{"interconnect_disordered_pkt_num", "query_metrics.instrumentation.interconnect.disordered_pkt_num", INT8OID}, + LogDesc{"interconnect_duplicated_pkt_num", "query_metrics.instrumentation.interconnect.duplicated_pkt_num", INT8OID}, + LogDesc{"interconnect_recv_ack_num", "query_metrics.instrumentation.interconnect.recv_ack_num", INT8OID}, + LogDesc{"interconnect_status_query_msg_num", "query_metrics.instrumentation.interconnect.status_query_msg_num", INT8OID}, + LogDesc{"spill_totalbytes", "query_metrics.spill.totalBytes", INT8OID}, + /* 8-byte aligned types - Float and Timestamp */ + LogDesc{"systemstat_runningtimeseconds", "query_metrics.systemStat.runningTimeSeconds", FLOAT8OID}, + LogDesc{"systemstat_usertimeseconds", "query_metrics.systemStat.userTimeSeconds", FLOAT8OID}, + LogDesc{"systemstat_kerneltimeseconds", "query_metrics.systemStat.kernelTimeSeconds", FLOAT8OID}, + LogDesc{"instrumentation_firsttuple", "query_metrics.instrumentation.firsttuple", FLOAT8OID}, + LogDesc{"instrumentation_startup", "query_metrics.instrumentation.startup", FLOAT8OID}, + LogDesc{"instrumentation_total", "query_metrics.instrumentation.total", FLOAT8OID}, + LogDesc{"instrumentation_blk_read_time", "query_metrics.instrumentation.blk_read_time", FLOAT8OID}, + LogDesc{"instrumentation_blk_write_time", "query_metrics.instrumentation.blk_write_time", FLOAT8OID}, + LogDesc{"instrumentation_startup_time", "query_metrics.instrumentation.startup_time", FLOAT8OID}, + LogDesc{"instrumentation_inherited_time", "query_metrics.instrumentation.inherited_time", FLOAT8OID}, + LogDesc{"datetime", "datetime", TIMESTAMPTZOID}, + LogDesc{"submit_time", "submit_time", TIMESTAMPTZOID}, + LogDesc{"start_time", "start_time", TIMESTAMPTZOID}, + LogDesc{"end_time", "end_time", TIMESTAMPTZOID}, + /* 4-byte aligned types - Query Key */ + LogDesc{"tmid", "query_key.tmid", INT4OID}, + LogDesc{"ssid", "query_key.ssid", INT4OID}, + LogDesc{"ccnt", "query_key.ccnt", INT4OID}, + /* 4-byte aligned types - Segment Key */ + LogDesc{"dbid", "segment_key.dbid", INT4OID}, + LogDesc{"segid", "segment_key.segindex", INT4OID}, + LogDesc{"spill_filecount", "query_metrics.spill.fileCount", INT4OID}, + /* Variable-length types - Query Info */ + LogDesc{"generator", "query_info.generator", TEXTOID}, + LogDesc{"query_text", "query_info.query_text", TEXTOID}, + LogDesc{"plan_text", "query_info.plan_text", TEXTOID}, + LogDesc{"template_query_text", "query_info.template_query_text", TEXTOID}, + LogDesc{"template_plan_text", "query_info.template_plan_text", TEXTOID}, + LogDesc{"user_name", "query_info.userName", TEXTOID}, + LogDesc{"database_name", "query_info.databaseName", TEXTOID}, + LogDesc{"rsgname", "query_info.rsgname", TEXTOID}, + LogDesc{"analyze_text", "query_info.analyze_text", TEXTOID}, + LogDesc{"error_message", "add_info.error_message", TEXTOID}, + LogDesc{"query_status", "query_status", TEXTOID}, + /* Extra field */ + LogDesc{"utility", "", BOOLOID}, +}; +/* clang-format on */ + +inline constexpr size_t natts_yagp_log = log_tbl_desc.size(); +inline constexpr size_t attnum_yagp_log_utility = natts_yagp_log - 1; + +const std::unordered_map &proto_name_to_col_idx(); + +TupleDesc DescribeTuple(); + +Datum protots_to_timestamptz(const google::protobuf::Timestamp &ts); + +Datum field_to_datum(const google::protobuf::FieldDescriptor *field, + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg); + +/* Process a single proto field and store in values/nulls arrays */ +void process_field(const google::protobuf::FieldDescriptor *field, + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg, + const std::string &field_name, Datum *values, bool *nulls); + +/* + * Extracts values from msg into values/nulls arrays. Caller must + * pre-init nulls[] to true (this function does net set nulls + * to true for nested messages if parent message is missing). + */ +void extract_query_req(const google::protobuf::Message &msg, + const std::string &prefix, Datum *values, bool *nulls); diff --git a/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp b/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp new file mode 100644 index 00000000000..22083e8bdaf --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp @@ -0,0 +1,252 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * gpdbwrappers.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp + * + *------------------------------------------------------------------------- + */ + +#include "gpdbwrappers.h" +#include "log/LogOps.h" + +extern "C" { +#include "postgres.h" +#include "utils/guc.h" +#include "commands/dbcommands.h" +#include "commands/resgroupcmds.h" +#include "utils/builtins.h" +#include "utils/varlena.h" +#include "nodes/pg_list.h" +#include "commands/explain.h" +#include "executor/instrument.h" +#include "access/tupdesc.h" +#include "access/htup.h" +#include "utils/elog.h" +#include "cdb/cdbexplain.h" +#include "stat_statements_parser/pg_stat_statements_ya_parser.h" +} + +namespace { + +template +auto wrap(Func &&func, Args &&...args) noexcept(!Throws) + -> decltype(func(std::forward(args)...)) { + + using RetType = decltype(func(std::forward(args)...)); + + // Empty struct for void return type. + struct VoidResult {}; + using ResultHolder = std::conditional_t, VoidResult, + std::optional>; + + bool success; + ErrorData *edata; + ResultHolder result_holder; + + PG_TRY(); + { + if constexpr (!std::is_void_v) { + result_holder.emplace(func(std::forward(args)...)); + } else { + func(std::forward(args)...); + } + edata = NULL; + success = true; + } + PG_CATCH(); + { + MemoryContext oldctx = MemoryContextSwitchTo(TopMemoryContext); + edata = CopyErrorData(); + MemoryContextSwitchTo(oldctx); + FlushErrorState(); + success = false; + } + PG_END_TRY(); + + if (!success) { + std::string err; + if (edata && edata->message) { + err = std::string(edata->message); + } else { + err = "Unknown error occurred"; + } + + if (edata) { + FreeErrorData(edata); + } + + if constexpr (Throws) { + throw std::runtime_error(err); + } + + if constexpr (!std::is_void_v) { + return RetType{}; + } else { + return; + } + } + + if constexpr (!std::is_void_v) { + return *std::move(result_holder); + } else { + return; + } +} + +template +auto wrap_throw(Func &&func, Args &&...args) + -> decltype(func(std::forward(args)...)) { + return wrap(std::forward(func), std::forward(args)...); +} + +template +auto wrap_noexcept(Func &&func, Args &&...args) noexcept + -> decltype(func(std::forward(args)...)) { + return wrap(std::forward(func), std::forward(args)...); +} +} // namespace + +void *ya_gpdb::palloc(Size size) { return wrap_throw(::palloc, size); } + +void *ya_gpdb::palloc0(Size size) { return wrap_throw(::palloc0, size); } + +char *ya_gpdb::pstrdup(const char *str) { return wrap_throw(::pstrdup, str); } + +char *ya_gpdb::get_database_name(Oid dbid) noexcept { + return wrap_noexcept(::get_database_name, dbid); +} + +bool ya_gpdb::split_identifier_string(char *rawstring, char separator, + List **namelist) noexcept { + return wrap_noexcept(SplitIdentifierString, rawstring, separator, namelist); +} + +ExplainState ya_gpdb::get_explain_state(QueryDesc *query_desc, + bool costs) noexcept { + return wrap_noexcept([&]() { + ExplainState *es = NewExplainState(); + es->costs = costs; + es->verbose = true; + es->format = EXPLAIN_FORMAT_TEXT; + ExplainBeginOutput(es); + ExplainPrintPlan(es, query_desc); + ExplainEndOutput(es); + return *es; + }); +} + +ExplainState ya_gpdb::get_analyze_state(QueryDesc *query_desc, + bool analyze) noexcept { + return wrap_noexcept([&]() { + ExplainState *es = NewExplainState(); + es->analyze = analyze; + es->verbose = true; + es->buffers = es->analyze; + es->timing = es->analyze; + es->summary = es->analyze; + es->format = EXPLAIN_FORMAT_TEXT; + ExplainBeginOutput(es); + if (analyze) { + ExplainPrintPlan(es, query_desc); + ExplainPrintExecStatsEnd(es, query_desc); + } + ExplainEndOutput(es); + return *es; + }); +} + +Instrumentation *ya_gpdb::instr_alloc(size_t n, int instrument_options, + bool async_mode) { + return wrap_throw(InstrAlloc, n, instrument_options, async_mode); +} + +HeapTuple ya_gpdb::heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, + bool *isnull) { + if (!tupleDescriptor || !values || !isnull) + throw std::runtime_error( + "Invalid input parameters for heap tuple formation"); + + return wrap_throw(::heap_form_tuple, tupleDescriptor, values, isnull); +} + +void ya_gpdb::pfree(void *pointer) noexcept { + // Note that ::pfree asserts that pointer != NULL. + if (!pointer) + return; + + wrap_noexcept(::pfree, pointer); +} + +MemoryContext ya_gpdb::mem_ctx_switch_to(MemoryContext context) noexcept { + return MemoryContextSwitchTo(context); +} + +const char *ya_gpdb::get_config_option(const char *name, bool missing_ok, + bool restrict_superuser) noexcept { + if (!name) + return nullptr; + + return wrap_noexcept(GetConfigOption, name, missing_ok, restrict_superuser); +} + +void ya_gpdb::list_free(List *list) noexcept { + if (!list) + return; + + wrap_noexcept(::list_free, list); +} + +CdbExplain_ShowStatCtx * +ya_gpdb::cdbexplain_showExecStatsBegin(QueryDesc *query_desc, + instr_time starttime) { + if (!query_desc) + throw std::runtime_error("Invalid query descriptor"); + + return wrap_throw(::cdbexplain_showExecStatsBegin, query_desc, starttime); +} + +void ya_gpdb::instr_end_loop(Instrumentation *instr) { + if (!instr) + throw std::runtime_error("Invalid instrumentation pointer"); + + wrap_throw(::InstrEndLoop, instr); +} + +char *ya_gpdb::gen_normquery(const char *query) noexcept { + return wrap_noexcept(::gen_normquery, query); +} + +StringInfo ya_gpdb::gen_normplan(const char *exec_plan) noexcept { + return wrap_noexcept(::gen_normplan, exec_plan); +} + +char *ya_gpdb::get_rg_name_for_id(Oid group_id) { + return wrap_throw(GetResGroupNameForId, group_id); +} + +Oid ya_gpdb::get_rg_id_by_session_id(int session_id) { + return wrap_throw(ResGroupGetGroupIdBySessionId, session_id); +} + +void ya_gpdb::insert_log(const yagpcc::SetQueryReq &req, bool utility) { + return wrap_throw(::insert_log, req, utility); +} diff --git a/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h b/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h new file mode 100644 index 00000000000..fe9b3ba0487 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h @@ -0,0 +1,81 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * gpdbwrappers.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h + * + *------------------------------------------------------------------------- + */ + +#pragma once + +extern "C" { +#include "postgres.h" +#include "nodes/pg_list.h" +#include "commands/explain.h" +#include "executor/instrument.h" +#include "access/htup.h" +#include "utils/elog.h" +#include "utils/memutils.h" +} + +#include +#include +#include +#include +#include + +namespace yagpcc { +class SetQueryReq; +} // namespace yagpcc + +namespace ya_gpdb { + +// Functions that call palloc(). +// Make sure correct memory context is set. +void *palloc(Size size); +void *palloc0(Size size); +char *pstrdup(const char *str); +char *get_database_name(Oid dbid) noexcept; +bool split_identifier_string(char *rawstring, char separator, + List **namelist) noexcept; +ExplainState get_explain_state(QueryDesc *query_desc, bool costs) noexcept; +ExplainState get_analyze_state(QueryDesc *query_desc, bool analyze) noexcept; +Instrumentation *instr_alloc(size_t n, int instrument_options, bool async_mode); +HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, + bool *isnull); +CdbExplain_ShowStatCtx *cdbexplain_showExecStatsBegin(QueryDesc *query_desc, + instr_time starttime); +void instr_end_loop(Instrumentation *instr); +char *gen_normquery(const char *query) noexcept; +StringInfo gen_normplan(const char *executionPlan) noexcept; +char *get_rg_name_for_id(Oid group_id); +void insert_log(const yagpcc::SetQueryReq &req, bool utility); + +// Palloc-free functions. +void pfree(void *pointer) noexcept; +MemoryContext mem_ctx_switch_to(MemoryContext context) noexcept; +const char *get_config_option(const char *name, bool missing_ok, + bool restrict_superuser) noexcept; +void list_free(List *list) noexcept; +Oid get_rg_id_by_session_id(int session_id); + +} // namespace ya_gpdb diff --git a/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/README.md b/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/README.md new file mode 100644 index 00000000000..291e31a3099 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/README.md @@ -0,0 +1 @@ +This directory contains a slightly modified subset of pg_stat_statements for PG v9.4 to be used in query and plan ID generation. diff --git a/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c new file mode 100644 index 00000000000..7404208055f --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c @@ -0,0 +1,378 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * pg_stat_statements_ya_parser.c + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c + * + *------------------------------------------------------------------------- + */ + +// NOTE: this file is just a bunch of code borrowed from pg_stat_statements for PG 9.4 +// and from our own inhouse implementation of pg_stat_statements for managed PG + +#include "postgres.h" + +#include +#include + +#include "common/hashfn.h" +#include "lib/stringinfo.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "parser/scanner.h" +#include "utils/builtins.h" +#include "utils/memutils.h" +#include "utils/queryjumble.h" + +#include "pg_stat_statements_ya_parser.h" + +#ifndef FCONST +#define FCONST 260 +#endif +#ifndef SCONST +#define SCONST 261 +#endif +#ifndef BCONST +#define BCONST 263 +#endif +#ifndef XCONST +#define XCONST 264 +#endif +#ifndef ICONST +#define ICONST 266 +#endif + +static void fill_in_constant_lengths(JumbleState *jstate, const char *query); +static int comp_location(const void *a, const void *b); +StringInfo gen_normplan(const char *execution_plan); +static bool need_replace(int token); +static char *generate_normalized_query(JumbleState *jstate, const char *query, + int *query_len_p, int encoding); + +void stat_statements_parser_init(void) +{ + EnableQueryId(); +} + +void stat_statements_parser_deinit(void) +{ + /* NO-OP */ +} + +/* check if token should be replaced by substitute varable */ +static bool +need_replace(int token) +{ + return (token == FCONST) || (token == ICONST) || (token == SCONST) || (token == BCONST) || (token == XCONST); +} + +/* + * gen_normplan - parse execution plan using flex and replace all CONST to + * substitute variables. + */ +StringInfo +gen_normplan(const char *execution_plan) +{ + core_yyscan_t yyscanner; + core_yy_extra_type yyextra; + core_YYSTYPE yylval; + YYLTYPE yylloc; + int tok; + int bind_prefix = 1; + char *tmp_str; + YYLTYPE last_yylloc = 0; + int last_tok = 0; + StringInfo plan_out = makeStringInfo(); + ; + + yyscanner = scanner_init(execution_plan, + &yyextra, +#if PG_VERSION_NUM >= 120000 + &ScanKeywords, + ScanKeywordTokens +#else + ScanKeywords, + NumScanKeywords +#endif + ); + + for (;;) + { + /* get the next lexem */ + tok = core_yylex(&yylval, &yylloc, yyscanner); + + /* now we store end previsous lexem in yylloc - so could prcess it */ + if (need_replace(last_tok)) + { + /* substitute variable instead of CONST */ + int s_len = asprintf(&tmp_str, "$%i", bind_prefix++); + if (s_len > 0) + { + appendStringInfoString(plan_out, tmp_str); + free(tmp_str); + } + else + { + appendStringInfoString(plan_out, "??"); + } + } + else + { + /* do not change - just copy as-is */ + tmp_str = strndup((char *)execution_plan + last_yylloc, yylloc - last_yylloc); + appendStringInfoString(plan_out, tmp_str); + free(tmp_str); + } + /* check if further parsing not needed */ + if (tok == 0) + break; + last_tok = tok; + last_yylloc = yylloc; + } + + scanner_finish(yyscanner); + + return plan_out; +} + +/* + * comp_location: comparator for qsorting LocationLen structs by location + */ +static int +comp_location(const void *a, const void *b) +{ + int l = ((const LocationLen *) a)->location; + int r = ((const LocationLen *) b)->location; + + if (l < r) + return -1; + else if (l > r) + return +1; + else + return 0; +} + +/* + * Given a valid SQL string and an array of constant-location records, + * fill in the textual lengths of those constants. + * + * The constants may use any allowed constant syntax, such as float literals, + * bit-strings, single-quoted strings and dollar-quoted strings. This is + * accomplished by using the public API for the core scanner. + * + * It is the caller's job to ensure that the string is a valid SQL statement + * with constants at the indicated locations. Since in practice the string + * has already been parsed, and the locations that the caller provides will + * have originated from within the authoritative parser, this should not be + * a problem. + * + * Duplicate constant pointers are possible, and will have their lengths + * marked as '-1', so that they are later ignored. (Actually, we assume the + * lengths were initialized as -1 to start with, and don't change them here.) + * + * N.B. There is an assumption that a '-' character at a Const location begins + * a negative numeric constant. This precludes there ever being another + * reason for a constant to start with a '-'. + */ +static void +fill_in_constant_lengths(JumbleState *jstate, const char *query) +{ + LocationLen *locs; + core_yyscan_t yyscanner; + core_yy_extra_type yyextra; + core_YYSTYPE yylval; + YYLTYPE yylloc; + int last_loc = -1; + int i; + + /* + * Sort the records by location so that we can process them in order while + * scanning the query text. + */ + if (jstate->clocations_count > 1) + qsort(jstate->clocations, jstate->clocations_count, + sizeof(LocationLen), comp_location); + locs = jstate->clocations; + + /* initialize the flex scanner --- should match raw_parser() */ + yyscanner = scanner_init(query, + &yyextra, + &ScanKeywords, + ScanKeywordTokens); + + /* Search for each constant, in sequence */ + for (i = 0; i < jstate->clocations_count; i++) + { + int loc = locs[i].location; + int tok; + + Assert(loc >= 0); + + if (loc <= last_loc) + continue; /* Duplicate constant, ignore */ + + /* Lex tokens until we find the desired constant */ + for (;;) + { + tok = core_yylex(&yylval, &yylloc, yyscanner); + + /* We should not hit end-of-string, but if we do, behave sanely */ + if (tok == 0) + break; /* out of inner for-loop */ + + /* + * We should find the token position exactly, but if we somehow + * run past it, work with that. + */ + if (yylloc >= loc) + { + if (query[loc] == '-') + { + /* + * It's a negative value - this is the one and only case + * where we replace more than a single token. + * + * Do not compensate for the core system's special-case + * adjustment of location to that of the leading '-' + * operator in the event of a negative constant. It is + * also useful for our purposes to start from the minus + * symbol. In this way, queries like "select * from foo + * where bar = 1" and "select * from foo where bar = -2" + * will have identical normalized query strings. + */ + tok = core_yylex(&yylval, &yylloc, yyscanner); + if (tok == 0) + break; /* out of inner for-loop */ + } + + /* + * We now rely on the assumption that flex has placed a zero + * byte after the text of the current token in scanbuf. + */ + locs[i].length = strlen(yyextra.scanbuf + loc); + break; /* out of inner for-loop */ + } + } + + /* If we hit end-of-string, give up, leaving remaining lengths -1 */ + if (tok == 0) + break; + + last_loc = loc; + } + + scanner_finish(yyscanner); +} + +/* + * Generate a normalized version of the query string that will be used to + * represent all similar queries. + * + * Note that the normalized representation may well vary depending on + * just which "equivalent" query is used to create the hashtable entry. + * We assume this is OK. + * + * *query_len_p contains the input string length, and is updated with + * the result string length (which cannot be longer) on exit. + * + * Returns a palloc'd string. + */ +static char * +generate_normalized_query(JumbleState *jstate, const char *query, + int *query_len_p, int encoding) +{ + char *norm_query; + int query_len = *query_len_p; + int i, + len_to_wrt, /* Length (in bytes) to write */ + quer_loc = 0, /* Source query byte location */ + n_quer_loc = 0, /* Normalized query byte location */ + last_off = 0, /* Offset from start for previous tok */ + last_tok_len = 0; /* Length (in bytes) of that tok */ + + /* + * Get constants' lengths (core system only gives us locations). Note + * this also ensures the items are sorted by location. + */ + fill_in_constant_lengths(jstate, query); + + /* Allocate result buffer */ + norm_query = palloc(query_len + 1); + + for (i = 0; i < jstate->clocations_count; i++) + { + int off, /* Offset from start for cur tok */ + tok_len; /* Length (in bytes) of that tok */ + + off = jstate->clocations[i].location; + tok_len = jstate->clocations[i].length; + + if (tok_len < 0) + continue; /* ignore any duplicates */ + + /* Copy next chunk (what precedes the next constant) */ + len_to_wrt = off - last_off; + len_to_wrt -= last_tok_len; + + Assert(len_to_wrt >= 0); + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + + /* And insert a '?' in place of the constant token */ + norm_query[n_quer_loc++] = '?'; + + quer_loc = off + tok_len; + last_off = off; + last_tok_len = tok_len; + } + + /* + * We've copied up until the last ignorable constant. Copy over the + * remaining bytes of the original query string. + */ + len_to_wrt = query_len - quer_loc; + + Assert(len_to_wrt >= 0); + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + + Assert(n_quer_loc <= query_len); + norm_query[n_quer_loc] = '\0'; + + *query_len_p = n_quer_loc; + return norm_query; +} + +char *gen_normquery(const char *query) +{ + if (!query) { + return NULL; + } + JumbleState jstate; + jstate.jumble = (unsigned char *)palloc(JUMBLE_SIZE); + jstate.jumble_len = 0; + jstate.clocations_buf_size = 32; + jstate.clocations = (LocationLen *) + palloc(jstate.clocations_buf_size * sizeof(LocationLen)); + jstate.clocations_count = 0; + int query_len = strlen(query); + return generate_normalized_query(&jstate, query, &query_len, GetDatabaseEncoding()); +} \ No newline at end of file diff --git a/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h b/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h new file mode 100644 index 00000000000..96c6a776dba --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h @@ -0,0 +1,43 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * pg_stat_statements_ya_parser.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h + * + *------------------------------------------------------------------------- + */ + +#pragma once + +#ifdef __cplusplus +extern "C" +{ +#endif + +extern void stat_statements_parser_init(void); +extern void stat_statements_parser_deinit(void); + +StringInfo gen_normplan(const char *executionPlan); +char *gen_normquery(const char *query); + +#ifdef __cplusplus +} +#endif diff --git a/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c b/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c new file mode 100644 index 00000000000..271bceee178 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c @@ -0,0 +1,150 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * yagp_hooks_collector.c + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "cdb/cdbvars.h" +#include "funcapi.h" +#include "utils/builtins.h" + +#include "hook_wrappers.h" + +PG_MODULE_MAGIC; + +void _PG_init(void); +void _PG_fini(void); +PG_FUNCTION_INFO_V1(yagp_stat_messages_reset); +PG_FUNCTION_INFO_V1(yagp_stat_messages); +PG_FUNCTION_INFO_V1(yagp_init_log); +PG_FUNCTION_INFO_V1(yagp_truncate_log); + +PG_FUNCTION_INFO_V1(yagp_test_uds_start_server); +PG_FUNCTION_INFO_V1(yagp_test_uds_receive); +PG_FUNCTION_INFO_V1(yagp_test_uds_stop_server); + +void _PG_init(void) { + if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) + hooks_init(); +} + +void _PG_fini(void) { + if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) + hooks_deinit(); +} + +Datum yagp_stat_messages_reset(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) { + funcctx = SRF_FIRSTCALL_INIT(); + yagp_functions_reset(); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); +} + +Datum yagp_stat_messages(PG_FUNCTION_ARGS) { + return yagp_functions_get(fcinfo); +} + +Datum yagp_init_log(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) { + funcctx = SRF_FIRSTCALL_INIT(); + init_log(); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); +} + +Datum yagp_truncate_log(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) { + funcctx = SRF_FIRSTCALL_INIT(); + truncate_log(); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); +} + +Datum yagp_test_uds_start_server(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) { + funcctx = SRF_FIRSTCALL_INIT(); + char *path = text_to_cstring(PG_GETARG_TEXT_PP(0)); + test_uds_start_server(path); + pfree(path); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); +} + +Datum yagp_test_uds_receive(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + int64 *result; + + if (SRF_IS_FIRSTCALL()) { + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + result = (int64 *)palloc(sizeof(int64)); + funcctx->user_fctx = result; + funcctx->max_calls = 1; + MemoryContextSwitchTo(oldcontext); + + int timeout_ms = PG_GETARG_INT32(0); + *result = test_uds_receive(timeout_ms); + } + + funcctx = SRF_PERCALL_SETUP(); + + if (funcctx->call_cntr < funcctx->max_calls) { + result = (int64 *)funcctx->user_fctx; + SRF_RETURN_NEXT(funcctx, Int64GetDatum(*result)); + } + + SRF_RETURN_DONE(funcctx); +} + +Datum yagp_test_uds_stop_server(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) { + funcctx = SRF_FIRSTCALL_INIT(); + test_uds_stop_server(); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); +} diff --git a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0--1.1.sql b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0--1.1.sql new file mode 100644 index 00000000000..8684ca73915 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0--1.1.sql @@ -0,0 +1,113 @@ +/* yagp_hooks_collector--1.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION yagp_hooks_collector UPDATE TO '1.1'" to load this file. \quit + +CREATE SCHEMA yagpcc; + +-- Unlink existing objects from extension. +ALTER EXTENSION yagp_hooks_collector DROP VIEW yagp_stat_messages; +ALTER EXTENSION yagp_hooks_collector DROP FUNCTION yagp_stat_messages_reset(); +ALTER EXTENSION yagp_hooks_collector DROP FUNCTION __yagp_stat_messages_f_on_segments(); +ALTER EXTENSION yagp_hooks_collector DROP FUNCTION __yagp_stat_messages_f_on_master(); +ALTER EXTENSION yagp_hooks_collector DROP FUNCTION __yagp_stat_messages_reset_f_on_segments(); +ALTER EXTENSION yagp_hooks_collector DROP FUNCTION __yagp_stat_messages_reset_f_on_master(); + +-- Now drop the objects. +DROP VIEW yagp_stat_messages; +DROP FUNCTION yagp_stat_messages_reset(); +DROP FUNCTION __yagp_stat_messages_f_on_segments(); +DROP FUNCTION __yagp_stat_messages_f_on_master(); +DROP FUNCTION __yagp_stat_messages_reset_f_on_segments(); +DROP FUNCTION __yagp_stat_messages_reset_f_on_master(); + +-- Recreate functions and view in new schema. +CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' +LANGUAGE C EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' +LANGUAGE C EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION yagpcc.stat_messages_reset() +RETURNS SETOF void +AS +$$ + SELECT yagpcc.__stat_messages_reset_f_on_master(); + SELECT yagpcc.__stat_messages_reset_f_on_segments(); +$$ +LANGUAGE SQL EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__stat_messages_f_on_master() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'yagp_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__stat_messages_f_on_segments() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'yagp_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE VIEW yagpcc.stat_messages AS + SELECT C.* + FROM yagpcc.__stat_messages_f_on_master() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) + UNION ALL + SELECT C.* + FROM yagpcc.__stat_messages_f_on_segments() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) +ORDER BY segid; + +-- Create new objects. +CREATE FUNCTION yagpcc.__init_log_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__init_log_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +-- Creates log table inside yagpcc schema. +SELECT yagpcc.__init_log_on_master(); +SELECT yagpcc.__init_log_on_segments(); + +CREATE VIEW yagpcc.log AS + SELECT * FROM yagpcc.__log -- master + UNION ALL + SELECT * FROM gp_dist_random('yagpcc.__log') -- segments + ORDER BY tmid, ssid, ccnt; + +CREATE FUNCTION yagpcc.__truncate_log_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__truncate_log_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION yagpcc.truncate_log() +RETURNS SETOF void AS $$ +BEGIN + PERFORM yagpcc.__truncate_log_on_master(); + PERFORM yagpcc.__truncate_log_on_segments(); +END; +$$ LANGUAGE plpgsql VOLATILE; diff --git a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0.sql b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0.sql new file mode 100644 index 00000000000..270cab92382 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0.sql @@ -0,0 +1,55 @@ +/* yagp_hooks_collector--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION yagp_hooks_collector" to load this file. \quit + +CREATE FUNCTION __yagp_stat_messages_reset_f_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' +LANGUAGE C EXECUTE ON MASTER; + +CREATE FUNCTION __yagp_stat_messages_reset_f_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' +LANGUAGE C EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION yagp_stat_messages_reset() +RETURNS SETOF void +AS +$$ + SELECT __yagp_stat_messages_reset_f_on_master(); + SELECT __yagp_stat_messages_reset_f_on_segments(); +$$ +LANGUAGE SQL EXECUTE ON MASTER; + +CREATE FUNCTION __yagp_stat_messages_f_on_master() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'yagp_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION __yagp_stat_messages_f_on_segments() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'yagp_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE VIEW yagp_stat_messages AS + SELECT C.* + FROM __yagp_stat_messages_f_on_master() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) + UNION ALL + SELECT C.* + FROM __yagp_stat_messages_f_on_segments() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) +ORDER BY segid; diff --git a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.1.sql b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.1.sql new file mode 100644 index 00000000000..83bfb553638 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.1.sql @@ -0,0 +1,110 @@ +/* yagp_hooks_collector--1.1.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION yagp_hooks_collector" to load this file. \quit + +CREATE SCHEMA yagpcc; + +CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' +LANGUAGE C EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' +LANGUAGE C EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION yagpcc.stat_messages_reset() +RETURNS SETOF void +AS +$$ + SELECT yagpcc.__stat_messages_reset_f_on_master(); + SELECT yagpcc.__stat_messages_reset_f_on_segments(); +$$ +LANGUAGE SQL EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__stat_messages_f_on_master() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'yagp_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__stat_messages_f_on_segments() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'yagp_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE VIEW yagpcc.stat_messages AS + SELECT C.* + FROM yagpcc.__stat_messages_f_on_master() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) + UNION ALL + SELECT C.* + FROM yagpcc.__stat_messages_f_on_segments() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) +ORDER BY segid; + +CREATE FUNCTION yagpcc.__init_log_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__init_log_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +-- Creates log table inside yagpcc schema. +SELECT yagpcc.__init_log_on_master(); +SELECT yagpcc.__init_log_on_segments(); + +CREATE VIEW yagpcc.log AS + SELECT * FROM yagpcc.__log -- master + UNION ALL + SELECT * FROM gp_dist_random('yagpcc.__log') -- segments +ORDER BY tmid, ssid, ccnt; + +CREATE FUNCTION yagpcc.__truncate_log_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__truncate_log_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION yagpcc.truncate_log() +RETURNS SETOF void AS $$ +BEGIN + PERFORM yagpcc.__truncate_log_on_master(); + PERFORM yagpcc.__truncate_log_on_segments(); +END; +$$ LANGUAGE plpgsql VOLATILE; + +CREATE FUNCTION yagpcc.__test_uds_start_server(path text) +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_test_uds_start_server' +LANGUAGE C STRICT EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__test_uds_receive(timeout_ms int DEFAULT 2000) +RETURNS SETOF bigint +AS 'MODULE_PATHNAME', 'yagp_test_uds_receive' +LANGUAGE C STRICT EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__test_uds_stop_server() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_test_uds_stop_server' +LANGUAGE C EXECUTE ON MASTER; diff --git a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector.control b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector.control new file mode 100644 index 00000000000..cb5906a1302 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector.control @@ -0,0 +1,5 @@ +# yagp_hooks_collector extension +comment = 'Intercept query and plan execution hooks and report them to Yandex GPCC agents' +default_version = '1.1' +module_pathname = '$libdir/yagp_hooks_collector' +superuser = true diff --git a/pom.xml b/pom.xml index 6d33d7e9de5..35bd5b13442 100644 --- a/pom.xml +++ b/pom.xml @@ -153,6 +153,13 @@ code or new licensing patterns. gpcontrib/gp_exttable_fdw/data/** gpcontrib/gp_exttable_fdw/gp_exttable_fdw.control + gpcontrib/yagp_hooks_collector/yagp_hooks_collector.control + gpcontrib/yagp_hooks_collector/protos/yagpcc_set_service.proto + gpcontrib/yagp_hooks_collector/protos/yagpcc_plan.proto + gpcontrib/yagp_hooks_collector/protos/yagpcc_metrics.proto + gpcontrib/yagp_hooks_collector/.clang-format + gpcontrib/yagp_hooks_collector/Makefile + getversion .git-blame-ignore-revs .dir-locals.el diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 062ec75b039..edc49b72e05 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -271,6 +271,7 @@ with_zstd = @with_zstd@ ZSTD_CFLAGS = @ZSTD_CFLAGS@ ZSTD_LIBS = @ZSTD_LIBS@ EVENT_LIBS = @EVENT_LIBS@ +with_yagp_hooks_collector = @with_yagp_hooks_collector@ ########################################################################## # diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index f367b00a675..be09847022b 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -2971,7 +2971,6 @@ LookupExplicitNamespace(const char *nspname, bool missing_ok) { Oid namespaceId; AclResult aclresult; - /* check for pg_temp alias */ if (strcmp(nspname, "pg_temp") == 0) { @@ -2989,7 +2988,24 @@ LookupExplicitNamespace(const char *nspname, bool missing_ok) if (missing_ok && !OidIsValid(namespaceId)) return InvalidOid; - aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(), ACL_USAGE); + HeapTuple tuple; + Oid ownerId; + + tuple = SearchSysCache1(NAMESPACEOID, ObjectIdGetDatum(namespaceId)); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("schema with OID %u does not exist", namespaceId))); + + ownerId = ((Form_pg_namespace) GETSTRUCT(tuple))->nspowner; + + ReleaseSysCache(tuple); + + if (!mdb_admin_allow_bypass_owner_checks(GetUserId(), ownerId)) { + aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(), ACL_USAGE); + } else { + aclresult = ACLCHECK_OK; + } if (aclresult != ACLCHECK_OK) aclcheck_error(aclresult, OBJECT_SCHEMA, nspname); diff --git a/src/backend/commands/alter.c b/src/backend/commands/alter.c index f5dfd6ff126..6f370a2c9aa 100644 --- a/src/backend/commands/alter.c +++ b/src/backend/commands/alter.c @@ -1085,7 +1085,8 @@ AlterObjectOwner_internal(Relation rel, Oid objectId, Oid new_ownerId) if (!superuser()) { /* must be owner */ - if (!has_privs_of_role(GetUserId(), old_ownerId)) + if (!has_privs_of_role(GetUserId(), old_ownerId) + && !mdb_admin_allow_bypass_owner_checks(GetUserId(), old_ownerId)) { char *objname; char namebuf[NAMEDATALEN]; @@ -1105,14 +1106,13 @@ AlterObjectOwner_internal(Relation rel, Oid objectId, Oid new_ownerId) aclcheck_error(ACLCHECK_NOT_OWNER, get_object_type(classId, objectId), objname); } - /* Must be able to become new owner */ - check_is_member_of_role(GetUserId(), new_ownerId); + + check_mdb_admin_is_member_of_role(GetUserId(), new_ownerId); /* New owner must have CREATE privilege on namespace */ if (OidIsValid(namespaceId)) { AclResult aclresult; - aclresult = pg_namespace_aclcheck(namespaceId, new_ownerId, ACL_CREATE); if (aclresult != ACLCHECK_OK) diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 6822032fe0d..a3d2f155fd8 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -478,10 +478,6 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, dest, params, queryEnv, 0); } - /* GPDB hook for collecting query info */ - if (query_info_collect_hook) - (*query_info_collect_hook)(METRICS_QUERY_SUBMIT, queryDesc); - if (into->skipData) { /* @@ -495,6 +491,10 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, } else { + /* GPDB hook for collecting query info */ + if (query_info_collect_hook) + (*query_info_collect_hook)(METRICS_QUERY_SUBMIT, queryDesc); + check_and_unassign_from_resgroup(queryDesc->plannedstmt); queryDesc->plannedstmt->query_mem = ResourceManagerGetQueryMemoryLimit(queryDesc->plannedstmt); diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c index b99b2419fcc..1ab3b36dd59 100644 --- a/src/backend/commands/functioncmds.c +++ b/src/backend/commands/functioncmds.c @@ -1525,9 +1525,13 @@ CreateFunction(ParseState *pstate, CreateFunctionStmt *stmt) * by security barrier views or row-level security policies. */ if (isLeakProof && !superuser()) - ereport(ERROR, - (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("only superuser can define a leakproof function"))); + { + Oid role = get_role_oid("mdb_admin", true /*if nodoby created mdb_admin role in this database*/); + if (!is_member_of_role(GetUserId(), role)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("only superuser or mdb_admin can define a leakproof function"))); + } if (transformDefElem) { @@ -1852,9 +1856,13 @@ AlterFunction(ParseState *pstate, AlterFunctionStmt *stmt) { procForm->proleakproof = intVal(leakproof_item->arg); if (procForm->proleakproof && !superuser()) - ereport(ERROR, - (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("only superuser can define a leakproof function"))); + { + Oid role = get_role_oid("mdb_admin", true /*if nodoby created mdb_admin role in this database*/); + if (!is_member_of_role(GetUserId(), role)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("only superuser or mdb_admin can define a leakproof function"))); + } } if (cost_item) { diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index 1555ea9d334..dc8efd4d892 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -63,6 +63,7 @@ #include "tcop/tcopprot.h" #include "utils/builtins.h" #include "utils/lsyscache.h" +#include "utils/metrics_utils.h" #include "utils/rel.h" #include "utils/snapmgr.h" #include "utils/syscache.h" @@ -842,6 +843,10 @@ refresh_matview_datafill(DestReceiver *dest, Query *query, GetActiveSnapshot(), InvalidSnapshot, dest, NULL, NULL, 0); + /* GPDB hook for collecting query info */ + if (query_info_collect_hook) + (*query_info_collect_hook)(METRICS_QUERY_SUBMIT, queryDesc); + RestoreOidAssignments(saved_dispatch_oids); /* call ExecutorStart to prepare the plan for execution */ diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c index 4817c14f07d..553830e8599 100644 --- a/src/backend/commands/portalcmds.c +++ b/src/backend/commands/portalcmds.c @@ -35,6 +35,7 @@ #include "tcop/pquery.h" #include "tcop/tcopprot.h" #include "utils/memutils.h" +#include "utils/metrics_utils.h" #include "utils/snapmgr.h" #include "cdb/cdbendpoint.h" @@ -373,6 +374,10 @@ PortalCleanup(Portal portal) FreeQueryDesc(queryDesc); CurrentResourceOwner = saveResourceOwner; + } else { + /* GPDB hook for collecting query info */ + if (queryDesc->yagp_query_key && query_info_collect_hook) + (*query_info_collect_hook)(METRICS_QUERY_ERROR, queryDesc); } } diff --git a/src/backend/commands/schemacmds.c b/src/backend/commands/schemacmds.c index 96757eaa814..03f96bb6499 100644 --- a/src/backend/commands/schemacmds.c +++ b/src/backend/commands/schemacmds.c @@ -598,12 +598,12 @@ AlterSchemaOwner_internal(HeapTuple tup, Relation rel, Oid newOwnerId) AclResult aclresult; /* Otherwise, must be owner of the existing object */ - if (!pg_namespace_ownercheck(nspForm->oid, GetUserId())) + if (!mdb_admin_allow_bypass_owner_checks(GetUserId(), nspForm->nspowner) + && !pg_namespace_ownercheck(nspForm->oid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SCHEMA, NameStr(nspForm->nspname)); - /* Must be able to become new owner */ - check_is_member_of_role(GetUserId(), newOwnerId); + check_mdb_admin_is_member_of_role(GetUserId(), newOwnerId); /* * must have create-schema rights @@ -614,8 +614,13 @@ AlterSchemaOwner_internal(HeapTuple tup, Relation rel, Oid newOwnerId) * schemas. Because superusers will always have this right, we need * no special case for them. */ - aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), + if (mdb_admin_allow_bypass_owner_checks(GetUserId(), nspForm->nspowner)) { + aclresult = ACLCHECK_OK; + } else { + aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), ACL_CREATE); + } + if (aclresult != ACLCHECK_OK) aclcheck_error(aclresult, OBJECT_DATABASE, get_database_name(MyDatabaseId)); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 42e00efe81d..07f00a212b0 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -15704,13 +15704,14 @@ ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lock AclResult aclresult; /* Otherwise, must be owner of the existing object */ - if (!pg_class_ownercheck(relationOid, GetUserId())) + if (!mdb_admin_allow_bypass_owner_checks(GetUserId(), tuple_class->relowner) + && !pg_class_ownercheck(relationOid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relationOid)), RelationGetRelationName(target_rel)); - /* Must be able to become new owner */ - check_is_member_of_role(GetUserId(), newOwnerId); + check_mdb_admin_is_member_of_role(GetUserId(), newOwnerId); + /* New owner must have CREATE privilege on namespace */ aclresult = pg_namespace_aclcheck(namespaceOid, newOwnerId, ACL_CREATE); @@ -20791,7 +20792,7 @@ RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid, Form_pg_class classform; AclResult aclresult; char relkind; - + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); if (!HeapTupleIsValid(tuple)) return; /* concurrently dropped */ @@ -20799,7 +20800,8 @@ RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid, relkind = classform->relkind; /* Must own relation. */ - if (!pg_class_ownercheck(relid, GetUserId())) + if (!mdb_admin_allow_bypass_owner_checks(GetUserId(), classform->relowner) + && !pg_class_ownercheck(relid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relid)), rv->relname); /* No system table modifications unless explicitly allowed. */ diff --git a/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp b/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp index 60bccf59341..bb086954403 100644 --- a/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp +++ b/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp @@ -12,6 +12,7 @@ #include "unittest/gpos/string/CWStringTest.h" #include +#include "common/mdb_locale.h" #include "gpos/base.h" #include "gpos/error/CAutoTrace.h" @@ -177,18 +178,18 @@ CWStringTest::EresUnittest_AppendFormatInvalidLocale() CWStringDynamic *expected = GPOS_NEW(mp) CWStringDynamic(mp, GPOS_WSZ_LIT("UNKNOWN")); - CHAR *oldLocale = setlocale(LC_CTYPE, nullptr); + CHAR *oldLocale = SETLOCALE(LC_CTYPE, nullptr); CWStringDynamic *pstr1 = GPOS_NEW(mp) CWStringDynamic(mp); GPOS_RESULT eres = GPOS_OK; - setlocale(LC_CTYPE, "C"); + SETLOCALE(LC_CTYPE, "C"); pstr1->AppendFormat(GPOS_WSZ_LIT("%s"), (CHAR *) "ÃË", 123); pstr1->Equals(expected); // cleanup - setlocale(LC_CTYPE, oldLocale); + SETLOCALE(LC_CTYPE, oldLocale); GPOS_DELETE(pstr1); GPOS_DELETE(expected); diff --git a/src/backend/storage/ipc/signalfuncs.c b/src/backend/storage/ipc/signalfuncs.c index 0d5ccaa201d..753b94752d3 100644 --- a/src/backend/storage/ipc/signalfuncs.c +++ b/src/backend/storage/ipc/signalfuncs.c @@ -52,6 +52,7 @@ static int pg_signal_backend(int pid, int sig, char *msg) { PGPROC *proc = BackendPidGetProc(pid); + LocalPgBackendStatus *local_beentry; /* * BackendPidGetProc returns NULL if the pid isn't valid; but by the time @@ -72,9 +73,34 @@ pg_signal_backend(int pid, int sig, char *msg) return SIGNAL_BACKEND_ERROR; } + local_beentry = pgstat_fetch_stat_local_beentry_by_pid(pid); + /* Only allow superusers to signal superuser-owned backends. */ if (superuser_arg(proc->roleId) && !superuser()) - return SIGNAL_BACKEND_NOSUPERUSER; + { + Oid role; + char * appname; + + if (local_beentry == NULL) { + return SIGNAL_BACKEND_NOSUPERUSER; + } + + role = get_role_oid("mdb_admin", true /*if nodoby created mdb_admin role in this database*/); + appname = local_beentry->backendStatus.st_appname; + + // only allow mdb_admin to kill su queries + if (!is_member_of_role(GetUserId(), role)) { + return SIGNAL_BACKEND_NOSUPERUSER; + } + + if (local_beentry->backendStatus.st_backendType == B_AUTOVAC_WORKER) { + // ok + } else if (appname != NULL && strcmp(appname, "MDB") == 0) { + // ok + } else { + return SIGNAL_BACKEND_NOSUPERUSER; + } + } /* Users can signal backends they have role membership in. */ if (!has_privs_of_role(GetUserId(), proc->roleId) && diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index 532690f1d51..7c1dbc480bc 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -127,6 +127,9 @@ CreateQueryDesc(PlannedStmt *plannedstmt, if (Gp_role != GP_ROLE_EXECUTE) increment_command_count(); + /* null this field until set by YAGP Hooks collector */ + qd->yagp_query_key = NULL; + return qd; } diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c index 9a0918bceff..217483c1c61 100644 --- a/src/backend/utils/activity/backend_status.c +++ b/src/backend/utils/activity/backend_status.c @@ -1102,6 +1102,22 @@ pgstat_fetch_stat_local_beentry(int beid) return &localBackendStatusTable[beid - 1]; } +/* -- mdb admin patch -- */ +LocalPgBackendStatus * +pgstat_fetch_stat_local_beentry_by_pid(int pid) +{ + pgstat_read_current_status(); + + for (int i = 1; i <= localNumBackends; ++i) { + if (localBackendStatusTable[i - 1].backendStatus.st_procpid == pid) { + return &localBackendStatusTable[i - 1]; + } + } + + return NULL; +} + +/* -- mdb admin patch end -- */ /* ---------- * pgstat_fetch_stat_numbackends() - diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index bd5479c546b..58dd15a6f8b 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -117,7 +117,8 @@ OBJS = \ windowfuncs.o \ xid.o \ xid8funcs.o \ - xml.o + xml.o \ + mdb.o jsonpath_scan.c: FLEXFLAGS = -CF -p -p jsonpath_scan.c: FLEX_NO_BACKUP=yes diff --git a/src/backend/utils/adt/acl.c b/src/backend/utils/adt/acl.c index 714a536e93d..e3463f636ae 100644 --- a/src/backend/utils/adt/acl.c +++ b/src/backend/utils/adt/acl.c @@ -116,6 +116,7 @@ static AclResult pg_role_aclcheck(Oid role_oid, Oid roleid, AclMode mode); static void RoleMembershipCacheCallback(Datum arg, int cacheid, uint32 hashvalue); +static bool has_privs_of_unwanted_system_role(Oid role); /* * getid @@ -4991,9 +4992,65 @@ roles_is_member_of(Oid roleid, enum RoleRecurseType type, * set; for such roles, membership implies the ability to do SET ROLE, but * the privileges are not available until you've done so. */ + +/* +* This is basically original postgresql privs-check function +*/ + +// -- mdb_superuser patch + +bool +has_privs_of_role_strict(Oid member, Oid role) +{ + /* Fast path for simple case */ + if (member == role) + return true; + + /* Superusers have every privilege, so are part of every role */ + if (superuser_arg(member)) + return true; + + /* + * Find all the roles that member has the privileges of, including + * multi-level recursion, then see if target role is any one of them. + */ + return list_member_oid(roles_is_member_of(member, ROLERECURSE_PRIVS, + InvalidOid, NULL), + role); +} + +/* +* Check that role is either one of "dangerous" system role +* or has "strict" (not through mdb_admin or mdb_superuser) +* privs of this role +*/ + +static bool +has_privs_of_unwanted_system_role(Oid role) { + if (has_privs_of_role_strict(role, ROLE_PG_READ_SERVER_FILES)) { + return true; + } + if (has_privs_of_role_strict(role, ROLE_PG_WRITE_SERVER_FILES)) { + return true; + } + if (has_privs_of_role_strict(role, ROLE_PG_EXECUTE_SERVER_PROGRAM)) { + return true; + } + if (has_privs_of_role_strict(role, ROLE_PG_READ_ALL_DATA)) { + return true; + } + if (has_privs_of_role_strict(role, ROLE_PG_WRITE_ALL_DATA)) { + return true; + } + + return false; +} + bool has_privs_of_role(Oid member, Oid role) { + Oid mdb_superuser_roleoid; + /* Fast path for simple case */ if (member == role) return true; @@ -5002,6 +5059,23 @@ has_privs_of_role(Oid member, Oid role) if (superuser_arg(member)) return true; + mdb_superuser_roleoid = get_role_oid("mdb_superuser", true /*if nodoby created mdb_superuser role in this database*/); + + if (is_member_of_role(member, mdb_superuser_roleoid)) { + /* if target role is superuser, disallow */ + if (!superuser_arg(role)) { + /* we want mdb_roles_admin to bypass + * has_priv_of_roles test + * if target role is neither superuser nor + * some dangerous system role + */ + if (!has_privs_of_unwanted_system_role(role)) { + return true; + } + } + } + + /* * Find all the roles that member has the privileges of, including * multi-level recursion, then see if target role is any one of them. @@ -5011,6 +5085,49 @@ has_privs_of_role(Oid member, Oid role) role); } +// -- mdb_superuser patch + +// -- non-upstream patch begin +/* + * Is userId allowed to bypass ownership check + * and tranfer onwership to ownerId role? + */ +bool +mdb_admin_allow_bypass_owner_checks(Oid userId, Oid ownerId) +{ + Oid mdb_admin_roleoid; + /* + * Never allow nobody to grant objects to + * superusers. + * This can result in various CVE. + * For paranoic reasons, check this even before + * membership of mdb_admin role. + */ + if (superuser_arg(ownerId)) { + return false; + } + + mdb_admin_roleoid = get_role_oid("mdb_admin", true /*if nodoby created mdb_admin role in this database*/); + /* Is userId actually member of mdb admin? */ + if (!is_member_of_role(userId, mdb_admin_roleoid)) { + /* if no, disallow. */ + return false; + } + + /* + * Now, we need to check if ownerId + * is some dangerous role to trasfer membership to. + * + * For now, we check that ownerId does not have + * priviledge to execute server program or/and + * read/write server files, or/and pg read/write all data + */ + + /* All checks passed, hope will not be hacked here (again) */ + return !has_privs_of_unwanted_system_role(ownerId); +} + +// -- non-upstream patch end /* * Is member a member of role (directly or indirectly)? @@ -5051,6 +5168,53 @@ check_is_member_of_role(Oid member, Oid role) GetUserNameFromId(role, false)))); } +// -- mdb admin patch +/* + * check_mdb_admin_is_member_of_role + * is_member_of_role with a standard permission-violation error if not in usual case + * Is case `member` in mdb_admin we check that role is neither of superuser, pg_read/write + * server files nor pg_execute_server_program or pg_read/write all data + */ +void +check_mdb_admin_is_member_of_role(Oid member, Oid role) +{ + Oid mdb_admin_roleoid; + /* fast path - if we are superuser, its ok */ + if (superuser_arg(member)) { + return; + } + + mdb_admin_roleoid = get_role_oid("mdb_admin", true /*if nodoby created mdb_admin role in this database*/); + /* Is userId actually member of mdb admin? */ + if (is_member_of_role(member, mdb_admin_roleoid)) { + + /* role is mdb admin */ + if (superuser_arg(role)) { + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("cannot transfer ownership to superuser \"%s\"", + GetUserNameFromId(role, false)))); + } + + if (has_privs_of_unwanted_system_role(role)) { + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("forbidden to transfer ownership to this system role in Cloud"))); + } + } else { + /* if no, check membership transfer in usual way. */ + + if (!is_member_of_role(member, role)) { + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be member of role \"%s\"", + GetUserNameFromId(role, false)))); + } + } +} + +// -- mdb admin patch + /* * Is member a member of role, not considering superuserness? * @@ -5175,6 +5339,7 @@ select_best_grantor(Oid roleId, AclMode privileges, List *roles_list; int nrights; ListCell *l; + Oid mdb_superuser_roleoid; /* * The object owner is always treated as having all grant options, so if @@ -5189,6 +5354,16 @@ select_best_grantor(Oid roleId, AclMode privileges, return; } + mdb_superuser_roleoid = get_role_oid("mdb_superuser", true /*if nodoby created mdb_superuser role in this database*/); + + if (is_member_of_role(GetUserId(), mdb_superuser_roleoid) + && has_privs_of_role(GetUserId(), ownerId)) { + *grantorId = mdb_superuser_roleoid; + AclMode mdb_superuser_allowed_privs = needed_goptions; + *grantOptions = mdb_superuser_allowed_privs; + return; + } + /* * Otherwise we have to do a careful search to see if roleId has the * privileges of any suitable role. Note: we can hang onto the result of @@ -5197,7 +5372,6 @@ select_best_grantor(Oid roleId, AclMode privileges, */ roles_list = roles_is_member_of(roleId, ROLERECURSE_PRIVS, InvalidOid, NULL); - /* initialize candidate result as default */ *grantorId = roleId; *grantOptions = ACL_NO_RIGHTS; diff --git a/src/backend/utils/adt/mdb.c b/src/backend/utils/adt/mdb.c new file mode 100644 index 00000000000..e5c695de1b6 --- /dev/null +++ b/src/backend/utils/adt/mdb.c @@ -0,0 +1,37 @@ +/*------------------------------------------------------------------------- + * + * mdb.c + * mdb routines + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/mdb.c + * + *------------------------------------------------------------------------- + */ + + +#include "postgres.h" +#include "fmgr.h" +#include "utils/fmgrprotos.h" + +/* + * mdb_admin_enabled + * Check that mdb locale patch is enabled + */ +Datum +mdb_locale_enabled(PG_FUNCTION_ARGS) +{ + bool res; + +#if USE_MDBLOCALES + res = true; +#else + res = false; +#endif + + PG_RETURN_BOOL(res); +} diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 11392891538..a9acb875eee 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -66,6 +66,7 @@ #include "utils/memutils.h" #include "utils/pg_locale.h" #include "utils/syscache.h" +#include "common/mdb_locale.h" #ifdef USE_ICU #include @@ -147,7 +148,7 @@ pg_perm_setlocale(int category, const char *locale) const char *envvar; #ifndef WIN32 - result = setlocale(category, locale); + result = SETLOCALE(category, locale); #else /* @@ -165,7 +166,7 @@ pg_perm_setlocale(int category, const char *locale) } else #endif - result = setlocale(category, locale); + result = SETLOCALE(category, locale); #endif /* WIN32 */ if (result == NULL) @@ -252,7 +253,7 @@ check_locale(int category, const char *locale, char **canonname) if (canonname) *canonname = NULL; /* in case of failure */ - save = setlocale(category, NULL); + save = SETLOCALE(category, NULL); if (!save) return false; /* won't happen, we hope */ @@ -260,14 +261,14 @@ check_locale(int category, const char *locale, char **canonname) save = pstrdup(save); /* set the locale with setlocale, to see if it accepts it. */ - res = setlocale(category, locale); + res = SETLOCALE(category, locale); /* save canonical name if requested. */ if (res && canonname) *canonname = pstrdup(res); /* restore old value. */ - if (!setlocale(category, save)) + if (!SETLOCALE(category, save)) elog(WARNING, "failed to restore old locale \"%s\"", save); pfree(save); @@ -501,12 +502,12 @@ PGLC_localeconv(void) memset(&worklconv, 0, sizeof(worklconv)); /* Save prevailing values of monetary and numeric locales */ - save_lc_monetary = setlocale(LC_MONETARY, NULL); + save_lc_monetary = SETLOCALE(LC_MONETARY, NULL); if (!save_lc_monetary) elog(ERROR, "setlocale(NULL) failed"); save_lc_monetary = pstrdup(save_lc_monetary); - save_lc_numeric = setlocale(LC_NUMERIC, NULL); + save_lc_numeric = SETLOCALE(LC_NUMERIC, NULL); if (!save_lc_numeric) elog(ERROR, "setlocale(NULL) failed"); save_lc_numeric = pstrdup(save_lc_numeric); @@ -528,7 +529,7 @@ PGLC_localeconv(void) */ /* Save prevailing value of ctype locale */ - save_lc_ctype = setlocale(LC_CTYPE, NULL); + save_lc_ctype = SETLOCALE(LC_CTYPE, NULL); if (!save_lc_ctype) elog(ERROR, "setlocale(NULL) failed"); save_lc_ctype = pstrdup(save_lc_ctype); @@ -536,11 +537,11 @@ PGLC_localeconv(void) /* Here begins the critical section where we must not throw error */ /* use numeric to set the ctype */ - setlocale(LC_CTYPE, locale_numeric); + SETLOCALE(LC_CTYPE, locale_numeric); #endif /* Get formatting information for numeric */ - setlocale(LC_NUMERIC, locale_numeric); + SETLOCALE(LC_NUMERIC, locale_numeric); extlconv = localeconv(); /* Must copy data now in case setlocale() overwrites it */ @@ -550,11 +551,11 @@ PGLC_localeconv(void) #ifdef WIN32 /* use monetary to set the ctype */ - setlocale(LC_CTYPE, locale_monetary); + SETLOCALE(LC_CTYPE, locale_monetary); #endif /* Get formatting information for monetary */ - setlocale(LC_MONETARY, locale_monetary); + SETLOCALE(LC_MONETARY, locale_monetary); extlconv = localeconv(); /* Must copy data now in case setlocale() overwrites it */ @@ -584,12 +585,12 @@ PGLC_localeconv(void) * should fail. */ #ifdef WIN32 - if (!setlocale(LC_CTYPE, save_lc_ctype)) + if (!SETLOCALE(LC_CTYPE, save_lc_ctype)) elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype); #endif - if (!setlocale(LC_MONETARY, save_lc_monetary)) + if (!SETLOCALE(LC_MONETARY, save_lc_monetary)) elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary); - if (!setlocale(LC_NUMERIC, save_lc_numeric)) + if (!SETLOCALE(LC_NUMERIC, save_lc_numeric)) elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric); /* @@ -773,7 +774,7 @@ cache_locale_time(void) */ /* Save prevailing value of time locale */ - save_lc_time = setlocale(LC_TIME, NULL); + save_lc_time = SETLOCALE(LC_TIME, NULL); if (!save_lc_time) elog(ERROR, "setlocale(NULL) failed"); save_lc_time = pstrdup(save_lc_time); @@ -788,16 +789,16 @@ cache_locale_time(void) */ /* Save prevailing value of ctype locale */ - save_lc_ctype = setlocale(LC_CTYPE, NULL); + save_lc_ctype = SETLOCALE(LC_CTYPE, NULL); if (!save_lc_ctype) elog(ERROR, "setlocale(NULL) failed"); save_lc_ctype = pstrdup(save_lc_ctype); /* use lc_time to set the ctype */ - setlocale(LC_CTYPE, locale_time); + SETLOCALE(LC_CTYPE, locale_time); #endif - setlocale(LC_TIME, locale_time); + SETLOCALE(LC_TIME, locale_time); /* We use times close to current time as data for strftime(). */ timenow = time(NULL); @@ -846,10 +847,10 @@ cache_locale_time(void) * failure to do so is fatal. */ #ifdef WIN32 - if (!setlocale(LC_CTYPE, save_lc_ctype)) + if (!SETLOCALE(LC_CTYPE, save_lc_ctype)) elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype); #endif - if (!setlocale(LC_TIME, save_lc_time)) + if (!SETLOCALE(LC_TIME, save_lc_time)) elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time); /* @@ -1225,7 +1226,7 @@ check_strxfrm_bug(void) ereport(ERROR, (errcode(ERRCODE_SYSTEM_ERROR), errmsg_internal("strxfrm(), in locale \"%s\", writes past the specified array length", - setlocale(LC_COLLATE, NULL)), + SETLOCALE(LC_COLLATE, NULL)), errhint("Apply system library package updates."))); } @@ -1339,7 +1340,7 @@ lc_collate_is_c(Oid collation) if (result >= 0) return (bool) result; - localeptr = setlocale(LC_COLLATE, NULL); + localeptr = SETLOCALE(LC_COLLATE, NULL); if (!localeptr) elog(ERROR, "invalid LC_COLLATE setting"); @@ -1389,7 +1390,7 @@ lc_ctype_is_c(Oid collation) if (result >= 0) return (bool) result; - localeptr = setlocale(LC_CTYPE, NULL); + localeptr = SETLOCALE(LC_CTYPE, NULL); if (!localeptr) elog(ERROR, "invalid LC_CTYPE setting"); @@ -1518,8 +1519,10 @@ pg_newlocale_from_collation(Oid collid) /* Normal case where they're the same */ errno = 0; #ifndef WIN32 - loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate, + + loc = NEWLOCALE(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate, NULL); + #else loc = _create_locale(LC_ALL, collcollate); #endif @@ -1533,11 +1536,11 @@ pg_newlocale_from_collation(Oid collid) locale_t loc1; errno = 0; - loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL); + loc1 = NEWLOCALE(LC_COLLATE_MASK, collcollate, NULL); if (!loc1) report_newlocale_failure(collcollate); errno = 0; - loc = newlocale(LC_CTYPE_MASK, collctype, loc1); + loc = NEWLOCALE(LC_CTYPE_MASK, collctype, loc1); if (!loc) report_newlocale_failure(collctype); #else @@ -1680,12 +1683,16 @@ get_collation_actual_version(char collprovider, const char *collcollate) { #if defined(__GLIBC__) /* Use the glibc version because we don't have anything better. */ +#ifdef USE_MDBLOCALES + collversion = pstrdup(mdb_localesversion()); +#else collversion = pstrdup(gnu_get_libc_version()); +#endif #elif defined(LC_VERSION_MASK) locale_t loc; /* Look up FreeBSD collation version. */ - loc = newlocale(LC_COLLATE, collcollate, NULL); + loc = NEWLOCALE(LC_COLLATE, collcollate, NULL); if (loc) { collversion = diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 29287088ecf..952d1474870 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -40,6 +40,7 @@ #include "utils/builtins.h" #include "utils/memutils.h" #include "utils/syscache.h" +#include "common/mdb_locale.h" /* * We maintain a simple linked list caching the fmgr lookup info for the @@ -1308,7 +1309,7 @@ pg_bind_textdomain_codeset(const char *domainname) int new_msgenc; #ifndef WIN32 - const char *ctype = setlocale(LC_CTYPE, NULL); + const char *ctype = SETLOCALE(LC_CTYPE, NULL); if (pg_strcasecmp(ctype, "C") == 0 || pg_strcasecmp(ctype, "POSIX") == 0) #endif diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index fb09180ebe9..3b9d6da07fb 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -4928,7 +4928,7 @@ static struct config_enum ConfigureNamesEnum[] = { {"session_replication_role", PGC_SUSET, CLIENT_CONN_STATEMENT, gettext_noop("Sets the session's behavior for triggers and rewrite rules."), - NULL + NULL, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, 0, true, }, &SessionReplicationRole, SESSION_REPLICATION_ROLE_ORIGIN, session_replication_role_options, @@ -7625,6 +7625,7 @@ set_config_option(const char *name, const char *value, void *newextra = NULL; bool prohibitValueChange = false; bool makeDefault; + Oid role; if (elevel == 0) { @@ -7782,10 +7783,13 @@ set_config_option(const char *name, const char *value, case PGC_SUSET: if (context == PGC_USERSET || context == PGC_BACKEND) { - ereport(elevel, - (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("permission denied to set parameter \"%s\"", - name))); + role = get_role_oid("mdb_admin", true /*if nodoby created mdb_admin role in this database*/); + if (!(record->mdb_admin_allowed && is_member_of_role(GetUserId(), role))) { + ereport(elevel, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to set parameter \"%s\"", + name))); + } return 0; } break; diff --git a/src/backend/utils/workfile_manager/workfile_mgr.c b/src/backend/utils/workfile_manager/workfile_mgr.c index e5b311cf9ba..21b4463e5f1 100644 --- a/src/backend/utils/workfile_manager/workfile_mgr.c +++ b/src/backend/utils/workfile_manager/workfile_mgr.c @@ -192,6 +192,9 @@ static void unpin_workset(workfile_set *work_set); static bool proc_exit_hook_registered = false; +static uint64 total_bytes_written = 0; +static uint64 total_files_created = 0; + Datum gp_workfile_mgr_cache_entries(PG_FUNCTION_ARGS); Datum gp_workfile_mgr_used_diskspace(PG_FUNCTION_ARGS); @@ -371,6 +374,7 @@ RegisterFileWithSet(File file, workfile_set *work_set) localCtl.entries[file].work_set = work_set; work_set->num_files++; work_set->perquery->num_files++; + total_files_created++; /* Enforce the limit on number of files */ if (gp_workfile_limit_files_per_query > 0 && @@ -447,6 +451,7 @@ UpdateWorkFileSize(File file, uint64 newsize) (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("workfile per segment size limit exceeded"))); } + total_bytes_written += diff; } /* @@ -986,3 +991,22 @@ workfile_is_active(workfile_set *workfile) { return workfile ? workfile->active : false; } + +uint64 +WorkfileTotalBytesWritten(void) +{ + return total_bytes_written; +} + +uint64 +WorkfileTotalFilesCreated(void) +{ + return total_files_created; +} + +void +WorkfileResetBackendStats(void) +{ + total_bytes_written = 0; + total_files_created = 0; +} diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 4ed9869a2c9..708cf77ffdf 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -75,6 +75,7 @@ #include "getopt_long.h" #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "common/mdb_locale.h" #include "catalog/catalog.h" @@ -2274,12 +2275,13 @@ locale_date_order(const char *locale) result = DATEORDER_MDY; /* default */ - save = setlocale(LC_TIME, NULL); + save = SETLOCALE(LC_TIME, NULL); + if (!save) return result; save = pg_strdup(save); - setlocale(LC_TIME, locale); + SETLOCALE(LC_TIME, locale); memset(&testtime, 0, sizeof(testtime)); testtime.tm_mday = 22; @@ -2288,7 +2290,7 @@ locale_date_order(const char *locale) res = my_strftime(buf, sizeof(buf), "%x", &testtime); - setlocale(LC_TIME, save); + SETLOCALE(LC_TIME, save); free(save); if (res == 0) @@ -2332,7 +2334,7 @@ check_locale_name(int category, const char *locale, char **canonname) if (canonname) *canonname = NULL; /* in case of failure */ - save = setlocale(category, NULL); + save = SETLOCALE(category, NULL); if (!save) { pg_log_error("setlocale() failed"); @@ -2347,14 +2349,14 @@ check_locale_name(int category, const char *locale, char **canonname) locale = ""; /* set the locale with setlocale, to see if it accepts it. */ - res = setlocale(category, locale); + res = SETLOCALE(category, locale); /* save canonical name if requested. */ if (res && canonname) *canonname = pg_strdup(res); /* restore old value. */ - if (!setlocale(category, save)) + if (!SETLOCALE(category, save)) { pg_log_error("failed to restore old locale \"%s\"", save); exit(1); diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index d0905f3d588..1859443ed87 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -16,6 +16,8 @@ #include "mb/pg_wchar.h" #include "pg_upgrade.h" #include "greenplum/pg_upgrade_greenplum.h" +#include "common/mdb_locale.h" + static void check_new_cluster_is_empty(void); static void check_databases_are_compatible(void); @@ -1629,7 +1631,8 @@ get_canonical_locale_name(int category, const char *locale) char *res; /* get the current setting, so we can restore it. */ - save = setlocale(category, NULL); + + save = SETLOCALE(category, NULL); if (!save) pg_fatal("failed to get the current locale\n"); @@ -1637,7 +1640,7 @@ get_canonical_locale_name(int category, const char *locale) save = (char *) pg_strdup(save); /* set the locale with setlocale, to see if it accepts it. */ - res = setlocale(category, locale); + res = SETLOCALE(category, locale); if (!res) pg_fatal("failed to get system locale name for \"%s\"\n", locale); @@ -1645,7 +1648,7 @@ get_canonical_locale_name(int category, const char *locale) res = pg_strdup(res); /* restore old value. */ - if (!setlocale(category, save)) + if (!SETLOCALE(category, save)) pg_fatal("failed to restore old locale \"%s\"\n", save); pg_free(save); diff --git a/src/common/exec.c b/src/common/exec.c index 7dd2f8c4942..5159b616a39 100644 --- a/src/common/exec.c +++ b/src/common/exec.c @@ -24,6 +24,8 @@ #include #include #include +#include "common/mdb_locale.h" + /* Inhibit mingw CRT's auto-globbing of command line arguments */ #if defined(WIN32) && !defined(_MSC_VER) @@ -443,7 +445,7 @@ set_pglocale_pgservice(const char *argv0, const char *app) /* don't set LC_ALL in the backend */ if (strcmp(app, PG_TEXTDOMAIN("postgres")) != 0) { - setlocale(LC_ALL, ""); + SETLOCALE(LC_ALL, ""); /* * One could make a case for reproducing here PostmasterMain()'s test diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index 4bbb035eaea..f053a30b009 100644 --- a/src/include/access/multixact.h +++ b/src/include/access/multixact.h @@ -30,8 +30,8 @@ #define MaxMultiXactOffset ((MultiXactOffset) 0xFFFFFFFF) /* Number of SLRU buffers to use for multixact */ -#define NUM_MULTIXACTOFFSET_BUFFERS 8 -#define NUM_MULTIXACTMEMBER_BUFFERS 16 +#define NUM_MULTIXACTOFFSET_BUFFERS 32 +#define NUM_MULTIXACTMEMBER_BUFFERS 64 /* * Possible multixact lock modes ("status"). The first four modes are for diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h index 9a54dc0fb3b..73503a26dcc 100644 --- a/src/include/access/subtrans.h +++ b/src/include/access/subtrans.h @@ -12,7 +12,7 @@ #define SUBTRANS_H /* Number of SLRU buffers to use for subtrans */ -#define NUM_SUBTRANS_BUFFERS 32 +#define NUM_SUBTRANS_BUFFERS 64 typedef struct SubTransData { diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index a47b1ef1615..1093fa948b8 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -11758,7 +11758,9 @@ # # GPDB ADDITIONS START HERE # - +{ oid => '16383', descr => 'contains', + proname => 'mdb_locale_enabled', prorettype => 'bool', + proargtypes => '', prosrc => 'mdb_locale_enabled' }, { oid => '7178', descr => 'for use by pg_upgrade', proname => 'binary_upgrade_set_preassigned_oids', provolatile => 'v', proparallel => 'u', prorettype => 'void', proargtypes => '_oid', diff --git a/src/include/common/mdb_locale.h b/src/include/common/mdb_locale.h new file mode 100644 index 00000000000..91d8656c2c2 --- /dev/null +++ b/src/include/common/mdb_locale.h @@ -0,0 +1,41 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * mdb_locale.h + * Generic headers for custom MDB-locales patch. + * + * IDENTIFICATION + * src/include/common/mdb_locale.h + * + *------------------------------------------------------------------------- + */ + +#ifndef PG_MDB_LOCALE_H +#define PG_MDB_LOCALE_H + +#ifdef USE_MDBLOCALES +#include +#define SETLOCALE(category, locale) mdb_setlocale(category, locale) +#define NEWLOCALE(category, locale, base) mdb_newlocale(category, locale, base) +#else +#define SETLOCALE(category, locale) setlocale(category, locale) +#define NEWLOCALE(category, locale, base) newlocale(category, locale, base) +#endif + +#endif /* PG_MDB_LOCALE_H */ diff --git a/src/include/executor/execdesc.h b/src/include/executor/execdesc.h index e3ecf31b664..e469945a4c5 100644 --- a/src/include/executor/execdesc.h +++ b/src/include/executor/execdesc.h @@ -22,6 +22,14 @@ struct CdbExplain_ShowStatCtx; /* private, in "cdb/cdbexplain.c" */ +typedef struct YagpQueryKey +{ + int tmid; /* transaction time */ + int ssid; /* session id */ + int ccnt; /* command count */ + int nesting_level; + uintptr_t query_desc_addr; +} YagpQueryKey; /* * SerializedParams is used to serialize external query parameters @@ -330,6 +338,9 @@ typedef struct QueryDesc /* This is always set NULL by the core system, but plugins can change it */ struct Instrumentation *totaltime; /* total time spent in ExecutorRun */ + + /* YAGP Hooks collector */ + YagpQueryKey *yagp_query_key; } QueryDesc; /* in pquery.c */ diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index aaa3ea32e8a..54de6844f58 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -392,6 +392,9 @@ /* Define to 1 if you have the `m' library (-lm). */ #undef HAVE_LIBM +/* Define to 1 if you have the `mdblocales' library (-lmdblocales). */ +#undef HAVE_LIBMDBLOCALES + /* Define to 1 if you have the `numa' library (-lnuma). */ #undef HAVE_LIBNUMA @@ -1041,6 +1044,9 @@ /* Define to 1 to build with LZ4 support. (--with-lz4) */ #undef USE_LZ4 +/* Define to 1 to build with MDB locales. (--with-mdblocales) */ +#undef USE_MDBLOCALES + /* Define to 1 to build with Mapreduce capabilities (--enable-mapreduce) */ #undef USE_MAPREDUCE diff --git a/src/include/utils/acl.h b/src/include/utils/acl.h index 223175099bd..49068f04b2f 100644 --- a/src/include/utils/acl.h +++ b/src/include/utils/acl.h @@ -207,9 +207,17 @@ extern AclMode aclmask(const Acl *acl, Oid roleid, Oid ownerId, extern int aclmembers(const Acl *acl, Oid **roleids); extern bool has_privs_of_role(Oid member, Oid role); +extern bool has_privs_of_role_strict(Oid member, Oid role); extern bool is_member_of_role(Oid member, Oid role); extern bool is_member_of_role_nosuper(Oid member, Oid role); extern bool is_admin_of_role(Oid member, Oid role); + +// -- non-upstream patch begin +extern bool mdb_admin_allow_bypass_owner_checks(Oid userId, Oid ownerId); + +extern void check_mdb_admin_is_member_of_role(Oid member, Oid role); +// -- non-upstream patch end + extern void check_is_member_of_role(Oid member, Oid role); extern Oid get_role_oid(const char *rolename, bool missing_ok); extern Oid get_role_oid_or_public(const char *rolename); diff --git a/src/include/utils/backend_status.h b/src/include/utils/backend_status.h index 139b7355d13..139646d4a40 100644 --- a/src/include/utils/backend_status.h +++ b/src/include/utils/backend_status.h @@ -319,6 +319,9 @@ extern uint64 pgstat_get_my_query_id(void); extern int pgstat_fetch_stat_numbackends(void); extern PgBackendStatus *pgstat_fetch_stat_beentry(int beid); extern LocalPgBackendStatus *pgstat_fetch_stat_local_beentry(int beid); +/* -- mdb admin patch -- */ +extern LocalPgBackendStatus *pgstat_fetch_stat_local_beentry_by_pid(int pid); +/* -- mdb admin patch end -- */ extern char *pgstat_clip_activity(const char *raw_activity); diff --git a/src/include/utils/guc_tables.h b/src/include/utils/guc_tables.h index 17d2a166b09..08584e4db54 100644 --- a/src/include/utils/guc_tables.h +++ b/src/include/utils/guc_tables.h @@ -204,6 +204,8 @@ struct config_generic char *sourcefile; /* file current setting is from (NULL if not * set in config file) */ int sourceline; /* line in source file */ + + bool mdb_admin_allowed; /* is mdb admin allowed to change this, makes sence only for superuser/not superuser ctx */ }; /* bit values in status field */ diff --git a/src/include/utils/workfile_mgr.h b/src/include/utils/workfile_mgr.h index dfbd17bca57..48c83620610 100644 --- a/src/include/utils/workfile_mgr.h +++ b/src/include/utils/workfile_mgr.h @@ -74,4 +74,8 @@ extern workfile_set *workfile_mgr_cache_entries_get_copy(int* num_actives); extern uint64 WorkfileSegspace_GetSize(void); extern bool workfile_is_active(workfile_set *workfile); +extern uint64 WorkfileTotalBytesWritten(void); +extern uint64 WorkfileTotalFilesCreated(void); +extern void WorkfileResetBackendStats(void); + #endif /* __WORKFILE_MGR_H__ */ diff --git a/src/interfaces/ecpg/ecpglib/connect.c b/src/interfaces/ecpg/ecpglib/connect.c index 056940cb252..f4d2da9173a 100644 --- a/src/interfaces/ecpg/ecpglib/connect.c +++ b/src/interfaces/ecpg/ecpglib/connect.c @@ -9,6 +9,7 @@ #include "ecpglib_extern.h" #include "ecpgtype.h" #include "sqlca.h" +#include "common/mdb_locale.h" #ifdef HAVE_USELOCALE locale_t ecpg_clocale = (locale_t) 0; @@ -517,7 +518,7 @@ ECPGconnect(int lineno, int c, const char *name, const char *user, const char *p #ifdef HAVE_USELOCALE if (!ecpg_clocale) { - ecpg_clocale = newlocale(LC_NUMERIC_MASK, "C", (locale_t) 0); + ecpg_clocale = NEWLOCALE(LC_NUMERIC_MASK, "C", (locale_t) 0); if (!ecpg_clocale) { #ifdef ENABLE_THREAD_SAFETY diff --git a/src/interfaces/ecpg/ecpglib/descriptor.c b/src/interfaces/ecpg/ecpglib/descriptor.c index f1898dec6a6..2238febbbdd 100644 --- a/src/interfaces/ecpg/ecpglib/descriptor.c +++ b/src/interfaces/ecpg/ecpglib/descriptor.c @@ -15,6 +15,8 @@ #include "sql3types.h" #include "sqlca.h" #include "sqlda.h" +#include "common/mdb_locale.h" + static void descriptor_free(struct descriptor *desc); @@ -500,8 +502,8 @@ ECPGget_desc(int lineno, const char *desc_name, int index,...) #ifdef HAVE__CONFIGTHREADLOCALE stmt.oldthreadlocale = _configthreadlocale(_ENABLE_PER_THREAD_LOCALE); #endif - stmt.oldlocale = ecpg_strdup(setlocale(LC_NUMERIC, NULL), lineno); - setlocale(LC_NUMERIC, "C"); + stmt.oldlocale = ecpg_strdup(SETLOCALE(LC_NUMERIC, NULL), lineno); + SETLOCALE(LC_NUMERIC, "C"); #endif /* desperate try to guess something sensible */ @@ -514,7 +516,7 @@ ECPGget_desc(int lineno, const char *desc_name, int index,...) #else if (stmt.oldlocale) { - setlocale(LC_NUMERIC, stmt.oldlocale); + SETLOCALE(LC_NUMERIC, stmt.oldlocale); ecpg_free(stmt.oldlocale); } #ifdef HAVE__CONFIGTHREADLOCALE diff --git a/src/interfaces/ecpg/ecpglib/execute.c b/src/interfaces/ecpg/ecpglib/execute.c index e8e8fb2b2c3..eafdd8e421a 100644 --- a/src/interfaces/ecpg/ecpglib/execute.c +++ b/src/interfaces/ecpg/ecpglib/execute.c @@ -31,6 +31,7 @@ #include "sqlca.h" #include "sqlda-compat.h" #include "sqlda-native.h" +#include "common/mdb_locale.h" /* * This function returns a newly malloced string that has ' and \ @@ -2002,13 +2003,13 @@ ecpg_do_prologue(int lineno, const int compat, const int force_indicator, #ifdef HAVE__CONFIGTHREADLOCALE stmt->oldthreadlocale = _configthreadlocale(_ENABLE_PER_THREAD_LOCALE); #endif - stmt->oldlocale = ecpg_strdup(setlocale(LC_NUMERIC, NULL), lineno); + stmt->oldlocale = ecpg_strdup(SETLOCALE(LC_NUMERIC, NULL), lineno); if (stmt->oldlocale == NULL) { ecpg_do_epilogue(stmt); return false; } - setlocale(LC_NUMERIC, "C"); + SETLOCALE(LC_NUMERIC, "C"); #endif /* @@ -2222,7 +2223,7 @@ ecpg_do_epilogue(struct statement *stmt) uselocale(stmt->oldlocale); #else if (stmt->oldlocale) - setlocale(LC_NUMERIC, stmt->oldlocale); + SETLOCALE(LC_NUMERIC, stmt->oldlocale); #ifdef HAVE__CONFIGTHREADLOCALE /* diff --git a/src/interfaces/libpq/Makefile b/src/interfaces/libpq/Makefile index 43682574b23..ed3df424ae4 100644 --- a/src/interfaces/libpq/Makefile +++ b/src/interfaces/libpq/Makefile @@ -83,7 +83,7 @@ endif # that are built correctly for use in a shlib. SHLIB_LINK_INTERNAL = -lpgcommon_shlib -lpgport_shlib ifneq ($(PORTNAME), win32) -SHLIB_LINK += $(filter -lcrypt -ldes -lcom_err -lcrypto -lk5crypto -lkrb5 -lgssapi_krb5 -lgss -lgssapi -lssl -lsocket -lnsl -lresolv -lintl -lm, $(LIBS)) $(LDAP_LIBS_FE) $(PTHREAD_LIBS) +SHLIB_LINK += $(filter -lcrypt -ldes -lcom_err -lcrypto -lk5crypto -lkrb5 -lgssapi_krb5 -lgss -lgssapi -lssl -lsocket -lnsl -lresolv -lintl -lm -lmdblocales, $(LIBS)) $(LDAP_LIBS_FE) $(PTHREAD_LIBS) else SHLIB_LINK += $(filter -lcrypt -ldes -lcom_err -lcrypto -lk5crypto -lkrb5 -lgssapi32 -lssl -lsocket -lnsl -lresolv -lintl -lm $(PTHREAD_LIBS), $(LIBS)) $(LDAP_LIBS_FE) endif diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c index 48591e48429..3aff8e95450 100644 --- a/src/pl/plperl/plperl.c +++ b/src/pl/plperl/plperl.c @@ -38,6 +38,7 @@ #include "utils/rel.h" #include "utils/syscache.h" #include "utils/typcache.h" +#include "common/mdb_locale.h" /* define our text domain for translations */ #undef TEXTDOMAIN @@ -743,15 +744,15 @@ plperl_init_interp(void) *save_numeric, *save_time; - loc = setlocale(LC_COLLATE, NULL); + loc = SETLOCALE(LC_COLLATE, NULL); save_collate = loc ? pstrdup(loc) : NULL; - loc = setlocale(LC_CTYPE, NULL); + loc = SETLOCALE(LC_CTYPE, NULL); save_ctype = loc ? pstrdup(loc) : NULL; - loc = setlocale(LC_MONETARY, NULL); + loc = SETLOCALE(LC_MONETARY, NULL); save_monetary = loc ? pstrdup(loc) : NULL; - loc = setlocale(LC_NUMERIC, NULL); + loc = SETLOCALE(LC_NUMERIC, NULL); save_numeric = loc ? pstrdup(loc) : NULL; - loc = setlocale(LC_TIME, NULL); + loc = SETLOCALE(LC_TIME, NULL); save_time = loc ? pstrdup(loc) : NULL; #define PLPERL_RESTORE_LOCALE(name, saved) \ @@ -4167,7 +4168,7 @@ static char * setlocale_perl(int category, char *locale) { dTHX; - char *RETVAL = setlocale(category, locale); + char *RETVAL = SETLOCALE(category, locale); if (RETVAL) { @@ -4182,7 +4183,7 @@ setlocale_perl(int category, char *locale) #ifdef LC_ALL if (category == LC_ALL) - newctype = setlocale(LC_CTYPE, NULL); + newctype = SETLOCALE(LC_CTYPE, NULL); else #endif newctype = RETVAL; @@ -4200,7 +4201,7 @@ setlocale_perl(int category, char *locale) #ifdef LC_ALL if (category == LC_ALL) - newcoll = setlocale(LC_COLLATE, NULL); + newcoll = SETLOCALE(LC_COLLATE, NULL); else #endif newcoll = RETVAL; @@ -4219,7 +4220,7 @@ setlocale_perl(int category, char *locale) #ifdef LC_ALL if (category == LC_ALL) - newnum = setlocale(LC_NUMERIC, NULL); + newnum = SETLOCALE(LC_NUMERIC, NULL); else #endif newnum = RETVAL; diff --git a/src/port/chklocale.c b/src/port/chklocale.c index 3d47d37eae4..2dae78e74e9 100644 --- a/src/port/chklocale.c +++ b/src/port/chklocale.c @@ -18,6 +18,8 @@ #else #include "postgres_fe.h" #endif +#include "common/mdb_locale.h" + #ifdef HAVE_LANGINFO_H #include @@ -343,7 +345,7 @@ pg_get_encoding_from_locale(const char *ctype, bool write_message) pg_strcasecmp(ctype, "POSIX") == 0) return PG_SQL_ASCII; - save = setlocale(LC_CTYPE, NULL); + save = SETLOCALE(LC_CTYPE, NULL); if (!save) return -1; /* setlocale() broken? */ /* must copy result, or it might change after setlocale */ @@ -351,7 +353,7 @@ pg_get_encoding_from_locale(const char *ctype, bool write_message) if (!save) return -1; /* out of memory; unlikely */ - name = setlocale(LC_CTYPE, ctype); + name = SETLOCALE(LC_CTYPE, ctype); if (!name) { free(save); @@ -366,13 +368,13 @@ pg_get_encoding_from_locale(const char *ctype, bool write_message) sys = win32_langinfo(name); #endif - setlocale(LC_CTYPE, save); + SETLOCALE(LC_CTYPE, save); free(save); } else { /* much easier... */ - ctype = setlocale(LC_CTYPE, NULL); + ctype = SETLOCALE(LC_CTYPE, NULL); if (!ctype) return -1; /* setlocale() broken? */ diff --git a/src/test/Makefile b/src/test/Makefile index d84edb282df..150c4e97b73 100644 --- a/src/test/Makefile +++ b/src/test/Makefile @@ -18,6 +18,9 @@ SUBDIRS = perl regress isolation modules authentication recovery SUBDIRS += fsync walrep heap_checksum isolation2 fdw singlenode_regress singlenode_isolation2 +# MDB addon +SUBDIRS += mdb_admin + # Test suites that are not safe by default but can be run if selected # by the user via the whitespace-separated list in variable # PG_TEST_EXTRA: diff --git a/src/test/locale/test-ctype.c b/src/test/locale/test-ctype.c index a3f896c5ecb..10c2b49cb92 100644 --- a/src/test/locale/test-ctype.c +++ b/src/test/locale/test-ctype.c @@ -23,6 +23,8 @@ the author shall be liable for any damage, etc. #include #include #include +#include "common/mdb_locale.h" + char *flag(int b); void describe_char(int c); @@ -62,7 +64,7 @@ main() short c; char *cur_locale; - cur_locale = setlocale(LC_ALL, ""); + cur_locale = SETLOCALE(LC_ALL, ""); if (cur_locale) fprintf(stderr, "Successfully set locale to \"%s\"\n", cur_locale); else diff --git a/src/test/mdb_admin/.gitignore b/src/test/mdb_admin/.gitignore new file mode 100644 index 00000000000..871e943d50e --- /dev/null +++ b/src/test/mdb_admin/.gitignore @@ -0,0 +1,2 @@ +# Generated by test suite +/tmp_check/ diff --git a/src/test/mdb_admin/Makefile b/src/test/mdb_admin/Makefile new file mode 100644 index 00000000000..e4e82367da9 --- /dev/null +++ b/src/test/mdb_admin/Makefile @@ -0,0 +1,23 @@ +#------------------------------------------------------------------------- +# +# Makefile for src/test/mdb_admin +# +# Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group +# Portions Copyright (c) 1994, Regents of the University of California +# +# src/test/mdb_admin/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/test/mdb_admin +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +check: + $(prove_check) + +installcheck: + $(prove_installcheck) + +clean distclean maintainer-clean: + rm -rf tmp_check diff --git a/src/test/mdb_admin/t/signals.pl b/src/test/mdb_admin/t/signals.pl new file mode 100644 index 00000000000..a11db27a527 --- /dev/null +++ b/src/test/mdb_admin/t/signals.pl @@ -0,0 +1,74 @@ + +# Copyright (c) 2024-2024, MDB, Mother Russia + +# Minimal test testing streaming replication +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Initialize primary node +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(); +$node_primary->start; + +# Create some content on primary and check its presence in standby nodes +$node_primary->safe_psql('postgres', + " + CREATE DATABASE regress; + CREATE ROLE mdb_admin; + CREATE ROLE mdb_reg_lh_1; + CREATE ROLE mdb_reg_lh_2; + GRANT pg_signal_backend TO mdb_admin; + GRANT pg_signal_backend TO mdb_reg_lh_1; + GRANT mdb_admin TO mdb_reg_lh_2; +"); + +# Create some content on primary and check its presence in standby nodes +$node_primary->safe_psql('regress', + " + CREATE TABLE tab_int(i int); + INSERT INTO tab_int SELECT * FROm generate_series(1, 1000000); + ALTER SYSTEM SET autovacuum_vacuum_cost_limit TO 1; + ALTER SYSTEM SET autovacuum_vacuum_cost_delay TO 100; + ALTER SYSTEM SET autovacuum_naptime TO 1; +"); + +$node_primary->restart; + +sleep 1; + +my $res_pid = $node_primary->safe_psql('regress', + " + SELECT pid FROM pg_stat_activity WHERE backend_type = 'autovacuum worker' and datname = 'regress';; +"); + + +print "pid is $res_pid\n"; + +ok(1); + + +my ($res_reg_lh_1, $stdout_reg_lh_1, $stderr_reg_lh_1) = $node_primary->psql('regress', + " + SET ROLE mdb_reg_lh_1; + SELECT pg_terminate_backend($res_pid); +"); + +# print ($res_reg_lh_1, $stdout_reg_lh_1, $stderr_reg_lh_1, "\n"); + +ok($res_reg_lh_1 != 0, "should fail for non-mdb_admin"); +like($stderr_reg_lh_1, qr/ERROR: must be a superuser to terminate superuser process/, "matches"); + +my ($res_reg_lh_2, $stdout_reg_lh_2, $stderr_reg_lh_2) = $node_primary->psql('regress', + " + SET ROLE mdb_reg_lh_2; + SELECT pg_terminate_backend($res_pid); +"); + +ok($res_reg_lh_2 == 0, "should success for mdb_admin"); + +# print ($res_reg_lh_2, $stdout_reg_lh_2, $stderr_reg_lh_2, "\n"); + +done_testing(); \ No newline at end of file diff --git a/src/test/regress/expected/create_function_3.out b/src/test/regress/expected/create_function_3.out index 8380df1591f..7842a3c1c82 100644 --- a/src/test/regress/expected/create_function_3.out +++ b/src/test/regress/expected/create_function_3.out @@ -166,10 +166,10 @@ SET SESSION AUTHORIZATION regress_unpriv_user; SET search_path TO temp_func_test, public; ALTER FUNCTION functest_E_1(int) NOT LEAKPROOF; ALTER FUNCTION functest_E_2(int) LEAKPROOF; -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function CREATE FUNCTION functest_E_3(int) RETURNS bool LANGUAGE 'sql' LEAKPROOF AS 'SELECT $1 < 200'; -- fail -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function RESET SESSION AUTHORIZATION; -- -- CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT diff --git a/src/test/regress/expected/create_function_3_optimizer.out b/src/test/regress/expected/create_function_3_optimizer.out index 3ae669d518a..3256709e1aa 100644 --- a/src/test/regress/expected/create_function_3_optimizer.out +++ b/src/test/regress/expected/create_function_3_optimizer.out @@ -166,10 +166,10 @@ SET SESSION AUTHORIZATION regress_unpriv_user; SET search_path TO temp_func_test, public; ALTER FUNCTION functest_E_1(int) NOT LEAKPROOF; ALTER FUNCTION functest_E_2(int) LEAKPROOF; -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function CREATE FUNCTION functest_E_3(int) RETURNS bool LANGUAGE 'sql' LEAKPROOF AS 'SELECT $1 < 200'; -- fail -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function RESET SESSION AUTHORIZATION; -- -- CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT diff --git a/src/test/regress/expected/mdb_admin.out b/src/test/regress/expected/mdb_admin.out new file mode 100644 index 00000000000..e4dfc436802 --- /dev/null +++ b/src/test/regress/expected/mdb_admin.out @@ -0,0 +1,100 @@ +CREATE ROLE regress_mdb_admin_user1; +CREATE ROLE regress_mdb_admin_user2; +CREATE ROLE regress_mdb_admin_user3; +CREATE ROLE regress_superuser WITH SUPERUSER; +GRANT mdb_admin TO regress_mdb_admin_user1; +GRANT CREATE ON DATABASE regression TO regress_mdb_admin_user2; +GRANT CREATE ON DATABASE regression TO regress_mdb_admin_user3; +-- mdb admin trasfers ownership to another role +SET ROLE regress_mdb_admin_user2; +CREATE FUNCTION regress_mdb_admin_add(integer, integer) RETURNS integer + AS 'SELECT $1 + $2;' + LANGUAGE SQL + IMMUTABLE + RETURNS NULL ON NULL INPUT; +CREATE SCHEMA regress_mdb_admin_schema; +GRANT CREATE ON SCHEMA regress_mdb_admin_schema TO regress_mdb_admin_user3; +CREATE TABLE regress_mdb_admin_schema.regress_mdb_admin_table(); +CREATE TABLE regress_mdb_admin_table(); +CREATE VIEW regress_mdb_admin_view as SELECT 1; +SET ROLE regress_mdb_admin_user1; +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO regress_mdb_admin_user3; +ALTER VIEW regress_mdb_admin_view OWNER TO regress_mdb_admin_user3; +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO regress_mdb_admin_user3; +ALTER TABLE regress_mdb_admin_table OWNER TO regress_mdb_admin_user3; +ALTER SCHEMA regress_mdb_admin_schema OWNER TO regress_mdb_admin_user3; +-- mdb admin fails to transfer ownership to superusers and particular system roles +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO regress_superuser; +ERROR: cannot transfer ownership to superuser "regress_superuser" +ALTER VIEW regress_mdb_admin_view OWNER TO regress_superuser; +ERROR: cannot transfer ownership to superuser "regress_superuser" +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO regress_superuser; +ERROR: cannot transfer ownership to superuser "regress_superuser" +ALTER TABLE regress_mdb_admin_table OWNER TO regress_superuser; +ERROR: cannot transfer ownership to superuser "regress_superuser" +ALTER SCHEMA regress_mdb_admin_schema OWNER TO regress_superuser; +ERROR: cannot transfer ownership to superuser "regress_superuser" +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_execute_server_program; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER VIEW regress_mdb_admin_view OWNER TO pg_execute_server_program; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_execute_server_program; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER TABLE regress_mdb_admin_table OWNER TO pg_execute_server_program; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_execute_server_program; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_write_server_files; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER VIEW regress_mdb_admin_view OWNER TO pg_write_server_files; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_write_server_files; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER TABLE regress_mdb_admin_table OWNER TO pg_write_server_files; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_write_server_files; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_read_server_files; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER VIEW regress_mdb_admin_view OWNER TO pg_read_server_files; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_read_server_files; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER TABLE regress_mdb_admin_table OWNER TO pg_read_server_files; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_read_server_files; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_write_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER VIEW regress_mdb_admin_view OWNER TO pg_write_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_write_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER TABLE regress_mdb_admin_table OWNER TO pg_write_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_write_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_read_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER VIEW regress_mdb_admin_view OWNER TO pg_read_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_read_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER TABLE regress_mdb_admin_table OWNER TO pg_read_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_read_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud +-- end tests +RESET SESSION AUTHORIZATION; +-- +REVOKE CREATE ON DATABASE regression FROM regress_mdb_admin_user2; +REVOKE CREATE ON DATABASE regression FROM regress_mdb_admin_user3; +DROP VIEW regress_mdb_admin_view; +DROP FUNCTION regress_mdb_admin_add; +DROP TABLE regress_mdb_admin_schema.regress_mdb_admin_table; +DROP TABLE regress_mdb_admin_table; +DROP SCHEMA regress_mdb_admin_schema; +DROP ROLE regress_mdb_admin_user1; +DROP ROLE regress_mdb_admin_user2; +DROP ROLE regress_mdb_admin_user3; +DROP ROLE regress_superuser; diff --git a/src/test/regress/expected/mdb_superuser.out b/src/test/regress/expected/mdb_superuser.out new file mode 100644 index 00000000000..21bafb1011b --- /dev/null +++ b/src/test/regress/expected/mdb_superuser.out @@ -0,0 +1,115 @@ +CREATE ROLE regress_mdb_superuser_user1; +CREATE ROLE regress_mdb_superuser_user2; +CREATE ROLE regress_mdb_superuser_user3; +GRANT mdb_admin TO mdb_superuser; +CREATE ROLE regress_superuser WITH SUPERUSER; +GRANT mdb_superuser TO regress_mdb_superuser_user1; +GRANT CREATE ON DATABASE regression TO regress_mdb_superuser_user2; +GRANT CREATE ON DATABASE regression TO regress_mdb_superuser_user3; +SET ROLE regress_mdb_superuser_user2; +CREATE FUNCTION regress_mdb_superuser_add(integer, integer) RETURNS integer + AS 'SELECT $1 + $2;' + LANGUAGE SQL + IMMUTABLE + RETURNS NULL ON NULL INPUT; +CREATE SCHEMA regress_mdb_superuser_schema; +CREATE TABLE regress_mdb_superuser_schema.regress_mdb_superuser_table(); +CREATE TABLE regress_mdb_superuser_table(); +CREATE VIEW regress_mdb_superuser_view as SELECT 1; +SET ROLE regress_mdb_superuser_user3; +INSERT INTO regress_mdb_superuser_table SELECT * FROM regress_mdb_superuser_table; +ERROR: permission denied for table regress_mdb_superuser_table +SET ROLE regress_mdb_superuser_user1; +-- mdb_superuser can grant to other role +GRANT USAGE, CREATE ON SCHEMA regress_mdb_superuser_schema TO regress_mdb_superuser_user3; +GRANT ALL PRIVILEGES ON TABLE regress_mdb_superuser_table TO regress_mdb_superuser_user3; +REVOKE ALL PRIVILEGES ON TABLE regress_mdb_superuser_table FROM regress_mdb_superuser_user3; +GRANT INSERT, SELECT ON TABLE regress_mdb_superuser_table TO regress_mdb_superuser_user3; +-- grant works +SET ROLE regress_mdb_superuser_user3; +INSERT INTO regress_mdb_superuser_table SELECT * FROM regress_mdb_superuser_table; +SET ROLE mdb_superuser; +-- mdb_superuser drop object of other role +DROP TABLE regress_mdb_superuser_table; +-- mdb admin fails to transfer ownership to superusers and system roles +RESET SESSION AUTHORIZATION; +CREATE TABLE regress_superuser_table(); +SET ROLE pg_read_server_files; +CREATE TABLE regress_pgrsf_table(); +SET ROLE pg_write_server_files; +CREATE TABLE regress_pgwsf_table(); +SET ROLE pg_execute_server_program; +CREATE TABLE regress_pgxsp_table(); +SET ROLE pg_read_all_data; +CREATE TABLE regress_pgrad_table(); +SET ROLE pg_write_all_data; +CREATE TABLE regress_pgrwd_table(); +SET ROLE mdb_superuser; +-- cannot read all data (fail) +SELECT * FROM pg_authid; +ERROR: permission denied for table pg_authid +-- can not drop superuser objects, because does not has_privs_of pg_database_owner +DROP TABLE regress_superuser_table; +ERROR: must be owner of table regress_superuser_table +DROP TABLE regress_pgrsf_table; +ERROR: must be owner of table regress_pgrsf_table +DROP TABLE regress_pgwsf_table; +ERROR: must be owner of table regress_pgwsf_table +DROP TABLE regress_pgxsp_table; +ERROR: must be owner of table regress_pgxsp_table +DROP TABLE regress_pgrad_table; +ERROR: must be owner of table regress_pgrad_table +DROP TABLE regress_pgrwd_table; +ERROR: must be owner of table regress_pgrwd_table +-- does allowed to creare database, role or extension +-- or grant such priviledge +CREATE DATABASE regress_db_fail; +ERROR: permission denied to create database +CREATE ROLE regress_role_fail; +ERROR: permission denied to create role +ALTER ROLE mdb_superuser WITH CREATEROLE; +ERROR: permission denied +ALTER ROLE mdb_superuser WITH CREATEDB; +ERROR: permission denied +ALTER ROLE regress_mdb_superuser_user2 WITH CREATEROLE; +ERROR: permission denied +ALTER ROLE regress_mdb_superuser_user2 WITH CREATEDB; +ERROR: permission denied +-- mdb_superuser more powerfull than pg_database_owner +RESET SESSION AUTHORIZATION; +CREATE DATABASE regress_check_owner OWNER regress_mdb_superuser_user2; +\c regress_check_owner; +SET ROLE regress_mdb_superuser_user2; +CREATE SCHEMA regtest; +CREATE TABLE regtest.regtest(); +-- this should fail +SET ROLE regress_mdb_superuser_user3; +GRANT ALL ON TABLE regtest.regtest TO regress_mdb_superuser_user3; +ERROR: permission denied for schema regtest +ALTER TABLE regtest.regtest OWNER TO regress_mdb_superuser_user3; +ERROR: permission denied for schema regtest +SET ROLE regress_mdb_superuser_user1; +GRANT ALL ON TABLE regtest.regtest TO regress_mdb_superuser_user1; +ALTER TABLE regtest.regtest OWNER TO regress_mdb_superuser_user1; +\c regression +DROP DATABASE regress_check_owner; +-- end tests +RESET SESSION AUTHORIZATION; +-- +REVOKE CREATE ON DATABASE regression FROM regress_mdb_superuser_user2; +REVOKE CREATE ON DATABASE regression FROM regress_mdb_superuser_user3; +DROP VIEW regress_mdb_superuser_view; +DROP FUNCTION regress_mdb_superuser_add; +DROP TABLE regress_mdb_superuser_schema.regress_mdb_superuser_table; +DROP TABLE regress_mdb_superuser_table; +ERROR: table "regress_mdb_superuser_table" does not exist +DROP SCHEMA regress_mdb_superuser_schema; +DROP ROLE regress_mdb_superuser_user1; +DROP ROLE regress_mdb_superuser_user2; +DROP ROLE regress_mdb_superuser_user3; +DROP TABLE regress_superuser_table; +DROP TABLE regress_pgrsf_table; +DROP TABLE regress_pgwsf_table; +DROP TABLE regress_pgxsp_table; +DROP TABLE regress_pgrad_table; +DROP TABLE regress_pgrwd_table; diff --git a/src/test/regress/expected/test_setup.out b/src/test/regress/expected/test_setup.out new file mode 100644 index 00000000000..c1cb724ef37 --- /dev/null +++ b/src/test/regress/expected/test_setup.out @@ -0,0 +1,5 @@ +-- +-- TEST_SETUP --- prepare environment expected by regression test scripts +-- +CREATE ROLE mdb_admin; +CREATE ROLE mdb_superuser; diff --git a/src/test/regress/input/misc.source b/src/test/regress/input/misc.source index 331499a2aba..2abe2c82eb8 100644 --- a/src/test/regress/input/misc.source +++ b/src/test/regress/input/misc.source @@ -264,3 +264,8 @@ SELECT *, (equipment(CAST((h.*) AS hobbies_r))).name FROM hobbies_r h; -- -- rewrite rules -- + + +--- mdb-related + +SELECT mdb_locale_enabled(); diff --git a/src/test/regress/output/misc.source b/src/test/regress/output/misc.source index 18bcc227f0a..f2f7c0dee32 100644 --- a/src/test/regress/output/misc.source +++ b/src/test/regress/output/misc.source @@ -609,3 +609,10 @@ CONTEXT: SQL function "equipment" during startup -- -- rewrite rules -- +--- mdb-related +SELECT mdb_locale_enabled(); + mdb_locale_enabled +-------------------- + f +(1 row) + diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index e2df0208627..b2ed818f677 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -5,10 +5,18 @@ # this limits the number of connections needed to run the tests. # ---------- +# mdb admin simple checks +test: test_setup + # run tablespace by itself, and first, because it forces a checkpoint; # we'd prefer not to have checkpoints later in the tests because that # interferes with crash-recovery testing. test: tablespace + +test: mdb_admin + +test: mdb_superuser + # ---------- # The first group of parallel tests # ---------- diff --git a/src/test/regress/sql/mdb_admin.sql b/src/test/regress/sql/mdb_admin.sql new file mode 100644 index 00000000000..b6b048e5692 --- /dev/null +++ b/src/test/regress/sql/mdb_admin.sql @@ -0,0 +1,87 @@ +CREATE ROLE regress_mdb_admin_user1; +CREATE ROLE regress_mdb_admin_user2; +CREATE ROLE regress_mdb_admin_user3; + +CREATE ROLE regress_superuser WITH SUPERUSER; + +GRANT mdb_admin TO regress_mdb_admin_user1; +GRANT CREATE ON DATABASE regression TO regress_mdb_admin_user2; +GRANT CREATE ON DATABASE regression TO regress_mdb_admin_user3; + +-- mdb admin trasfers ownership to another role + +SET ROLE regress_mdb_admin_user2; +CREATE FUNCTION regress_mdb_admin_add(integer, integer) RETURNS integer + AS 'SELECT $1 + $2;' + LANGUAGE SQL + IMMUTABLE + RETURNS NULL ON NULL INPUT; + +CREATE SCHEMA regress_mdb_admin_schema; +GRANT CREATE ON SCHEMA regress_mdb_admin_schema TO regress_mdb_admin_user3; +CREATE TABLE regress_mdb_admin_schema.regress_mdb_admin_table(); +CREATE TABLE regress_mdb_admin_table(); +CREATE VIEW regress_mdb_admin_view as SELECT 1; +SET ROLE regress_mdb_admin_user1; + +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO regress_mdb_admin_user3; +ALTER VIEW regress_mdb_admin_view OWNER TO regress_mdb_admin_user3; +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO regress_mdb_admin_user3; +ALTER TABLE regress_mdb_admin_table OWNER TO regress_mdb_admin_user3; +ALTER SCHEMA regress_mdb_admin_schema OWNER TO regress_mdb_admin_user3; + + +-- mdb admin fails to transfer ownership to superusers and particular system roles + +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO regress_superuser; +ALTER VIEW regress_mdb_admin_view OWNER TO regress_superuser; +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO regress_superuser; +ALTER TABLE regress_mdb_admin_table OWNER TO regress_superuser; +ALTER SCHEMA regress_mdb_admin_schema OWNER TO regress_superuser; + +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_execute_server_program; +ALTER VIEW regress_mdb_admin_view OWNER TO pg_execute_server_program; +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_execute_server_program; +ALTER TABLE regress_mdb_admin_table OWNER TO pg_execute_server_program; +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_execute_server_program; + +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_write_server_files; +ALTER VIEW regress_mdb_admin_view OWNER TO pg_write_server_files; +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_write_server_files; +ALTER TABLE regress_mdb_admin_table OWNER TO pg_write_server_files; +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_write_server_files; + +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_read_server_files; +ALTER VIEW regress_mdb_admin_view OWNER TO pg_read_server_files; +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_read_server_files; +ALTER TABLE regress_mdb_admin_table OWNER TO pg_read_server_files; +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_read_server_files; + +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_write_all_data; +ALTER VIEW regress_mdb_admin_view OWNER TO pg_write_all_data; +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_write_all_data; +ALTER TABLE regress_mdb_admin_table OWNER TO pg_write_all_data; +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_write_all_data; + +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_read_all_data; +ALTER VIEW regress_mdb_admin_view OWNER TO pg_read_all_data; +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_read_all_data; +ALTER TABLE regress_mdb_admin_table OWNER TO pg_read_all_data; +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_read_all_data; + +-- end tests + +RESET SESSION AUTHORIZATION; +-- +REVOKE CREATE ON DATABASE regression FROM regress_mdb_admin_user2; +REVOKE CREATE ON DATABASE regression FROM regress_mdb_admin_user3; + +DROP VIEW regress_mdb_admin_view; +DROP FUNCTION regress_mdb_admin_add; +DROP TABLE regress_mdb_admin_schema.regress_mdb_admin_table; +DROP TABLE regress_mdb_admin_table; +DROP SCHEMA regress_mdb_admin_schema; +DROP ROLE regress_mdb_admin_user1; +DROP ROLE regress_mdb_admin_user2; +DROP ROLE regress_mdb_admin_user3; +DROP ROLE regress_superuser; diff --git a/src/test/regress/sql/mdb_superuser.sql b/src/test/regress/sql/mdb_superuser.sql new file mode 100644 index 00000000000..f96338f3aec --- /dev/null +++ b/src/test/regress/sql/mdb_superuser.sql @@ -0,0 +1,144 @@ +CREATE ROLE regress_mdb_superuser_user1; +CREATE ROLE regress_mdb_superuser_user2; +CREATE ROLE regress_mdb_superuser_user3; + +GRANT mdb_admin TO mdb_superuser; + +CREATE ROLE regress_superuser WITH SUPERUSER; + +GRANT mdb_superuser TO regress_mdb_superuser_user1; + +GRANT CREATE ON DATABASE regression TO regress_mdb_superuser_user2; +GRANT CREATE ON DATABASE regression TO regress_mdb_superuser_user3; + + +SET ROLE regress_mdb_superuser_user2; + +CREATE FUNCTION regress_mdb_superuser_add(integer, integer) RETURNS integer + AS 'SELECT $1 + $2;' + LANGUAGE SQL + IMMUTABLE + RETURNS NULL ON NULL INPUT; + +CREATE SCHEMA regress_mdb_superuser_schema; +CREATE TABLE regress_mdb_superuser_schema.regress_mdb_superuser_table(); +CREATE TABLE regress_mdb_superuser_table(); +CREATE VIEW regress_mdb_superuser_view as SELECT 1; + +SET ROLE regress_mdb_superuser_user3; +INSERT INTO regress_mdb_superuser_table SELECT * FROM regress_mdb_superuser_table; + +SET ROLE regress_mdb_superuser_user1; + +-- mdb_superuser can grant to other role +GRANT USAGE, CREATE ON SCHEMA regress_mdb_superuser_schema TO regress_mdb_superuser_user3; +GRANT ALL PRIVILEGES ON TABLE regress_mdb_superuser_table TO regress_mdb_superuser_user3; +REVOKE ALL PRIVILEGES ON TABLE regress_mdb_superuser_table FROM regress_mdb_superuser_user3; + +GRANT INSERT, SELECT ON TABLE regress_mdb_superuser_table TO regress_mdb_superuser_user3; + +-- grant works +SET ROLE regress_mdb_superuser_user3; +INSERT INTO regress_mdb_superuser_table SELECT * FROM regress_mdb_superuser_table; + +SET ROLE mdb_superuser; + +-- mdb_superuser drop object of other role +DROP TABLE regress_mdb_superuser_table; +-- mdb admin fails to transfer ownership to superusers and system roles + +RESET SESSION AUTHORIZATION; + +CREATE TABLE regress_superuser_table(); + +SET ROLE pg_read_server_files; + +CREATE TABLE regress_pgrsf_table(); + +SET ROLE pg_write_server_files; + +CREATE TABLE regress_pgwsf_table(); + +SET ROLE pg_execute_server_program; + +CREATE TABLE regress_pgxsp_table(); + +SET ROLE pg_read_all_data; + +CREATE TABLE regress_pgrad_table(); + +SET ROLE pg_write_all_data; + +CREATE TABLE regress_pgrwd_table(); + +SET ROLE mdb_superuser; + +-- cannot read all data (fail) +SELECT * FROM pg_authid; + +-- can not drop superuser objects, because does not has_privs_of pg_database_owner +DROP TABLE regress_superuser_table; +DROP TABLE regress_pgrsf_table; +DROP TABLE regress_pgwsf_table; +DROP TABLE regress_pgxsp_table; +DROP TABLE regress_pgrad_table; +DROP TABLE regress_pgrwd_table; + + +-- does allowed to creare database, role or extension +-- or grant such priviledge + +CREATE DATABASE regress_db_fail; +CREATE ROLE regress_role_fail; + +ALTER ROLE mdb_superuser WITH CREATEROLE; +ALTER ROLE mdb_superuser WITH CREATEDB; + +ALTER ROLE regress_mdb_superuser_user2 WITH CREATEROLE; +ALTER ROLE regress_mdb_superuser_user2 WITH CREATEDB; + +-- mdb_superuser more powerfull than pg_database_owner + +RESET SESSION AUTHORIZATION; +CREATE DATABASE regress_check_owner OWNER regress_mdb_superuser_user2; + +\c regress_check_owner; + +SET ROLE regress_mdb_superuser_user2; +CREATE SCHEMA regtest; +CREATE TABLE regtest.regtest(); + +-- this should fail + +SET ROLE regress_mdb_superuser_user3; +GRANT ALL ON TABLE regtest.regtest TO regress_mdb_superuser_user3; +ALTER TABLE regtest.regtest OWNER TO regress_mdb_superuser_user3; + +SET ROLE regress_mdb_superuser_user1; +GRANT ALL ON TABLE regtest.regtest TO regress_mdb_superuser_user1; +ALTER TABLE regtest.regtest OWNER TO regress_mdb_superuser_user1; + +\c regression +DROP DATABASE regress_check_owner; + +-- end tests + +RESET SESSION AUTHORIZATION; +-- +REVOKE CREATE ON DATABASE regression FROM regress_mdb_superuser_user2; +REVOKE CREATE ON DATABASE regression FROM regress_mdb_superuser_user3; + +DROP VIEW regress_mdb_superuser_view; +DROP FUNCTION regress_mdb_superuser_add; +DROP TABLE regress_mdb_superuser_schema.regress_mdb_superuser_table; +DROP TABLE regress_mdb_superuser_table; +DROP SCHEMA regress_mdb_superuser_schema; +DROP ROLE regress_mdb_superuser_user1; +DROP ROLE regress_mdb_superuser_user2; +DROP ROLE regress_mdb_superuser_user3; +DROP TABLE regress_superuser_table; +DROP TABLE regress_pgrsf_table; +DROP TABLE regress_pgwsf_table; +DROP TABLE regress_pgxsp_table; +DROP TABLE regress_pgrad_table; +DROP TABLE regress_pgrwd_table; diff --git a/src/test/regress/sql/test_setup.sql b/src/test/regress/sql/test_setup.sql new file mode 100644 index 00000000000..7ec5ccc7471 --- /dev/null +++ b/src/test/regress/sql/test_setup.sql @@ -0,0 +1,6 @@ +-- +-- TEST_SETUP --- prepare environment expected by regression test scripts +-- + +CREATE ROLE mdb_admin; +CREATE ROLE mdb_superuser; diff --git a/src/test/singlenode_regress/expected/create_function_3.out b/src/test/singlenode_regress/expected/create_function_3.out index 3a4fd451471..6423fdb7965 100644 --- a/src/test/singlenode_regress/expected/create_function_3.out +++ b/src/test/singlenode_regress/expected/create_function_3.out @@ -166,10 +166,10 @@ SET SESSION AUTHORIZATION regress_unpriv_user; SET search_path TO temp_func_test, public; ALTER FUNCTION functest_E_1(int) NOT LEAKPROOF; ALTER FUNCTION functest_E_2(int) LEAKPROOF; -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function CREATE FUNCTION functest_E_3(int) RETURNS bool LANGUAGE 'sql' LEAKPROOF AS 'SELECT $1 < 200'; -- fail -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function RESET SESSION AUTHORIZATION; -- -- CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT