From d994d97a2e431b253661a4d27f2112afbe60b9ec Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 23 Mar 2023 15:15:26 +0300 Subject: [PATCH 001/133] Initial commit: completely useless yet Just create the overal project structure with a Makefile to generate protobufs, compile it into a shared library extension and install it --- Makefile | 33 + protos/yagpcc_metrics.proto | 129 ++++ protos/yagpcc_plan.proto | 569 ++++++++++++++++++ protos/yagpcc_set_service.proto | 44 ++ sql/yagp-hooks-collector--1.0.sql | 2 + sql/yagp-hooks-collector--unpackaged--1.0.sql | 2 + src/yagp_hooks_collector.c | 20 + yagp-hooks-collector.control | 5 + 8 files changed, 804 insertions(+) create mode 100644 Makefile create mode 100644 protos/yagpcc_metrics.proto create mode 100644 protos/yagpcc_plan.proto create mode 100644 protos/yagpcc_set_service.proto create mode 100644 sql/yagp-hooks-collector--1.0.sql create mode 100644 sql/yagp-hooks-collector--unpackaged--1.0.sql create mode 100644 src/yagp_hooks_collector.c create mode 100644 yagp-hooks-collector.control diff --git a/Makefile b/Makefile new file mode 100644 index 00000000000..515c6cbdc85 --- /dev/null +++ b/Makefile @@ -0,0 +1,33 @@ +override CFLAGS = -Wall -Wmissing-prototypes -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -fno-aggressive-loop-optimizations -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=gnu99 -Werror=uninitialized -Werror=implicit-function-declaration -DGPBUILD +override CXXFLAGS = -fPIC -lstdc++ -lpthread -g3 -ggdb -Wall -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fno-aggressive-loop-optimizations -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=c++14 -fPIC -I/usr/include/libxml2 -I/usr/local/opt/openssl/include -Iinclude -g -DGPBUILD +COMMON_CPP_FLAGS := -Isrc -Iinclude +PG_CXXFLAGS += $(COMMON_CPP_FLAGS) + +PROTOC = protoc +SRC_DIR = ./src +GEN_DIR = ./src/protos +PROTO_DIR = ./protos +PROTO_GEN_OBJECTS = $(GEN_DIR)/yagpcc_plan.pb.o $(GEN_DIR)/yagpcc_metrics.pb.o $(GEN_DIR)/yagpcc_set_service.pb.o + +$(GEN_DIR)/%.pb.cpp : $(PROTO_DIR)/%.proto + sed -i 's/optional //g' $^ + sed -i 's/cloud\/mdb\/yagpcc\/api\/proto\/common\//\protos\//g' $^ + $(PROTOC) --cpp_out=$(SRC_DIR) $^ + mv $(GEN_DIR)/$*.pb.cc $(GEN_DIR)/$*.pb.cpp + +#$(PROTO_GEN_OBJECTS) : $(GEN_DIR)/%.pb.o : $(GEN_DIR)/%.pb.cpp + +OBJS := $(PROTO_GEN_OBJECTS) $(SRC_DIR)/yagp_hooks_collector.o +EXTRA_CLEAN := $(GEN_DIR) +DATA := $(wildcard sql/*--*.sql) +EXTENSION := yagp-hooks-collector +EXTVERSION := $(shell grep default_version $(EXTENSION).control | \ + sed -e "s/default_version[[:space:]]*=[[:space:]]*'\([^']*\)'/\1/") +MODULE_big := yagp-hooks-collector +PG_CONFIG := pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) + +gen: $(PROTO_GEN_OBJECTS) + +.DEFAULT_GOAL := all diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto new file mode 100644 index 00000000000..2c5136429cb --- /dev/null +++ b/protos/yagpcc_metrics.proto @@ -0,0 +1,129 @@ +syntax = "proto3"; + +option java_outer_classname = "SegmentYAGPCCM"; +option go_package = "a.yandex-team.ru/cloud/mdb/yagpcc/api/proto/common;greenplum"; + +enum QueryStatus { + QUERY_STATUS_UNSPECIFIED = 0; + QUERY_STATUS_SUBMIT = 1; + QUERY_STATUS_START = 2; + QUERY_STATUS_DONE = 3; + QUERY_STATUS_QUERY_DONE = 4; + QUERY_STATUS_ERROR = 5; + QUERY_STATUS_CANCELLING = 6; + QUERY_STATUS_CANCELED = 7; + QUERY_STATUS_END = 8; +} + +enum PlanNodeStatus { + PLAN_NODE_STATUS_UNSPECIFIED = 0; + PLAN_NODE_STATUS_INITIALIZED = 1; + PLAN_NODE_STATUS_EXECUTING = 2; + PLAN_NODE_STATUS_FINISHED = 3; +} + +message QueryInfo { + PlanGenerator generator = 1; + uint64 query_id = 2; + uint64 plan_id = 3; + string queryText = 4; + string planText = 5; + SessionInfo sessionInfo = 6; +} + +enum PlanGenerator +{ + PLAN_GENERATOR_UNSPECIFIED = 0; + PLAN_GENERATOR_PLANNER = 1; /* plan produced by the planner*/ + PLAN_GENERATOR_OPTIMIZER = 2; /* plan produced by the optimizer*/ +} + +message GPMetrics { + SystemStat systemStat = 1; + MetricInstrumentation instrumentation = 2; + SpillInfo spill = 3; +} + +message QueryInfoHeader { + int32 pid = 1; + GpId gpIdentity = 2; + + int32 tmid = 3; /* A time identifier for a particular query. All records associated with the query will have the same tmid. */ + int32 ssid = 4; /* The session id as shown by gp_session_id. All records associated with the query will have the same ssid */ + int32 ccnt = 5; /* The command number within this session as shown by gp_command_count. All records associated with the query will have the same ccnt */ + int32 sliceid = 6; /* slice identificator, 0 means general info for the whole query */ +} + +message GpId { + int32 dbid = 1; /* the dbid of this database */ + int32 segindex = 2; /* content indicator: -1 for entry database, + * 0, ..., n-1 for segment database * + * a primary and its mirror have the same segIndex */ + GpRole gp_role = 3; + GpRole gp_session_role = 4; +} + +enum GpRole +{ + GP_ROLE_UNSPECIFIED = 0; + GP_ROLE_UTILITY = 1; /* Operating as a simple database engine */ + GP_ROLE_DISPATCH = 2; /* Operating as the parallel query dispatcher */ + GP_ROLE_EXECUTE = 3; /* Operating as a parallel query executor */ + GP_ROLE_UNDEFINED = 4; /* Should never see this role in use */ +} + +message SessionInfo { + string sql = 1; + string userName = 2; + string databaseName = 3; + string resourceGroup = 4; + string applicationName = 5; +} + +message SystemStat { + /* CPU stat*/ + double runningTimeSeconds = 1; + double userTimeSeconds = 2; + double kernelTimeSeconds = 3; + + /* Memory stat */ + uint64 vsize = 4; + uint64 rss = 5; + uint64 VmSizeKb = 6; + uint64 VmPeakKb = 7; + + /* Storage stat */ + uint64 rchar = 8; + uint64 wchar = 9; + uint64 syscr = 10; + uint64 syscw = 11; + uint64 read_bytes = 12; + uint64 write_bytes = 13; + uint64 cancelled_write_bytes = 14; +} + +message MetricInstrumentation { + uint64 ntuples = 1; /* Total tuples produced */ + uint64 nloops = 2; /* # of run cycles for this node */ + uint64 tuplecount = 3; /* Tuples emitted so far this cycle */ + double firsttuple = 4; /* Time for first tuple of this cycle */ + double startup = 5; /* Total startup time (in seconds) */ + double total = 6; /* Total total time (in seconds) */ + uint64 shared_blks_hit = 7; /* shared blocks stats*/ + uint64 shared_blks_read = 8; + uint64 shared_blks_dirtied = 9; + uint64 shared_blks_written = 10; + uint64 local_blks_hit = 11; /* data read from disks */ + uint64 local_blks_read = 12; + uint64 local_blks_dirtied = 13; + uint64 local_blks_written = 14; + uint64 temp_blks_read = 15; /* temporary tables read stat */ + uint64 temp_blks_written = 16; + double blk_read_time = 17; /* measured read/write time */ + double blk_write_time = 18; +} + +message SpillInfo { + int32 fileCount = 1; + int64 totalBytes = 2; +} diff --git a/protos/yagpcc_plan.proto b/protos/yagpcc_plan.proto new file mode 100644 index 00000000000..4ea3a3db6e9 --- /dev/null +++ b/protos/yagpcc_plan.proto @@ -0,0 +1,569 @@ +syntax = "proto3"; + +option java_outer_classname = "SegmentYAGPCCP"; +option go_package = "a.yandex-team.ru/cloud/mdb/yagpcc/api/proto/common;greenplum"; + +message MetricPlan { + GpdbNodeType type = 1; + + int32 plan_node_id = 2; + int32 parent_plan_node_id = 3; // Valid only for QueryInfoMetricQuerySubmit + + double startup_cost = 4; /* cost expended before fetching any tuples */ + double total_cost = 5; /* total cost (assuming all tuples fetched) */ + double plan_rows = 6; /* number of rows plan is expected to emit */ + int32 plan_width = 7; /* average row width in bytes */ + + int32 arg1 = 8; // for some nodes it's additional opperand type + int32 arg2 = 9; // for some nodes it's additional opperand type + + MetricMotionInfo motion_info = 10; + MetricRelationInfo relation_info = 11; + + string scan_index_name = 12; + ScanDirection scan_direction = 13; + MetricSliceInfo slice_info = 14; + string statement = 15; +} + +message MetricMotionInfo { + MotionType type = 1; + bool isBroadcast = 2; + CdbLocusType locusType = 3; + + int32 sliceId = 4; + int32 parentSliceId = 5; +} + +message MetricRelationInfo { + int32 oid = 1; + string name = 2; + string schema = 3; + string alias = 4; + int32 dynamicScanId = 5; +} + +message MetricSliceInfo { + int32 slice = 1; + int32 segments = 2; + GangType gangType = 3; + int32 gang = 4; +} + +enum ScanDirection +{ + SCAN_DIRECTION_UNSPECIFIED = 0; + SCAN_DIRECTION_BACKWARD = 1; + SCAN_DIRECTION_FORWARD = 2; +} + +/* GangType enumeration is used in several structures related to CDB + * slice plan support. + */ +enum GangType +{ + GANG_TYPE_UNSPECIFIED = 0; + GANG_TYPE_UNALLOCATED = 1; /* a root slice executed by the qDisp */ + GANG_TYPE_ENTRYDB_READER = 2; /* a 1-gang with read access to the entry db */ + GANG_TYPE_SINGLETON_READER = 3; /* a 1-gang to read the segment dbs */ + GANG_TYPE_PRIMARY_READER = 4; /* a 1-gang or N-gang to read the segment dbs */ + GANG_TYPE_PRIMARY_WRITER = 5; /* the N-gang that can update the segment dbs */ +} + + +enum CdbLocusType +{ + CDB_LOCUS_TYPE_UNSPECIFIED = 0; + CDB_LOCUS_TYPE_ENTRY = 1; /* a single backend process on the entry db: + * usually the qDisp itself, but could be a + * qExec started by the entry postmaster. + */ + + CDB_LOCUS_TYPE_SINGLE_QE = 2; /* a single backend process on any db: the + * qDisp itself, or a qExec started by a + * segment postmaster or the entry postmaster. + */ + + CDB_LOCUS_TYPE_GENERAL = 3; /* compatible with any locus (data is + * self-contained in the query plan or + * generally available in any qExec or qDisp) */ + + CDB_LOCUS_TYPE_SEGMENT_GENERAL = 4; /* generally available in any qExec, but not + * available in qDisp */ + + CDB_LOCUS_TYPE_REPLICATED = 5; /* replicated over all qExecs of an N-gang */ + CDB_LOCUS_TYPE_HASHED = 6; /* hash partitioned over all qExecs of N-gang */ + CDB_LOCUS_TYPE_HASHED_OJ = 7; /* result of hash partitioned outer join, NULLs can be anywhere */ + CDB_LOCUS_TYPE_STREWN = 8; /* partitioned on no known function */ + CDB_LOCUS_TYPE_END = 9; /* = last valid CdbLocusType + 1 */ +} + +enum MotionType +{ + MOTION_TYPE_UNSPECIFIED = 0; + MOTION_TYPE_HASH = 1; // Use hashing to select a segindex destination + MOTION_TYPE_FIXED = 2; // Send tuples to a fixed set of segindexes + MOTION_TYPE_EXPLICIT = 3; // Send tuples to the segment explicitly specified in their segid column +} + +enum GpdbNodeType { + GPDB_NODE_TYPE_UNSPECIFIED = 0; + INDEX_INFO = 1; + EXPR_CONTEXT = 2; + PROJECTION_INFO = 3; + JUNK_FILTER = 4; + RESULT_REL_INFO = 5; + E_STATE = 6; + TUPLE_TABLE_SLOT = 7; + CDB_PROCESS = 8; + SLICE = 9; + SLICE_TABLE = 10; + CURSOR_POS_INFO = 11; + SHARE_NODE_ENTRY = 12; + PARTITION_STATE = 13; + QUERY_DISPATCH_DESC = 14; + OID_ASSIGNMENT = 15; + PLAN = 16; + SCAN = 17; + JOIN = 18; + RESULT = 19; + MODIFY_TABLE = 20; + APPEND = 21; + MERGE_APPEND = 22; + RECURSIVE_UNION = 23; + SEQUENCE = 24; + BITMAP_AND = 25; + BITMAP_OR = 26; + SEQ_SCAN = 27; + DYNAMIC_SEQ_SCAN = 28; + EXTERNAL_SCAN = 29; + INDEX_SCAN = 30; + DYNAMIC_INDEX_SCAN = 31; + INDEX_ONLY_SCAN = 32; + BITMAP_INDEX_SCAN = 33; + DYNAMIC_BITMAP_INDEX_SCAN = 34; + BITMAP_HEAP_SCAN = 35; + DYNAMIC_BITMAP_HEAP_SCAN = 36; + TID_SCAN = 37; + SUBQUERY_SCAN = 38; + FUNCTION_SCAN = 39; + TABLE_FUNCTION_SCAN = 40; + VALUES_SCAN = 41; + CTE_SCAN = 42; + WORK_TABLE_SCAN = 43; + FOREIGN_SCAN = 44; + NEST_LOOP = 45; + MERGE_JOIN = 46; + HASH_JOIN = 47; + MATERIAL = 48; + SORT = 49; + AGG = 50; + WINDOW_AGG = 51; + UNIQUE = 52; + HASH = 53; + SET_OP = 54; + LOCK_ROWS = 55; + LIMIT = 56; + MOTION = 57; + SHARE_INPUT_SCAN = 58; + REPEAT = 59; + DML = 60; + SPLIT_UPDATE = 61; + ROW_TRIGGER = 62; + ASSERT_OP = 63; + PARTITION_SELECTOR = 64; + PLAN_END = 65; + NEST_LOOP_PARAM = 66; + PLAN_ROW_MARK = 67; + PLAN_INVAL_ITEM = 68; + PLAN_STATE = 69; + SCAN_STATE = 70; + JOIN_STATE = 71; + RESULT_STATE = 72; + MODIFY_TABLE_STATE = 73; + APPEND_STATE = 74; + MERGE_APPEND_STATE = 75; + RECURSIVE_UNION_STATE = 76; + SEQUENCE_STATE = 77; + BITMAP_AND_STATE = 78; + BITMAP_OR_STATE = 79; + SEQ_SCAN_STATE = 80; + DYNAMIC_SEQ_SCAN_STATE = 81; + EXTERNAL_SCAN_STATE = 82; + INDEX_SCAN_STATE = 83; + DYNAMIC_INDEX_SCAN_STATE = 84; + INDEX_ONLY_SCAN_STATE = 85; + BITMAP_INDEX_SCAN_STATE = 86; + DYNAMIC_BITMAP_INDEX_SCAN_STATE = 87; + BITMAP_HEAP_SCAN_STATE = 88; + DYNAMIC_BITMAP_HEAP_SCAN_STATE = 89; + TID_SCAN_STATE = 90; + SUBQUERY_SCAN_STATE = 91; + FUNCTION_SCAN_STATE = 92; + TABLE_FUNCTION_STATE = 93; + VALUES_SCAN_STATE = 94; + CTE_SCAN_STATE = 95; + WORK_TABLE_SCAN_STATE = 96; + FOREIGN_SCAN_STATE = 97; + NEST_LOOP_STATE = 98; + MERGE_JOIN_STATE = 99; + HASH_JOIN_STATE = 100; + MATERIAL_STATE = 101; + SORT_STATE = 102; + AGG_STATE = 103; + WINDOW_AGG_STATE = 104; + UNIQUE_STATE = 105; + HASH_STATE = 106; + SET_OP_STATE = 107; + LOCK_ROWS_STATE = 108; + LIMIT_STATE = 109; + MOTION_STATE = 110; + SHARE_INPUT_SCAN_STATE = 111; + REPEAT_STATE = 112; + DML_STATE = 113; + SPLIT_UPDATE_STATE = 114; + ROW_TRIGGER_STATE = 115; + ASSERT_OP_STATE = 116; + PARTITION_SELECTOR_STATE = 117; + TUPLE_DESC_NODE = 118; + SERIALIZED_PARAM_EXTERN_DATA = 119; + ALIAS = 120; + RANGE_VAR = 121; + EXPR = 122; + VAR = 123; + CONST = 124; + PARAM = 125; + AGGREF = 126; + WINDOW_FUNC = 127; + ARRAY_REF = 128; + FUNC_EXPR = 129; + NAMED_ARG_EXPR = 130; + OP_EXPR = 131; + DISTINCT_EXPR = 132; + NULL_IF_EXPR = 133; + SCALAR_ARRAY_OP_EXPR = 134; + BOOL_EXPR = 135; + SUB_LINK = 136; + SUB_PLAN = 137; + ALTERNATIVE_SUB_PLAN = 138; + FIELD_SELECT = 139; + FIELD_STORE = 140; + RELABEL_TYPE = 141; + COERCE_VIA_IO = 142; + ARRAY_COERCE_EXPR = 143; + CONVERT_ROWTYPE_EXPR = 144; + COLLATE_EXPR = 145; + CASE_EXPR = 146; + CASE_WHEN = 147; + CASE_TEST_EXPR = 148; + ARRAY_EXPR = 149; + ROW_EXPR = 150; + ROW_COMPARE_EXPR = 151; + COALESCE_EXPR = 152; + MIN_MAX_EXPR = 153; + XML_EXPR = 154; + NULL_TEST = 155; + BOOLEAN_TEST = 156; + COERCE_TO_DOMAIN = 157; + COERCE_TO_DOMAIN_VALUES = 158; + SET_TO_DEFAULT = 159; + CURRENT_OF_EXPR = 160; + TARGET_ENTRY = 161; + RANGE_TBL_REF = 162; + JOIN_EXPR = 163; + FROM_EXPR = 164; + INTO_CLAUSE = 165; + COPY_INTO_CLAUSE = 166; + REFRESH_CLAUSE = 167; + FLOW = 168; + GROUPING = 169; + GROUP_ID = 170; + DISTRIBUTED_BY = 171; + DML_ACTION_EXPR = 172; + PART_SELECTED_EXPR = 173; + PART_DEFAULT_EXPR = 174; + PART_BOUND_EXPR = 175; + PART_BOUND_INCLUSION_EXPR = 176; + PART_BOUND_OPEN_EXPR = 177; + PART_LIST_RULE_EXPR = 178; + PART_LIST_NULL_TEST_EXPR = 179; + TABLE_OID_INFO = 180; + EXPR_STATE = 181; + GENERIC_EXPR_STATE = 182; + WHOLE_ROW_VAR_EXPR_STATE = 183; + AGGREF_EXPR_STATE = 184; + WINDOW_FUNC_EXPR_STATE = 185; + ARRAY_REF_EXPR_STATE = 186; + FUNC_EXPR_STATE = 187; + SCALAR_ARRAY_OP_EXPR_STATE = 188; + BOOL_EXPR_STATE = 189; + SUB_PLAN_STATE = 190; + ALTERNATIVE_SUB_PLAN_STATE = 191; + FIELD_SELECT_STATE = 192; + FIELD_STORE_STATE = 193; + COERCE_VIA_IO_STATE = 194; + ARRAY_COERCE_EXPR_STATE = 195; + CONVERT_ROWTYPE_EXPR_STATE = 196; + CASE_EXPR_STATE = 197; + CASE_WHEN_STATE = 198; + ARRAY_EXPR_STATE = 199; + ROW_EXPR_STATE = 200; + ROW_COMPARE_EXPR_STATE = 201; + COALESCE_EXPR_STATE = 202; + MIN_MAX_EXPR_STATE = 203; + XML_EXPR_STATE = 204; + NULL_TEST_STATE = 205; + COERCE_TO_DOMAIN_STATE = 206; + DOMAIN_CONSTRAINT_STATE = 207; + GROUPING_FUNC_EXPR_STATE = 208; + PART_SELECTED_EXPR_STATE = 209; + PART_DEFAULT_EXPR_STATE = 210; + PART_BOUND_EXPR_STATE = 211; + PART_BOUND_INCLUSION_EXPR_STATE = 212; + PART_BOUND_OPEN_EXPR_STATE = 213; + PART_LIST_RULE_EXPR_STATE = 214; + PART_LIST_NULL_TEST_EXPR_STATE = 215; + PLANNER_INFO = 216; + PLANNER_GLOBAL = 217; + REL_OPT_INFO = 218; + INDEX_OPT_INFO = 219; + PARAM_PATH_INFO = 220; + PATH = 221; + APPEND_ONLY_PATH = 222; + AOCS_PATH = 223; + EXTERNAL_PATH = 224; + INDEX_PATH = 225; + BITMAP_HEAP_PATH = 226; + BITMAP_AND_PATH = 227; + BITMAP_OR_PATH = 228; + NEST_PATH = 229; + MERGE_PATH = 230; + HASH_PATH = 231; + TID_PATH = 232; + FOREIGN_PATH = 233; + APPEND_PATH = 234; + MERGE_APPEND_PATH = 235; + RESULT_PATH = 236; + MATERIAL_PATH = 237; + UNIQUE_PATH = 238; + PROJECTION_PATH = 239; + EQUIVALENCE_CLASS = 240; + EQUIVALENCE_MEMBER = 241; + PATH_KEY = 242; + RESTRICT_INFO = 243; + PLACE_HOLDER_VAR = 244; + SPECIAL_JOIN_INFO = 245; + LATERAL_JOIN_INFO = 246; + APPEND_REL_INFO = 247; + PLACE_HOLDER_INFO = 248; + MIN_MAX_AGG_INFO = 249; + PARTITION = 250; + PARTITION_RULE = 251; + PARTITION_NODE = 252; + PG_PART_RULE = 253; + SEGFILE_MAP_NODE = 254; + PLANNER_PARAM_ITEM = 255; + CDB_MOTION_PATH = 256; + PARTITION_SELECTOR_PATH = 257; + CDB_REL_COLUMN_INFO = 258; + DISTRIBUTION_KEY = 259; + MEMORY_CONTEXT = 260; + ALLOC_SET_CONTEXT = 261; + MEMORY_ACCOUNT = 262; + VALUE = 263; + INTEGER = 264; + FLOAT = 265; + STRING = 266; + BIT_STRING = 267; + NULL_VALUE = 268; + LIST = 269; + INT_LIST = 270; + OID_LIST = 271; + QUERY = 272; + PLANNED_STMT = 273; + INSERT_STMT = 274; + DELETE_STMT = 275; + UPDATE_STMT = 276; + SELECT_STMT = 277; + ALTER_TABLE_STMT = 278; + ALTER_TABLE_CMD = 279; + ALTER_DOMAIN_STMT = 280; + SET_OPERATION_STMT = 281; + GRANT_STMT = 282; + GRANT_ROLE_STMT = 283; + ALTER_DEFAULT_PRIVILEGES_STMT = 284; + CLOSE_PORTAL_STMT = 285; + CLUSTER_STMT = 286; + COPY_STMT = 287; + CREATE_STMT = 288; + SINGLE_ROW_ERROR_DESC = 289; + EXT_TABLE_TYPE_DESC = 290; + CREATE_EXTERNAL_STMT = 291; + DEFINE_STMT = 292; + DROP_STMT = 293; + TRUNCATE_STMT = 294; + COMMENT_STMT = 295; + FETCH_STMT = 296; + INDEX_STMT = 297; + CREATE_FUNCTION_STMT = 298; + ALTER_FUNCTION_STMT = 299; + DO_STMT = 300; + RENAME_STMT = 301; + RULE_STMT = 302; + NOTIFY_STMT = 303; + LISTEN_STMT = 304; + UNLISTEN_STMT = 305; + TRANSACTION_STMT = 306; + VIEW_STMT = 307; + LOAD_STMT = 308; + CREATE_DOMAIN_STMT = 309; + CREATEDB_STMT = 310; + DROPDB_STMT = 311; + VACUUM_STMT = 312; + EXPLAIN_STMT = 313; + CREATE_TABLE_AS_STMT = 314; + CREATE_SEQ_STMT = 315; + ALTER_SEQ_STMT = 316; + VARIABLE_SET_STMT = 317; + VARIABLE_SHOW_STMT = 318; + DISCARD_STMT = 319; + CREATE_TRIG_STMT = 320; + CREATE_P_LANG_STMT = 321; + CREATE_ROLE_STMT = 322; + ALTER_ROLE_STMT = 323; + DROP_ROLE_STMT = 324; + CREATE_QUEUE_STMT = 325; + ALTER_QUEUE_STMT = 326; + DROP_QUEUE_STMT = 327; + CREATE_RESOURCE_GROUP_STMT = 328; + DROP_RESOURCE_GROUP_STMT = 329; + ALTER_RESOURCE_GROUP_STMT = 330; + LOCK_STMT = 331; + CONSTRAINTS_SET_STMT = 332; + REINDEX_STMT = 333; + CHECK_POINT_STMT = 334; + CREATE_SCHEMA_STMT = 335; + ALTER_DATABASE_STMT = 336; + ALTER_DATABASE_SET_STMT = 337; + ALTER_ROLE_SET_STMT = 338; + CREATE_CONVERSION_STMT = 339; + CREATE_CAST_STMT = 340; + CREATE_OP_CLASS_STMT = 341; + CREATE_OP_FAMILY_STMT = 342; + ALTER_OP_FAMILY_STMT = 343; + PREPARE_STMT = 344; + EXECUTE_STMT = 345; + DEALLOCATE_STMT = 346; + DECLARE_CURSOR_STMT = 347; + CREATE_TABLE_SPACE_STMT = 348; + DROP_TABLE_SPACE_STMT = 349; + ALTER_OBJECT_SCHEMA_STMT = 350; + ALTER_OWNER_STMT = 351; + DROP_OWNED_STMT = 352; + REASSIGN_OWNED_STMT = 353; + COMPOSITE_TYPE_STMT = 354; + CREATE_ENUM_STMT = 355; + CREATE_RANGE_STMT = 356; + ALTER_ENUM_STMT = 357; + ALTER_TS_DICTIONARY_STMT = 358; + ALTER_TS_CONFIGURATION_STMT = 359; + CREATE_FDW_STMT = 360; + ALTER_FDW_STMT = 361; + CREATE_FOREIGN_SERVER_STMT = 362; + ALTER_FOREIGN_SERVER_STMT = 363; + CREATE_USER_MAPPING_STMT = 364; + ALTER_USER_MAPPING_STMT = 365; + DROP_USER_MAPPING_STMT = 366; + ALTER_TABLE_SPACE_OPTIONS_STMT = 367; + ALTER_TABLE_MOVE_ALL_STMT = 368; + SEC_LABEL_STMT = 369; + CREATE_FOREIGN_TABLE_STMT = 370; + CREATE_EXTENSION_STMT = 371; + ALTER_EXTENSION_STMT = 372; + ALTER_EXTENSION_CONTENTS_STMT = 373; + CREATE_EVENT_TRIG_STMT = 374; + ALTER_EVENT_TRIG_STMT = 375; + REFRESH_MAT_VIEW_STMT = 376; + REPLICA_IDENTITY_STMT = 377; + ALTER_SYSTEM_STMT = 378; + PARTITION_BY = 379; + PARTITION_ELEM = 380; + PARTITION_RANGE_ITEM = 381; + PARTITION_BOUND_SPEC = 382; + PARTITION_SPEC = 383; + PARTITION_VALUES_SPEC = 384; + ALTER_PARTITION_ID = 385; + ALTER_PARTITION_CMD = 386; + INHERIT_PARTITION_CMD = 387; + CREATE_FILE_SPACE_STMT = 388; + FILE_SPACE_ENTRY = 389; + DROP_FILE_SPACE_STMT = 390; + TABLE_VALUE_EXPR = 391; + DENY_LOGIN_INTERVAL = 392; + DENY_LOGIN_POINT = 393; + ALTER_TYPE_STMT = 394; + SET_DISTRIBUTION_CMD = 395; + EXPAND_STMT_SPEC = 396; + A_EXPR = 397; + COLUMN_REF = 398; + PARAM_REF = 399; + A_CONST = 400; + FUNC_CALL = 401; + A_STAR = 402; + A_INDICES = 403; + A_INDIRECTION = 404; + A_ARRAY_EXPR = 405; + RES_TARGET = 406; + TYPE_CAST = 407; + COLLATE_CLAUSE = 408; + SORT_BY = 409; + WINDOW_DEF = 410; + RANGE_SUBSELECT = 411; + RANGE_FUNCTION = 412; + TYPE_NAME = 413; + COLUMN_DEF = 414; + INDEX_ELEM = 415; + CONSTRAINT = 416; + DEF_ELEM = 417; + RANGE_TBL_ENTRY = 418; + RANGE_TBL_FUNCTION = 419; + WITH_CHECK_OPTION = 420; + GROUPING_CLAUSE = 421; + GROUPING_FUNC = 422; + SORT_GROUP_CLAUSE = 423; + WINDOW_CLAUSE = 424; + PRIV_GRANTEE = 425; + FUNC_WITH_ARGS = 426; + ACCESS_PRIV = 427; + CREATE_OP_CLASS_ITEM = 428; + TABLE_LIKE_CLAUSE = 429; + FUNCTION_PARAMETER = 430; + LOCKING_CLAUSE = 431; + ROW_MARK_CLAUSE = 432; + XML_SERIALIZE = 433; + WITH_CLAUSE = 434; + COMMON_TABLE_EXPR = 435; + COLUMN_REFERENCE_STORAGE_DIRECTIVE = 436; + IDENTIFY_SYSTEM_CMD = 437; + BASE_BACKUP_CMD = 438; + CREATE_REPLICATION_SLOT_CMD = 439; + DROP_REPLICATION_SLOT_CMD = 440; + START_REPLICATION_CMD = 441; + TIME_LINE_HISTORY_CMD = 442; + TRIGGER_DATA = 443; + EVENT_TRIGGER_DATA = 444; + RETURN_SET_INFO = 445; + WINDOW_OBJECT_DATA = 446; + TID_BITMAP = 447; + INLINE_CODE_BLOCK = 448; + FDW_ROUTINE = 449; + STREAM_BITMAP = 450; + FORMATTER_DATA = 451; + EXT_PROTOCOL_DATA = 452; + EXT_PROTOCOL_VALIDATOR_DATA = 453; + SELECTED_PARTS = 454; + COOKED_CONSTRAINT = 455; + CDB_EXPLAIN_STAT_HDR = 456; + GP_POLICY = 457; + RETRIEVE_STMT = 458; +} diff --git a/protos/yagpcc_set_service.proto b/protos/yagpcc_set_service.proto new file mode 100644 index 00000000000..e90fd7953de --- /dev/null +++ b/protos/yagpcc_set_service.proto @@ -0,0 +1,44 @@ +syntax = "proto3"; + +import "google/protobuf/timestamp.proto"; + +import "protos/yagpcc_metrics.proto"; +import "protos/yagpcc_plan.proto"; + +option java_outer_classname = "SegmentYAGPCCAS"; +option go_package = "a.yandex-team.ru/cloud/mdb/yagpcc/api/proto/agent_segment;greenplum"; + +service SetQueryInfo { + rpc SetMetricPlanNode (SetPlanNodeReq) returns (MetricResponse) {} + + rpc SetMetricQuery (SetQueryReq) returns (MetricResponse) {} +} + +message MetricResponse { + MetricResponseStatusCode error_code = 1; + string error_text = 2; +} + +enum MetricResponseStatusCode { + METRIC_RESPONSE_STATUS_CODE_UNSPECIFIED = 0; + METRIC_RESPONSE_STATUS_CODE_SUCCESS = 1; + METRIC_RESPONSE_STATUS_CODE_ERROR = 2; +} + +message SetQueryReq { + QueryStatus query_status = 1; + google.protobuf.Timestamp datetime = 2; + + QueryInfoHeader header = 3; + QueryInfo query_info = 4; + GPMetrics query_metrics = 5; + repeated MetricPlan plan_tree = 6; +} + +message SetPlanNodeReq { + PlanNodeStatus node_status = 1; + google.protobuf.Timestamp datetime = 2; + QueryInfoHeader header = 3; + GPMetrics node_metrics = 4; + MetricPlan plan_node = 5; +} diff --git a/sql/yagp-hooks-collector--1.0.sql b/sql/yagp-hooks-collector--1.0.sql new file mode 100644 index 00000000000..f9ab15fb400 --- /dev/null +++ b/sql/yagp-hooks-collector--1.0.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use '''CREATE EXTENSION "yagp-hooks-collector"''' to load this file. \quit diff --git a/sql/yagp-hooks-collector--unpackaged--1.0.sql b/sql/yagp-hooks-collector--unpackaged--1.0.sql new file mode 100644 index 00000000000..0441c97bd84 --- /dev/null +++ b/sql/yagp-hooks-collector--unpackaged--1.0.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use '''CREATE EXTENSION "uuid-cb" FROM unpackaged''' to load this file. \quit diff --git a/src/yagp_hooks_collector.c b/src/yagp_hooks_collector.c new file mode 100644 index 00000000000..c06e3b510d0 --- /dev/null +++ b/src/yagp_hooks_collector.c @@ -0,0 +1,20 @@ +#include "postgres.h" +#include "cdb/cdbvars.h" +#include "fmgr.h" + +PG_MODULE_MAGIC; + +void _PG_init(void); +void _PG_fini(void); + +void _PG_init(void) { + if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { + //greenplum_hook_init(); + } +} + +void _PG_fini(void) { + if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { + //greenplum_hook_deinit(); + } +} diff --git a/yagp-hooks-collector.control b/yagp-hooks-collector.control new file mode 100644 index 00000000000..82c189a88fc --- /dev/null +++ b/yagp-hooks-collector.control @@ -0,0 +1,5 @@ +# yagp-hooks-collector extension +comment = 'Intercept query and plan execution hooks and report them to Yandex GPCC agents' +default_version = '1.0' +module_pathname = '$libdir/yagp-hooks-collector' +superuser = true From 7dd86411dcbd17bc0186b4941ddd6ea3d372db11 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 23 Mar 2023 15:20:24 +0300 Subject: [PATCH 002/133] Add .gitignore --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000000..e8dfe855dad --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.o +*.so +src/protos/ +.vscode +compile_commands.json From 57b290435d8af1de1b0888597b02e7442b550186 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 23 Mar 2023 18:13:48 +0300 Subject: [PATCH 003/133] Simple Executor{Start,Finish} logging as a fundation to build on --- Makefile | 6 ++++- src/EventSender.cpp | 21 +++++++++++++++ src/EventSender.h | 16 ++++++++++++ src/hook_wrappers.cpp | 52 ++++++++++++++++++++++++++++++++++++++ src/hook_wrappers.h | 12 +++++++++ src/yagp_hooks_collector.c | 6 +++-- 6 files changed, 110 insertions(+), 3 deletions(-) create mode 100644 src/EventSender.cpp create mode 100644 src/EventSender.h create mode 100644 src/hook_wrappers.cpp create mode 100644 src/hook_wrappers.h diff --git a/Makefile b/Makefile index 515c6cbdc85..ac33fa9035c 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,7 @@ override CFLAGS = -Wall -Wmissing-prototypes -Wpointer-arith -Wendif-labels -Wmi override CXXFLAGS = -fPIC -lstdc++ -lpthread -g3 -ggdb -Wall -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fno-aggressive-loop-optimizations -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=c++14 -fPIC -I/usr/include/libxml2 -I/usr/local/opt/openssl/include -Iinclude -g -DGPBUILD COMMON_CPP_FLAGS := -Isrc -Iinclude PG_CXXFLAGS += $(COMMON_CPP_FLAGS) +SHLIB_LINK += -lprotobuf PROTOC = protoc SRC_DIR = ./src @@ -17,7 +18,10 @@ $(GEN_DIR)/%.pb.cpp : $(PROTO_DIR)/%.proto #$(PROTO_GEN_OBJECTS) : $(GEN_DIR)/%.pb.o : $(GEN_DIR)/%.pb.cpp -OBJS := $(PROTO_GEN_OBJECTS) $(SRC_DIR)/yagp_hooks_collector.o +OBJS := $(PROTO_GEN_OBJECTS) \ + $(SRC_DIR)/EventSender.o \ + $(SRC_DIR)/hook_wrappers.o \ + $(SRC_DIR)/yagp_hooks_collector.o EXTRA_CLEAN := $(GEN_DIR) DATA := $(wildcard sql/*--*.sql) EXTENSION := yagp-hooks-collector diff --git a/src/EventSender.cpp b/src/EventSender.cpp new file mode 100644 index 00000000000..5dffc793723 --- /dev/null +++ b/src/EventSender.cpp @@ -0,0 +1,21 @@ +#include "EventSender.h" + +extern "C" { +#include "postgres.h" +#include "utils/metrics_utils.h" +#include "utils/elog.h" +#include "executor/executor.h" + +#include "cdb/cdbvars.h" +#include "cdb/cdbexplain.h" + +#include "tcop/utility.h" +} + +void EventSender::ExecutorStart(QueryDesc *queryDesc, int eflags) { + elog(DEBUG1, "Query %s started", queryDesc->sourceText); +} + +void EventSender::ExecutorFinish(QueryDesc *queryDesc) { + elog(DEBUG1, "Query %s finished", queryDesc->sourceText); +} \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h new file mode 100644 index 00000000000..b4fbff598a5 --- /dev/null +++ b/src/EventSender.h @@ -0,0 +1,16 @@ +#pragma once + +struct QueryDesc; + +class EventSender { +public: + void ExecutorStart(QueryDesc *queryDesc, int eflags); + void ExecutorFinish(QueryDesc *queryDesc); + + static EventSender *instance() { + static EventSender sender; + return &sender; + } +private: + EventSender() {} +}; \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp new file mode 100644 index 00000000000..019709d46d8 --- /dev/null +++ b/src/hook_wrappers.cpp @@ -0,0 +1,52 @@ +#include "hook_wrappers.h" +#include "EventSender.h" + +extern "C" { +#include "postgres.h" +#include "utils/metrics_utils.h" +#include "utils/elog.h" +#include "executor/executor.h" + +#include "cdb/cdbvars.h" +#include "cdb/cdbexplain.h" + +#include "tcop/utility.h" +} + +static ExecutorStart_hook_type previous_ExecutorStart_hook = nullptr; +static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr; + +static void ya_ExecutorStart_hook(QueryDesc *queryDesc, int eflags); +static void ya_ExecutorFinish_hook(QueryDesc *queryDesc); + +#define ReplaceHook(hook_name) \ + previous_##hook_name = hook_name; \ + hook_name = ya_##hook_name; + +void hooks_init() { + ReplaceHook(ExecutorStart_hook); + ReplaceHook(ExecutorFinish_hook); +} + +void hooks_deinit() { + ExecutorStart_hook = previous_ExecutorStart_hook; + ExecutorFinish_hook = ExecutorFinish_hook; +} + +void ya_ExecutorStart_hook(QueryDesc *queryDesc, int eflags) { + EventSender::instance()->ExecutorStart(queryDesc, eflags); + + if (previous_ExecutorStart_hook) + (*previous_ExecutorStart_hook)(queryDesc, eflags); + else + standard_ExecutorStart(queryDesc, eflags); +} + +void ya_ExecutorFinish_hook(QueryDesc *queryDesc) { + EventSender::instance()->ExecutorFinish(queryDesc); + + if (previous_ExecutorFinish_hook) + (*previous_ExecutorFinish_hook)(queryDesc); + else + standard_ExecutorFinish(queryDesc); +} \ No newline at end of file diff --git a/src/hook_wrappers.h b/src/hook_wrappers.h new file mode 100644 index 00000000000..815fcb7cd51 --- /dev/null +++ b/src/hook_wrappers.h @@ -0,0 +1,12 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +extern void hooks_init(); +extern void hooks_deinit(); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/src/yagp_hooks_collector.c b/src/yagp_hooks_collector.c index c06e3b510d0..69475ea5079 100644 --- a/src/yagp_hooks_collector.c +++ b/src/yagp_hooks_collector.c @@ -2,6 +2,8 @@ #include "cdb/cdbvars.h" #include "fmgr.h" +#include "hook_wrappers.h" + PG_MODULE_MAGIC; void _PG_init(void); @@ -9,12 +11,12 @@ void _PG_fini(void); void _PG_init(void) { if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { - //greenplum_hook_init(); + hooks_init(); } } void _PG_fini(void) { if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { - //greenplum_hook_deinit(); + hooks_deinit(); } } From c147d97057d473750d92c1a22ef825337a9a68a4 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Fri, 24 Mar 2023 18:09:42 +0300 Subject: [PATCH 004/133] Add GRPC into Makefile and some prototype senders --- Makefile | 23 +++++++++++----- protos/yagpcc_metrics.proto | 1 + protos/yagpcc_plan.proto | 1 + protos/yagpcc_set_service.proto | 1 + src/EventSender.cpp | 39 ++++++++++++++++++++++++-- src/EventSender.h | 12 ++++---- src/GrpcConnector.cpp | 49 +++++++++++++++++++++++++++++++++ src/GrpcConnector.h | 14 ++++++++++ src/hook_wrappers.cpp | 40 +++++++++++++++------------ 9 files changed, 148 insertions(+), 32 deletions(-) create mode 100644 src/GrpcConnector.cpp create mode 100644 src/GrpcConnector.h diff --git a/Makefile b/Makefile index ac33fa9035c..8d45edd70f2 100644 --- a/Makefile +++ b/Makefile @@ -1,14 +1,18 @@ override CFLAGS = -Wall -Wmissing-prototypes -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -fno-aggressive-loop-optimizations -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=gnu99 -Werror=uninitialized -Werror=implicit-function-declaration -DGPBUILD -override CXXFLAGS = -fPIC -lstdc++ -lpthread -g3 -ggdb -Wall -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fno-aggressive-loop-optimizations -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=c++14 -fPIC -I/usr/include/libxml2 -I/usr/local/opt/openssl/include -Iinclude -g -DGPBUILD +override CXXFLAGS = -fPIC -lstdc++ -lpthread -g3 -ggdb -Wall -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fno-aggressive-loop-optimizations -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=c++14 -fPIC -Iinclude -Isrc/protos -Isrc -g -DGPBUILD COMMON_CPP_FLAGS := -Isrc -Iinclude PG_CXXFLAGS += $(COMMON_CPP_FLAGS) -SHLIB_LINK += -lprotobuf +SHLIB_LINK += -lprotobuf -lgrpc++ PROTOC = protoc SRC_DIR = ./src GEN_DIR = ./src/protos PROTO_DIR = ./protos -PROTO_GEN_OBJECTS = $(GEN_DIR)/yagpcc_plan.pb.o $(GEN_DIR)/yagpcc_metrics.pb.o $(GEN_DIR)/yagpcc_set_service.pb.o +PROTO_GEN_OBJECTS = $(GEN_DIR)/yagpcc_plan.pb.o $(GEN_DIR)/yagpcc_metrics.pb.o $(GEN_DIR)/yagpcc_set_service.pb.o \ + $(GEN_DIR)/yagpcc_set_service.grpc.pb.o + +GRPC_CPP_PLUGIN := grpc_cpp_plugin +GRPC_CPP_PLUGIN_PATH ?= `which $(GRPC_CPP_PLUGIN)` $(GEN_DIR)/%.pb.cpp : $(PROTO_DIR)/%.proto sed -i 's/optional //g' $^ @@ -16,11 +20,16 @@ $(GEN_DIR)/%.pb.cpp : $(PROTO_DIR)/%.proto $(PROTOC) --cpp_out=$(SRC_DIR) $^ mv $(GEN_DIR)/$*.pb.cc $(GEN_DIR)/$*.pb.cpp -#$(PROTO_GEN_OBJECTS) : $(GEN_DIR)/%.pb.o : $(GEN_DIR)/%.pb.cpp -OBJS := $(PROTO_GEN_OBJECTS) \ - $(SRC_DIR)/EventSender.o \ - $(SRC_DIR)/hook_wrappers.o \ + +$(GEN_DIR)/yagpcc_set_service.grpc.pb.cpp : $(PROTO_DIR)/yagpcc_set_service.proto + $(PROTOC) --grpc_out=$(SRC_DIR) --plugin=protoc-gen-grpc=$(GRPC_CPP_PLUGIN_PATH) $^ + mv $(GEN_DIR)/yagpcc_set_service.grpc.pb.cc $(GEN_DIR)/yagpcc_set_service.grpc.pb.cpp + +OBJS := $(PROTO_GEN_OBJECTS) \ + $(SRC_DIR)/GrpcConnector.o \ + $(SRC_DIR)/EventSender.o \ + $(SRC_DIR)/hook_wrappers.o \ $(SRC_DIR)/yagp_hooks_collector.o EXTRA_CLEAN := $(GEN_DIR) DATA := $(wildcard sql/*--*.sql) diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto index 2c5136429cb..b7e255484c7 100644 --- a/protos/yagpcc_metrics.proto +++ b/protos/yagpcc_metrics.proto @@ -1,5 +1,6 @@ syntax = "proto3"; +package yagpcc; option java_outer_classname = "SegmentYAGPCCM"; option go_package = "a.yandex-team.ru/cloud/mdb/yagpcc/api/proto/common;greenplum"; diff --git a/protos/yagpcc_plan.proto b/protos/yagpcc_plan.proto index 4ea3a3db6e9..962fab4bbdd 100644 --- a/protos/yagpcc_plan.proto +++ b/protos/yagpcc_plan.proto @@ -1,5 +1,6 @@ syntax = "proto3"; +package yagpcc; option java_outer_classname = "SegmentYAGPCCP"; option go_package = "a.yandex-team.ru/cloud/mdb/yagpcc/api/proto/common;greenplum"; diff --git a/protos/yagpcc_set_service.proto b/protos/yagpcc_set_service.proto index e90fd7953de..0bef72891ee 100644 --- a/protos/yagpcc_set_service.proto +++ b/protos/yagpcc_set_service.proto @@ -5,6 +5,7 @@ import "google/protobuf/timestamp.proto"; import "protos/yagpcc_metrics.proto"; import "protos/yagpcc_plan.proto"; +package yagpcc; option java_outer_classname = "SegmentYAGPCCAS"; option go_package = "a.yandex-team.ru/cloud/mdb/yagpcc/api/proto/agent_segment;greenplum"; diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 5dffc793723..e186d6235e1 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,4 +1,6 @@ #include "EventSender.h" +#include "GrpcConnector.h" +#include "protos/yagpcc_set_service.pb.h" extern "C" { #include "postgres.h" @@ -12,10 +14,41 @@ extern "C" { #include "tcop/utility.h" } -void EventSender::ExecutorStart(QueryDesc *queryDesc, int eflags) { - elog(DEBUG1, "Query %s started", queryDesc->sourceText); +void EventSender::ExecutorStart(QueryDesc *queryDesc, int/* eflags*/) { + elog(DEBUG1, "Query %s start recording", queryDesc->sourceText); + yagpcc::SetQueryReq req; + req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); + google::protobuf::Timestamp ts; + req.set_allocated_datetime(ts.New()); + auto result = connector->setMetricQuery(req); + if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { + elog(WARNING, "Query %s start reporting failed with an error %s", + queryDesc->sourceText, result.error_text().c_str()); + } else { + elog(DEBUG1, "Query %s start successful", queryDesc->sourceText); + } } void EventSender::ExecutorFinish(QueryDesc *queryDesc) { - elog(DEBUG1, "Query %s finished", queryDesc->sourceText); + elog(DEBUG1, "Query %s finish recording", queryDesc->sourceText); + yagpcc::SetQueryReq req; + req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); + google::protobuf::Timestamp ts; + req.set_allocated_datetime(ts.New()); + auto result = connector->setMetricQuery(req); + if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { + elog(WARNING, "Query %s finish reporting failed with an error %s", + queryDesc->sourceText, result.error_text().c_str()); + } else { + elog(DEBUG1, "Query %s finish successful", queryDesc->sourceText); + } +} + +EventSender* EventSender::instance() { + static EventSender sender; + return &sender; +} + +EventSender::EventSender() { + connector = std::make_unique(); } \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h index b4fbff598a5..f3e4cd2ed2b 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -1,16 +1,18 @@ #pragma once +#include + +class GrpcConnector; + struct QueryDesc; class EventSender { public: void ExecutorStart(QueryDesc *queryDesc, int eflags); void ExecutorFinish(QueryDesc *queryDesc); + static EventSender *instance(); - static EventSender *instance() { - static EventSender sender; - return &sender; - } private: - EventSender() {} + EventSender(); + std::unique_ptr connector; }; \ No newline at end of file diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp new file mode 100644 index 00000000000..9a9157fcd97 --- /dev/null +++ b/src/GrpcConnector.cpp @@ -0,0 +1,49 @@ +#include "GrpcConnector.h" +#include "yagpcc_set_service.grpc.pb.h" + +#include +#include +#include + +class GrpcConnector::Impl { +public: + Impl() { + GOOGLE_PROTOBUF_VERIFY_VERSION; + this->stub = yagpcc::SetQueryInfo::NewStub(grpc::CreateChannel( + SOCKET_FILE, grpc::InsecureChannelCredentials())); + } + + yagpcc::MetricResponse setMetricQuery(yagpcc::SetQueryReq req) { + yagpcc::MetricResponse response; + grpc::ClientContext context; + auto deadline = std::chrono::system_clock::now() + std::chrono::seconds(1); + context.set_deadline(deadline); + context.set_compression_algorithm(grpc_compression_algorithm::GRPC_COMPRESS_GZIP); + + grpc::Status status = (stub->SetMetricQuery)(&context, req, &response); + + if (!status.ok()) { + response.set_error_text("Connection lost"); + response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); + } + + return response; + } + +private: + const std::string SOCKET_FILE = "unix:///tmp/yagpcc_agent.sock"; + const std::string TCP_ADDRESS = "127.0.0.1:1432"; + std::unique_ptr stub; +}; + +GrpcConnector::GrpcConnector() { + impl = new Impl(); +} + +GrpcConnector::~GrpcConnector() { + delete impl; +} + +yagpcc::MetricResponse GrpcConnector::setMetricQuery(yagpcc::SetQueryReq req) { + return impl->setMetricQuery(req); +} \ No newline at end of file diff --git a/src/GrpcConnector.h b/src/GrpcConnector.h new file mode 100644 index 00000000000..7d504ba9c2a --- /dev/null +++ b/src/GrpcConnector.h @@ -0,0 +1,14 @@ +#pragma once + +#include "yagpcc_set_service.pb.h" + +class GrpcConnector { +public: + GrpcConnector(); + ~GrpcConnector(); + yagpcc::MetricResponse setMetricQuery(yagpcc::SetQueryReq req); + +private: + class Impl; + Impl* impl; +}; \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 019709d46d8..ba990ab57f1 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -19,13 +19,13 @@ static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr; static void ya_ExecutorStart_hook(QueryDesc *queryDesc, int eflags); static void ya_ExecutorFinish_hook(QueryDesc *queryDesc); -#define ReplaceHook(hook_name) \ - previous_##hook_name = hook_name; \ - hook_name = ya_##hook_name; +#define REPLACE_HOOK(hookName) \ + previous_##hookName = hookName; \ + hookName = ya_##hookName; void hooks_init() { - ReplaceHook(ExecutorStart_hook); - ReplaceHook(ExecutorFinish_hook); + REPLACE_HOOK(ExecutorStart_hook); + REPLACE_HOOK(ExecutorFinish_hook); } void hooks_deinit() { @@ -33,20 +33,26 @@ void hooks_deinit() { ExecutorFinish_hook = ExecutorFinish_hook; } -void ya_ExecutorStart_hook(QueryDesc *queryDesc, int eflags) { - EventSender::instance()->ExecutorStart(queryDesc, eflags); +#define CREATE_HOOK_WRAPPER(hookName, ...) \ + PG_TRY(); \ + { \ + EventSender::instance()->hookName(__VA_ARGS__); \ + } \ + PG_CATCH(); \ + { \ + ereport(WARNING, (errmsg("EventSender failed in %s", #hookName))); \ + PG_RE_THROW(); \ + } \ + PG_END_TRY(); \ + if (previous_##hookName##_hook) \ + (*previous_##hookName##_hook)(__VA_ARGS__); \ + else \ + standard_##hookName(__VA_ARGS__); - if (previous_ExecutorStart_hook) - (*previous_ExecutorStart_hook)(queryDesc, eflags); - else - standard_ExecutorStart(queryDesc, eflags); +void ya_ExecutorStart_hook(QueryDesc *queryDesc, int eflags) { + CREATE_HOOK_WRAPPER(ExecutorStart, queryDesc, eflags); } void ya_ExecutorFinish_hook(QueryDesc *queryDesc) { - EventSender::instance()->ExecutorFinish(queryDesc); - - if (previous_ExecutorFinish_hook) - (*previous_ExecutorFinish_hook)(queryDesc); - else - standard_ExecutorFinish(queryDesc); + CREATE_HOOK_WRAPPER(ExecutorFinish, queryDesc); } \ No newline at end of file From 4f558948e126ed461b72d1b6340bb36dbef0efd6 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Sat, 25 Mar 2023 20:20:29 +0300 Subject: [PATCH 005/133] Make ExecuteStart and ExecuteFinish work (partially) --- src/EventSender.cpp | 43 +++++++++++++++++++++++++++++++++++++++---- src/GrpcConnector.cpp | 5 ++--- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index e186d6235e1..7b54e550032 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,6 +1,7 @@ #include "EventSender.h" #include "GrpcConnector.h" #include "protos/yagpcc_set_service.pb.h" +#include extern "C" { #include "postgres.h" @@ -14,12 +15,45 @@ extern "C" { #include "tcop/utility.h" } +static google::protobuf::Timestamp current_ts() { + google::protobuf::Timestamp current_ts; + struct timeval tv; + gettimeofday(&tv, nullptr); + current_ts.set_seconds(tv.tv_sec); + current_ts.set_nanos(static_cast(tv.tv_usec * 1000)); + return current_ts; +} + +static yagpcc::QueryInfoHeader create_header() { + yagpcc::QueryInfoHeader header; + header.set_pid(MyProcPid); + auto gpId = header.mutable_gpidentity(); + gpId->set_dbid(GpIdentity.dbid); + gpId->set_segindex(GpIdentity.segindex); + gpId->set_gp_role(static_cast(Gp_role)); + gpId->set_gp_session_role(static_cast(Gp_session_role)); + header.set_ssid(gp_session_id); + header.set_ccnt(gp_command_count); + header.set_sliceid(0); + int32 tmid = 0; + gpmon_gettmid(&tmid); + header.set_tmid(tmid); + return header; +} + +static yagpcc::QueryInfo create_query_info(QueryDesc *queryDesc) { + yagpcc::QueryInfo qi; + // TODO + return qi; +} + void EventSender::ExecutorStart(QueryDesc *queryDesc, int/* eflags*/) { elog(DEBUG1, "Query %s start recording", queryDesc->sourceText); yagpcc::SetQueryReq req; req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); - google::protobuf::Timestamp ts; - req.set_allocated_datetime(ts.New()); + *req.mutable_datetime() = current_ts(); + *req.mutable_header() = create_header(); + *req.mutable_query_info() = create_query_info(queryDesc); auto result = connector->setMetricQuery(req); if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { elog(WARNING, "Query %s start reporting failed with an error %s", @@ -33,8 +67,9 @@ void EventSender::ExecutorFinish(QueryDesc *queryDesc) { elog(DEBUG1, "Query %s finish recording", queryDesc->sourceText); yagpcc::SetQueryReq req; req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); - google::protobuf::Timestamp ts; - req.set_allocated_datetime(ts.New()); + *req.mutable_datetime() = current_ts(); + *req.mutable_header() = create_header(); + *req.mutable_query_info() = create_query_info(queryDesc); auto result = connector->setMetricQuery(req); if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { elog(WARNING, "Query %s finish reporting failed with an error %s", diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index 9a9157fcd97..0d200584848 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -16,14 +16,13 @@ class GrpcConnector::Impl { yagpcc::MetricResponse setMetricQuery(yagpcc::SetQueryReq req) { yagpcc::MetricResponse response; grpc::ClientContext context; - auto deadline = std::chrono::system_clock::now() + std::chrono::seconds(1); + auto deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(50); context.set_deadline(deadline); - context.set_compression_algorithm(grpc_compression_algorithm::GRPC_COMPRESS_GZIP); grpc::Status status = (stub->SetMetricQuery)(&context, req, &response); if (!status.ok()) { - response.set_error_text("Connection lost"); + response.set_error_text("Connection lost: " + status.error_message() + "; " + status.error_details()); response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); } From f85d17388283bf5b18fe609e3d45616f99262a01 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 28 Mar 2023 12:40:57 +0300 Subject: [PATCH 006/133] Implement QueryInfo generation - borrow GlowByte code to generate plan text and SessionInfo - borrow code from our in-house pg_stat_statements to generate query id and plan id - refactor code to follow common name conventions and identations --- Makefile | 8 +- src/EventSender.cpp | 150 +++- src/EventSender.h | 3 +- src/GrpcConnector.cpp | 23 +- src/GrpcConnector.h | 7 +- src/hook_wrappers.cpp | 49 +- src/stat_statements_parser/README.MD | 1 + .../pg_stat_statements_ya_parser.c | 771 ++++++++++++++++++ .../pg_stat_statements_ya_parser.h | 15 + 9 files changed, 971 insertions(+), 56 deletions(-) create mode 100644 src/stat_statements_parser/README.MD create mode 100644 src/stat_statements_parser/pg_stat_statements_ya_parser.c create mode 100644 src/stat_statements_parser/pg_stat_statements_ya_parser.h diff --git a/Makefile b/Makefile index 8d45edd70f2..89cc54d527d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ override CFLAGS = -Wall -Wmissing-prototypes -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -fno-aggressive-loop-optimizations -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=gnu99 -Werror=uninitialized -Werror=implicit-function-declaration -DGPBUILD override CXXFLAGS = -fPIC -lstdc++ -lpthread -g3 -ggdb -Wall -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fno-aggressive-loop-optimizations -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=c++14 -fPIC -Iinclude -Isrc/protos -Isrc -g -DGPBUILD -COMMON_CPP_FLAGS := -Isrc -Iinclude +COMMON_CPP_FLAGS := -Isrc -Iinclude -Isrc/stat_statements_parser PG_CXXFLAGS += $(COMMON_CPP_FLAGS) SHLIB_LINK += -lprotobuf -lgrpc++ @@ -26,7 +26,11 @@ $(GEN_DIR)/yagpcc_set_service.grpc.pb.cpp : $(PROTO_DIR)/yagpcc_set_service.prot $(PROTOC) --grpc_out=$(SRC_DIR) --plugin=protoc-gen-grpc=$(GRPC_CPP_PLUGIN_PATH) $^ mv $(GEN_DIR)/yagpcc_set_service.grpc.pb.cc $(GEN_DIR)/yagpcc_set_service.grpc.pb.cpp -OBJS := $(PROTO_GEN_OBJECTS) \ +PG_STAT_DIR := $(SRC_DIR)/stat_statements_parser +PG_STAT_OBJS := $(PG_STAT_DIR)/pg_stat_statements_ya_parser.o + +OBJS := $(PG_STAT_OBJS) \ + $(PROTO_GEN_OBJECTS) \ $(SRC_DIR)/GrpcConnector.o \ $(SRC_DIR)/EventSender.o \ $(SRC_DIR)/hook_wrappers.o \ diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 7b54e550032..967612aa22a 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -3,19 +3,70 @@ #include "protos/yagpcc_set_service.pb.h" #include -extern "C" { +extern "C" +{ #include "postgres.h" #include "utils/metrics_utils.h" #include "utils/elog.h" #include "executor/executor.h" +#include "commands/explain.h" +#include "commands/dbcommands.h" +#include "commands/resgroupcmds.h" #include "cdb/cdbvars.h" #include "cdb/cdbexplain.h" #include "tcop/utility.h" +#include "pg_stat_statements_ya_parser.h" } -static google::protobuf::Timestamp current_ts() { +namespace +{ +std::string get_user_name() +{ + const char *username = GetConfigOption("session_authorization", false, false); + return username ? "" : std::string(username); +} + +std::string get_db_name() +{ + char *dbname = get_database_name(MyDatabaseId); + std::string result = dbname ? std::string(dbname) : ""; + pfree(dbname); + return result; +} + +std::string get_rg_name() +{ + auto userId = GetUserId(); + if (!OidIsValid(userId)) + return std::string(); + auto groupId = GetResGroupIdForRole(userId); + if (!OidIsValid(groupId)) + return std::string(); + char *rgname = GetResGroupNameForId(groupId); + if (rgname == nullptr) + return std::string(); + pfree(rgname); + return std::string(rgname); +} + +std::string get_app_name() +{ + return application_name ? std::string(application_name) : ""; +} + +int get_cur_slice_id(QueryDesc *desc) +{ + if (!desc->estate) + { + return 0; + } + return LocallyExecutingSliceIndex(desc->estate); +} + +google::protobuf::Timestamp current_ts() +{ google::protobuf::Timestamp current_ts; struct timeval tv; gettimeofday(&tv, nullptr); @@ -24,7 +75,8 @@ static google::protobuf::Timestamp current_ts() { return current_ts; } -static yagpcc::QueryInfoHeader create_header() { +yagpcc::QueryInfoHeader create_header(QueryDesc *queryDesc) +{ yagpcc::QueryInfoHeader header; header.set_pid(MyProcPid); auto gpId = header.mutable_gpidentity(); @@ -34,56 +86,110 @@ static yagpcc::QueryInfoHeader create_header() { gpId->set_gp_session_role(static_cast(Gp_session_role)); header.set_ssid(gp_session_id); header.set_ccnt(gp_command_count); - header.set_sliceid(0); + header.set_sliceid(get_cur_slice_id(queryDesc)); int32 tmid = 0; gpmon_gettmid(&tmid); header.set_tmid(tmid); return header; } -static yagpcc::QueryInfo create_query_info(QueryDesc *queryDesc) { - yagpcc::QueryInfo qi; - // TODO - return qi; +yagpcc::SessionInfo get_session_info(QueryDesc *queryDesc) +{ + yagpcc::SessionInfo si; + if (queryDesc->sourceText) + *si.mutable_sql() = std::string(queryDesc->sourceText); + *si.mutable_applicationname() = get_app_name(); + *si.mutable_databasename() = get_db_name(); + *si.mutable_resourcegroup() = get_rg_name(); + *si.mutable_username() = get_user_name(); + return si; +} + +ExplainState get_explain_state(QueryDesc *queryDesc, bool costs) +{ + ExplainState es; + ExplainInitState(&es); + es.costs = costs; + es.verbose = true; + es.format = EXPLAIN_FORMAT_TEXT; + ExplainBeginOutput(&es); + ExplainPrintPlan(&es, queryDesc); + ExplainEndOutput(&es); + return es; +} + +std::string get_plan_text(QueryDesc *queryDesc) +{ + auto es = get_explain_state(queryDesc, true); + return std::string(es.str->data, es.str->len); } -void EventSender::ExecutorStart(QueryDesc *queryDesc, int/* eflags*/) { +yagpcc::QueryInfo create_query_info(QueryDesc *queryDesc) +{ + yagpcc::QueryInfo qi; + *qi.mutable_sessioninfo() = get_session_info(queryDesc); + if (queryDesc->sourceText) + *qi.mutable_querytext() = queryDesc->sourceText; + if (queryDesc->plannedstmt) + { + qi.set_generator(queryDesc->plannedstmt->planGen == PLANGEN_OPTIMIZER + ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER + : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); + } + *qi.mutable_plantext() = get_plan_text(queryDesc); + qi.set_plan_id(get_plan_id(queryDesc)); + qi.set_query_id(queryDesc->plannedstmt->queryId); + return qi; +} +} // namespace + +void EventSender::ExecutorStart(QueryDesc *queryDesc, int /* eflags*/) +{ elog(DEBUG1, "Query %s start recording", queryDesc->sourceText); yagpcc::SetQueryReq req; req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); *req.mutable_datetime() = current_ts(); - *req.mutable_header() = create_header(); + *req.mutable_header() = create_header(queryDesc); *req.mutable_query_info() = create_query_info(queryDesc); - auto result = connector->setMetricQuery(req); - if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { + auto result = connector->set_metric_query(req); + if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) + { elog(WARNING, "Query %s start reporting failed with an error %s", - queryDesc->sourceText, result.error_text().c_str()); - } else { + queryDesc->sourceText, result.error_text().c_str()); + } + else + { elog(DEBUG1, "Query %s start successful", queryDesc->sourceText); } } -void EventSender::ExecutorFinish(QueryDesc *queryDesc) { +void EventSender::ExecutorFinish(QueryDesc *queryDesc) +{ elog(DEBUG1, "Query %s finish recording", queryDesc->sourceText); yagpcc::SetQueryReq req; req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); *req.mutable_datetime() = current_ts(); - *req.mutable_header() = create_header(); + *req.mutable_header() = create_header(queryDesc); *req.mutable_query_info() = create_query_info(queryDesc); - auto result = connector->setMetricQuery(req); - if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { + auto result = connector->set_metric_query(req); + if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) + { elog(WARNING, "Query %s finish reporting failed with an error %s", - queryDesc->sourceText, result.error_text().c_str()); - } else { + queryDesc->sourceText, result.error_text().c_str()); + } + else + { elog(DEBUG1, "Query %s finish successful", queryDesc->sourceText); } } -EventSender* EventSender::instance() { +EventSender *EventSender::instance() +{ static EventSender sender; return &sender; } -EventSender::EventSender() { +EventSender::EventSender() +{ connector = std::make_unique(); } \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h index f3e4cd2ed2b..70868f6c757 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -6,7 +6,8 @@ class GrpcConnector; struct QueryDesc; -class EventSender { +class EventSender +{ public: void ExecutorStart(QueryDesc *queryDesc, int eflags); void ExecutorFinish(QueryDesc *queryDesc); diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index 0d200584848..7329f392010 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -5,15 +5,18 @@ #include #include -class GrpcConnector::Impl { +class GrpcConnector::Impl +{ public: - Impl() { + Impl() + { GOOGLE_PROTOBUF_VERIFY_VERSION; this->stub = yagpcc::SetQueryInfo::NewStub(grpc::CreateChannel( SOCKET_FILE, grpc::InsecureChannelCredentials())); } - yagpcc::MetricResponse setMetricQuery(yagpcc::SetQueryReq req) { + yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req) + { yagpcc::MetricResponse response; grpc::ClientContext context; auto deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(50); @@ -21,7 +24,8 @@ class GrpcConnector::Impl { grpc::Status status = (stub->SetMetricQuery)(&context, req, &response); - if (!status.ok()) { + if (!status.ok()) + { response.set_error_text("Connection lost: " + status.error_message() + "; " + status.error_details()); response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); } @@ -35,14 +39,17 @@ class GrpcConnector::Impl { std::unique_ptr stub; }; -GrpcConnector::GrpcConnector() { +GrpcConnector::GrpcConnector() +{ impl = new Impl(); } -GrpcConnector::~GrpcConnector() { +GrpcConnector::~GrpcConnector() +{ delete impl; } -yagpcc::MetricResponse GrpcConnector::setMetricQuery(yagpcc::SetQueryReq req) { - return impl->setMetricQuery(req); +yagpcc::MetricResponse GrpcConnector::set_metric_query(yagpcc::SetQueryReq req) +{ + return impl->set_metric_query(req); } \ No newline at end of file diff --git a/src/GrpcConnector.h b/src/GrpcConnector.h index 7d504ba9c2a..dc0f21706a3 100644 --- a/src/GrpcConnector.h +++ b/src/GrpcConnector.h @@ -2,13 +2,14 @@ #include "yagpcc_set_service.pb.h" -class GrpcConnector { +class GrpcConnector +{ public: GrpcConnector(); ~GrpcConnector(); - yagpcc::MetricResponse setMetricQuery(yagpcc::SetQueryReq req); + yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req); private: class Impl; - Impl* impl; + Impl *impl; }; \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index ba990ab57f1..9f3200c006f 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -1,7 +1,8 @@ #include "hook_wrappers.h" #include "EventSender.h" -extern "C" { +extern "C" +{ #include "postgres.h" #include "utils/metrics_utils.h" #include "utils/elog.h" @@ -13,46 +14,54 @@ extern "C" { #include "tcop/utility.h" } +#include "stat_statements_parser/pg_stat_statements_ya_parser.h" + static ExecutorStart_hook_type previous_ExecutorStart_hook = nullptr; static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr; static void ya_ExecutorStart_hook(QueryDesc *queryDesc, int eflags); static void ya_ExecutorFinish_hook(QueryDesc *queryDesc); -#define REPLACE_HOOK(hookName) \ +#define REPLACE_HOOK(hookName) \ previous_##hookName = hookName; \ hookName = ya_##hookName; -void hooks_init() { +void hooks_init() +{ REPLACE_HOOK(ExecutorStart_hook); REPLACE_HOOK(ExecutorFinish_hook); + stat_statements_parser_init(); } -void hooks_deinit() { +void hooks_deinit() +{ ExecutorStart_hook = previous_ExecutorStart_hook; ExecutorFinish_hook = ExecutorFinish_hook; + stat_statements_parser_deinit(); } -#define CREATE_HOOK_WRAPPER(hookName, ...) \ - PG_TRY(); \ - { \ - EventSender::instance()->hookName(__VA_ARGS__); \ - } \ - PG_CATCH(); \ - { \ - ereport(WARNING, (errmsg("EventSender failed in %s", #hookName))); \ - PG_RE_THROW(); \ - } \ - PG_END_TRY(); \ - if (previous_##hookName##_hook) \ - (*previous_##hookName##_hook)(__VA_ARGS__); \ - else \ +#define CREATE_HOOK_WRAPPER(hookName, ...) \ + PG_TRY(); \ + { \ + EventSender::instance()->hookName(__VA_ARGS__); \ + } \ + PG_CATCH(); \ + { \ + ereport(WARNING, (errmsg("EventSender failed in %s", #hookName))); \ + PG_RE_THROW(); \ + } \ + PG_END_TRY(); \ + if (previous_##hookName##_hook) \ + (*previous_##hookName##_hook)(__VA_ARGS__); \ + else \ standard_##hookName(__VA_ARGS__); -void ya_ExecutorStart_hook(QueryDesc *queryDesc, int eflags) { +void ya_ExecutorStart_hook(QueryDesc *queryDesc, int eflags) +{ CREATE_HOOK_WRAPPER(ExecutorStart, queryDesc, eflags); } -void ya_ExecutorFinish_hook(QueryDesc *queryDesc) { +void ya_ExecutorFinish_hook(QueryDesc *queryDesc) +{ CREATE_HOOK_WRAPPER(ExecutorFinish, queryDesc); } \ No newline at end of file diff --git a/src/stat_statements_parser/README.MD b/src/stat_statements_parser/README.MD new file mode 100644 index 00000000000..291e31a3099 --- /dev/null +++ b/src/stat_statements_parser/README.MD @@ -0,0 +1 @@ +This directory contains a slightly modified subset of pg_stat_statements for PG v9.4 to be used in query and plan ID generation. diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/src/stat_statements_parser/pg_stat_statements_ya_parser.c new file mode 100644 index 00000000000..f14742337bd --- /dev/null +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.c @@ -0,0 +1,771 @@ +#include "postgres.h" + +#include +#include + +#include "access/hash.h" +#include "executor/instrument.h" +#include "executor/execdesc.h" +#include "funcapi.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "parser/analyze.h" +#include "parser/parsetree.h" +#include "parser/scanner.h" +#include "parser/gram.h" +#include "pgstat.h" +#include "storage/fd.h" +#include "storage/ipc.h" +#include "storage/spin.h" +#include "tcop/utility.h" +#include "utils/builtins.h" +#include "utils/memutils.h" + +#include "pg_stat_statements_ya_parser.h" + +static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL; + +#define JUMBLE_SIZE 1024 /* query serialization buffer size */ + +/* + * Struct for tracking locations/lengths of constants during normalization + */ +typedef struct pgssLocationLen +{ + int location; /* start offset in query text */ + int length; /* length in bytes, or -1 to ignore */ +} pgssLocationLen; + +/* + * Working state for computing a query jumble and producing a normalized + * query string + */ +typedef struct pgssJumbleState +{ + /* Jumble of current query tree */ + unsigned char *jumble; + + /* Number of bytes used in jumble[] */ + Size jumble_len; + + /* Array of locations of constants that should be removed */ + pgssLocationLen *clocations; + + /* Allocated length of clocations array */ + int clocations_buf_size; + + /* Current number of valid entries in clocations array */ + int clocations_count; + + /* highest Param id we've seen, in order to start normalization correctly */ + int highest_extern_param_id; +} pgssJumbleState; + +static void AppendJumble(pgssJumbleState *jstate, + const unsigned char *item, Size size); +static void JumbleQuery(pgssJumbleState *jstate, Query *query); +static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable); +static void JumbleExpr(pgssJumbleState *jstate, Node *node); +static void RecordConstLocation(pgssJumbleState *jstate, int location); + +static StringInfo gen_normplan(const char *execution_plan); + +static bool need_replace(int token); + +void pgss_post_parse_analyze(ParseState *pstate, Query *query); + +void stat_statements_parser_init() +{ + prev_post_parse_analyze_hook = post_parse_analyze_hook; + post_parse_analyze_hook = pgss_post_parse_analyze; +} + +void stat_statements_parser_deinit() +{ + post_parse_analyze_hook = prev_post_parse_analyze_hook; +} + +/* + * AppendJumble: Append a value that is substantive in a given query to + * the current jumble. + */ +static void +AppendJumble(pgssJumbleState *jstate, const unsigned char *item, Size size) +{ + unsigned char *jumble = jstate->jumble; + Size jumble_len = jstate->jumble_len; + + /* + * Whenever the jumble buffer is full, we hash the current contents and + * reset the buffer to contain just that hash value, thus relying on the + * hash to summarize everything so far. + */ + while (size > 0) + { + Size part_size; + + if (jumble_len >= JUMBLE_SIZE) + { + uint32 start_hash = hash_any(jumble, JUMBLE_SIZE); + + memcpy(jumble, &start_hash, sizeof(start_hash)); + jumble_len = sizeof(start_hash); + } + part_size = Min(size, JUMBLE_SIZE - jumble_len); + memcpy(jumble + jumble_len, item, part_size); + jumble_len += part_size; + item += part_size; + size -= part_size; + } + jstate->jumble_len = jumble_len; +} + +/* + * Wrappers around AppendJumble to encapsulate details of serialization + * of individual local variable elements. + */ +#define APP_JUMB(item) \ + AppendJumble(jstate, (const unsigned char *)&(item), sizeof(item)) +#define APP_JUMB_STRING(str) \ + AppendJumble(jstate, (const unsigned char *)(str), strlen(str) + 1) + +/* + * JumbleQuery: Selectively serialize the query tree, appending significant + * data to the "query jumble" while ignoring nonsignificant data. + * + * Rule of thumb for what to include is that we should ignore anything not + * semantically significant (such as alias names) as well as anything that can + * be deduced from child nodes (else we'd just be double-hashing that piece + * of information). + */ +void JumbleQuery(pgssJumbleState *jstate, Query *query) +{ + Assert(IsA(query, Query)); + Assert(query->utilityStmt == NULL); + + APP_JUMB(query->commandType); + /* resultRelation is usually predictable from commandType */ + JumbleExpr(jstate, (Node *)query->cteList); + JumbleRangeTable(jstate, query->rtable); + JumbleExpr(jstate, (Node *)query->jointree); + JumbleExpr(jstate, (Node *)query->targetList); + JumbleExpr(jstate, (Node *)query->returningList); + JumbleExpr(jstate, (Node *)query->groupClause); + JumbleExpr(jstate, query->havingQual); + JumbleExpr(jstate, (Node *)query->windowClause); + JumbleExpr(jstate, (Node *)query->distinctClause); + JumbleExpr(jstate, (Node *)query->sortClause); + JumbleExpr(jstate, query->limitOffset); + JumbleExpr(jstate, query->limitCount); + /* we ignore rowMarks */ + JumbleExpr(jstate, query->setOperations); +} + +/* + * Jumble a range table + */ +static void +JumbleRangeTable(pgssJumbleState *jstate, List *rtable) +{ + ListCell *lc; + + foreach (lc, rtable) + { + RangeTblEntry *rte = (RangeTblEntry *)lfirst(lc); + + Assert(IsA(rte, RangeTblEntry)); + APP_JUMB(rte->rtekind); + switch (rte->rtekind) + { + case RTE_RELATION: + APP_JUMB(rte->relid); + break; + case RTE_SUBQUERY: + JumbleQuery(jstate, rte->subquery); + break; + case RTE_JOIN: + APP_JUMB(rte->jointype); + break; + case RTE_FUNCTION: + JumbleExpr(jstate, (Node *)rte->functions); + break; + case RTE_VALUES: + JumbleExpr(jstate, (Node *)rte->values_lists); + break; + case RTE_CTE: + + /* + * Depending on the CTE name here isn't ideal, but it's the + * only info we have to identify the referenced WITH item. + */ + APP_JUMB_STRING(rte->ctename); + APP_JUMB(rte->ctelevelsup); + break; + default: + elog(ERROR, "unrecognized RTE kind: %d", (int)rte->rtekind); + break; + } + } +} + +/* + * Jumble an expression tree + * + * In general this function should handle all the same node types that + * expression_tree_walker() does, and therefore it's coded to be as parallel + * to that function as possible. However, since we are only invoked on + * queries immediately post-parse-analysis, we need not handle node types + * that only appear in planning. + * + * Note: the reason we don't simply use expression_tree_walker() is that the + * point of that function is to support tree walkers that don't care about + * most tree node types, but here we care about all types. We should complain + * about any unrecognized node type. + */ +static void +JumbleExpr(pgssJumbleState *jstate, Node *node) +{ + ListCell *temp; + + if (node == NULL) + return; + + /* Guard against stack overflow due to overly complex expressions */ + check_stack_depth(); + + /* + * We always emit the node's NodeTag, then any additional fields that are + * considered significant, and then we recurse to any child nodes. + */ + APP_JUMB(node->type); + + switch (nodeTag(node)) + { + case T_Var: + { + Var *var = (Var *)node; + + APP_JUMB(var->varno); + APP_JUMB(var->varattno); + APP_JUMB(var->varlevelsup); + } + break; + case T_Const: + { + Const *c = (Const *)node; + + /* We jumble only the constant's type, not its value */ + APP_JUMB(c->consttype); + /* Also, record its parse location for query normalization */ + RecordConstLocation(jstate, c->location); + } + break; + case T_Param: + { + Param *p = (Param *)node; + + APP_JUMB(p->paramkind); + APP_JUMB(p->paramid); + APP_JUMB(p->paramtype); + } + break; + case T_Aggref: + { + Aggref *expr = (Aggref *)node; + + APP_JUMB(expr->aggfnoid); + JumbleExpr(jstate, (Node *)expr->aggdirectargs); + JumbleExpr(jstate, (Node *)expr->args); + JumbleExpr(jstate, (Node *)expr->aggorder); + JumbleExpr(jstate, (Node *)expr->aggdistinct); + JumbleExpr(jstate, (Node *)expr->aggfilter); + } + break; + case T_WindowFunc: + { + WindowFunc *expr = (WindowFunc *)node; + + APP_JUMB(expr->winfnoid); + APP_JUMB(expr->winref); + JumbleExpr(jstate, (Node *)expr->args); + JumbleExpr(jstate, (Node *)expr->aggfilter); + } + break; + case T_ArrayRef: + { + ArrayRef *aref = (ArrayRef *)node; + + JumbleExpr(jstate, (Node *)aref->refupperindexpr); + JumbleExpr(jstate, (Node *)aref->reflowerindexpr); + JumbleExpr(jstate, (Node *)aref->refexpr); + JumbleExpr(jstate, (Node *)aref->refassgnexpr); + } + break; + case T_FuncExpr: + { + FuncExpr *expr = (FuncExpr *)node; + + APP_JUMB(expr->funcid); + JumbleExpr(jstate, (Node *)expr->args); + } + break; + case T_NamedArgExpr: + { + NamedArgExpr *nae = (NamedArgExpr *)node; + + APP_JUMB(nae->argnumber); + JumbleExpr(jstate, (Node *)nae->arg); + } + break; + case T_OpExpr: + case T_DistinctExpr: /* struct-equivalent to OpExpr */ + case T_NullIfExpr: /* struct-equivalent to OpExpr */ + { + OpExpr *expr = (OpExpr *)node; + + APP_JUMB(expr->opno); + JumbleExpr(jstate, (Node *)expr->args); + } + break; + case T_ScalarArrayOpExpr: + { + ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *)node; + + APP_JUMB(expr->opno); + APP_JUMB(expr->useOr); + JumbleExpr(jstate, (Node *)expr->args); + } + break; + case T_BoolExpr: + { + BoolExpr *expr = (BoolExpr *)node; + + APP_JUMB(expr->boolop); + JumbleExpr(jstate, (Node *)expr->args); + } + break; + case T_SubLink: + { + SubLink *sublink = (SubLink *)node; + + APP_JUMB(sublink->subLinkType); + JumbleExpr(jstate, (Node *)sublink->testexpr); + JumbleQuery(jstate, (Query *)sublink->subselect); + } + break; + case T_FieldSelect: + { + FieldSelect *fs = (FieldSelect *)node; + + APP_JUMB(fs->fieldnum); + JumbleExpr(jstate, (Node *)fs->arg); + } + break; + case T_FieldStore: + { + FieldStore *fstore = (FieldStore *)node; + + JumbleExpr(jstate, (Node *)fstore->arg); + JumbleExpr(jstate, (Node *)fstore->newvals); + } + break; + case T_RelabelType: + { + RelabelType *rt = (RelabelType *)node; + + APP_JUMB(rt->resulttype); + JumbleExpr(jstate, (Node *)rt->arg); + } + break; + case T_CoerceViaIO: + { + CoerceViaIO *cio = (CoerceViaIO *)node; + + APP_JUMB(cio->resulttype); + JumbleExpr(jstate, (Node *)cio->arg); + } + break; + case T_ArrayCoerceExpr: + { + ArrayCoerceExpr *acexpr = (ArrayCoerceExpr *)node; + + APP_JUMB(acexpr->resulttype); + JumbleExpr(jstate, (Node *)acexpr->arg); + } + break; + case T_ConvertRowtypeExpr: + { + ConvertRowtypeExpr *crexpr = (ConvertRowtypeExpr *)node; + + APP_JUMB(crexpr->resulttype); + JumbleExpr(jstate, (Node *)crexpr->arg); + } + break; + case T_CollateExpr: + { + CollateExpr *ce = (CollateExpr *)node; + + APP_JUMB(ce->collOid); + JumbleExpr(jstate, (Node *)ce->arg); + } + break; + case T_CaseExpr: + { + CaseExpr *caseexpr = (CaseExpr *)node; + + JumbleExpr(jstate, (Node *)caseexpr->arg); + foreach (temp, caseexpr->args) + { + CaseWhen *when = (CaseWhen *)lfirst(temp); + + Assert(IsA(when, CaseWhen)); + JumbleExpr(jstate, (Node *)when->expr); + JumbleExpr(jstate, (Node *)when->result); + } + JumbleExpr(jstate, (Node *)caseexpr->defresult); + } + break; + case T_CaseTestExpr: + { + CaseTestExpr *ct = (CaseTestExpr *)node; + + APP_JUMB(ct->typeId); + } + break; + case T_ArrayExpr: + JumbleExpr(jstate, (Node *)((ArrayExpr *)node)->elements); + break; + case T_RowExpr: + JumbleExpr(jstate, (Node *)((RowExpr *)node)->args); + break; + case T_RowCompareExpr: + { + RowCompareExpr *rcexpr = (RowCompareExpr *)node; + + APP_JUMB(rcexpr->rctype); + JumbleExpr(jstate, (Node *)rcexpr->largs); + JumbleExpr(jstate, (Node *)rcexpr->rargs); + } + break; + case T_CoalesceExpr: + JumbleExpr(jstate, (Node *)((CoalesceExpr *)node)->args); + break; + case T_MinMaxExpr: + { + MinMaxExpr *mmexpr = (MinMaxExpr *)node; + + APP_JUMB(mmexpr->op); + JumbleExpr(jstate, (Node *)mmexpr->args); + } + break; + case T_XmlExpr: + { + XmlExpr *xexpr = (XmlExpr *)node; + + APP_JUMB(xexpr->op); + JumbleExpr(jstate, (Node *)xexpr->named_args); + JumbleExpr(jstate, (Node *)xexpr->args); + } + break; + case T_NullTest: + { + NullTest *nt = (NullTest *)node; + + APP_JUMB(nt->nulltesttype); + JumbleExpr(jstate, (Node *)nt->arg); + } + break; + case T_BooleanTest: + { + BooleanTest *bt = (BooleanTest *)node; + + APP_JUMB(bt->booltesttype); + JumbleExpr(jstate, (Node *)bt->arg); + } + break; + case T_CoerceToDomain: + { + CoerceToDomain *cd = (CoerceToDomain *)node; + + APP_JUMB(cd->resulttype); + JumbleExpr(jstate, (Node *)cd->arg); + } + break; + case T_CoerceToDomainValue: + { + CoerceToDomainValue *cdv = (CoerceToDomainValue *)node; + + APP_JUMB(cdv->typeId); + } + break; + case T_SetToDefault: + { + SetToDefault *sd = (SetToDefault *)node; + + APP_JUMB(sd->typeId); + } + break; + case T_CurrentOfExpr: + { + CurrentOfExpr *ce = (CurrentOfExpr *)node; + + APP_JUMB(ce->cvarno); + if (ce->cursor_name) + APP_JUMB_STRING(ce->cursor_name); + APP_JUMB(ce->cursor_param); + } + break; + case T_TargetEntry: + { + TargetEntry *tle = (TargetEntry *)node; + + APP_JUMB(tle->resno); + APP_JUMB(tle->ressortgroupref); + JumbleExpr(jstate, (Node *)tle->expr); + } + break; + case T_RangeTblRef: + { + RangeTblRef *rtr = (RangeTblRef *)node; + + APP_JUMB(rtr->rtindex); + } + break; + case T_JoinExpr: + { + JoinExpr *join = (JoinExpr *)node; + + APP_JUMB(join->jointype); + APP_JUMB(join->isNatural); + APP_JUMB(join->rtindex); + JumbleExpr(jstate, join->larg); + JumbleExpr(jstate, join->rarg); + JumbleExpr(jstate, join->quals); + } + break; + case T_FromExpr: + { + FromExpr *from = (FromExpr *)node; + + JumbleExpr(jstate, (Node *)from->fromlist); + JumbleExpr(jstate, from->quals); + } + break; + case T_List: + foreach (temp, (List *)node) + { + JumbleExpr(jstate, (Node *)lfirst(temp)); + } + break; + case T_SortGroupClause: + { + SortGroupClause *sgc = (SortGroupClause *)node; + + APP_JUMB(sgc->tleSortGroupRef); + APP_JUMB(sgc->eqop); + APP_JUMB(sgc->sortop); + APP_JUMB(sgc->nulls_first); + } + break; + case T_WindowClause: + { + WindowClause *wc = (WindowClause *)node; + + APP_JUMB(wc->winref); + APP_JUMB(wc->frameOptions); + JumbleExpr(jstate, (Node *)wc->partitionClause); + JumbleExpr(jstate, (Node *)wc->orderClause); + JumbleExpr(jstate, wc->startOffset); + JumbleExpr(jstate, wc->endOffset); + } + break; + case T_CommonTableExpr: + { + CommonTableExpr *cte = (CommonTableExpr *)node; + + /* we store the string name because RTE_CTE RTEs need it */ + APP_JUMB_STRING(cte->ctename); + JumbleQuery(jstate, (Query *)cte->ctequery); + } + break; + case T_SetOperationStmt: + { + SetOperationStmt *setop = (SetOperationStmt *)node; + + APP_JUMB(setop->op); + APP_JUMB(setop->all); + JumbleExpr(jstate, setop->larg); + JumbleExpr(jstate, setop->rarg); + } + break; + case T_RangeTblFunction: + { + RangeTblFunction *rtfunc = (RangeTblFunction *)node; + + JumbleExpr(jstate, rtfunc->funcexpr); + } + break; + default: + /* Only a warning, since we can stumble along anyway */ + elog(WARNING, "unrecognized node type: %d", + (int)nodeTag(node)); + break; + } +} + +/* + * Record location of constant within query string of query tree + * that is currently being walked. + */ +static void +RecordConstLocation(pgssJumbleState *jstate, int location) +{ + /* -1 indicates unknown or undefined location */ + if (location >= 0) + { + /* enlarge array if needed */ + if (jstate->clocations_count >= jstate->clocations_buf_size) + { + jstate->clocations_buf_size *= 2; + jstate->clocations = (pgssLocationLen *) + repalloc(jstate->clocations, + jstate->clocations_buf_size * + sizeof(pgssLocationLen)); + } + jstate->clocations[jstate->clocations_count].location = location; + /* initialize lengths to -1 to simplify fill_in_constant_lengths */ + jstate->clocations[jstate->clocations_count].length = -1; + jstate->clocations_count++; + } +} + +/* check if token should be replaced by substitute varable */ +static bool +need_replace(int token) +{ + return (token == FCONST) || (token == ICONST) || (token == SCONST) || (token == BCONST) || (token == XCONST); +} + +/* + * gen_normplan - parse execution plan using flex and replace all CONST to + * substitute variables. + */ +static StringInfo +gen_normplan(const char *execution_plan) +{ + core_yyscan_t yyscanner; + core_yy_extra_type yyextra; + core_YYSTYPE yylval; + YYLTYPE yylloc; + int tok; + int bind_prefix = 1; + char *tmp_str; + YYLTYPE last_yylloc = 0; + int last_tok = 0; + StringInfo plan_out = makeStringInfo(); + ; + + yyscanner = scanner_init(execution_plan, + &yyextra, +#if PG_VERSION_NUM >= 120000 + &ScanKeywords, + ScanKeywordTokens +#else + ScanKeywords, + NumScanKeywords +#endif + ); + + for (;;) + { + /* get the next lexem */ + tok = core_yylex(&yylval, &yylloc, yyscanner); + + /* now we store end previsous lexem in yylloc - so could prcess it */ + if (need_replace(last_tok)) + { + /* substitute variable instead of CONST */ + int s_len = asprintf(&tmp_str, "$%i", bind_prefix++); + if (s_len > 0) + { + appendStringInfoString(plan_out, tmp_str); + free(tmp_str); + } + else + { + appendStringInfoString(plan_out, "??"); + } + } + else + { + /* do not change - just copy as-is */ + tmp_str = strndup((char *)execution_plan + last_yylloc, yylloc - last_yylloc); + appendStringInfoString(plan_out, tmp_str); + free(tmp_str); + } + /* check if further parsing not needed */ + if (tok == 0) + break; + last_tok = tok; + last_yylloc = yylloc; + } + + scanner_finish(yyscanner); + + return plan_out; +} + +uint64_t get_plan_id(QueryDesc *queryDesc) +{ + if (!queryDesc->sourceText) + return 0; + StringInfo normalized = gen_normplan(queryDesc->sourceText); + return hash_any((unsigned char *)normalized->data, normalized->len); +} + +/* + * Post-parse-analysis hook: mark query with a queryId + */ +void pgss_post_parse_analyze(ParseState *pstate, Query *query) +{ + pgssJumbleState jstate; + + if (prev_post_parse_analyze_hook) + prev_post_parse_analyze_hook(pstate, query); + + /* Assert we didn't do this already */ + Assert(query->queryId == 0); + + /* + * Utility statements get queryId zero. We do this even in cases where + * the statement contains an optimizable statement for which a queryId + * could be derived (such as EXPLAIN or DECLARE CURSOR). For such cases, + * runtime control will first go through ProcessUtility and then the + * executor, and we don't want the executor hooks to do anything, since we + * are already measuring the statement's costs at the utility level. + */ + if (query->utilityStmt) + { + query->queryId = 0; + return; + } + + /* Set up workspace for query jumbling */ + jstate.jumble = (unsigned char *)palloc(JUMBLE_SIZE); + jstate.jumble_len = 0; + jstate.clocations_buf_size = 32; + jstate.clocations = (pgssLocationLen *) + palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen)); + jstate.clocations_count = 0; + + /* Compute query ID and mark the Query node with it */ + JumbleQuery(&jstate, query); + query->queryId = hash_any(jstate.jumble, jstate.jumble_len); + + /* + * If we are unlucky enough to get a hash of zero, use 1 instead, to + * prevent confusion with the utility-statement case. + */ + if (query->queryId == 0) + query->queryId = 1; +} \ No newline at end of file diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.h b/src/stat_statements_parser/pg_stat_statements_ya_parser.h new file mode 100644 index 00000000000..274f96aebaf --- /dev/null +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.h @@ -0,0 +1,15 @@ +#pragma once + +#ifdef __cplusplus +extern "C" +{ +#endif + +extern void stat_statements_parser_init(void); +extern void stat_statements_parser_deinit(void); + +#ifdef __cplusplus +} +#endif + +uint64_t get_plan_id(QueryDesc *queryDesc); \ No newline at end of file From 5aa16ef96033ecfa04c3481faccedaca8cdaca2f Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 28 Mar 2023 13:24:05 +0300 Subject: [PATCH 007/133] Better protobuf filling code --- src/EventSender.cpp | 80 +++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 43 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 967612aa22a..bb4765adeb1 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -22,38 +22,38 @@ extern "C" namespace { -std::string get_user_name() +std::string* get_user_name() { const char *username = GetConfigOption("session_authorization", false, false); - return username ? "" : std::string(username); + return username ? new std::string(username) : nullptr; } -std::string get_db_name() +std::string* get_db_name() { char *dbname = get_database_name(MyDatabaseId); - std::string result = dbname ? std::string(dbname) : ""; + std::string* result = dbname ? new std::string(dbname) : nullptr; pfree(dbname); return result; } -std::string get_rg_name() +std::string* get_rg_name() { auto userId = GetUserId(); if (!OidIsValid(userId)) - return std::string(); + return nullptr; auto groupId = GetResGroupIdForRole(userId); if (!OidIsValid(groupId)) - return std::string(); + return nullptr; char *rgname = GetResGroupNameForId(groupId); if (rgname == nullptr) - return std::string(); + return nullptr; pfree(rgname); - return std::string(rgname); + return new std::string(rgname); } -std::string get_app_name() +std::string* get_app_name() { - return application_name ? std::string(application_name) : ""; + return application_name ? new std::string(application_name) : nullptr; } int get_cur_slice_id(QueryDesc *desc) @@ -75,34 +75,30 @@ google::protobuf::Timestamp current_ts() return current_ts; } -yagpcc::QueryInfoHeader create_header(QueryDesc *queryDesc) +void set_header(yagpcc::QueryInfoHeader *header, QueryDesc *queryDesc) { - yagpcc::QueryInfoHeader header; - header.set_pid(MyProcPid); - auto gpId = header.mutable_gpidentity(); + header->set_pid(MyProcPid); + auto gpId = header->mutable_gpidentity(); gpId->set_dbid(GpIdentity.dbid); gpId->set_segindex(GpIdentity.segindex); gpId->set_gp_role(static_cast(Gp_role)); gpId->set_gp_session_role(static_cast(Gp_session_role)); - header.set_ssid(gp_session_id); - header.set_ccnt(gp_command_count); - header.set_sliceid(get_cur_slice_id(queryDesc)); + header->set_ssid(gp_session_id); + header->set_ccnt(gp_command_count); + header->set_sliceid(get_cur_slice_id(queryDesc)); int32 tmid = 0; gpmon_gettmid(&tmid); - header.set_tmid(tmid); - return header; + header->set_tmid(tmid); } -yagpcc::SessionInfo get_session_info(QueryDesc *queryDesc) +void set_session_info(yagpcc::SessionInfo *si, QueryDesc *queryDesc) { - yagpcc::SessionInfo si; if (queryDesc->sourceText) - *si.mutable_sql() = std::string(queryDesc->sourceText); - *si.mutable_applicationname() = get_app_name(); - *si.mutable_databasename() = get_db_name(); - *si.mutable_resourcegroup() = get_rg_name(); - *si.mutable_username() = get_user_name(); - return si; + *si->mutable_sql() = std::string(queryDesc->sourceText); + si->set_allocated_applicationname(get_app_name()); + si->set_allocated_databasename(get_db_name()); + si->set_allocated_resourcegroup(get_rg_name()); + si->set_allocated_username(get_user_name()); } ExplainState get_explain_state(QueryDesc *queryDesc, bool costs) @@ -118,28 +114,26 @@ ExplainState get_explain_state(QueryDesc *queryDesc, bool costs) return es; } -std::string get_plan_text(QueryDesc *queryDesc) +void set_plan_text(std::string *plan_text, QueryDesc *queryDesc) { auto es = get_explain_state(queryDesc, true); - return std::string(es.str->data, es.str->len); + *plan_text = std::string(es.str->data, es.str->len); } -yagpcc::QueryInfo create_query_info(QueryDesc *queryDesc) +void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *queryDesc) { - yagpcc::QueryInfo qi; - *qi.mutable_sessioninfo() = get_session_info(queryDesc); + set_session_info(qi->mutable_sessioninfo(), queryDesc); if (queryDesc->sourceText) - *qi.mutable_querytext() = queryDesc->sourceText; + *qi->mutable_querytext() = queryDesc->sourceText; if (queryDesc->plannedstmt) { - qi.set_generator(queryDesc->plannedstmt->planGen == PLANGEN_OPTIMIZER + qi->set_generator(queryDesc->plannedstmt->planGen == PLANGEN_OPTIMIZER ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); + set_plan_text(qi->mutable_plantext(), queryDesc); + qi->set_plan_id(get_plan_id(queryDesc)); + qi->set_query_id(queryDesc->plannedstmt->queryId); } - *qi.mutable_plantext() = get_plan_text(queryDesc); - qi.set_plan_id(get_plan_id(queryDesc)); - qi.set_query_id(queryDesc->plannedstmt->queryId); - return qi; } } // namespace @@ -149,8 +143,8 @@ void EventSender::ExecutorStart(QueryDesc *queryDesc, int /* eflags*/) yagpcc::SetQueryReq req; req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); *req.mutable_datetime() = current_ts(); - *req.mutable_header() = create_header(queryDesc); - *req.mutable_query_info() = create_query_info(queryDesc); + set_header(req.mutable_header(), queryDesc); + set_query_info(req.mutable_query_info(), queryDesc); auto result = connector->set_metric_query(req); if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { @@ -169,8 +163,8 @@ void EventSender::ExecutorFinish(QueryDesc *queryDesc) yagpcc::SetQueryReq req; req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); *req.mutable_datetime() = current_ts(); - *req.mutable_header() = create_header(queryDesc); - *req.mutable_query_info() = create_query_info(queryDesc); + set_header(req.mutable_header(), queryDesc); + set_query_info(req.mutable_query_info(), queryDesc); auto result = connector->set_metric_query(req); if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { From e3dde52a28b455b8c5b1720c2d3a354f74280f4f Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 28 Mar 2023 17:07:23 +0300 Subject: [PATCH 008/133] Fix segfault in plan text generator --- src/EventSender.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index bb4765adeb1..b1815a22bf8 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -130,10 +130,13 @@ void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *queryDesc) qi->set_generator(queryDesc->plannedstmt->planGen == PLANGEN_OPTIMIZER ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); - set_plan_text(qi->mutable_plantext(), queryDesc); - qi->set_plan_id(get_plan_id(queryDesc)); - qi->set_query_id(queryDesc->plannedstmt->queryId); + if (queryDesc->planstate) + { + set_plan_text(qi->mutable_plantext(), queryDesc); + qi->set_plan_id(get_plan_id(queryDesc)); + } } + qi->set_query_id(queryDesc->plannedstmt->queryId); } } // namespace From 7211556c871c7ff69a0cff7a318e02505946937b Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 29 Mar 2023 16:10:20 +0300 Subject: [PATCH 009/133] Add support of spill info and - do some minor refactoring to follow common naming convention - add additional message right after ExecutorStart hook --- Makefile | 1 + src/EventSender.cpp | 94 ++++++++++++++++++++++++------------------ src/EventSender.h | 4 +- src/SpillInfoWrapper.c | 21 ++++++++++ src/hook_wrappers.cpp | 22 +++++++--- 5 files changed, 93 insertions(+), 49 deletions(-) create mode 100644 src/SpillInfoWrapper.c diff --git a/Makefile b/Makefile index 89cc54d527d..9f2311888cc 100644 --- a/Makefile +++ b/Makefile @@ -31,6 +31,7 @@ PG_STAT_OBJS := $(PG_STAT_DIR)/pg_stat_statements_ya_parser.o OBJS := $(PG_STAT_OBJS) \ $(PROTO_GEN_OBJECTS) \ + $(SRC_DIR)/SpillInfoWrapper.o \ $(SRC_DIR)/GrpcConnector.o \ $(SRC_DIR)/EventSender.o \ $(SRC_DIR)/hook_wrappers.o \ diff --git a/src/EventSender.cpp b/src/EventSender.cpp index b1815a22bf8..f0f69977fe0 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -18,6 +18,8 @@ extern "C" #include "tcop/utility.h" #include "pg_stat_statements_ya_parser.h" + +void get_spill_info(int ssid, int ccid, int32_t* file_count, int64_t* total_bytes); } namespace @@ -38,13 +40,13 @@ std::string* get_db_name() std::string* get_rg_name() { - auto userId = GetUserId(); - if (!OidIsValid(userId)) + auto user_id = GetUserId(); + if (!OidIsValid(user_id)) return nullptr; - auto groupId = GetResGroupIdForRole(userId); - if (!OidIsValid(groupId)) + auto group_id = GetResGroupIdForRole(user_id); + if (!OidIsValid(group_id)) return nullptr; - char *rgname = GetResGroupNameForId(groupId); + char *rgname = GetResGroupNameForId(group_id); if (rgname == nullptr) return nullptr; pfree(rgname); @@ -75,33 +77,33 @@ google::protobuf::Timestamp current_ts() return current_ts; } -void set_header(yagpcc::QueryInfoHeader *header, QueryDesc *queryDesc) +void set_header(yagpcc::QueryInfoHeader *header, QueryDesc *query_desc) { header->set_pid(MyProcPid); - auto gpId = header->mutable_gpidentity(); - gpId->set_dbid(GpIdentity.dbid); - gpId->set_segindex(GpIdentity.segindex); - gpId->set_gp_role(static_cast(Gp_role)); - gpId->set_gp_session_role(static_cast(Gp_session_role)); + auto gpid = header->mutable_gpidentity(); + gpid->set_dbid(GpIdentity.dbid); + gpid->set_segindex(GpIdentity.segindex); + gpid->set_gp_role(static_cast(Gp_role)); + gpid->set_gp_session_role(static_cast(Gp_session_role)); header->set_ssid(gp_session_id); header->set_ccnt(gp_command_count); - header->set_sliceid(get_cur_slice_id(queryDesc)); + header->set_sliceid(get_cur_slice_id(query_desc)); int32 tmid = 0; gpmon_gettmid(&tmid); header->set_tmid(tmid); } -void set_session_info(yagpcc::SessionInfo *si, QueryDesc *queryDesc) +void set_session_info(yagpcc::SessionInfo *si, QueryDesc *query_desc) { - if (queryDesc->sourceText) - *si->mutable_sql() = std::string(queryDesc->sourceText); + if (query_desc->sourceText) + *si->mutable_sql() = std::string(query_desc->sourceText); si->set_allocated_applicationname(get_app_name()); si->set_allocated_databasename(get_db_name()); si->set_allocated_resourcegroup(get_rg_name()); si->set_allocated_username(get_user_name()); } -ExplainState get_explain_state(QueryDesc *queryDesc, bool costs) +ExplainState get_explain_state(QueryDesc *query_desc, bool costs) { ExplainState es; ExplainInitState(&es); @@ -109,74 +111,84 @@ ExplainState get_explain_state(QueryDesc *queryDesc, bool costs) es.verbose = true; es.format = EXPLAIN_FORMAT_TEXT; ExplainBeginOutput(&es); - ExplainPrintPlan(&es, queryDesc); + ExplainPrintPlan(&es, query_desc); ExplainEndOutput(&es); return es; } -void set_plan_text(std::string *plan_text, QueryDesc *queryDesc) +void set_plan_text(std::string *plan_text, QueryDesc *query_desc) { - auto es = get_explain_state(queryDesc, true); + auto es = get_explain_state(query_desc, true); *plan_text = std::string(es.str->data, es.str->len); } -void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *queryDesc) +void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { - set_session_info(qi->mutable_sessioninfo(), queryDesc); - if (queryDesc->sourceText) - *qi->mutable_querytext() = queryDesc->sourceText; - if (queryDesc->plannedstmt) + set_session_info(qi->mutable_sessioninfo(), query_desc); + if (query_desc->sourceText) + *qi->mutable_querytext() = query_desc->sourceText; + if (query_desc->plannedstmt) { - qi->set_generator(queryDesc->plannedstmt->planGen == PLANGEN_OPTIMIZER + qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); - if (queryDesc->planstate) + if (query_desc->planstate) { - set_plan_text(qi->mutable_plantext(), queryDesc); - qi->set_plan_id(get_plan_id(queryDesc)); + set_plan_text(qi->mutable_plantext(), query_desc); + qi->set_plan_id(get_plan_id(query_desc)); } } - qi->set_query_id(queryDesc->plannedstmt->queryId); + qi->set_query_id(query_desc->plannedstmt->queryId); +} + +void set_gp_metrics(yagpcc::GPMetrics* metrics, QueryDesc *query_desc) +{ + int32_t n_spill_files = 0; + int64_t n_spill_bytes = 0; + get_spill_info(gp_session_id, gp_command_count, &n_spill_files, &n_spill_bytes); + metrics->mutable_spill()->set_filecount(n_spill_files); + metrics->mutable_spill()->set_totalbytes(n_spill_bytes); } } // namespace -void EventSender::ExecutorStart(QueryDesc *queryDesc, int /* eflags*/) +void EventSender::ExecutorStart(QueryDesc *query_desc, int /* eflags*/) { - elog(DEBUG1, "Query %s start recording", queryDesc->sourceText); + elog(DEBUG1, "Query %s start recording", query_desc->sourceText); yagpcc::SetQueryReq req; req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); *req.mutable_datetime() = current_ts(); - set_header(req.mutable_header(), queryDesc); - set_query_info(req.mutable_query_info(), queryDesc); + set_header(req.mutable_header(), query_desc); + set_query_info(req.mutable_query_info(), query_desc); auto result = connector->set_metric_query(req); if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { elog(WARNING, "Query %s start reporting failed with an error %s", - queryDesc->sourceText, result.error_text().c_str()); + query_desc->sourceText, result.error_text().c_str()); } else { - elog(DEBUG1, "Query %s start successful", queryDesc->sourceText); + elog(DEBUG1, "Query %s start successful", query_desc->sourceText); } } -void EventSender::ExecutorFinish(QueryDesc *queryDesc) +void EventSender::ExecutorFinish(QueryDesc *query_desc) { - elog(DEBUG1, "Query %s finish recording", queryDesc->sourceText); + elog(DEBUG1, "Query %s finish recording", query_desc->sourceText); yagpcc::SetQueryReq req; req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); *req.mutable_datetime() = current_ts(); - set_header(req.mutable_header(), queryDesc); - set_query_info(req.mutable_query_info(), queryDesc); + set_header(req.mutable_header(), query_desc); + set_query_info(req.mutable_query_info(), query_desc); + set_gp_metrics(req.mutable_query_metrics(), query_desc); auto result = connector->set_metric_query(req); if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { elog(WARNING, "Query %s finish reporting failed with an error %s", - queryDesc->sourceText, result.error_text().c_str()); + query_desc->sourceText, result.error_text().c_str()); } else { - elog(DEBUG1, "Query %s finish successful", queryDesc->sourceText); + elog(DEBUG1, "Query %s finish successful", query_desc->sourceText); } } diff --git a/src/EventSender.h b/src/EventSender.h index 70868f6c757..bd02455ca7e 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -9,8 +9,8 @@ struct QueryDesc; class EventSender { public: - void ExecutorStart(QueryDesc *queryDesc, int eflags); - void ExecutorFinish(QueryDesc *queryDesc); + void ExecutorStart(QueryDesc *query_desc, int eflags); + void ExecutorFinish(QueryDesc *query_desc); static EventSender *instance(); private: diff --git a/src/SpillInfoWrapper.c b/src/SpillInfoWrapper.c new file mode 100644 index 00000000000..c6ace0a693f --- /dev/null +++ b/src/SpillInfoWrapper.c @@ -0,0 +1,21 @@ +#include "postgres.h" +#include "utils/workfile_mgr.h" + +void get_spill_info(int ssid, int ccid, int32_t* file_count, int64_t* total_bytes); + +void get_spill_info(int ssid, int ccid, int32_t* file_count, int64_t* total_bytes) +{ + int count = 0; + int i = 0; + workfile_set *workfiles = workfile_mgr_cache_entries_get_copy(&count); + workfile_set *wf_iter = workfiles; + for (i = 0; i < count; ++i, ++wf_iter) + { + if (wf_iter->active && wf_iter->session_id == ssid && wf_iter->command_count == ccid) + { + *file_count += wf_iter->num_files; + *total_bytes += wf_iter->total_bytes; + } + } + pfree(workfiles); +} \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 9f3200c006f..1dabb59ab3f 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -19,8 +19,8 @@ extern "C" static ExecutorStart_hook_type previous_ExecutorStart_hook = nullptr; static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr; -static void ya_ExecutorStart_hook(QueryDesc *queryDesc, int eflags); -static void ya_ExecutorFinish_hook(QueryDesc *queryDesc); +static void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags); +static void ya_ExecutorFinish_hook(QueryDesc *query_desc); #define REPLACE_HOOK(hookName) \ previous_##hookName = hookName; \ @@ -56,12 +56,22 @@ void hooks_deinit() else \ standard_##hookName(__VA_ARGS__); -void ya_ExecutorStart_hook(QueryDesc *queryDesc, int eflags) +void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { - CREATE_HOOK_WRAPPER(ExecutorStart, queryDesc, eflags); + CREATE_HOOK_WRAPPER(ExecutorStart, query_desc, eflags); + PG_TRY(); + { + EventSender::instance()->ExecutorStart(query_desc, eflags); + } + PG_CATCH(); + { + ereport(WARNING, (errmsg("EventSender failed in ExecutorStart afterhook"))); + PG_RE_THROW(); + } + PG_END_TRY(); } -void ya_ExecutorFinish_hook(QueryDesc *queryDesc) +void ya_ExecutorFinish_hook(QueryDesc *query_desc) { - CREATE_HOOK_WRAPPER(ExecutorFinish, queryDesc); + CREATE_HOOK_WRAPPER(ExecutorFinish, query_desc); } \ No newline at end of file From b4b20431b3a5f5cf1cb4944f35fa687ca912f0f2 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 30 Mar 2023 15:00:11 +0300 Subject: [PATCH 010/133] Sync with proto changes + generate normalized query and plan texts --- Makefile | 4 +- protos/yagpcc_metrics.proto | 42 +-- protos/yagpcc_set_service.proto | 20 +- src/EventSender.cpp | 88 +++---- .../pg_stat_statements_ya_parser.c | 248 +++++++++++++++++- .../pg_stat_statements_ya_parser.h | 3 +- 6 files changed, 295 insertions(+), 110 deletions(-) diff --git a/Makefile b/Makefile index 9f2311888cc..cd5e3917899 100644 --- a/Makefile +++ b/Makefile @@ -8,8 +8,8 @@ PROTOC = protoc SRC_DIR = ./src GEN_DIR = ./src/protos PROTO_DIR = ./protos -PROTO_GEN_OBJECTS = $(GEN_DIR)/yagpcc_plan.pb.o $(GEN_DIR)/yagpcc_metrics.pb.o $(GEN_DIR)/yagpcc_set_service.pb.o \ - $(GEN_DIR)/yagpcc_set_service.grpc.pb.o +PROTO_GEN_OBJECTS = $(GEN_DIR)/yagpcc_plan.pb.o $(GEN_DIR)/yagpcc_metrics.pb.o \ + $(GEN_DIR)/yagpcc_set_service.pb.o $(GEN_DIR)/yagpcc_set_service.grpc.pb.o GRPC_CPP_PLUGIN := grpc_cpp_plugin GRPC_CPP_PLUGIN_PATH ?= `which $(GRPC_CPP_PLUGIN)` diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto index b7e255484c7..f00f329a208 100644 --- a/protos/yagpcc_metrics.proto +++ b/protos/yagpcc_metrics.proto @@ -27,9 +27,12 @@ message QueryInfo { PlanGenerator generator = 1; uint64 query_id = 2; uint64 plan_id = 3; - string queryText = 4; - string planText = 5; - SessionInfo sessionInfo = 6; + string query_text = 4; + string plan_text = 5; + string temlate_query_text = 6; + string temlate_plan_text = 7; + string userName = 8; + string databaseName = 9; } enum PlanGenerator @@ -45,40 +48,17 @@ message GPMetrics { SpillInfo spill = 3; } -message QueryInfoHeader { - int32 pid = 1; - GpId gpIdentity = 2; - - int32 tmid = 3; /* A time identifier for a particular query. All records associated with the query will have the same tmid. */ - int32 ssid = 4; /* The session id as shown by gp_session_id. All records associated with the query will have the same ssid */ - int32 ccnt = 5; /* The command number within this session as shown by gp_command_count. All records associated with the query will have the same ccnt */ - int32 sliceid = 6; /* slice identificator, 0 means general info for the whole query */ +message QueryKey { + int32 tmid = 1; /* A time identifier for a particular query. All records associated with the query will have the same tmid. */ + int32 ssid = 2; /* The session id as shown by gp_session_id. All records associated with the query will have the same ssid */ + int32 ccnt = 3; /* The command number within this session as shown by gp_command_count. All records associated with the query will have the same ccnt */ } -message GpId { +message SegmentKey { int32 dbid = 1; /* the dbid of this database */ int32 segindex = 2; /* content indicator: -1 for entry database, * 0, ..., n-1 for segment database * * a primary and its mirror have the same segIndex */ - GpRole gp_role = 3; - GpRole gp_session_role = 4; -} - -enum GpRole -{ - GP_ROLE_UNSPECIFIED = 0; - GP_ROLE_UTILITY = 1; /* Operating as a simple database engine */ - GP_ROLE_DISPATCH = 2; /* Operating as the parallel query dispatcher */ - GP_ROLE_EXECUTE = 3; /* Operating as a parallel query executor */ - GP_ROLE_UNDEFINED = 4; /* Should never see this role in use */ -} - -message SessionInfo { - string sql = 1; - string userName = 2; - string databaseName = 3; - string resourceGroup = 4; - string applicationName = 5; } message SystemStat { diff --git a/protos/yagpcc_set_service.proto b/protos/yagpcc_set_service.proto index 0bef72891ee..97c5691a6f5 100644 --- a/protos/yagpcc_set_service.proto +++ b/protos/yagpcc_set_service.proto @@ -27,19 +27,19 @@ enum MetricResponseStatusCode { } message SetQueryReq { - QueryStatus query_status = 1; + QueryStatus query_status = 1; google.protobuf.Timestamp datetime = 2; - - QueryInfoHeader header = 3; - QueryInfo query_info = 4; - GPMetrics query_metrics = 5; - repeated MetricPlan plan_tree = 6; + QueryKey query_key = 3; + QueryInfo query_info = 4; + GPMetrics query_metrics = 5; + repeated MetricPlan plan_tree = 6; } message SetPlanNodeReq { - PlanNodeStatus node_status = 1; + PlanNodeStatus node_status = 1; google.protobuf.Timestamp datetime = 2; - QueryInfoHeader header = 3; - GPMetrics node_metrics = 4; - MetricPlan plan_node = 5; + QueryKey query_key = 3; + SegmentKey segment_key = 4; + GPMetrics node_metrics = 5; + MetricPlan plan_node = 6; } diff --git a/src/EventSender.cpp b/src/EventSender.cpp index f0f69977fe0..211dea52bab 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -6,6 +6,7 @@ extern "C" { #include "postgres.h" +#include "access/hash.h" #include "utils/metrics_utils.h" #include "utils/elog.h" #include "executor/executor.h" @@ -24,6 +25,7 @@ void get_spill_info(int ssid, int ccid, int32_t* file_count, int64_t* total_byte namespace { + std::string* get_user_name() { const char *username = GetConfigOption("session_authorization", false, false); @@ -38,26 +40,6 @@ std::string* get_db_name() return result; } -std::string* get_rg_name() -{ - auto user_id = GetUserId(); - if (!OidIsValid(user_id)) - return nullptr; - auto group_id = GetResGroupIdForRole(user_id); - if (!OidIsValid(group_id)) - return nullptr; - char *rgname = GetResGroupNameForId(group_id); - if (rgname == nullptr) - return nullptr; - pfree(rgname); - return new std::string(rgname); -} - -std::string* get_app_name() -{ - return application_name ? new std::string(application_name) : nullptr; -} - int get_cur_slice_id(QueryDesc *desc) { if (!desc->estate) @@ -77,30 +59,19 @@ google::protobuf::Timestamp current_ts() return current_ts; } -void set_header(yagpcc::QueryInfoHeader *header, QueryDesc *query_desc) +void set_query_key(yagpcc::QueryKey *key, QueryDesc *query_desc) { - header->set_pid(MyProcPid); - auto gpid = header->mutable_gpidentity(); - gpid->set_dbid(GpIdentity.dbid); - gpid->set_segindex(GpIdentity.segindex); - gpid->set_gp_role(static_cast(Gp_role)); - gpid->set_gp_session_role(static_cast(Gp_session_role)); - header->set_ssid(gp_session_id); - header->set_ccnt(gp_command_count); - header->set_sliceid(get_cur_slice_id(query_desc)); + key->set_ccnt(gp_command_count); + key->set_ssid(gp_session_id); int32 tmid = 0; gpmon_gettmid(&tmid); - header->set_tmid(tmid); + key->set_tmid(tmid); } -void set_session_info(yagpcc::SessionInfo *si, QueryDesc *query_desc) +void set_segment_key(yagpcc::SegmentKey *key, QueryDesc *query_desc) { - if (query_desc->sourceText) - *si->mutable_sql() = std::string(query_desc->sourceText); - si->set_allocated_applicationname(get_app_name()); - si->set_allocated_databasename(get_db_name()); - si->set_allocated_resourcegroup(get_rg_name()); - si->set_allocated_username(get_user_name()); + key->set_dbid(GpIdentity.dbid); + key->set_segindex(GpIdentity.segindex); } ExplainState get_explain_state(QueryDesc *query_desc, bool costs) @@ -122,23 +93,37 @@ void set_plan_text(std::string *plan_text, QueryDesc *query_desc) *plan_text = std::string(es.str->data, es.str->len); } +void set_query_plan(yagpcc::QueryInfo *qi, QueryDesc *query_desc) +{ + qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER + ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER + : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); + set_plan_text(qi->mutable_plan_text(), query_desc); + StringInfo norm_plan = gen_normplan(qi->plan_text().c_str()); + *qi->mutable_temlate_plan_text() = std::string(norm_plan->data); + qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); + //TODO: free stringinfo? +} + +void set_query_text(yagpcc::QueryInfo *qi, QueryDesc *query_desc) +{ + *qi->mutable_query_text() = query_desc->sourceText; + char* norm_query = gen_normquery(query_desc->sourceText); + *qi->mutable_temlate_query_text() = std::string(norm_query); + pfree(norm_query); +} + void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { - set_session_info(qi->mutable_sessioninfo(), query_desc); if (query_desc->sourceText) - *qi->mutable_querytext() = query_desc->sourceText; + set_query_text(qi, query_desc); if (query_desc->plannedstmt) { - qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER - ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER - : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); - if (query_desc->planstate) - { - set_plan_text(qi->mutable_plantext(), query_desc); - qi->set_plan_id(get_plan_id(query_desc)); - } + set_query_plan(qi, query_desc); + qi->set_query_id(query_desc->plannedstmt->queryId); } - qi->set_query_id(query_desc->plannedstmt->queryId); + qi->set_allocated_username(get_user_name()); + qi->set_allocated_databasename(get_db_name()); } void set_gp_metrics(yagpcc::GPMetrics* metrics, QueryDesc *query_desc) @@ -157,8 +142,7 @@ void EventSender::ExecutorStart(QueryDesc *query_desc, int /* eflags*/) yagpcc::SetQueryReq req; req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); *req.mutable_datetime() = current_ts(); - set_header(req.mutable_header(), query_desc); - set_query_info(req.mutable_query_info(), query_desc); + set_query_key(req.mutable_query_key(), query_desc); auto result = connector->set_metric_query(req); if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { @@ -177,7 +161,7 @@ void EventSender::ExecutorFinish(QueryDesc *query_desc) yagpcc::SetQueryReq req; req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); *req.mutable_datetime() = current_ts(); - set_header(req.mutable_header(), query_desc); + set_query_key(req.mutable_query_key(), query_desc); set_query_info(req.mutable_query_info(), query_desc); set_gp_metrics(req.mutable_query_metrics(), query_desc); auto result = connector->set_metric_query(req); diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/src/stat_statements_parser/pg_stat_statements_ya_parser.c index f14742337bd..ae79e7dc40a 100644 --- a/src/stat_statements_parser/pg_stat_statements_ya_parser.c +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.c @@ -1,3 +1,6 @@ +// NOTE: this file is just a bunch of code borrowed from pg_stat_statements for PG 9.4 +// and from our own inhouse implementation of pg_stat_statements for managed PG + #include "postgres.h" #include @@ -67,14 +70,15 @@ static void JumbleQuery(pgssJumbleState *jstate, Query *query); static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable); static void JumbleExpr(pgssJumbleState *jstate, Node *node); static void RecordConstLocation(pgssJumbleState *jstate, int location); - -static StringInfo gen_normplan(const char *execution_plan); - +static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query); +static int comp_location(const void *a, const void *b); +StringInfo gen_normplan(const char *execution_plan); static bool need_replace(int token); - void pgss_post_parse_analyze(ParseState *pstate, Query *query); +static char *generate_normalized_query(pgssJumbleState *jstate, const char *query, + int *query_len_p, int encoding); -void stat_statements_parser_init() + void stat_statements_parser_init() { prev_post_parse_analyze_hook = post_parse_analyze_hook; post_parse_analyze_hook = pgss_post_parse_analyze; @@ -650,7 +654,7 @@ need_replace(int token) * gen_normplan - parse execution plan using flex and replace all CONST to * substitute variables. */ -static StringInfo +StringInfo gen_normplan(const char *execution_plan) { core_yyscan_t yyscanner; @@ -715,14 +719,6 @@ gen_normplan(const char *execution_plan) return plan_out; } -uint64_t get_plan_id(QueryDesc *queryDesc) -{ - if (!queryDesc->sourceText) - return 0; - StringInfo normalized = gen_normplan(queryDesc->sourceText); - return hash_any((unsigned char *)normalized->data, normalized->len); -} - /* * Post-parse-analysis hook: mark query with a queryId */ @@ -768,4 +764,228 @@ void pgss_post_parse_analyze(ParseState *pstate, Query *query) */ if (query->queryId == 0) query->queryId = 1; +} + +/* + * comp_location: comparator for qsorting pgssLocationLen structs by location + */ +static int +comp_location(const void *a, const void *b) +{ + int l = ((const pgssLocationLen *) a)->location; + int r = ((const pgssLocationLen *) b)->location; + + if (l < r) + return -1; + else if (l > r) + return +1; + else + return 0; +} + +/* + * Given a valid SQL string and an array of constant-location records, + * fill in the textual lengths of those constants. + * + * The constants may use any allowed constant syntax, such as float literals, + * bit-strings, single-quoted strings and dollar-quoted strings. This is + * accomplished by using the public API for the core scanner. + * + * It is the caller's job to ensure that the string is a valid SQL statement + * with constants at the indicated locations. Since in practice the string + * has already been parsed, and the locations that the caller provides will + * have originated from within the authoritative parser, this should not be + * a problem. + * + * Duplicate constant pointers are possible, and will have their lengths + * marked as '-1', so that they are later ignored. (Actually, we assume the + * lengths were initialized as -1 to start with, and don't change them here.) + * + * N.B. There is an assumption that a '-' character at a Const location begins + * a negative numeric constant. This precludes there ever being another + * reason for a constant to start with a '-'. + */ +static void +fill_in_constant_lengths(pgssJumbleState *jstate, const char *query) +{ + pgssLocationLen *locs; + core_yyscan_t yyscanner; + core_yy_extra_type yyextra; + core_YYSTYPE yylval; + YYLTYPE yylloc; + int last_loc = -1; + int i; + + /* + * Sort the records by location so that we can process them in order while + * scanning the query text. + */ + if (jstate->clocations_count > 1) + qsort(jstate->clocations, jstate->clocations_count, + sizeof(pgssLocationLen), comp_location); + locs = jstate->clocations; + + /* initialize the flex scanner --- should match raw_parser() */ + yyscanner = scanner_init(query, + &yyextra, + ScanKeywords, + NumScanKeywords); + + /* Search for each constant, in sequence */ + for (i = 0; i < jstate->clocations_count; i++) + { + int loc = locs[i].location; + int tok; + + Assert(loc >= 0); + + if (loc <= last_loc) + continue; /* Duplicate constant, ignore */ + + /* Lex tokens until we find the desired constant */ + for (;;) + { + tok = core_yylex(&yylval, &yylloc, yyscanner); + + /* We should not hit end-of-string, but if we do, behave sanely */ + if (tok == 0) + break; /* out of inner for-loop */ + + /* + * We should find the token position exactly, but if we somehow + * run past it, work with that. + */ + if (yylloc >= loc) + { + if (query[loc] == '-') + { + /* + * It's a negative value - this is the one and only case + * where we replace more than a single token. + * + * Do not compensate for the core system's special-case + * adjustment of location to that of the leading '-' + * operator in the event of a negative constant. It is + * also useful for our purposes to start from the minus + * symbol. In this way, queries like "select * from foo + * where bar = 1" and "select * from foo where bar = -2" + * will have identical normalized query strings. + */ + tok = core_yylex(&yylval, &yylloc, yyscanner); + if (tok == 0) + break; /* out of inner for-loop */ + } + + /* + * We now rely on the assumption that flex has placed a zero + * byte after the text of the current token in scanbuf. + */ + locs[i].length = strlen(yyextra.scanbuf + loc); + break; /* out of inner for-loop */ + } + } + + /* If we hit end-of-string, give up, leaving remaining lengths -1 */ + if (tok == 0) + break; + + last_loc = loc; + } + + scanner_finish(yyscanner); +} + +/* + * Generate a normalized version of the query string that will be used to + * represent all similar queries. + * + * Note that the normalized representation may well vary depending on + * just which "equivalent" query is used to create the hashtable entry. + * We assume this is OK. + * + * *query_len_p contains the input string length, and is updated with + * the result string length (which cannot be longer) on exit. + * + * Returns a palloc'd string. + */ +static char * +generate_normalized_query(pgssJumbleState *jstate, const char *query, + int *query_len_p, int encoding) +{ + char *norm_query; + int query_len = *query_len_p; + int i, + len_to_wrt, /* Length (in bytes) to write */ + quer_loc = 0, /* Source query byte location */ + n_quer_loc = 0, /* Normalized query byte location */ + last_off = 0, /* Offset from start for previous tok */ + last_tok_len = 0; /* Length (in bytes) of that tok */ + + /* + * Get constants' lengths (core system only gives us locations). Note + * this also ensures the items are sorted by location. + */ + fill_in_constant_lengths(jstate, query); + + /* Allocate result buffer */ + norm_query = palloc(query_len + 1); + + for (i = 0; i < jstate->clocations_count; i++) + { + int off, /* Offset from start for cur tok */ + tok_len; /* Length (in bytes) of that tok */ + + off = jstate->clocations[i].location; + tok_len = jstate->clocations[i].length; + + if (tok_len < 0) + continue; /* ignore any duplicates */ + + /* Copy next chunk (what precedes the next constant) */ + len_to_wrt = off - last_off; + len_to_wrt -= last_tok_len; + + Assert(len_to_wrt >= 0); + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + + /* And insert a '?' in place of the constant token */ + norm_query[n_quer_loc++] = '?'; + + quer_loc = off + tok_len; + last_off = off; + last_tok_len = tok_len; + } + + /* + * We've copied up until the last ignorable constant. Copy over the + * remaining bytes of the original query string. + */ + len_to_wrt = query_len - quer_loc; + + Assert(len_to_wrt >= 0); + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + + Assert(n_quer_loc <= query_len); + norm_query[n_quer_loc] = '\0'; + + *query_len_p = n_quer_loc; + return norm_query; +} + +char *gen_normquery(const char *query) +{ + if (!query) { + return NULL; + } + pgssJumbleState jstate; + jstate.jumble = (unsigned char *)palloc(JUMBLE_SIZE); + jstate.jumble_len = 0; + jstate.clocations_buf_size = 32; + jstate.clocations = (pgssLocationLen *) + palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen)); + jstate.clocations_count = 0; + int query_len = strlen(query); + return generate_normalized_query(&jstate, query, &query_len, GetDatabaseEncoding()); } \ No newline at end of file diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.h b/src/stat_statements_parser/pg_stat_statements_ya_parser.h index 274f96aebaf..aa9cd217e31 100644 --- a/src/stat_statements_parser/pg_stat_statements_ya_parser.h +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.h @@ -12,4 +12,5 @@ extern void stat_statements_parser_deinit(void); } #endif -uint64_t get_plan_id(QueryDesc *queryDesc); \ No newline at end of file +StringInfo gen_normplan(const char *executionPlan); +char *gen_normquery(const char *query); \ No newline at end of file From 490f1b03cbef0d1e40e461710ecc9b5a6295825e Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Sat, 1 Apr 2023 18:41:49 +0300 Subject: [PATCH 011/133] Implement the rest of statistics 1) Query instrumentation 2) /proc/self/* stats --- Makefile | 1 + src/EventSender.cpp | 36 +++++++++++++- src/ProcStats.cpp | 119 ++++++++++++++++++++++++++++++++++++++++++++ src/ProcStats.h | 7 +++ 4 files changed, 162 insertions(+), 1 deletion(-) create mode 100644 src/ProcStats.cpp create mode 100644 src/ProcStats.h diff --git a/Makefile b/Makefile index cd5e3917899..c6d0ddd87ae 100644 --- a/Makefile +++ b/Makefile @@ -31,6 +31,7 @@ PG_STAT_OBJS := $(PG_STAT_DIR)/pg_stat_statements_ya_parser.o OBJS := $(PG_STAT_OBJS) \ $(PROTO_GEN_OBJECTS) \ + $(SRC_DIR)/ProcStats.o \ $(SRC_DIR)/SpillInfoWrapper.o \ $(SRC_DIR)/GrpcConnector.o \ $(SRC_DIR)/EventSender.o \ diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 211dea52bab..d8145b811a4 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,5 +1,6 @@ #include "EventSender.h" #include "GrpcConnector.h" +#include "ProcStats.h" #include "protos/yagpcc_set_service.pb.h" #include @@ -126,18 +127,51 @@ void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *query_desc) qi->set_allocated_databasename(get_db_name()); } -void set_gp_metrics(yagpcc::GPMetrics* metrics, QueryDesc *query_desc) +void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, QueryDesc *query_desc) +{ + auto instrument = query_desc->planstate->instrument; + metrics->set_ntuples(instrument->ntuples); + metrics->set_nloops(instrument->nloops); + metrics->set_tuplecount(instrument->tuplecount); + metrics->set_firsttuple(instrument->firsttuple); + metrics->set_startup(instrument->startup); + metrics->set_total(instrument->total); + auto &buffusage = instrument->bufusage; + metrics->set_shared_blks_hit(buffusage.shared_blks_hit); + metrics->set_shared_blks_read(buffusage.shared_blks_read); + metrics->set_shared_blks_dirtied(buffusage.shared_blks_dirtied); + metrics->set_shared_blks_written(buffusage.shared_blks_written); + metrics->set_local_blks_hit(buffusage.local_blks_hit); + metrics->set_local_blks_read(buffusage.local_blks_read); + metrics->set_local_blks_dirtied(buffusage.local_blks_dirtied); + metrics->set_local_blks_written(buffusage.local_blks_written); + metrics->set_temp_blks_read(buffusage.temp_blks_read); + metrics->set_temp_blks_written(buffusage.temp_blks_written); + metrics->set_blk_read_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_read_time)); + metrics->set_blk_write_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); +} + +void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc) { int32_t n_spill_files = 0; int64_t n_spill_bytes = 0; get_spill_info(gp_session_id, gp_command_count, &n_spill_files, &n_spill_bytes); metrics->mutable_spill()->set_filecount(n_spill_files); metrics->mutable_spill()->set_totalbytes(n_spill_bytes); + if (query_desc->planstate->instrument) + set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc); + fill_self_stats(metrics->mutable_systemstat()); } + + } // namespace void EventSender::ExecutorStart(QueryDesc *query_desc, int /* eflags*/) { + query_desc->instrument_options |= INSTRUMENT_BUFFERS; + query_desc->instrument_options |= INSTRUMENT_ROWS; + query_desc->instrument_options |= INSTRUMENT_TIMER; + elog(DEBUG1, "Query %s start recording", query_desc->sourceText); yagpcc::SetQueryReq req; req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); diff --git a/src/ProcStats.cpp b/src/ProcStats.cpp new file mode 100644 index 00000000000..34c5d05719e --- /dev/null +++ b/src/ProcStats.cpp @@ -0,0 +1,119 @@ +#include "ProcStats.h" +#include "yagpcc_metrics.pb.h" +#include +#include +#include + +extern "C" +{ +#include "postgres.h" +#include "utils/elog.h" +} + +namespace { +#define FILL_IO_STAT(stat_name) \ + uint64_t stat_name; \ + proc_stat >> tmp >> stat_name; \ + stats->set_##stat_name(stat_name); + +void fill_io_stats(yagpcc::SystemStat *stats) +{ + std::ifstream proc_stat("/proc/self/io"); + std::string tmp; + FILL_IO_STAT(rchar); + FILL_IO_STAT(wchar); + FILL_IO_STAT(syscr); + FILL_IO_STAT(syscw); + FILL_IO_STAT(read_bytes); + FILL_IO_STAT(write_bytes); + FILL_IO_STAT(cancelled_write_bytes); +} + +void fill_cpu_stats(yagpcc::SystemStat *stats) +{ + static const int UTIME_ID = 13; + static const int STIME_ID = 14; + static const int STARTTIME_ID = 21; + static const int VSIZE_ID = 22; + static const int RSS_ID = 23; + static const double tps = sysconf(_SC_CLK_TCK); + + double uptime; + { + std::ifstream proc_stat("/proc/uptime"); + proc_stat >> uptime; + } + + std::ifstream proc_stat("/proc/self/stat"); + std::string trash; + double start_time = 0; + for (int i = 0; i <= RSS_ID; ++i) + { + switch (i) + { + case UTIME_ID: + double utime; + proc_stat >> utime; + stats->set_usertimeseconds(utime / tps); + break; + case STIME_ID: + double stime; + proc_stat >> stime; + stats->set_kerneltimeseconds(stime / tps); + break; + case STARTTIME_ID: + uint64_t starttime; + proc_stat >> starttime; + start_time = static_cast(starttime) / tps; + break; + case VSIZE_ID: + uint64_t vsize; + proc_stat >> vsize; + stats->set_vsize(vsize); + break; + case RSS_ID: + uint64_t rss; + proc_stat >> rss; + // NOTE: this is a double AFAIU, need to double-check + stats->set_rss(rss); + break; + default: + proc_stat >> trash; + } + stats->set_runningtimeseconds(uptime - start_time); + } +} + +void fill_status_stats(yagpcc::SystemStat *stats) +{ + std::ifstream proc_stat("/proc/self/status"); + std::string key, measure; + while (proc_stat >> key) + { + if (key == "VmPeak:") + { + uint64_t value; + proc_stat >> value; + stats->set_vmpeakkb(value); + proc_stat >> measure; + if (measure != "kB") + elog(FATAL, "Expected memory sizes in kB, but got in %s", measure.c_str()); + } + else if (key == "VmSize:") + { + uint64_t value; + proc_stat >> value; + stats->set_vmsizekb(value); + if (measure != "kB") + elog(FATAL, "Expected memory sizes in kB, but got in %s", measure.c_str()); + } + } +} +} // namespace + +void fill_self_stats(yagpcc::SystemStat *stats) +{ + fill_io_stats(stats); + fill_cpu_stats(stats); + fill_status_stats(stats); +} \ No newline at end of file diff --git a/src/ProcStats.h b/src/ProcStats.h new file mode 100644 index 00000000000..30a90a60519 --- /dev/null +++ b/src/ProcStats.h @@ -0,0 +1,7 @@ +#pragma once + +namespace yagpcc { +class SystemStat; +} + +void fill_self_stats(yagpcc::SystemStat *stats); \ No newline at end of file From ebebc796a4ad660ab2f5f52d203c0acf4679ab85 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 6 Apr 2023 13:24:25 +0300 Subject: [PATCH 012/133] Use llvm code style --- Makefile | 4 +- src/EventSender.cpp | 367 ++++++++++++++++++++---------------------- src/EventSender.h | 13 +- src/GrpcConnector.cpp | 68 ++++---- src/GrpcConnector.h | 13 +- src/ProcStats.cpp | 183 ++++++++++----------- src/hook_wrappers.cpp | 83 +++++----- 7 files changed, 340 insertions(+), 391 deletions(-) diff --git a/Makefile b/Makefile index c6d0ddd87ae..6103e3bebce 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -override CFLAGS = -Wall -Wmissing-prototypes -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -fno-aggressive-loop-optimizations -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=gnu99 -Werror=uninitialized -Werror=implicit-function-declaration -DGPBUILD -override CXXFLAGS = -fPIC -lstdc++ -lpthread -g3 -ggdb -Wall -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fno-aggressive-loop-optimizations -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=c++14 -fPIC -Iinclude -Isrc/protos -Isrc -g -DGPBUILD +override CFLAGS = -Wall -Wmissing-prototypes -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=gnu99 -Werror=uninitialized -Werror=implicit-function-declaration -DGPBUILD +override CXXFLAGS = -fPIC -lstdc++ -lpthread -g3 -ggdb -Wall -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=c++14 -fPIC -Iinclude -Isrc/protos -Isrc -g -DGPBUILD COMMON_CPP_FLAGS := -Isrc -Iinclude -Isrc/stat_statements_parser PG_CXXFLAGS += $(COMMON_CPP_FLAGS) SHLIB_LINK += -lprotobuf -lgrpc++ diff --git a/src/EventSender.cpp b/src/EventSender.cpp index d8145b811a4..b7c3cd70b85 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -4,8 +4,7 @@ #include "protos/yagpcc_set_service.pb.h" #include -extern "C" -{ +extern "C" { #include "postgres.h" #include "access/hash.h" #include "utils/metrics_utils.h" @@ -21,202 +20,178 @@ extern "C" #include "tcop/utility.h" #include "pg_stat_statements_ya_parser.h" -void get_spill_info(int ssid, int ccid, int32_t* file_count, int64_t* total_bytes); -} - -namespace -{ - -std::string* get_user_name() -{ - const char *username = GetConfigOption("session_authorization", false, false); - return username ? new std::string(username) : nullptr; -} - -std::string* get_db_name() -{ - char *dbname = get_database_name(MyDatabaseId); - std::string* result = dbname ? new std::string(dbname) : nullptr; - pfree(dbname); - return result; -} - -int get_cur_slice_id(QueryDesc *desc) -{ - if (!desc->estate) - { - return 0; - } - return LocallyExecutingSliceIndex(desc->estate); -} - -google::protobuf::Timestamp current_ts() -{ - google::protobuf::Timestamp current_ts; - struct timeval tv; - gettimeofday(&tv, nullptr); - current_ts.set_seconds(tv.tv_sec); - current_ts.set_nanos(static_cast(tv.tv_usec * 1000)); - return current_ts; -} - -void set_query_key(yagpcc::QueryKey *key, QueryDesc *query_desc) -{ - key->set_ccnt(gp_command_count); - key->set_ssid(gp_session_id); - int32 tmid = 0; - gpmon_gettmid(&tmid); - key->set_tmid(tmid); -} - -void set_segment_key(yagpcc::SegmentKey *key, QueryDesc *query_desc) -{ - key->set_dbid(GpIdentity.dbid); - key->set_segindex(GpIdentity.segindex); -} - -ExplainState get_explain_state(QueryDesc *query_desc, bool costs) -{ - ExplainState es; - ExplainInitState(&es); - es.costs = costs; - es.verbose = true; - es.format = EXPLAIN_FORMAT_TEXT; - ExplainBeginOutput(&es); - ExplainPrintPlan(&es, query_desc); - ExplainEndOutput(&es); - return es; -} - -void set_plan_text(std::string *plan_text, QueryDesc *query_desc) -{ - auto es = get_explain_state(query_desc, true); - *plan_text = std::string(es.str->data, es.str->len); -} - -void set_query_plan(yagpcc::QueryInfo *qi, QueryDesc *query_desc) -{ - qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER - ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER - : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); - set_plan_text(qi->mutable_plan_text(), query_desc); - StringInfo norm_plan = gen_normplan(qi->plan_text().c_str()); - *qi->mutable_temlate_plan_text() = std::string(norm_plan->data); - qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); - //TODO: free stringinfo? -} - -void set_query_text(yagpcc::QueryInfo *qi, QueryDesc *query_desc) -{ - *qi->mutable_query_text() = query_desc->sourceText; - char* norm_query = gen_normquery(query_desc->sourceText); - *qi->mutable_temlate_query_text() = std::string(norm_query); - pfree(norm_query); -} - -void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *query_desc) -{ - if (query_desc->sourceText) - set_query_text(qi, query_desc); - if (query_desc->plannedstmt) - { - set_query_plan(qi, query_desc); - qi->set_query_id(query_desc->plannedstmt->queryId); - } - qi->set_allocated_username(get_user_name()); - qi->set_allocated_databasename(get_db_name()); -} - -void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, QueryDesc *query_desc) -{ - auto instrument = query_desc->planstate->instrument; - metrics->set_ntuples(instrument->ntuples); - metrics->set_nloops(instrument->nloops); - metrics->set_tuplecount(instrument->tuplecount); - metrics->set_firsttuple(instrument->firsttuple); - metrics->set_startup(instrument->startup); - metrics->set_total(instrument->total); - auto &buffusage = instrument->bufusage; - metrics->set_shared_blks_hit(buffusage.shared_blks_hit); - metrics->set_shared_blks_read(buffusage.shared_blks_read); - metrics->set_shared_blks_dirtied(buffusage.shared_blks_dirtied); - metrics->set_shared_blks_written(buffusage.shared_blks_written); - metrics->set_local_blks_hit(buffusage.local_blks_hit); - metrics->set_local_blks_read(buffusage.local_blks_read); - metrics->set_local_blks_dirtied(buffusage.local_blks_dirtied); - metrics->set_local_blks_written(buffusage.local_blks_written); - metrics->set_temp_blks_read(buffusage.temp_blks_read); - metrics->set_temp_blks_written(buffusage.temp_blks_written); - metrics->set_blk_read_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_read_time)); - metrics->set_blk_write_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); -} - -void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc) -{ - int32_t n_spill_files = 0; - int64_t n_spill_bytes = 0; - get_spill_info(gp_session_id, gp_command_count, &n_spill_files, &n_spill_bytes); - metrics->mutable_spill()->set_filecount(n_spill_files); - metrics->mutable_spill()->set_totalbytes(n_spill_bytes); - if (query_desc->planstate->instrument) - set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc); - fill_self_stats(metrics->mutable_systemstat()); +void get_spill_info(int ssid, int ccid, int32_t *file_count, + int64_t *total_bytes); } +namespace { + +std::string *get_user_name() { + const char *username = GetConfigOption("session_authorization", false, false); + return username ? new std::string(username) : nullptr; +} + +std::string *get_db_name() { + char *dbname = get_database_name(MyDatabaseId); + std::string *result = dbname ? new std::string(dbname) : nullptr; + pfree(dbname); + return result; +} + +int get_cur_slice_id(QueryDesc *desc) { + if (!desc->estate) { + return 0; + } + return LocallyExecutingSliceIndex(desc->estate); +} + +google::protobuf::Timestamp current_ts() { + google::protobuf::Timestamp current_ts; + struct timeval tv; + gettimeofday(&tv, nullptr); + current_ts.set_seconds(tv.tv_sec); + current_ts.set_nanos(static_cast(tv.tv_usec * 1000)); + return current_ts; +} + +void set_query_key(yagpcc::QueryKey *key, QueryDesc *query_desc) { + key->set_ccnt(gp_command_count); + key->set_ssid(gp_session_id); + int32 tmid = 0; + gpmon_gettmid(&tmid); + key->set_tmid(tmid); +} + +void set_segment_key(yagpcc::SegmentKey *key, QueryDesc *query_desc) { + key->set_dbid(GpIdentity.dbid); + key->set_segindex(GpIdentity.segindex); +} + +ExplainState get_explain_state(QueryDesc *query_desc, bool costs) { + ExplainState es; + ExplainInitState(&es); + es.costs = costs; + es.verbose = true; + es.format = EXPLAIN_FORMAT_TEXT; + ExplainBeginOutput(&es); + ExplainPrintPlan(&es, query_desc); + ExplainEndOutput(&es); + return es; +} + +void set_plan_text(std::string *plan_text, QueryDesc *query_desc) { + auto es = get_explain_state(query_desc, true); + *plan_text = std::string(es.str->data, es.str->len); +} + +void set_query_plan(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { + qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER + ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER + : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); + set_plan_text(qi->mutable_plan_text(), query_desc); + StringInfo norm_plan = gen_normplan(qi->plan_text().c_str()); + *qi->mutable_temlate_plan_text() = std::string(norm_plan->data); + qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); + // TODO: free stringinfo? +} + +void set_query_text(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { + *qi->mutable_query_text() = query_desc->sourceText; + char *norm_query = gen_normquery(query_desc->sourceText); + *qi->mutable_temlate_query_text() = std::string(norm_query); + pfree(norm_query); +} + +void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { + if (query_desc->sourceText) { + set_query_text(qi, query_desc); + } + if (query_desc->plannedstmt) { + set_query_plan(qi, query_desc); + qi->set_query_id(query_desc->plannedstmt->queryId); + } + qi->set_allocated_username(get_user_name()); + qi->set_allocated_databasename(get_db_name()); +} + +void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, + QueryDesc *query_desc) { + auto instrument = query_desc->planstate->instrument; + metrics->set_ntuples(instrument->ntuples); + metrics->set_nloops(instrument->nloops); + metrics->set_tuplecount(instrument->tuplecount); + metrics->set_firsttuple(instrument->firsttuple); + metrics->set_startup(instrument->startup); + metrics->set_total(instrument->total); + auto &buffusage = instrument->bufusage; + metrics->set_shared_blks_hit(buffusage.shared_blks_hit); + metrics->set_shared_blks_read(buffusage.shared_blks_read); + metrics->set_shared_blks_dirtied(buffusage.shared_blks_dirtied); + metrics->set_shared_blks_written(buffusage.shared_blks_written); + metrics->set_local_blks_hit(buffusage.local_blks_hit); + metrics->set_local_blks_read(buffusage.local_blks_read); + metrics->set_local_blks_dirtied(buffusage.local_blks_dirtied); + metrics->set_local_blks_written(buffusage.local_blks_written); + metrics->set_temp_blks_read(buffusage.temp_blks_read); + metrics->set_temp_blks_written(buffusage.temp_blks_written); + metrics->set_blk_read_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_read_time)); + metrics->set_blk_write_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); +} + +void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc) { + int32_t n_spill_files = 0; + int64_t n_spill_bytes = 0; + get_spill_info(gp_session_id, gp_command_count, &n_spill_files, + &n_spill_bytes); + metrics->mutable_spill()->set_filecount(n_spill_files); + metrics->mutable_spill()->set_totalbytes(n_spill_bytes); + if (query_desc->planstate->instrument) { + set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc); + } + fill_self_stats(metrics->mutable_systemstat()); +} } // namespace -void EventSender::ExecutorStart(QueryDesc *query_desc, int /* eflags*/) -{ - query_desc->instrument_options |= INSTRUMENT_BUFFERS; - query_desc->instrument_options |= INSTRUMENT_ROWS; - query_desc->instrument_options |= INSTRUMENT_TIMER; - - elog(DEBUG1, "Query %s start recording", query_desc->sourceText); - yagpcc::SetQueryReq req; - req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); - *req.mutable_datetime() = current_ts(); - set_query_key(req.mutable_query_key(), query_desc); - auto result = connector->set_metric_query(req); - if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) - { - elog(WARNING, "Query %s start reporting failed with an error %s", - query_desc->sourceText, result.error_text().c_str()); - } - else - { - elog(DEBUG1, "Query %s start successful", query_desc->sourceText); - } -} - -void EventSender::ExecutorFinish(QueryDesc *query_desc) -{ - elog(DEBUG1, "Query %s finish recording", query_desc->sourceText); - yagpcc::SetQueryReq req; - req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); - *req.mutable_datetime() = current_ts(); - set_query_key(req.mutable_query_key(), query_desc); - set_query_info(req.mutable_query_info(), query_desc); - set_gp_metrics(req.mutable_query_metrics(), query_desc); - auto result = connector->set_metric_query(req); - if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) - { - elog(WARNING, "Query %s finish reporting failed with an error %s", - query_desc->sourceText, result.error_text().c_str()); - } - else - { - elog(DEBUG1, "Query %s finish successful", query_desc->sourceText); - } -} - -EventSender *EventSender::instance() -{ - static EventSender sender; - return &sender; -} - -EventSender::EventSender() -{ - connector = std::make_unique(); -} \ No newline at end of file +void EventSender::ExecutorStart(QueryDesc *query_desc, int /* eflags*/) { + query_desc->instrument_options |= INSTRUMENT_BUFFERS; + query_desc->instrument_options |= INSTRUMENT_ROWS; + query_desc->instrument_options |= INSTRUMENT_TIMER; + + elog(DEBUG1, "Query %s start recording", query_desc->sourceText); + yagpcc::SetQueryReq req; + req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); + *req.mutable_datetime() = current_ts(); + set_query_key(req.mutable_query_key(), query_desc); + auto result = connector->set_metric_query(req); + if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { + elog(WARNING, "Query %s start reporting failed with an error %s", + query_desc->sourceText, result.error_text().c_str()); + } else { + elog(DEBUG1, "Query %s start successful", query_desc->sourceText); + } +} + +void EventSender::ExecutorFinish(QueryDesc *query_desc) { + elog(DEBUG1, "Query %s finish recording", query_desc->sourceText); + yagpcc::SetQueryReq req; + req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); + *req.mutable_datetime() = current_ts(); + set_query_key(req.mutable_query_key(), query_desc); + set_query_info(req.mutable_query_info(), query_desc); + set_gp_metrics(req.mutable_query_metrics(), query_desc); + auto result = connector->set_metric_query(req); + if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { + elog(WARNING, "Query %s finish reporting failed with an error %s", + query_desc->sourceText, result.error_text().c_str()); + } else { + elog(DEBUG1, "Query %s finish successful", query_desc->sourceText); + } +} + +EventSender *EventSender::instance() { + static EventSender sender; + return &sender; +} + +EventSender::EventSender() { connector = std::make_unique(); } \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h index bd02455ca7e..d69958db9b0 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -6,14 +6,13 @@ class GrpcConnector; struct QueryDesc; -class EventSender -{ +class EventSender { public: - void ExecutorStart(QueryDesc *query_desc, int eflags); - void ExecutorFinish(QueryDesc *query_desc); - static EventSender *instance(); + void ExecutorStart(QueryDesc *query_desc, int eflags); + void ExecutorFinish(QueryDesc *query_desc); + static EventSender *instance(); private: - EventSender(); - std::unique_ptr connector; + EventSender(); + std::unique_ptr connector; }; \ No newline at end of file diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index 7329f392010..1a820404428 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -5,51 +5,43 @@ #include #include -class GrpcConnector::Impl -{ +class GrpcConnector::Impl { public: - Impl() - { - GOOGLE_PROTOBUF_VERIFY_VERSION; - this->stub = yagpcc::SetQueryInfo::NewStub(grpc::CreateChannel( - SOCKET_FILE, grpc::InsecureChannelCredentials())); + Impl() { + GOOGLE_PROTOBUF_VERIFY_VERSION; + this->stub = yagpcc::SetQueryInfo::NewStub( + grpc::CreateChannel(SOCKET_FILE, grpc::InsecureChannelCredentials())); + } + + yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req) { + yagpcc::MetricResponse response; + grpc::ClientContext context; + auto deadline = + std::chrono::system_clock::now() + std::chrono::milliseconds(50); + context.set_deadline(deadline); + + grpc::Status status = (stub->SetMetricQuery)(&context, req, &response); + + if (!status.ok()) { + response.set_error_text("Connection lost: " + status.error_message() + + "; " + status.error_details()); + response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); } - yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req) - { - yagpcc::MetricResponse response; - grpc::ClientContext context; - auto deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(50); - context.set_deadline(deadline); - - grpc::Status status = (stub->SetMetricQuery)(&context, req, &response); - - if (!status.ok()) - { - response.set_error_text("Connection lost: " + status.error_message() + "; " + status.error_details()); - response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); - } - - return response; - } + return response; + } private: - const std::string SOCKET_FILE = "unix:///tmp/yagpcc_agent.sock"; - const std::string TCP_ADDRESS = "127.0.0.1:1432"; - std::unique_ptr stub; + const std::string SOCKET_FILE = "unix:///tmp/yagpcc_agent.sock"; + const std::string TCP_ADDRESS = "127.0.0.1:1432"; + std::unique_ptr stub; }; -GrpcConnector::GrpcConnector() -{ - impl = new Impl(); -} +GrpcConnector::GrpcConnector() { impl = new Impl(); } -GrpcConnector::~GrpcConnector() -{ - delete impl; -} +GrpcConnector::~GrpcConnector() { delete impl; } -yagpcc::MetricResponse GrpcConnector::set_metric_query(yagpcc::SetQueryReq req) -{ - return impl->set_metric_query(req); +yagpcc::MetricResponse +GrpcConnector::set_metric_query(yagpcc::SetQueryReq req) { + return impl->set_metric_query(req); } \ No newline at end of file diff --git a/src/GrpcConnector.h b/src/GrpcConnector.h index dc0f21706a3..810c0bd3e15 100644 --- a/src/GrpcConnector.h +++ b/src/GrpcConnector.h @@ -2,14 +2,13 @@ #include "yagpcc_set_service.pb.h" -class GrpcConnector -{ +class GrpcConnector { public: - GrpcConnector(); - ~GrpcConnector(); - yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req); + GrpcConnector(); + ~GrpcConnector(); + yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req); private: - class Impl; - Impl *impl; + class Impl; + Impl *impl; }; \ No newline at end of file diff --git a/src/ProcStats.cpp b/src/ProcStats.cpp index 34c5d05719e..5c64f25ec09 100644 --- a/src/ProcStats.cpp +++ b/src/ProcStats.cpp @@ -4,116 +4,109 @@ #include #include -extern "C" -{ +extern "C" { #include "postgres.h" #include "utils/elog.h" } namespace { -#define FILL_IO_STAT(stat_name) \ - uint64_t stat_name; \ - proc_stat >> tmp >> stat_name; \ - stats->set_##stat_name(stat_name); +#define FILL_IO_STAT(stat_name) \ + uint64_t stat_name; \ + proc_stat >> tmp >> stat_name; \ + stats->set_##stat_name(stat_name); -void fill_io_stats(yagpcc::SystemStat *stats) -{ - std::ifstream proc_stat("/proc/self/io"); - std::string tmp; - FILL_IO_STAT(rchar); - FILL_IO_STAT(wchar); - FILL_IO_STAT(syscr); - FILL_IO_STAT(syscw); - FILL_IO_STAT(read_bytes); - FILL_IO_STAT(write_bytes); - FILL_IO_STAT(cancelled_write_bytes); +void fill_io_stats(yagpcc::SystemStat *stats) { + std::ifstream proc_stat("/proc/self/io"); + std::string tmp; + FILL_IO_STAT(rchar); + FILL_IO_STAT(wchar); + FILL_IO_STAT(syscr); + FILL_IO_STAT(syscw); + FILL_IO_STAT(read_bytes); + FILL_IO_STAT(write_bytes); + FILL_IO_STAT(cancelled_write_bytes); } -void fill_cpu_stats(yagpcc::SystemStat *stats) -{ - static const int UTIME_ID = 13; - static const int STIME_ID = 14; - static const int STARTTIME_ID = 21; - static const int VSIZE_ID = 22; - static const int RSS_ID = 23; - static const double tps = sysconf(_SC_CLK_TCK); +void fill_cpu_stats(yagpcc::SystemStat *stats) { + static const int UTIME_ID = 13; + static const int STIME_ID = 14; + static const int STARTTIME_ID = 21; + static const int VSIZE_ID = 22; + static const int RSS_ID = 23; + static const double tps = sysconf(_SC_CLK_TCK); - double uptime; - { - std::ifstream proc_stat("/proc/uptime"); - proc_stat >> uptime; - } + double uptime; + { + std::ifstream proc_stat("/proc/uptime"); + proc_stat >> uptime; + } - std::ifstream proc_stat("/proc/self/stat"); - std::string trash; - double start_time = 0; - for (int i = 0; i <= RSS_ID; ++i) - { - switch (i) - { - case UTIME_ID: - double utime; - proc_stat >> utime; - stats->set_usertimeseconds(utime / tps); - break; - case STIME_ID: - double stime; - proc_stat >> stime; - stats->set_kerneltimeseconds(stime / tps); - break; - case STARTTIME_ID: - uint64_t starttime; - proc_stat >> starttime; - start_time = static_cast(starttime) / tps; - break; - case VSIZE_ID: - uint64_t vsize; - proc_stat >> vsize; - stats->set_vsize(vsize); - break; - case RSS_ID: - uint64_t rss; - proc_stat >> rss; - // NOTE: this is a double AFAIU, need to double-check - stats->set_rss(rss); - break; - default: - proc_stat >> trash; - } - stats->set_runningtimeseconds(uptime - start_time); + std::ifstream proc_stat("/proc/self/stat"); + std::string trash; + double start_time = 0; + for (int i = 0; i <= RSS_ID; ++i) { + switch (i) { + case UTIME_ID: + double utime; + proc_stat >> utime; + stats->set_usertimeseconds(utime / tps); + break; + case STIME_ID: + double stime; + proc_stat >> stime; + stats->set_kerneltimeseconds(stime / tps); + break; + case STARTTIME_ID: + uint64_t starttime; + proc_stat >> starttime; + start_time = static_cast(starttime) / tps; + break; + case VSIZE_ID: + uint64_t vsize; + proc_stat >> vsize; + stats->set_vsize(vsize); + break; + case RSS_ID: + uint64_t rss; + proc_stat >> rss; + // NOTE: this is a double AFAIU, need to double-check + stats->set_rss(rss); + break; + default: + proc_stat >> trash; } + stats->set_runningtimeseconds(uptime - start_time); + } } -void fill_status_stats(yagpcc::SystemStat *stats) -{ - std::ifstream proc_stat("/proc/self/status"); - std::string key, measure; - while (proc_stat >> key) - { - if (key == "VmPeak:") - { - uint64_t value; - proc_stat >> value; - stats->set_vmpeakkb(value); - proc_stat >> measure; - if (measure != "kB") - elog(FATAL, "Expected memory sizes in kB, but got in %s", measure.c_str()); - } - else if (key == "VmSize:") - { - uint64_t value; - proc_stat >> value; - stats->set_vmsizekb(value); - if (measure != "kB") - elog(FATAL, "Expected memory sizes in kB, but got in %s", measure.c_str()); - } +void fill_status_stats(yagpcc::SystemStat *stats) { + std::ifstream proc_stat("/proc/self/status"); + std::string key, measure; + while (proc_stat >> key) { + if (key == "VmPeak:") { + uint64_t value; + proc_stat >> value; + stats->set_vmpeakkb(value); + proc_stat >> measure; + if (measure != "kB") { + elog(FATAL, "Expected memory sizes in kB, but got in %s", + measure.c_str()); + } + } else if (key == "VmSize:") { + uint64_t value; + proc_stat >> value; + stats->set_vmsizekb(value); + if (measure != "kB") { + elog(FATAL, "Expected memory sizes in kB, but got in %s", + measure.c_str()); + } } + } } } // namespace -void fill_self_stats(yagpcc::SystemStat *stats) -{ - fill_io_stats(stats); - fill_cpu_stats(stats); - fill_status_stats(stats); +void fill_self_stats(yagpcc::SystemStat *stats) { + fill_io_stats(stats); + fill_cpu_stats(stats); + fill_status_stats(stats); } \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 1dabb59ab3f..739cca80f01 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -1,8 +1,7 @@ #include "hook_wrappers.h" #include "EventSender.h" -extern "C" -{ +extern "C" { #include "postgres.h" #include "utils/metrics_utils.h" #include "utils/elog.h" @@ -22,56 +21,48 @@ static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr; static void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags); static void ya_ExecutorFinish_hook(QueryDesc *query_desc); -#define REPLACE_HOOK(hookName) \ - previous_##hookName = hookName; \ - hookName = ya_##hookName; +#define REPLACE_HOOK(hookName) \ + previous_##hookName = hookName; \ + hookName = ya_##hookName; -void hooks_init() -{ - REPLACE_HOOK(ExecutorStart_hook); - REPLACE_HOOK(ExecutorFinish_hook); - stat_statements_parser_init(); +void hooks_init() { + REPLACE_HOOK(ExecutorStart_hook); + REPLACE_HOOK(ExecutorFinish_hook); + stat_statements_parser_init(); } -void hooks_deinit() -{ - ExecutorStart_hook = previous_ExecutorStart_hook; - ExecutorFinish_hook = ExecutorFinish_hook; - stat_statements_parser_deinit(); +void hooks_deinit() { + ExecutorStart_hook = previous_ExecutorStart_hook; + ExecutorFinish_hook = previous_ExecutorFinish_hook; + stat_statements_parser_deinit(); } -#define CREATE_HOOK_WRAPPER(hookName, ...) \ - PG_TRY(); \ - { \ - EventSender::instance()->hookName(__VA_ARGS__); \ - } \ - PG_CATCH(); \ - { \ - ereport(WARNING, (errmsg("EventSender failed in %s", #hookName))); \ - PG_RE_THROW(); \ - } \ - PG_END_TRY(); \ - if (previous_##hookName##_hook) \ - (*previous_##hookName##_hook)(__VA_ARGS__); \ - else \ - standard_##hookName(__VA_ARGS__); +#define CREATE_HOOK_WRAPPER(hookName, ...) \ + PG_TRY(); \ + { EventSender::instance()->hookName(__VA_ARGS__); } \ + PG_CATCH(); \ + { \ + ereport(WARNING, (errmsg("EventSender failed in %s", #hookName))); \ + PG_RE_THROW(); \ + } \ + PG_END_TRY(); \ + if (previous_##hookName##_hook) \ + (*previous_##hookName##_hook)(__VA_ARGS__); \ + else \ + standard_##hookName(__VA_ARGS__); -void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) -{ - CREATE_HOOK_WRAPPER(ExecutorStart, query_desc, eflags); - PG_TRY(); - { - EventSender::instance()->ExecutorStart(query_desc, eflags); - } - PG_CATCH(); - { - ereport(WARNING, (errmsg("EventSender failed in ExecutorStart afterhook"))); - PG_RE_THROW(); - } - PG_END_TRY(); +void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { + CREATE_HOOK_WRAPPER(ExecutorStart, query_desc, eflags); + PG_TRY(); + { EventSender::instance()->ExecutorStart(query_desc, eflags); } + PG_CATCH(); + { + ereport(WARNING, (errmsg("EventSender failed in ExecutorStart afterhook"))); + PG_RE_THROW(); + } + PG_END_TRY(); } -void ya_ExecutorFinish_hook(QueryDesc *query_desc) -{ - CREATE_HOOK_WRAPPER(ExecutorFinish, query_desc); +void ya_ExecutorFinish_hook(QueryDesc *query_desc) { + CREATE_HOOK_WRAPPER(ExecutorFinish, query_desc); } \ No newline at end of file From 3ebd501edcff3af8a91683d6a90f55ac4dbc153e Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Mon, 10 Apr 2023 16:01:08 +0300 Subject: [PATCH 013/133] Trace events using query_info_collect_hook It allows finer granularity than executor hooks. Also removed some code duplication and data duplicaton --- src/EventSender.cpp | 124 ++++++++++++++++++++++++++++++------------ src/EventSender.h | 13 ++++- src/GrpcConnector.h | 2 +- src/hook_wrappers.cpp | 65 +++++++++++----------- 4 files changed, 131 insertions(+), 73 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index b7c3cd70b85..0c759760c2d 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,7 +1,5 @@ -#include "EventSender.h" #include "GrpcConnector.h" #include "ProcStats.h" -#include "protos/yagpcc_set_service.pb.h" #include extern "C" { @@ -18,12 +16,14 @@ extern "C" { #include "cdb/cdbexplain.h" #include "tcop/utility.h" -#include "pg_stat_statements_ya_parser.h" +#include "stat_statements_parser/pg_stat_statements_ya_parser.h" void get_spill_info(int ssid, int ccid, int32_t *file_count, int64_t *total_bytes); } +#include "EventSender.h" + namespace { std::string *get_user_name() { @@ -102,16 +102,19 @@ void set_query_text(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { pfree(norm_query); } -void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { - if (query_desc->sourceText) { - set_query_text(qi, query_desc); - } - if (query_desc->plannedstmt) { - set_query_plan(qi, query_desc); - qi->set_query_id(query_desc->plannedstmt->queryId); +void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *query_desc, + bool with_text, bool with_plan) { + if (Gp_session_role == GP_ROLE_DISPATCH) { + if (query_desc->sourceText && with_text) { + set_query_text(qi, query_desc); + } + if (query_desc->plannedstmt && with_plan) { + set_query_plan(qi, query_desc); + qi->set_query_id(query_desc->plannedstmt->queryId); + } + qi->set_allocated_username(get_user_name()); + qi->set_allocated_databasename(get_db_name()); } - qi->set_allocated_username(get_user_name()); - qi->set_allocated_databasename(get_db_name()); } void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, @@ -151,41 +154,90 @@ void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc) { fill_self_stats(metrics->mutable_systemstat()); } +yagpcc::SetQueryReq get_query_req(QueryDesc *query_desc, + yagpcc::QueryStatus status) { + yagpcc::SetQueryReq req; + req.set_query_status(status); + *req.mutable_datetime() = current_ts(); + set_query_key(req.mutable_query_key(), query_desc); + return req; +} + } // namespace -void EventSender::ExecutorStart(QueryDesc *query_desc, int /* eflags*/) { +void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { + switch (status) { + case METRICS_PLAN_NODE_INITIALIZE: + case METRICS_PLAN_NODE_EXECUTING: + case METRICS_PLAN_NODE_FINISHED: + // TODO + break; + case METRICS_QUERY_SUBMIT: + collect_query_submit(reinterpret_cast(arg)); + break; + case METRICS_QUERY_START: + // no-op: executor_after_start is enough + break; + case METRICS_QUERY_DONE: + collect_query_done(reinterpret_cast(arg), "done"); + break; + case METRICS_QUERY_ERROR: + collect_query_done(reinterpret_cast(arg), "error"); + break; + case METRICS_QUERY_CANCELING: + collect_query_done(reinterpret_cast(arg), "calcelling"); + break; + case METRICS_QUERY_CANCELED: + collect_query_done(reinterpret_cast(arg), "cancelled"); + break; + case METRICS_INNER_QUERY_DONE: + // TODO + break; + default: + elog(FATAL, "Unknown query status: %d", status); + } +} + +void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { + elog(DEBUG1, "Query %s started event recording", query_desc->sourceText); + auto req = get_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); + set_query_info(req.mutable_query_info(), query_desc, false, true); + send_query_info(&req, "started"); +} + +void EventSender::collect_query_submit(QueryDesc *query_desc) { query_desc->instrument_options |= INSTRUMENT_BUFFERS; query_desc->instrument_options |= INSTRUMENT_ROWS; query_desc->instrument_options |= INSTRUMENT_TIMER; - elog(DEBUG1, "Query %s start recording", query_desc->sourceText); - yagpcc::SetQueryReq req; - req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); - *req.mutable_datetime() = current_ts(); - set_query_key(req.mutable_query_key(), query_desc); - auto result = connector->set_metric_query(req); - if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { - elog(WARNING, "Query %s start reporting failed with an error %s", - query_desc->sourceText, result.error_text().c_str()); - } else { - elog(DEBUG1, "Query %s start successful", query_desc->sourceText); - } + elog(DEBUG1, "Query %s submit event recording", query_desc->sourceText); + auto req = + get_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); + set_query_info(req.mutable_query_info(), query_desc, true, false); + send_query_info(&req, "submit"); } -void EventSender::ExecutorFinish(QueryDesc *query_desc) { - elog(DEBUG1, "Query %s finish recording", query_desc->sourceText); - yagpcc::SetQueryReq req; - req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); - *req.mutable_datetime() = current_ts(); - set_query_key(req.mutable_query_key(), query_desc); - set_query_info(req.mutable_query_info(), query_desc); +void EventSender::collect_query_done(QueryDesc *query_desc, + const std::string &status) { + elog(DEBUG1, "Query %s %s event recording", query_desc->sourceText, + status.c_str()); + auto req = get_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); + set_query_info(req.mutable_query_info(), query_desc, false, false); set_gp_metrics(req.mutable_query_metrics(), query_desc); - auto result = connector->set_metric_query(req); + send_query_info(&req, status); +} + +void EventSender::send_query_info(yagpcc::SetQueryReq *req, + const std::string &event) { + auto result = connector->set_metric_query(*req); if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { - elog(WARNING, "Query %s finish reporting failed with an error %s", - query_desc->sourceText, result.error_text().c_str()); + elog(WARNING, "Query {%d-%d-%d} %s reporting failed with an error %s", + req->query_key().tmid(), req->query_key().ssid(), + req->query_key().ccnt(), event.c_str(), result.error_text().c_str()); } else { - elog(DEBUG1, "Query %s finish successful", query_desc->sourceText); + elog(DEBUG1, "Query {%d-%d-%d} %s successfully reported", + req->query_key().tmid(), req->query_key().ssid(), + req->query_key().ccnt(), event.c_str()); } } diff --git a/src/EventSender.h b/src/EventSender.h index d69958db9b0..9c574cba9a1 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -1,18 +1,25 @@ #pragma once #include +#include class GrpcConnector; - struct QueryDesc; +namespace yagpcc { +class SetQueryReq; +} class EventSender { public: - void ExecutorStart(QueryDesc *query_desc, int eflags); - void ExecutorFinish(QueryDesc *query_desc); + void executor_after_start(QueryDesc *query_desc, int eflags); + void query_metrics_collect(QueryMetricsStatus status, void *arg); static EventSender *instance(); private: + void collect_query_submit(QueryDesc *query_desc); + void collect_query_done(QueryDesc *query_desc, const std::string &status); + EventSender(); + void send_query_info(yagpcc::SetQueryReq *req, const std::string &event); std::unique_ptr connector; }; \ No newline at end of file diff --git a/src/GrpcConnector.h b/src/GrpcConnector.h index 810c0bd3e15..4fca6960a4e 100644 --- a/src/GrpcConnector.h +++ b/src/GrpcConnector.h @@ -1,6 +1,6 @@ #pragma once -#include "yagpcc_set_service.pb.h" +#include "protos/yagpcc_set_service.pb.h" class GrpcConnector { public: diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 739cca80f01..be39c953970 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -1,6 +1,3 @@ -#include "hook_wrappers.h" -#include "EventSender.h" - extern "C" { #include "postgres.h" #include "utils/metrics_utils.h" @@ -14,55 +11,57 @@ extern "C" { } #include "stat_statements_parser/pg_stat_statements_ya_parser.h" +#include "hook_wrappers.h" +#include "EventSender.h" static ExecutorStart_hook_type previous_ExecutorStart_hook = nullptr; -static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr; - -static void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags); -static void ya_ExecutorFinish_hook(QueryDesc *query_desc); +static query_info_collect_hook_type previous_query_info_collect_hook = nullptr; -#define REPLACE_HOOK(hookName) \ - previous_##hookName = hookName; \ - hookName = ya_##hookName; +static void ya_ExecutorAfterStart_hook(QueryDesc *query_desc, int eflags); +static void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg); void hooks_init() { - REPLACE_HOOK(ExecutorStart_hook); - REPLACE_HOOK(ExecutorFinish_hook); + previous_ExecutorStart_hook = ExecutorStart_hook; + ExecutorStart_hook = ya_ExecutorAfterStart_hook; + previous_query_info_collect_hook = query_info_collect_hook; + query_info_collect_hook = ya_query_info_collect_hook; stat_statements_parser_init(); } void hooks_deinit() { ExecutorStart_hook = previous_ExecutorStart_hook; - ExecutorFinish_hook = previous_ExecutorFinish_hook; + query_info_collect_hook = previous_query_info_collect_hook; stat_statements_parser_deinit(); } -#define CREATE_HOOK_WRAPPER(hookName, ...) \ - PG_TRY(); \ - { EventSender::instance()->hookName(__VA_ARGS__); } \ - PG_CATCH(); \ - { \ - ereport(WARNING, (errmsg("EventSender failed in %s", #hookName))); \ - PG_RE_THROW(); \ - } \ - PG_END_TRY(); \ - if (previous_##hookName##_hook) \ - (*previous_##hookName##_hook)(__VA_ARGS__); \ - else \ - standard_##hookName(__VA_ARGS__); - -void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { - CREATE_HOOK_WRAPPER(ExecutorStart, query_desc, eflags); +void ya_ExecutorAfterStart_hook(QueryDesc *query_desc, int eflags) { + if (previous_ExecutorStart_hook) { + (*previous_ExecutorStart_hook)(query_desc, eflags); + } else { + standard_ExecutorStart(query_desc, eflags); + } PG_TRY(); - { EventSender::instance()->ExecutorStart(query_desc, eflags); } + { EventSender::instance()->executor_after_start(query_desc, eflags); } PG_CATCH(); { - ereport(WARNING, (errmsg("EventSender failed in ExecutorStart afterhook"))); + ereport(WARNING, + (errmsg("EventSender failed in ya_ExecutorAfterStart_hook"))); PG_RE_THROW(); } PG_END_TRY(); } -void ya_ExecutorFinish_hook(QueryDesc *query_desc) { - CREATE_HOOK_WRAPPER(ExecutorFinish, query_desc); +void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { + PG_TRY(); + { EventSender::instance()->query_metrics_collect(status, arg); } + PG_CATCH(); + { + ereport(WARNING, + (errmsg("EventSender failed in ya_query_info_collect_hook"))); + PG_RE_THROW(); + } + PG_END_TRY(); + if (previous_query_info_collect_hook) { + (*previous_query_info_collect_hook)(status, arg); + } } \ No newline at end of file From 0283406fefdc7f4713cbfd5a4da6256a40d3e98b Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 12 Apr 2023 12:37:29 +0300 Subject: [PATCH 014/133] Fix two segfaults 1. Initialize query instrumentation to NULL so that it can be properly checked later (temporary solution, need to find a proper fix) 2. Don't collect spillinfo on query end. Reason: a) it will always be zero and b) it could crash if we failed to enlarge a spillfile. Seems like we need some cummulative statistics for spillinfo. Need to check what explain analyze use. --- Makefile | 2 +- src/EventSender.cpp | 88 ++++++++++++++++++++++++++------------------- 2 files changed, 52 insertions(+), 38 deletions(-) diff --git a/Makefile b/Makefile index 6103e3bebce..e95805da601 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ override CFLAGS = -Wall -Wmissing-prototypes -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=gnu99 -Werror=uninitialized -Werror=implicit-function-declaration -DGPBUILD -override CXXFLAGS = -fPIC -lstdc++ -lpthread -g3 -ggdb -Wall -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=c++14 -fPIC -Iinclude -Isrc/protos -Isrc -g -DGPBUILD +override CXXFLAGS = -fPIC -lstdc++ -lpthread -g3 -Wall -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=c++14 -Iinclude -Isrc/protos -Isrc -DGPBUILD COMMON_CPP_FLAGS := -Isrc -Iinclude -Isrc/stat_statements_parser PG_CXXFLAGS += $(COMMON_CPP_FLAGS) SHLIB_LINK += -lprotobuf -lgrpc++ diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 0c759760c2d..0751d9e04e7 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -4,19 +4,20 @@ extern "C" { #include "postgres.h" + #include "access/hash.h" -#include "utils/metrics_utils.h" -#include "utils/elog.h" -#include "executor/executor.h" -#include "commands/explain.h" #include "commands/dbcommands.h" +#include "commands/explain.h" #include "commands/resgroupcmds.h" +#include "executor/executor.h" +#include "utils/elog.h" +#include "utils/metrics_utils.h" -#include "cdb/cdbvars.h" #include "cdb/cdbexplain.h" +#include "cdb/cdbvars.h" -#include "tcop/utility.h" #include "stat_statements_parser/pg_stat_statements_ya_parser.h" +#include "tcop/utility.h" void get_spill_info(int ssid, int ccid, int32_t *file_count, int64_t *total_bytes); @@ -120,35 +121,41 @@ void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *query_desc, void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, QueryDesc *query_desc) { auto instrument = query_desc->planstate->instrument; - metrics->set_ntuples(instrument->ntuples); - metrics->set_nloops(instrument->nloops); - metrics->set_tuplecount(instrument->tuplecount); - metrics->set_firsttuple(instrument->firsttuple); - metrics->set_startup(instrument->startup); - metrics->set_total(instrument->total); - auto &buffusage = instrument->bufusage; - metrics->set_shared_blks_hit(buffusage.shared_blks_hit); - metrics->set_shared_blks_read(buffusage.shared_blks_read); - metrics->set_shared_blks_dirtied(buffusage.shared_blks_dirtied); - metrics->set_shared_blks_written(buffusage.shared_blks_written); - metrics->set_local_blks_hit(buffusage.local_blks_hit); - metrics->set_local_blks_read(buffusage.local_blks_read); - metrics->set_local_blks_dirtied(buffusage.local_blks_dirtied); - metrics->set_local_blks_written(buffusage.local_blks_written); - metrics->set_temp_blks_read(buffusage.temp_blks_read); - metrics->set_temp_blks_written(buffusage.temp_blks_written); - metrics->set_blk_read_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_read_time)); - metrics->set_blk_write_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); -} - -void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc) { - int32_t n_spill_files = 0; - int64_t n_spill_bytes = 0; - get_spill_info(gp_session_id, gp_command_count, &n_spill_files, - &n_spill_bytes); - metrics->mutable_spill()->set_filecount(n_spill_files); - metrics->mutable_spill()->set_totalbytes(n_spill_bytes); - if (query_desc->planstate->instrument) { + if (instrument) { + metrics->set_ntuples(instrument->ntuples); + metrics->set_nloops(instrument->nloops); + metrics->set_tuplecount(instrument->tuplecount); + metrics->set_firsttuple(instrument->firsttuple); + metrics->set_startup(instrument->startup); + metrics->set_total(instrument->total); + auto &buffusage = instrument->bufusage; + metrics->set_shared_blks_hit(buffusage.shared_blks_hit); + metrics->set_shared_blks_read(buffusage.shared_blks_read); + metrics->set_shared_blks_dirtied(buffusage.shared_blks_dirtied); + metrics->set_shared_blks_written(buffusage.shared_blks_written); + metrics->set_local_blks_hit(buffusage.local_blks_hit); + metrics->set_local_blks_read(buffusage.local_blks_read); + metrics->set_local_blks_dirtied(buffusage.local_blks_dirtied); + metrics->set_local_blks_written(buffusage.local_blks_written); + metrics->set_temp_blks_read(buffusage.temp_blks_read); + metrics->set_temp_blks_written(buffusage.temp_blks_written); + metrics->set_blk_read_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_read_time)); + metrics->set_blk_write_time( + INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); + } +} + +void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, + bool need_spillinfo) { + if (need_spillinfo) { + int32_t n_spill_files = 0; + int64_t n_spill_bytes = 0; + get_spill_info(gp_session_id, gp_command_count, &n_spill_files, + &n_spill_bytes); + metrics->mutable_spill()->set_filecount(n_spill_files); + metrics->mutable_spill()->set_totalbytes(n_spill_bytes); + } + if (query_desc->planstate && query_desc->planstate->instrument) { set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc); } fill_self_stats(metrics->mutable_systemstat()); @@ -200,6 +207,9 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { elog(DEBUG1, "Query %s started event recording", query_desc->sourceText); + if (query_desc->planstate) { + query_desc->planstate->instrument = nullptr; + } auto req = get_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); set_query_info(req.mutable_query_info(), query_desc, false, true); send_query_info(&req, "started"); @@ -219,11 +229,15 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { void EventSender::collect_query_done(QueryDesc *query_desc, const std::string &status) { - elog(DEBUG1, "Query %s %s event recording", query_desc->sourceText, + elog(DEBUG1, "Query %s %s event recording", + query_desc->sourceText ? query_desc->sourceText : "", status.c_str()); auto req = get_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); set_query_info(req.mutable_query_info(), query_desc, false, false); - set_gp_metrics(req.mutable_query_metrics(), query_desc); + // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to gather + // it here. It only makes sense when doing regular stat checks. + set_gp_metrics(req.mutable_query_metrics(), query_desc, + /*need_spillinfo*/ false); send_query_info(&req, status); } From 22c7a2d70c61383d7437cec8605a3dfa4c65204c Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Fri, 14 Apr 2023 13:32:38 +0300 Subject: [PATCH 015/133] Don't call hooks from UTILITY mode and increase grpc timeout --- src/EventSender.cpp | 9 ++++++--- src/GrpcConnector.cpp | 4 +++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 0751d9e04e7..5a68758ab03 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -173,6 +173,9 @@ yagpcc::SetQueryReq get_query_req(QueryDesc *query_desc, } // namespace void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { + if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { + return; + } switch (status) { case METRICS_PLAN_NODE_INITIALIZE: case METRICS_PLAN_NODE_EXECUTING: @@ -206,10 +209,10 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { } void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { - elog(DEBUG1, "Query %s started event recording", query_desc->sourceText); - if (query_desc->planstate) { - query_desc->planstate->instrument = nullptr; + if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { + return; } + elog(DEBUG1, "Query %s started event recording", query_desc->sourceText); auto req = get_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); set_query_info(req.mutable_query_info(), query_desc, false, true); send_query_info(&req, "started"); diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index 1a820404428..bca1acd9ce2 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -16,8 +16,10 @@ class GrpcConnector::Impl { yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req) { yagpcc::MetricResponse response; grpc::ClientContext context; + // TODO: find a more secure way to send messages than relying on a fixed + // timeout auto deadline = - std::chrono::system_clock::now() + std::chrono::milliseconds(50); + std::chrono::system_clock::now() + std::chrono::milliseconds(200); context.set_deadline(deadline); grpc::Status status = (stub->SetMetricQuery)(&context, req, &response); From 4a9c22ec3726ed99ccf94a328dfdffaf73e473e2 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Mon, 1 May 2023 18:21:26 +0300 Subject: [PATCH 016/133] More improvements to metrics collector 1. Sync with protobuf changes to collect segment info 2. Remove noisy logging 3. Fix some missing node types in pg_stat_statements --- protos/yagpcc_set_service.proto | 7 +++--- src/EventSender.cpp | 22 +++++++------------ .../pg_stat_statements_ya_parser.c | 21 ++++++++++++++++++ 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/protos/yagpcc_set_service.proto b/protos/yagpcc_set_service.proto index 97c5691a6f5..93c2f5a01d1 100644 --- a/protos/yagpcc_set_service.proto +++ b/protos/yagpcc_set_service.proto @@ -30,9 +30,10 @@ message SetQueryReq { QueryStatus query_status = 1; google.protobuf.Timestamp datetime = 2; QueryKey query_key = 3; - QueryInfo query_info = 4; - GPMetrics query_metrics = 5; - repeated MetricPlan plan_tree = 6; + SegmentKey segment_key = 4; + QueryInfo query_info = 5; + GPMetrics query_metrics = 6; + repeated MetricPlan plan_tree = 7; } message SetPlanNodeReq { diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 5a68758ab03..5ab6bbd60df 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -161,12 +161,13 @@ void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, fill_self_stats(metrics->mutable_systemstat()); } -yagpcc::SetQueryReq get_query_req(QueryDesc *query_desc, - yagpcc::QueryStatus status) { +yagpcc::SetQueryReq create_query_req(QueryDesc *query_desc, + yagpcc::QueryStatus status) { yagpcc::SetQueryReq req; req.set_query_status(status); *req.mutable_datetime() = current_ts(); set_query_key(req.mutable_query_key(), query_desc); + set_segment_key(req.mutable_segment_key(), query_desc); return req; } @@ -212,8 +213,8 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { return; } - elog(DEBUG1, "Query %s started event recording", query_desc->sourceText); - auto req = get_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); + auto req = + create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); set_query_info(req.mutable_query_info(), query_desc, false, true); send_query_info(&req, "started"); } @@ -223,19 +224,16 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { query_desc->instrument_options |= INSTRUMENT_ROWS; query_desc->instrument_options |= INSTRUMENT_TIMER; - elog(DEBUG1, "Query %s submit event recording", query_desc->sourceText); auto req = - get_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); + create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); set_query_info(req.mutable_query_info(), query_desc, true, false); send_query_info(&req, "submit"); } void EventSender::collect_query_done(QueryDesc *query_desc, const std::string &status) { - elog(DEBUG1, "Query %s %s event recording", - query_desc->sourceText ? query_desc->sourceText : "", - status.c_str()); - auto req = get_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); + auto req = + create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); set_query_info(req.mutable_query_info(), query_desc, false, false); // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to gather // it here. It only makes sense when doing regular stat checks. @@ -251,10 +249,6 @@ void EventSender::send_query_info(yagpcc::SetQueryReq *req, elog(WARNING, "Query {%d-%d-%d} %s reporting failed with an error %s", req->query_key().tmid(), req->query_key().ssid(), req->query_key().ccnt(), event.c_str(), result.error_text().c_str()); - } else { - elog(DEBUG1, "Query {%d-%d-%d} %s successfully reported", - req->query_key().tmid(), req->query_key().ssid(), - req->query_key().ccnt(), event.c_str()); } } diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/src/stat_statements_parser/pg_stat_statements_ya_parser.c index ae79e7dc40a..737e77745df 100644 --- a/src/stat_statements_parser/pg_stat_statements_ya_parser.c +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.c @@ -205,6 +205,13 @@ JumbleRangeTable(pgssJumbleState *jstate, List *rtable) APP_JUMB_STRING(rte->ctename); APP_JUMB(rte->ctelevelsup); break; + /* GPDB RTEs */ + case RTE_VOID: + break; + case RTE_TABLEFUNCTION: + JumbleQuery(jstate, rte->subquery); + JumbleExpr(jstate, (Node *)rte->functions); + break; default: elog(ERROR, "unrecognized RTE kind: %d", (int)rte->rtekind); break; @@ -609,6 +616,20 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) JumbleExpr(jstate, rtfunc->funcexpr); } break; + /* GPDB nodes */ + case T_GroupingFunc: + { + GroupingFunc *grpnode = (GroupingFunc *)node; + + JumbleExpr(jstate, (Node *)grpnode->args); + } + break; + case T_Grouping: + case T_GroupId: + case T_Integer: + case T_Value: + // TODO: no idea what to do with those + break; default: /* Only a warning, since we can stumble along anyway */ elog(WARNING, "unrecognized node type: %d", From 1b667f6a2c7b5dec76b1294ebf5e660af1a48cdf Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Mon, 1 May 2023 18:44:53 +0300 Subject: [PATCH 017/133] Add debian package configuration --- debian/compat | 1 + debian/control | 11 +++++++++++ debian/postinst | 8 ++++++++ debian/rules | 10 ++++++++++ 4 files changed, 30 insertions(+) create mode 100644 debian/compat create mode 100644 debian/control create mode 100644 debian/postinst create mode 100644 debian/rules diff --git a/debian/compat b/debian/compat new file mode 100644 index 00000000000..ec635144f60 --- /dev/null +++ b/debian/compat @@ -0,0 +1 @@ +9 diff --git a/debian/control b/debian/control new file mode 100644 index 00000000000..600dd4d602e --- /dev/null +++ b/debian/control @@ -0,0 +1,11 @@ +Source: greenplum-6-yagpcc-hooks-collector-1 +Section: misc +Priority: optional +Maintainer: Maxim Smyatkin +Build-Depends: make, gcc, g++, debhelper (>=9), greenplum-db-6 (>=6.19.3), protobuf-compiler, protobuf-compiler-grpc +Standards-Version: 3.9.8 + +Package: greenplum-6-yagpcc-hooks-collector-1 +Architecture: any +Depends: ${misc:Depends}, ${shlibs:Depends}, greenplum-db-6 (>=6.19.3) +Description: Greenplum extension to send query execution metrics to yandex command center agent diff --git a/debian/postinst b/debian/postinst new file mode 100644 index 00000000000..27ddfc06a7d --- /dev/null +++ b/debian/postinst @@ -0,0 +1,8 @@ +#!/bin/bash + +set -e + +GPADMIN=gpadmin +GPHOME=/opt/greenplum-db-6 + +chown -R ${GPADMIN}:${GPADMIN} ${GPHOME} diff --git a/debian/rules b/debian/rules new file mode 100644 index 00000000000..6c2c7491067 --- /dev/null +++ b/debian/rules @@ -0,0 +1,10 @@ +#!/usr/bin/make -f +# You must remove unused comment lines for the released package. +export DH_VERBOSE = 1 + + +export GPHOME := /opt/greenplum-db-6 +export PATH := $(GPHOME)/bin:$(PATH) + +%: + dh $@ From 804b59673bfd65d91bf483c885f062cb1ddc50d6 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 2 May 2023 12:55:18 +0300 Subject: [PATCH 018/133] Try older grpcpp headers for bionic builds --- src/GrpcConnector.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index bca1acd9ce2..5a24d576de1 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -1,8 +1,8 @@ #include "GrpcConnector.h" #include "yagpcc_set_service.grpc.pb.h" -#include -#include +#include +#include #include class GrpcConnector::Impl { From 92023f06139a1550936f64faf94b6698d99e05ca Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 4 May 2023 14:34:42 +0300 Subject: [PATCH 019/133] Add missing greenplum nodes to pg_stat_statements --- .../pg_stat_statements_ya_parser.c | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/src/stat_statements_parser/pg_stat_statements_ya_parser.c index 737e77745df..a37ac0ef0bf 100644 --- a/src/stat_statements_parser/pg_stat_statements_ya_parser.c +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.c @@ -617,6 +617,13 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) } break; /* GPDB nodes */ + case T_GroupingClause: + { + GroupingClause *grpnode = (GroupingClause *)node; + + JumbleExpr(jstate, (Node *)grpnode->groupsets); + } + break; case T_GroupingFunc: { GroupingFunc *grpnode = (GroupingFunc *)node; @@ -628,7 +635,27 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) case T_GroupId: case T_Integer: case T_Value: - // TODO: no idea what to do with those + // TODO:seems like nothing to do with it + break; + /* GPDB-only additions, nothing to do */ + case T_PartitionBy: + case T_PartitionElem: + case T_PartitionRangeItem: + case T_PartitionBoundSpec: + case T_PartitionSpec: + case T_PartitionValuesSpec: + case T_AlterPartitionId: + case T_AlterPartitionCmd: + case T_InheritPartitionCmd: + case T_CreateFileSpaceStmt: + case T_FileSpaceEntry: + case T_DropFileSpaceStmt: + case T_TableValueExpr: + case T_DenyLoginInterval: + case T_DenyLoginPoint: + case T_AlterTypeStmt: + case T_SetDistributionCmd: + case T_ExpandStmtSpec: break; default: /* Only a warning, since we can stumble along anyway */ From afc3bca464ba0102178a86abdc52279f2c972518 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 4 May 2023 14:36:49 +0300 Subject: [PATCH 020/133] Move query stats reporting to ExecutorEnd hook Reason: when query info hook is called with status 'DONE' planstate is already deallocated by ExecutorEnd --- src/EventSender.cpp | 18 ++++++++++++++---- src/EventSender.h | 1 + src/hook_wrappers.cpp | 21 +++++++++++++++++++++ 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 5ab6bbd60df..2c7d102e01f 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -219,6 +219,20 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { send_query_info(&req, "started"); } +void EventSender::executor_end(QueryDesc *query_desc) { + if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { + return; + } + auto req = + create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_END); + set_query_info(req.mutable_query_info(), query_desc, false, false); + // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to gather + // it here. It only makes sense when doing regular stat checks. + set_gp_metrics(req.mutable_query_metrics(), query_desc, + /*need_spillinfo*/ false); + send_query_info(&req, "ended"); +} + void EventSender::collect_query_submit(QueryDesc *query_desc) { query_desc->instrument_options |= INSTRUMENT_BUFFERS; query_desc->instrument_options |= INSTRUMENT_ROWS; @@ -235,10 +249,6 @@ void EventSender::collect_query_done(QueryDesc *query_desc, auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); set_query_info(req.mutable_query_info(), query_desc, false, false); - // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to gather - // it here. It only makes sense when doing regular stat checks. - set_gp_metrics(req.mutable_query_metrics(), query_desc, - /*need_spillinfo*/ false); send_query_info(&req, status); } diff --git a/src/EventSender.h b/src/EventSender.h index 9c574cba9a1..7a5458a0e22 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -12,6 +12,7 @@ class SetQueryReq; class EventSender { public: void executor_after_start(QueryDesc *query_desc, int eflags); + void executor_end(QueryDesc *query_desc); void query_metrics_collect(QueryMetricsStatus status, void *arg); static EventSender *instance(); diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index be39c953970..8ad1056254f 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -15,14 +15,18 @@ extern "C" { #include "EventSender.h" static ExecutorStart_hook_type previous_ExecutorStart_hook = nullptr; +static ExecutorEnd_hook_type previous_ExecutorEnd_hook = nullptr; static query_info_collect_hook_type previous_query_info_collect_hook = nullptr; static void ya_ExecutorAfterStart_hook(QueryDesc *query_desc, int eflags); +static void ya_ExecutorEnd_hook(QueryDesc *query_desc); static void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg); void hooks_init() { previous_ExecutorStart_hook = ExecutorStart_hook; ExecutorStart_hook = ya_ExecutorAfterStart_hook; + previous_ExecutorEnd_hook = ExecutorEnd_hook; + ExecutorEnd_hook = ya_ExecutorEnd_hook; previous_query_info_collect_hook = query_info_collect_hook; query_info_collect_hook = ya_query_info_collect_hook; stat_statements_parser_init(); @@ -30,6 +34,7 @@ void hooks_init() { void hooks_deinit() { ExecutorStart_hook = previous_ExecutorStart_hook; + ExecutorEnd_hook = previous_ExecutorEnd_hook; query_info_collect_hook = previous_query_info_collect_hook; stat_statements_parser_deinit(); } @@ -51,6 +56,22 @@ void ya_ExecutorAfterStart_hook(QueryDesc *query_desc, int eflags) { PG_END_TRY(); } +void ya_ExecutorEnd_hook(QueryDesc *query_desc) { + PG_TRY(); + { EventSender::instance()->executor_end(query_desc); } + PG_CATCH(); + { + ereport(WARNING, (errmsg("EventSender failed in ya_ExecutorEnd_hook"))); + PG_RE_THROW(); + } + PG_END_TRY(); + if (previous_ExecutorEnd_hook) { + (*previous_ExecutorEnd_hook)(query_desc); + } else { + standard_ExecutorEnd(query_desc); + } +} + void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { PG_TRY(); { EventSender::instance()->query_metrics_collect(status, arg); } From 8702297fd3465f5c427706f887ecd2c89faae361 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Fri, 5 May 2023 12:46:56 +0300 Subject: [PATCH 021/133] Change GRPC failure handling 1) Give higher gRPC timeouts to query dispatcher as losing messages there is more critical 2) If we've failed to send a message via gRPC we notify a background thread about it and refuse sending any new message until this thread re-establishes the lost connection --- src/GrpcConnector.cpp | 59 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index 5a24d576de1..9080ec6fffe 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -4,30 +4,54 @@ #include #include #include +#include +#include +#include +#include + +extern "C" { +#include "postgres.h" +#include "cdb/cdbvars.h" +} class GrpcConnector::Impl { public: Impl() { GOOGLE_PROTOBUF_VERIFY_VERSION; - this->stub = yagpcc::SetQueryInfo::NewStub( - grpc::CreateChannel(SOCKET_FILE, grpc::InsecureChannelCredentials())); + channel = + grpc::CreateChannel(SOCKET_FILE, grpc::InsecureChannelCredentials()); + stub = yagpcc::SetQueryInfo::NewStub(channel); + connected = true; + done = false; + reconnect_thread = std::thread(&Impl::reconnect, this); + } + + ~Impl() { + done = true; + cv.notify_one(); + reconnect_thread.join(); } yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req) { yagpcc::MetricResponse response; + if (!connected) { + response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); + response.set_error_text( + "Not tracing this query connection to agent has been lost"); + return response; + } grpc::ClientContext context; - // TODO: find a more secure way to send messages than relying on a fixed - // timeout + int timeout = Gp_role == GP_ROLE_DISPATCH ? 500 : 250; auto deadline = - std::chrono::system_clock::now() + std::chrono::milliseconds(200); + std::chrono::system_clock::now() + std::chrono::milliseconds(timeout); context.set_deadline(deadline); - grpc::Status status = (stub->SetMetricQuery)(&context, req, &response); - if (!status.ok()) { response.set_error_text("Connection lost: " + status.error_message() + "; " + status.error_details()); response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); + connected = false; + cv.notify_one(); } return response; @@ -35,8 +59,27 @@ class GrpcConnector::Impl { private: const std::string SOCKET_FILE = "unix:///tmp/yagpcc_agent.sock"; - const std::string TCP_ADDRESS = "127.0.0.1:1432"; std::unique_ptr stub; + std::shared_ptr channel; + std::atomic_bool connected; + std::thread reconnect_thread; + std::condition_variable cv; + std::mutex mtx; + bool done; + + void reconnect() { + while (!done) { + { + std::unique_lock lock(mtx); + cv.wait(lock); + } + while (!connected && !done) { + auto deadline = + std::chrono::system_clock::now() + std::chrono::milliseconds(100); + connected = channel->WaitForConnected(deadline); + } + } + } }; GrpcConnector::GrpcConnector() { impl = new Impl(); } From a321053cac7ca2694634a0ccf27c764d69153663 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Fri, 12 May 2023 13:56:32 +0300 Subject: [PATCH 022/133] Track CDB metrics and track query nesting --- src/EventSender.cpp | 60 ++++++++++++++++++++++++++++-------------- src/EventSender.h | 5 ++++ src/hook_wrappers.cpp | 61 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 103 insertions(+), 23 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 2c7d102e01f..769e3e24289 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -13,6 +13,7 @@ extern "C" { #include "utils/elog.h" #include "utils/metrics_utils.h" +#include "cdb/cdbdisp.h" #include "cdb/cdbexplain.h" #include "cdb/cdbvars.h" @@ -25,6 +26,8 @@ void get_spill_info(int ssid, int ccid, int32_t *file_count, #include "EventSender.h" +#define need_collect_metrics() (Gp_role == GP_ROLE_DISPATCH && nesting_level == 0) + namespace { std::string *get_user_name() { @@ -209,35 +212,52 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { } } +void EventSender::executor_before_start(QueryDesc *query_desc, + int /* eflags*/) { + if (need_collect_metrics()) { + instr_time starttime; + query_desc->instrument_options |= INSTRUMENT_BUFFERS; + query_desc->instrument_options |= INSTRUMENT_ROWS; + query_desc->instrument_options |= INSTRUMENT_TIMER; + query_desc->instrument_options |= INSTRUMENT_CDB; + + INSTR_TIME_SET_CURRENT(starttime); + query_desc->showstatctx = + cdbexplain_showExecStatsBegin(query_desc, starttime); + } +} + void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { - if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { - return; + if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { + auto req = + create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); + set_query_info(req.mutable_query_info(), query_desc, false, true); + send_query_info(&req, "started"); } - auto req = - create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); - set_query_info(req.mutable_query_info(), query_desc, false, true); - send_query_info(&req, "started"); } void EventSender::executor_end(QueryDesc *query_desc) { - if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { - return; + if (need_collect_metrics() && query_desc->totaltime) { + if (query_desc->estate->dispatcherState && + query_desc->estate->dispatcherState->primaryResults) { + cdbdisp_checkDispatchResult(query_desc->estate->dispatcherState, + DISPATCH_WAIT_NONE); + } + InstrEndLoop(query_desc->totaltime); + } + if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { + auto req = + create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_END); + set_query_info(req.mutable_query_info(), query_desc, false, false); + // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to + // gather it here. It only makes sense when doing regular stat checks. + set_gp_metrics(req.mutable_query_metrics(), query_desc, + /*need_spillinfo*/ false); + send_query_info(&req, "ended"); } - auto req = - create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_END); - set_query_info(req.mutable_query_info(), query_desc, false, false); - // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to gather - // it here. It only makes sense when doing regular stat checks. - set_gp_metrics(req.mutable_query_metrics(), query_desc, - /*need_spillinfo*/ false); - send_query_info(&req, "ended"); } void EventSender::collect_query_submit(QueryDesc *query_desc) { - query_desc->instrument_options |= INSTRUMENT_BUFFERS; - query_desc->instrument_options |= INSTRUMENT_ROWS; - query_desc->instrument_options |= INSTRUMENT_TIMER; - auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); set_query_info(req.mutable_query_info(), query_desc, true, false); diff --git a/src/EventSender.h b/src/EventSender.h index 7a5458a0e22..9e2ef992f81 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -11,9 +11,12 @@ class SetQueryReq; class EventSender { public: + void executor_before_start(QueryDesc *query_desc, int eflags); void executor_after_start(QueryDesc *query_desc, int eflags); void executor_end(QueryDesc *query_desc); void query_metrics_collect(QueryMetricsStatus status, void *arg); + void incr_depth() { nesting_level++; } + void decr_depth() { nesting_level--; } static EventSender *instance(); private: @@ -23,4 +26,6 @@ class EventSender { EventSender(); void send_query_info(yagpcc::SetQueryReq *req, const std::string &event); std::unique_ptr connector; + + int nesting_level = 0; }; \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 8ad1056254f..e7a0002f3b1 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -15,16 +15,25 @@ extern "C" { #include "EventSender.h" static ExecutorStart_hook_type previous_ExecutorStart_hook = nullptr; +static ExecutorRun_hook_type previous_ExecutorRun_hook = nullptr; +static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr; static ExecutorEnd_hook_type previous_ExecutorEnd_hook = nullptr; static query_info_collect_hook_type previous_query_info_collect_hook = nullptr; -static void ya_ExecutorAfterStart_hook(QueryDesc *query_desc, int eflags); +static void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags); +static void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, + long count); +static void ya_ExecutorFinish_hook(QueryDesc *query_desc); static void ya_ExecutorEnd_hook(QueryDesc *query_desc); static void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg); void hooks_init() { previous_ExecutorStart_hook = ExecutorStart_hook; - ExecutorStart_hook = ya_ExecutorAfterStart_hook; + ExecutorStart_hook = ya_ExecutorStart_hook; + previous_ExecutorRun_hook = ExecutorRun_hook; + ExecutorRun_hook = ya_ExecutorRun_hook; + previous_ExecutorFinish_hook = ExecutorFinish_hook; + ExecutorFinish_hook = ya_ExecutorFinish_hook; previous_ExecutorEnd_hook = ExecutorEnd_hook; ExecutorEnd_hook = ya_ExecutorEnd_hook; previous_query_info_collect_hook = query_info_collect_hook; @@ -39,7 +48,16 @@ void hooks_deinit() { stat_statements_parser_deinit(); } -void ya_ExecutorAfterStart_hook(QueryDesc *query_desc, int eflags) { +void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { + PG_TRY(); + { EventSender::instance()->executor_before_start(query_desc, eflags); } + PG_CATCH(); + { + ereport(WARNING, + (errmsg("EventSender failed in ya_ExecutorBeforeStart_hook"))); + PG_RE_THROW(); + } + PG_END_TRY(); if (previous_ExecutorStart_hook) { (*previous_ExecutorStart_hook)(query_desc, eflags); } else { @@ -56,6 +74,43 @@ void ya_ExecutorAfterStart_hook(QueryDesc *query_desc, int eflags) { PG_END_TRY(); } +void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, + long count) { + EventSender::instance()->incr_depth(); + PG_TRY(); + { + if (previous_ExecutorRun_hook) + previous_ExecutorRun_hook(query_desc, direction, count); + else + standard_ExecutorRun(query_desc, direction, count); + EventSender::instance()->decr_depth(); + } + PG_CATCH(); + { + EventSender::instance()->decr_depth(); + PG_RE_THROW(); + } + PG_END_TRY(); +} + +void ya_ExecutorFinish_hook(QueryDesc *query_desc) { + EventSender::instance()->incr_depth(); + PG_TRY(); + { + if (previous_ExecutorFinish_hook) + previous_ExecutorFinish_hook(query_desc); + else + standard_ExecutorFinish(query_desc); + EventSender::instance()->decr_depth(); + } + PG_CATCH(); + { + EventSender::instance()->decr_depth(); + PG_RE_THROW(); + } + PG_END_TRY(); +} + void ya_ExecutorEnd_hook(QueryDesc *query_desc) { PG_TRY(); { EventSender::instance()->executor_end(query_desc); } From 21c658e8f0fd144bf6de097f1c09279ff8809075 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Mon, 15 May 2023 15:44:07 +0300 Subject: [PATCH 023/133] Change package name --- debian/control | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/control b/debian/control index 600dd4d602e..14324c37030 100644 --- a/debian/control +++ b/debian/control @@ -1,11 +1,11 @@ -Source: greenplum-6-yagpcc-hooks-collector-1 +Source: greenplum-6-yagpcc-hooks Section: misc Priority: optional Maintainer: Maxim Smyatkin Build-Depends: make, gcc, g++, debhelper (>=9), greenplum-db-6 (>=6.19.3), protobuf-compiler, protobuf-compiler-grpc Standards-Version: 3.9.8 -Package: greenplum-6-yagpcc-hooks-collector-1 +Package: greenplum-6-yagpcc-hooks Architecture: any Depends: ${misc:Depends}, ${shlibs:Depends}, greenplum-db-6 (>=6.19.3) Description: Greenplum extension to send query execution metrics to yandex command center agent From c51be234d83ac55186b0d6eeeb2a0c17fa911751 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 17 May 2023 15:02:31 +0300 Subject: [PATCH 024/133] Skip nested and utility statements --- debian/control | 2 +- src/EventSender.cpp | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/debian/control b/debian/control index 14324c37030..c740a8590ca 100644 --- a/debian/control +++ b/debian/control @@ -2,7 +2,7 @@ Source: greenplum-6-yagpcc-hooks Section: misc Priority: optional Maintainer: Maxim Smyatkin -Build-Depends: make, gcc, g++, debhelper (>=9), greenplum-db-6 (>=6.19.3), protobuf-compiler, protobuf-compiler-grpc +Build-Depends: make, gcc, g++, debhelper (>=9), greenplum-db-6 (>=6.19.3), protobuf-compiler, protobuf-compiler-grpc, libgrpc++1, libgrpc++-dev Standards-Version: 3.9.8 Package: greenplum-6-yagpcc-hooks diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 769e3e24289..73d3a39e086 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -228,6 +228,9 @@ void EventSender::executor_before_start(QueryDesc *query_desc, } void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { + if (nesting_level != 0 || query_desc->utilitystmt) { + return; + } if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); @@ -237,6 +240,9 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { } void EventSender::executor_end(QueryDesc *query_desc) { + if (nesting_level != 0 || query_desc->utilitystmt) { + return; + } if (need_collect_metrics() && query_desc->totaltime) { if (query_desc->estate->dispatcherState && query_desc->estate->dispatcherState->primaryResults) { @@ -258,6 +264,9 @@ void EventSender::executor_end(QueryDesc *query_desc) { } void EventSender::collect_query_submit(QueryDesc *query_desc) { + if (nesting_level != 0 || query_desc->utilitystmt) { + return; + } auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); set_query_info(req.mutable_query_info(), query_desc, true, false); @@ -266,6 +275,9 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { void EventSender::collect_query_done(QueryDesc *query_desc, const std::string &status) { + if (nesting_level != 0 || query_desc->utilitystmt) { + return; + } auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); set_query_info(req.mutable_query_info(), query_desc, false, false); From d68c7c6a88bc7b5d15b78c1bc02f0052f87bcae0 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Fri, 2 Jun 2023 14:16:19 +0300 Subject: [PATCH 025/133] Slightly rework query skipping Don't collect system queries with empty query text and ccnt == 0 --- src/EventSender.cpp | 62 ++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 73d3a39e086..d32f1d571e9 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -26,7 +26,9 @@ void get_spill_info(int ssid, int ccid, int32_t *file_count, #include "EventSender.h" -#define need_collect_metrics() (Gp_role == GP_ROLE_DISPATCH && nesting_level == 0) +#define need_collect() \ + (nesting_level == 0 && gp_command_count != 0 && \ + query_desc->sourceText != nullptr) namespace { @@ -106,9 +108,10 @@ void set_query_text(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { pfree(norm_query); } -void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *query_desc, +void set_query_info(yagpcc::SetQueryReq *req, QueryDesc *query_desc, bool with_text, bool with_plan) { if (Gp_session_role == GP_ROLE_DISPATCH) { + auto qi = req->mutable_query_info(); if (query_desc->sourceText && with_text) { set_query_text(qi, query_desc); } @@ -214,7 +217,7 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { void EventSender::executor_before_start(QueryDesc *query_desc, int /* eflags*/) { - if (need_collect_metrics()) { + if (Gp_role == GP_ROLE_DISPATCH && need_collect()) { instr_time starttime; query_desc->instrument_options |= INSTRUMENT_BUFFERS; query_desc->instrument_options |= INSTRUMENT_ROWS; @@ -228,22 +231,21 @@ void EventSender::executor_before_start(QueryDesc *query_desc, } void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { - if (nesting_level != 0 || query_desc->utilitystmt) { - return; - } - if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { + if (Gp_role == GP_ROLE_DISPATCH || + Gp_role == GP_ROLE_EXECUTE && need_collect()) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); - set_query_info(req.mutable_query_info(), query_desc, false, true); + set_query_info(&req, query_desc, false, true); send_query_info(&req, "started"); } } void EventSender::executor_end(QueryDesc *query_desc) { - if (nesting_level != 0 || query_desc->utilitystmt) { + if (!need_collect() || + (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE)) { return; } - if (need_collect_metrics() && query_desc->totaltime) { + if (query_desc->totaltime) { if (query_desc->estate->dispatcherState && query_desc->estate->dispatcherState->primaryResults) { cdbdisp_checkDispatchResult(query_desc->estate->dispatcherState, @@ -251,37 +253,33 @@ void EventSender::executor_end(QueryDesc *query_desc) { } InstrEndLoop(query_desc->totaltime); } - if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { - auto req = - create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_END); - set_query_info(req.mutable_query_info(), query_desc, false, false); - // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to - // gather it here. It only makes sense when doing regular stat checks. - set_gp_metrics(req.mutable_query_metrics(), query_desc, - /*need_spillinfo*/ false); - send_query_info(&req, "ended"); - } + auto req = + create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_END); + set_query_info(&req, query_desc, false, false); + // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to + // gather it here. It only makes sense when doing regular stat checks. + set_gp_metrics(req.mutable_query_metrics(), query_desc, + /*need_spillinfo*/ false); + send_query_info(&req, "ended"); } void EventSender::collect_query_submit(QueryDesc *query_desc) { - if (nesting_level != 0 || query_desc->utilitystmt) { - return; + if (need_collect()) { + auto req = + create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); + set_query_info(&req, query_desc, true, false); + send_query_info(&req, "submit"); } - auto req = - create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); - set_query_info(req.mutable_query_info(), query_desc, true, false); - send_query_info(&req, "submit"); } void EventSender::collect_query_done(QueryDesc *query_desc, const std::string &status) { - if (nesting_level != 0 || query_desc->utilitystmt) { - return; + if (need_collect()) { + auto req = + create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); + set_query_info(&req, query_desc, false, false); + send_query_info(&req, status); } - auto req = - create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); - set_query_info(req.mutable_query_info(), query_desc, false, false); - send_query_info(&req, status); } void EventSender::send_query_info(yagpcc::SetQueryReq *req, From e401f140ddcf43bb5ebe82c0b15b43333f3dbd21 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Fri, 2 Jun 2023 16:22:00 +0300 Subject: [PATCH 026/133] Get resource groups back into query info --- protos/yagpcc_metrics.proto | 1 + src/EventSender.cpp | 22 +++++++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto index f00f329a208..26e0a496460 100644 --- a/protos/yagpcc_metrics.proto +++ b/protos/yagpcc_metrics.proto @@ -33,6 +33,7 @@ message QueryInfo { string temlate_plan_text = 7; string userName = 8; string databaseName = 9; + string rsgname = 10; } enum PlanGenerator diff --git a/src/EventSender.cpp b/src/EventSender.cpp index d32f1d571e9..9ad40b13b57 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -44,6 +44,21 @@ std::string *get_db_name() { return result; } +std::string *get_rg_name() { + auto userId = GetUserId(); + if (!OidIsValid(userId)) + return nullptr; + auto groupId = GetResGroupIdForRole(userId); + if (!OidIsValid(groupId)) + return nullptr; + char *rgname = GetResGroupNameForId(groupId); + if (rgname == nullptr) + return nullptr; + auto result = new std::string(rgname); + pfree(rgname); + return result; +} + int get_cur_slice_id(QueryDesc *desc) { if (!desc->estate) { return 0; @@ -121,6 +136,7 @@ void set_query_info(yagpcc::SetQueryReq *req, QueryDesc *query_desc, } qi->set_allocated_username(get_user_name()); qi->set_allocated_databasename(get_db_name()); + qi->set_allocated_rsgname(get_rg_name()); } } @@ -224,6 +240,10 @@ void EventSender::executor_before_start(QueryDesc *query_desc, query_desc->instrument_options |= INSTRUMENT_TIMER; query_desc->instrument_options |= INSTRUMENT_CDB; + // TODO: there is a PR resolving some memory leak around auto-explain: + // https://github.com/greenplum-db/gpdb/pull/15164 + // Need to check if the memory leak applies here as well and fix it + Assert(query_desc->showstatctx == NULL); INSTR_TIME_SET_CURRENT(starttime); query_desc->showstatctx = cdbexplain_showExecStatsBegin(query_desc, starttime); @@ -232,7 +252,7 @@ void EventSender::executor_before_start(QueryDesc *query_desc, void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { if (Gp_role == GP_ROLE_DISPATCH || - Gp_role == GP_ROLE_EXECUTE && need_collect()) { + (Gp_role == GP_ROLE_EXECUTE && need_collect())) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); set_query_info(&req, query_desc, false, true); From b96b5e93605dd523d44de99fa8719d722a62b753 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 6 Jun 2023 17:39:32 +0300 Subject: [PATCH 027/133] Add some configuration parameters --- Makefile | 1 + src/Config.cpp | 38 ++++++++++++++++++++++++++++++++++++++ src/Config.h | 12 ++++++++++++ src/EventSender.cpp | 38 +++++++++++++++++++++++--------------- src/GrpcConnector.cpp | 13 +++++++------ src/hook_wrappers.cpp | 11 ++++++----- 6 files changed, 87 insertions(+), 26 deletions(-) create mode 100644 src/Config.cpp create mode 100644 src/Config.h diff --git a/Makefile b/Makefile index e95805da601..91bcec3203e 100644 --- a/Makefile +++ b/Makefile @@ -33,6 +33,7 @@ OBJS := $(PG_STAT_OBJS) \ $(PROTO_GEN_OBJECTS) \ $(SRC_DIR)/ProcStats.o \ $(SRC_DIR)/SpillInfoWrapper.o \ + $(SRC_DIR)/Config.o \ $(SRC_DIR)/GrpcConnector.o \ $(SRC_DIR)/EventSender.o \ $(SRC_DIR)/hook_wrappers.o \ diff --git a/src/Config.cpp b/src/Config.cpp new file mode 100644 index 00000000000..d97e5d45984 --- /dev/null +++ b/src/Config.cpp @@ -0,0 +1,38 @@ +#include "Config.h" + +extern "C" { +#include "postgres.h" +#include "utils/builtins.h" +#include "utils/guc.h" +} + +static char *guc_uds_path = nullptr; +static bool guc_enable_analyze = true; +static bool guc_enable_cdbstats = true; +static bool guc_enable_collector = true; + +void Config::init() { + DefineCustomStringVariable( + "yagpcc.uds_path", "Sets filesystem path of the agent socket", 0LL, + &guc_uds_path, "/tmp/yagpcc_agent.sock", PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomBoolVariable( + "yagpcc.enable", "Enable metrics collector", 0LL, &guc_enable_collector, + true, PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomBoolVariable( + "yagpcc.enable_analyze", "Collect analyze metrics in yagpcc", 0LL, + &guc_enable_analyze, true, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomBoolVariable( + "yagpcc.enable_cdbstats", "Collect CDB metrics in yagpcc", 0LL, + &guc_enable_cdbstats, true, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); +} + +std::string Config::uds_path() { return guc_uds_path; } +bool Config::enable_analyze() { return guc_enable_analyze; } +bool Config::enable_cdbstats() { return guc_enable_cdbstats; } +bool Config::enable_collector() { return guc_enable_collector; } diff --git a/src/Config.h b/src/Config.h new file mode 100644 index 00000000000..117481f219b --- /dev/null +++ b/src/Config.h @@ -0,0 +1,12 @@ +#pragma once + +#include + +class Config { +public: + static void init(); + static std::string uds_path(); + static bool enable_analyze(); + static bool enable_cdbstats(); + static bool enable_collector(); +}; \ No newline at end of file diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 9ad40b13b57..55858ed5183 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,3 +1,4 @@ +#include "Config.h" #include "GrpcConnector.h" #include "ProcStats.h" #include @@ -28,7 +29,7 @@ void get_spill_info(int ssid, int ccid, int32_t *file_count, #define need_collect() \ (nesting_level == 0 && gp_command_count != 0 && \ - query_desc->sourceText != nullptr) + query_desc->sourceText != nullptr && Config::enable_collector()) namespace { @@ -233,26 +234,29 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { void EventSender::executor_before_start(QueryDesc *query_desc, int /* eflags*/) { - if (Gp_role == GP_ROLE_DISPATCH && need_collect()) { + if (Gp_role == GP_ROLE_DISPATCH && need_collect() && + Config::enable_analyze()) { instr_time starttime; query_desc->instrument_options |= INSTRUMENT_BUFFERS; query_desc->instrument_options |= INSTRUMENT_ROWS; query_desc->instrument_options |= INSTRUMENT_TIMER; - query_desc->instrument_options |= INSTRUMENT_CDB; - - // TODO: there is a PR resolving some memory leak around auto-explain: - // https://github.com/greenplum-db/gpdb/pull/15164 - // Need to check if the memory leak applies here as well and fix it - Assert(query_desc->showstatctx == NULL); - INSTR_TIME_SET_CURRENT(starttime); - query_desc->showstatctx = - cdbexplain_showExecStatsBegin(query_desc, starttime); + if (Config::enable_cdbstats()) { + query_desc->instrument_options |= INSTRUMENT_CDB; + + // TODO: there is a PR resolving some memory leak around auto-explain: + // https://github.com/greenplum-db/gpdb/pull/15164 + // Need to check if the memory leak applies here as well and fix it + Assert(query_desc->showstatctx == NULL); + INSTR_TIME_SET_CURRENT(starttime); + query_desc->showstatctx = + cdbexplain_showExecStatsBegin(query_desc, starttime); + } } } void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { - if (Gp_role == GP_ROLE_DISPATCH || - (Gp_role == GP_ROLE_EXECUTE && need_collect())) { + if ((Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) && + need_collect()) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); set_query_info(&req, query_desc, false, true); @@ -265,7 +269,8 @@ void EventSender::executor_end(QueryDesc *query_desc) { (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE)) { return; } - if (query_desc->totaltime) { + if (query_desc->totaltime && Config::enable_analyze() && + Config::enable_cdbstats()) { if (query_desc->estate->dispatcherState && query_desc->estate->dispatcherState->primaryResults) { cdbdisp_checkDispatchResult(query_desc->estate->dispatcherState, @@ -317,4 +322,7 @@ EventSender *EventSender::instance() { return &sender; } -EventSender::EventSender() { connector = std::make_unique(); } \ No newline at end of file +EventSender::EventSender() { + Config::init(); + connector = std::make_unique(); +} \ No newline at end of file diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index 9080ec6fffe..276c9ceb8a8 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -1,13 +1,14 @@ #include "GrpcConnector.h" +#include "Config.h" #include "yagpcc_set_service.grpc.pb.h" -#include +#include +#include #include +#include +#include #include -#include #include -#include -#include extern "C" { #include "postgres.h" @@ -16,7 +17,7 @@ extern "C" { class GrpcConnector::Impl { public: - Impl() { + Impl() : SOCKET_FILE("unix://" + Config::uds_path()) { GOOGLE_PROTOBUF_VERIFY_VERSION; channel = grpc::CreateChannel(SOCKET_FILE, grpc::InsecureChannelCredentials()); @@ -58,7 +59,7 @@ class GrpcConnector::Impl { } private: - const std::string SOCKET_FILE = "unix:///tmp/yagpcc_agent.sock"; + const std::string SOCKET_FILE; std::unique_ptr stub; std::shared_ptr channel; std::atomic_bool connected; diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index e7a0002f3b1..edad5798e44 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -1,18 +1,19 @@ extern "C" { #include "postgres.h" -#include "utils/metrics_utils.h" -#include "utils/elog.h" #include "executor/executor.h" +#include "utils/elog.h" +#include "utils/metrics_utils.h" -#include "cdb/cdbvars.h" #include "cdb/cdbexplain.h" +#include "cdb/cdbvars.h" #include "tcop/utility.h" } -#include "stat_statements_parser/pg_stat_statements_ya_parser.h" -#include "hook_wrappers.h" +#include "Config.h" #include "EventSender.h" +#include "hook_wrappers.h" +#include "stat_statements_parser/pg_stat_statements_ya_parser.h" static ExecutorStart_hook_type previous_ExecutorStart_hook = nullptr; static ExecutorRun_hook_type previous_ExecutorRun_hook = nullptr; From f96e044d9a4c897f64d88987d6d3f096c0a07e57 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 7 Jun 2023 14:58:57 +0300 Subject: [PATCH 028/133] Capture query execution system stats instead of whole process lifetime --- src/EventSender.cpp | 14 ++++++++++++-- src/ProcStats.cpp | 28 ++++++++-------------------- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 55858ed5183..d47bd678da0 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -2,6 +2,7 @@ #include "GrpcConnector.h" #include "ProcStats.h" #include +#include extern "C" { #include "postgres.h" @@ -168,6 +169,8 @@ void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, } } +decltype(std::chrono::high_resolution_clock::now()) query_start_time; + void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, bool need_spillinfo) { if (need_spillinfo) { @@ -182,6 +185,10 @@ void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc); } fill_self_stats(metrics->mutable_systemstat()); + std::chrono::duration elapsed_seconds = + std::chrono::high_resolution_clock::now() - query_start_time; + metrics->mutable_systemstat()->set_runningtimeseconds( + elapsed_seconds.count()); } yagpcc::SetQueryReq create_query_req(QueryDesc *query_desc, @@ -234,8 +241,11 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { void EventSender::executor_before_start(QueryDesc *query_desc, int /* eflags*/) { - if (Gp_role == GP_ROLE_DISPATCH && need_collect() && - Config::enable_analyze()) { + if (!need_collect()) { + return; + } + query_start_time = std::chrono::high_resolution_clock::now(); + if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze()) { instr_time starttime; query_desc->instrument_options |= INSTRUMENT_BUFFERS; query_desc->instrument_options |= INSTRUMENT_ROWS; diff --git a/src/ProcStats.cpp b/src/ProcStats.cpp index 5c64f25ec09..08133b6a65f 100644 --- a/src/ProcStats.cpp +++ b/src/ProcStats.cpp @@ -13,7 +13,7 @@ namespace { #define FILL_IO_STAT(stat_name) \ uint64_t stat_name; \ proc_stat >> tmp >> stat_name; \ - stats->set_##stat_name(stat_name); + stats->set_##stat_name(stat_name - stats->stat_name()); void fill_io_stats(yagpcc::SystemStat *stats) { std::ifstream proc_stat("/proc/self/io"); @@ -30,36 +30,23 @@ void fill_io_stats(yagpcc::SystemStat *stats) { void fill_cpu_stats(yagpcc::SystemStat *stats) { static const int UTIME_ID = 13; static const int STIME_ID = 14; - static const int STARTTIME_ID = 21; static const int VSIZE_ID = 22; static const int RSS_ID = 23; static const double tps = sysconf(_SC_CLK_TCK); - double uptime; - { - std::ifstream proc_stat("/proc/uptime"); - proc_stat >> uptime; - } - std::ifstream proc_stat("/proc/self/stat"); std::string trash; - double start_time = 0; for (int i = 0; i <= RSS_ID; ++i) { switch (i) { case UTIME_ID: double utime; proc_stat >> utime; - stats->set_usertimeseconds(utime / tps); + stats->set_usertimeseconds(utime / tps - stats->usertimeseconds()); break; case STIME_ID: double stime; proc_stat >> stime; - stats->set_kerneltimeseconds(stime / tps); - break; - case STARTTIME_ID: - uint64_t starttime; - proc_stat >> starttime; - start_time = static_cast(starttime) / tps; + stats->set_kerneltimeseconds(stime / tps - stats->kerneltimeseconds()); break; case VSIZE_ID: uint64_t vsize; @@ -75,7 +62,6 @@ void fill_cpu_stats(yagpcc::SystemStat *stats) { default: proc_stat >> trash; } - stats->set_runningtimeseconds(uptime - start_time); } } @@ -106,7 +92,9 @@ void fill_status_stats(yagpcc::SystemStat *stats) { } // namespace void fill_self_stats(yagpcc::SystemStat *stats) { - fill_io_stats(stats); - fill_cpu_stats(stats); - fill_status_stats(stats); + static yagpcc::SystemStat prev_stats; + fill_io_stats(&prev_stats); + fill_cpu_stats(&prev_stats); + fill_status_stats(&prev_stats); + *stats = prev_stats; } \ No newline at end of file From c63a680870a1ce3c2dc6e833683a2dbd75935004 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Fri, 9 Jun 2023 11:41:17 +0300 Subject: [PATCH 029/133] Don't rethrow errors from metrics collector Rethrowing them might break other extensions and even query execution pipeline itself --- src/hook_wrappers.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index edad5798e44..0ae8359d820 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -56,7 +56,6 @@ void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { { ereport(WARNING, (errmsg("EventSender failed in ya_ExecutorBeforeStart_hook"))); - PG_RE_THROW(); } PG_END_TRY(); if (previous_ExecutorStart_hook) { @@ -70,7 +69,6 @@ void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { { ereport(WARNING, (errmsg("EventSender failed in ya_ExecutorAfterStart_hook"))); - PG_RE_THROW(); } PG_END_TRY(); } @@ -116,10 +114,7 @@ void ya_ExecutorEnd_hook(QueryDesc *query_desc) { PG_TRY(); { EventSender::instance()->executor_end(query_desc); } PG_CATCH(); - { - ereport(WARNING, (errmsg("EventSender failed in ya_ExecutorEnd_hook"))); - PG_RE_THROW(); - } + { ereport(WARNING, (errmsg("EventSender failed in ya_ExecutorEnd_hook"))); } PG_END_TRY(); if (previous_ExecutorEnd_hook) { (*previous_ExecutorEnd_hook)(query_desc); @@ -135,7 +130,6 @@ void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { { ereport(WARNING, (errmsg("EventSender failed in ya_query_info_collect_hook"))); - PG_RE_THROW(); } PG_END_TRY(); if (previous_query_info_collect_hook) { From bfefe9f77f2d25f727879c33e8efc3a3e09eb64d Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Sat, 10 Jun 2023 12:18:28 +0300 Subject: [PATCH 030/133] Add forgotten hooks deinitialization --- src/hook_wrappers.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 0ae8359d820..a904dc9bafd 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -44,6 +44,8 @@ void hooks_init() { void hooks_deinit() { ExecutorStart_hook = previous_ExecutorStart_hook; + ExecutorRun_hook = previous_ExecutorRun_hook; + ExecutorFinish_hook = previous_ExecutorFinish_hook; ExecutorEnd_hook = previous_ExecutorEnd_hook; query_info_collect_hook = previous_query_info_collect_hook; stat_statements_parser_deinit(); From 2a214243b6d8f75888fd5991f1f457302257add7 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 13 Jun 2023 08:58:03 +0300 Subject: [PATCH 031/133] Misc: use modern style error messages --- src/EventSender.cpp | 10 ++++++---- src/ProcStats.cpp | 8 ++++---- .../pg_stat_statements_ya_parser.c | 6 +++--- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index d47bd678da0..b1f85cf9f1e 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -235,7 +235,7 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { // TODO break; default: - elog(FATAL, "Unknown query status: %d", status); + ereport(FATAL, (errmsg("Unknown query status: %d", status))); } } @@ -321,9 +321,11 @@ void EventSender::send_query_info(yagpcc::SetQueryReq *req, const std::string &event) { auto result = connector->set_metric_query(*req); if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { - elog(WARNING, "Query {%d-%d-%d} %s reporting failed with an error %s", - req->query_key().tmid(), req->query_key().ssid(), - req->query_key().ccnt(), event.c_str(), result.error_text().c_str()); + ereport(WARNING, + (errmsg("Query {%d-%d-%d} %s reporting failed with an error %s", + req->query_key().tmid(), req->query_key().ssid(), + req->query_key().ccnt(), event.c_str(), + result.error_text().c_str()))); } } diff --git a/src/ProcStats.cpp b/src/ProcStats.cpp index 08133b6a65f..668173a0f7e 100644 --- a/src/ProcStats.cpp +++ b/src/ProcStats.cpp @@ -75,16 +75,16 @@ void fill_status_stats(yagpcc::SystemStat *stats) { stats->set_vmpeakkb(value); proc_stat >> measure; if (measure != "kB") { - elog(FATAL, "Expected memory sizes in kB, but got in %s", - measure.c_str()); + ereport(FATAL, (errmsg("Expected memory sizes in kB, but got in %s", + measure.c_str()))); } } else if (key == "VmSize:") { uint64_t value; proc_stat >> value; stats->set_vmsizekb(value); if (measure != "kB") { - elog(FATAL, "Expected memory sizes in kB, but got in %s", - measure.c_str()); + ereport(FATAL, (errmsg("Expected memory sizes in kB, but got in %s", + measure.c_str()))); } } } diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/src/stat_statements_parser/pg_stat_statements_ya_parser.c index a37ac0ef0bf..1c58d936093 100644 --- a/src/stat_statements_parser/pg_stat_statements_ya_parser.c +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.c @@ -213,7 +213,7 @@ JumbleRangeTable(pgssJumbleState *jstate, List *rtable) JumbleExpr(jstate, (Node *)rte->functions); break; default: - elog(ERROR, "unrecognized RTE kind: %d", (int)rte->rtekind); + ereport(ERROR, (errmsg("unrecognized RTE kind: %d", (int)rte->rtekind))); break; } } @@ -659,8 +659,8 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) break; default: /* Only a warning, since we can stumble along anyway */ - elog(WARNING, "unrecognized node type: %d", - (int)nodeTag(node)); + ereport(WARNING, (errmsg("unrecognized node type: %d", + (int)nodeTag(node)))); break; } } From 43fa27ea0d5a0f704fb397323638698e8ab24aaa Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 13 Jun 2023 16:51:40 +0300 Subject: [PATCH 032/133] Fix EventSender and GrpcConnector in forks The problem is both static variable singleton and Grpc don't work well with forked processes. Delayed their initialization to the actual query handling time. Should fix the problem. --- src/EventSender.cpp | 10 ++-------- src/EventSender.h | 6 ++---- src/GrpcConnector.cpp | 33 ++++++++++++++++++++++----------- src/hook_wrappers.cpp | 33 +++++++++++++++++++++++---------- 4 files changed, 49 insertions(+), 33 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index b1f85cf9f1e..ec966e8686c 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -329,12 +329,6 @@ void EventSender::send_query_info(yagpcc::SetQueryReq *req, } } -EventSender *EventSender::instance() { - static EventSender sender; - return &sender; -} +EventSender::EventSender() { connector = std::make_unique(); } -EventSender::EventSender() { - Config::init(); - connector = std::make_unique(); -} \ No newline at end of file +EventSender::~EventSender() { connector.release(); } \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h index 9e2ef992f81..92e6937a690 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -17,15 +17,13 @@ class EventSender { void query_metrics_collect(QueryMetricsStatus status, void *arg); void incr_depth() { nesting_level++; } void decr_depth() { nesting_level--; } - static EventSender *instance(); + EventSender(); + ~EventSender(); private: void collect_query_submit(QueryDesc *query_desc); void collect_query_done(QueryDesc *query_desc, const std::string &status); - - EventSender(); void send_query_info(yagpcc::SetQueryReq *req, const std::string &event); std::unique_ptr connector; - int nesting_level = 0; }; \ No newline at end of file diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index 276c9ceb8a8..966bfb4a780 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -10,14 +10,17 @@ #include #include -extern "C" { +extern "C" +{ #include "postgres.h" #include "cdb/cdbvars.h" } -class GrpcConnector::Impl { +class GrpcConnector::Impl +{ public: - Impl() : SOCKET_FILE("unix://" + Config::uds_path()) { + Impl() : SOCKET_FILE("unix://" + Config::uds_path()) + { GOOGLE_PROTOBUF_VERIFY_VERSION; channel = grpc::CreateChannel(SOCKET_FILE, grpc::InsecureChannelCredentials()); @@ -27,15 +30,18 @@ class GrpcConnector::Impl { reconnect_thread = std::thread(&Impl::reconnect, this); } - ~Impl() { + ~Impl() + { done = true; cv.notify_one(); reconnect_thread.join(); } - yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req) { + yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req) + { yagpcc::MetricResponse response; - if (!connected) { + if (!connected) + { response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); response.set_error_text( "Not tracing this query connection to agent has been lost"); @@ -47,7 +53,8 @@ class GrpcConnector::Impl { std::chrono::system_clock::now() + std::chrono::milliseconds(timeout); context.set_deadline(deadline); grpc::Status status = (stub->SetMetricQuery)(&context, req, &response); - if (!status.ok()) { + if (!status.ok()) + { response.set_error_text("Connection lost: " + status.error_message() + "; " + status.error_details()); response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); @@ -68,13 +75,16 @@ class GrpcConnector::Impl { std::mutex mtx; bool done; - void reconnect() { - while (!done) { + void reconnect() + { + while (!done) + { { std::unique_lock lock(mtx); cv.wait(lock); } - while (!connected && !done) { + while (!connected && !done) + { auto deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(100); connected = channel->WaitForConnected(deadline); @@ -88,6 +98,7 @@ GrpcConnector::GrpcConnector() { impl = new Impl(); } GrpcConnector::~GrpcConnector() { delete impl; } yagpcc::MetricResponse -GrpcConnector::set_metric_query(yagpcc::SetQueryReq req) { +GrpcConnector::set_metric_query(yagpcc::SetQueryReq req) +{ return impl->set_metric_query(req); } \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index a904dc9bafd..66ba6547ce2 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -28,7 +28,17 @@ static void ya_ExecutorFinish_hook(QueryDesc *query_desc); static void ya_ExecutorEnd_hook(QueryDesc *query_desc); static void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg); +static EventSender *sender = nullptr; + +static inline EventSender *get_sender() { + if (!sender) { + sender = new EventSender(); + } + return sender; +} + void hooks_init() { + Config::init(); previous_ExecutorStart_hook = ExecutorStart_hook; ExecutorStart_hook = ya_ExecutorStart_hook; previous_ExecutorRun_hook = ExecutorRun_hook; @@ -49,11 +59,14 @@ void hooks_deinit() { ExecutorEnd_hook = previous_ExecutorEnd_hook; query_info_collect_hook = previous_query_info_collect_hook; stat_statements_parser_deinit(); + if (sender) { + delete sender; + } } void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { PG_TRY(); - { EventSender::instance()->executor_before_start(query_desc, eflags); } + { get_sender()->executor_before_start(query_desc, eflags); } PG_CATCH(); { ereport(WARNING, @@ -66,7 +79,7 @@ void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { standard_ExecutorStart(query_desc, eflags); } PG_TRY(); - { EventSender::instance()->executor_after_start(query_desc, eflags); } + { get_sender()->executor_after_start(query_desc, eflags); } PG_CATCH(); { ereport(WARNING, @@ -77,36 +90,36 @@ void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, long count) { - EventSender::instance()->incr_depth(); + get_sender()->incr_depth(); PG_TRY(); { if (previous_ExecutorRun_hook) previous_ExecutorRun_hook(query_desc, direction, count); else standard_ExecutorRun(query_desc, direction, count); - EventSender::instance()->decr_depth(); + get_sender()->decr_depth(); } PG_CATCH(); { - EventSender::instance()->decr_depth(); + get_sender()->decr_depth(); PG_RE_THROW(); } PG_END_TRY(); } void ya_ExecutorFinish_hook(QueryDesc *query_desc) { - EventSender::instance()->incr_depth(); + get_sender()->incr_depth(); PG_TRY(); { if (previous_ExecutorFinish_hook) previous_ExecutorFinish_hook(query_desc); else standard_ExecutorFinish(query_desc); - EventSender::instance()->decr_depth(); + get_sender()->decr_depth(); } PG_CATCH(); { - EventSender::instance()->decr_depth(); + get_sender()->decr_depth(); PG_RE_THROW(); } PG_END_TRY(); @@ -114,7 +127,7 @@ void ya_ExecutorFinish_hook(QueryDesc *query_desc) { void ya_ExecutorEnd_hook(QueryDesc *query_desc) { PG_TRY(); - { EventSender::instance()->executor_end(query_desc); } + { get_sender()->executor_end(query_desc); } PG_CATCH(); { ereport(WARNING, (errmsg("EventSender failed in ya_ExecutorEnd_hook"))); } PG_END_TRY(); @@ -127,7 +140,7 @@ void ya_ExecutorEnd_hook(QueryDesc *query_desc) { void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { PG_TRY(); - { EventSender::instance()->query_metrics_collect(status, arg); } + { get_sender()->query_metrics_collect(status, arg); } PG_CATCH(); { ereport(WARNING, From 218c6366658ac864a7338201cd00d30904553e7c Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Mon, 19 Jun 2023 19:16:38 +0300 Subject: [PATCH 033/133] Set ya-grpc as a dependency --- debian/control | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/control b/debian/control index c740a8590ca..07176e94be5 100644 --- a/debian/control +++ b/debian/control @@ -2,10 +2,10 @@ Source: greenplum-6-yagpcc-hooks Section: misc Priority: optional Maintainer: Maxim Smyatkin -Build-Depends: make, gcc, g++, debhelper (>=9), greenplum-db-6 (>=6.19.3), protobuf-compiler, protobuf-compiler-grpc, libgrpc++1, libgrpc++-dev +Build-Depends: make, gcc, g++, debhelper (>=9), greenplum-db-6 (>=6.19.3), ya-grpc (=1.46-57-50820-02384e3918-yandex) Standards-Version: 3.9.8 Package: greenplum-6-yagpcc-hooks Architecture: any -Depends: ${misc:Depends}, ${shlibs:Depends}, greenplum-db-6 (>=6.19.3) +Depends: ${misc:Depends}, ${shlibs:Depends}, greenplum-db-6 (>=6.19.3), ya-grpc (=1.46-57-50820-02384e3918-yandex) Description: Greenplum extension to send query execution metrics to yandex command center agent From 1f5e166a8cbfc8e0c6ac9e60aa4c4fbd1b353b50 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 16 Aug 2023 13:23:00 +0300 Subject: [PATCH 034/133] Fix memory leak in analyze code --- protos/yagpcc_metrics.proto | 4 ++-- src/EventSender.cpp | 25 ++++++++----------------- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto index 26e0a496460..bc128a22f17 100644 --- a/protos/yagpcc_metrics.proto +++ b/protos/yagpcc_metrics.proto @@ -29,8 +29,8 @@ message QueryInfo { uint64 plan_id = 3; string query_text = 4; string plan_text = 5; - string temlate_query_text = 6; - string temlate_plan_text = 7; + string template_query_text = 6; + string template_plan_text = 7; string userName = 8; string databaseName = 9; string rsgname = 10; diff --git a/src/EventSender.cpp b/src/EventSender.cpp index ec966e8686c..6d2ff4afd47 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -61,13 +61,6 @@ std::string *get_rg_name() { return result; } -int get_cur_slice_id(QueryDesc *desc) { - if (!desc->estate) { - return 0; - } - return LocallyExecutingSliceIndex(desc->estate); -} - google::protobuf::Timestamp current_ts() { google::protobuf::Timestamp current_ts; struct timeval tv; @@ -113,7 +106,7 @@ void set_query_plan(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); set_plan_text(qi->mutable_plan_text(), query_desc); StringInfo norm_plan = gen_normplan(qi->plan_text().c_str()); - *qi->mutable_temlate_plan_text() = std::string(norm_plan->data); + *qi->mutable_template_plan_text() = std::string(norm_plan->data); qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); // TODO: free stringinfo? } @@ -121,7 +114,7 @@ void set_query_plan(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { void set_query_text(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { *qi->mutable_query_text() = query_desc->sourceText; char *norm_query = gen_normquery(query_desc->sourceText); - *qi->mutable_temlate_query_text() = std::string(norm_query); + *qi->mutable_template_query_text() = std::string(norm_query); pfree(norm_query); } @@ -246,20 +239,18 @@ void EventSender::executor_before_start(QueryDesc *query_desc, } query_start_time = std::chrono::high_resolution_clock::now(); if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze()) { - instr_time starttime; query_desc->instrument_options |= INSTRUMENT_BUFFERS; query_desc->instrument_options |= INSTRUMENT_ROWS; query_desc->instrument_options |= INSTRUMENT_TIMER; if (Config::enable_cdbstats()) { query_desc->instrument_options |= INSTRUMENT_CDB; - // TODO: there is a PR resolving some memory leak around auto-explain: - // https://github.com/greenplum-db/gpdb/pull/15164 - // Need to check if the memory leak applies here as well and fix it - Assert(query_desc->showstatctx == NULL); - INSTR_TIME_SET_CURRENT(starttime); - query_desc->showstatctx = - cdbexplain_showExecStatsBegin(query_desc, starttime); + if (!query_desc->showstatctx) { + instr_time starttime; + INSTR_TIME_SET_CURRENT(starttime); + query_desc->showstatctx = + cdbexplain_showExecStatsBegin(query_desc, starttime); + } } } } From 43fb9c9a6cfee386a56af9f8d2af0294d7029c81 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 6 Sep 2023 16:10:04 +0300 Subject: [PATCH 035/133] Add motion and workfile stats Also fix some compilation issues and minor bugs --- protos/yagpcc_metrics.proto | 8 ++++++++ src/EventSender.cpp | 41 ++++++++++++++++++++++++++++--------- src/EventSender.h | 2 +- 3 files changed, 40 insertions(+), 11 deletions(-) diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto index bc128a22f17..2d20d3c46d9 100644 --- a/protos/yagpcc_metrics.proto +++ b/protos/yagpcc_metrics.proto @@ -84,6 +84,12 @@ message SystemStat { uint64 cancelled_write_bytes = 14; } +message NetworkStat { + uint32 total_bytes = 1; + uint32 tuple_bytes = 2; + uint32 chunks = 3; +} + message MetricInstrumentation { uint64 ntuples = 1; /* Total tuples produced */ uint64 nloops = 2; /* # of run cycles for this node */ @@ -103,6 +109,8 @@ message MetricInstrumentation { uint64 temp_blks_written = 16; double blk_read_time = 17; /* measured read/write time */ double blk_write_time = 18; + NetworkStat sent = 19; + NetworkStat received = 20; } message SpillInfo { diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 6d2ff4afd47..2810e581313 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -4,6 +4,8 @@ #include #include +#define typeid __typeid +#define operator __operator extern "C" { #include "postgres.h" @@ -14,10 +16,12 @@ extern "C" { #include "executor/executor.h" #include "utils/elog.h" #include "utils/metrics_utils.h" +#include "utils/workfile_mgr.h" #include "cdb/cdbdisp.h" #include "cdb/cdbexplain.h" #include "cdb/cdbvars.h" +#include "cdb/cdbinterconnect.h" #include "stat_statements_parser/pg_stat_statements_ya_parser.h" #include "tcop/utility.h" @@ -25,6 +29,8 @@ extern "C" { void get_spill_info(int ssid, int ccid, int32_t *file_count, int64_t *total_bytes); } +#undef typeid +#undef operator #include "EventSender.h" @@ -160,6 +166,18 @@ void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, metrics->set_blk_write_time( INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); } + if (query_desc->estate && query_desc->estate->motionlayer_context) { + MotionLayerState *mlstate = + (MotionLayerState *)query_desc->estate->motionlayer_context; + metrics->mutable_sent()->set_total_bytes(mlstate->stat_total_bytes_sent); + metrics->mutable_sent()->set_tuple_bytes(mlstate->stat_tuple_bytes_sent); + metrics->mutable_sent()->set_chunks(mlstate->stat_total_chunks_sent); + metrics->mutable_received()->set_total_bytes( + mlstate->stat_total_bytes_recvd); + metrics->mutable_received()->set_tuple_bytes( + mlstate->stat_tuple_bytes_recvd); + metrics->mutable_received()->set_chunks(mlstate->stat_total_chunks_recvd); + } } decltype(std::chrono::high_resolution_clock::now()) query_start_time; @@ -182,6 +200,8 @@ void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, std::chrono::high_resolution_clock::now() - query_start_time; metrics->mutable_systemstat()->set_runningtimeseconds( elapsed_seconds.count()); + metrics->mutable_spill()->set_filecount(WorkfileTotalFilesCreated()); + metrics->mutable_spill()->set_totalbytes(WorkfileTotalBytesWritten()); } yagpcc::SetQueryReq create_query_req(QueryDesc *query_desc, @@ -238,6 +258,7 @@ void EventSender::executor_before_start(QueryDesc *query_desc, return; } query_start_time = std::chrono::high_resolution_clock::now(); + WorkfileResetBackendStats(); if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze()) { query_desc->instrument_options |= INSTRUMENT_BUFFERS; query_desc->instrument_options |= INSTRUMENT_ROWS; @@ -245,12 +266,10 @@ void EventSender::executor_before_start(QueryDesc *query_desc, if (Config::enable_cdbstats()) { query_desc->instrument_options |= INSTRUMENT_CDB; - if (!query_desc->showstatctx) { - instr_time starttime; - INSTR_TIME_SET_CURRENT(starttime); - query_desc->showstatctx = - cdbexplain_showExecStatsBegin(query_desc, starttime); - } + instr_time starttime; + INSTR_TIME_SET_CURRENT(starttime); + query_desc->showstatctx = + cdbexplain_showExecStatsBegin(query_desc, starttime); } } } @@ -281,7 +300,6 @@ void EventSender::executor_end(QueryDesc *query_desc) { } auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_END); - set_query_info(&req, query_desc, false, false); // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to // gather it here. It only makes sense when doing regular stat checks. set_gp_metrics(req.mutable_query_metrics(), query_desc, @@ -303,7 +321,6 @@ void EventSender::collect_query_done(QueryDesc *query_desc, if (need_collect()) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); - set_query_info(&req, query_desc, false, false); send_query_info(&req, status); } } @@ -320,6 +337,10 @@ void EventSender::send_query_info(yagpcc::SetQueryReq *req, } } -EventSender::EventSender() { connector = std::make_unique(); } +EventSender::EventSender() { + if (Config::enable_collector()) { + connector = new GrpcConnector(); + } +} -EventSender::~EventSender() { connector.release(); } \ No newline at end of file +EventSender::~EventSender() { delete connector; } \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h index 92e6937a690..f53648bed36 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -24,6 +24,6 @@ class EventSender { void collect_query_submit(QueryDesc *query_desc); void collect_query_done(QueryDesc *query_desc, const std::string &status); void send_query_info(yagpcc::SetQueryReq *req, const std::string &event); - std::unique_ptr connector; + GrpcConnector *connector; int nesting_level = 0; }; \ No newline at end of file From 01f44ac816f3b84a2e0f7f876aaa07b546c0fc5c Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 6 Sep 2023 16:11:06 +0300 Subject: [PATCH 036/133] Mute all PG-related signals in reconnection thread Reason: background thread could receive signals, which are supposed to be handled by the main thread. And only main pg thread knows how to handle those properly. --- src/GrpcConnector.cpp | 62 ++++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 22 deletions(-) diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index 966bfb4a780..d06692ec3d2 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -9,18 +9,43 @@ #include #include #include +#include +#include -extern "C" -{ +extern "C" { #include "postgres.h" #include "cdb/cdbvars.h" } -class GrpcConnector::Impl -{ +/* + * Set up the thread signal mask, we don't want to run our signal handlers + * in downloading and uploading threads. + */ +static void MaskThreadSignals() { + sigset_t sigs; + + if (pthread_equal(main_tid, pthread_self())) { + ereport(ERROR, (errmsg("thread_mask is called from main thread!"))); + return; + } + + sigemptyset(&sigs); + + /* make our thread to ignore these signals (which should allow that they be + * delivered to the main thread) */ + sigaddset(&sigs, SIGHUP); + sigaddset(&sigs, SIGINT); + sigaddset(&sigs, SIGTERM); + sigaddset(&sigs, SIGALRM); + sigaddset(&sigs, SIGUSR1); + sigaddset(&sigs, SIGUSR2); + + pthread_sigmask(SIG_BLOCK, &sigs, NULL); +} + +class GrpcConnector::Impl { public: - Impl() : SOCKET_FILE("unix://" + Config::uds_path()) - { + Impl() : SOCKET_FILE("unix://" + Config::uds_path()) { GOOGLE_PROTOBUF_VERIFY_VERSION; channel = grpc::CreateChannel(SOCKET_FILE, grpc::InsecureChannelCredentials()); @@ -30,18 +55,15 @@ class GrpcConnector::Impl reconnect_thread = std::thread(&Impl::reconnect, this); } - ~Impl() - { + ~Impl() { done = true; cv.notify_one(); reconnect_thread.join(); } - yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req) - { + yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req) { yagpcc::MetricResponse response; - if (!connected) - { + if (!connected) { response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); response.set_error_text( "Not tracing this query connection to agent has been lost"); @@ -53,8 +75,7 @@ class GrpcConnector::Impl std::chrono::system_clock::now() + std::chrono::milliseconds(timeout); context.set_deadline(deadline); grpc::Status status = (stub->SetMetricQuery)(&context, req, &response); - if (!status.ok()) - { + if (!status.ok()) { response.set_error_text("Connection lost: " + status.error_message() + "; " + status.error_details()); response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); @@ -75,16 +96,14 @@ class GrpcConnector::Impl std::mutex mtx; bool done; - void reconnect() - { - while (!done) - { + void reconnect() { + MaskThreadSignals(); + while (!done) { { std::unique_lock lock(mtx); cv.wait(lock); } - while (!connected && !done) - { + while (!connected && !done) { auto deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(100); connected = channel->WaitForConnected(deadline); @@ -98,7 +117,6 @@ GrpcConnector::GrpcConnector() { impl = new Impl(); } GrpcConnector::~GrpcConnector() { delete impl; } yagpcc::MetricResponse -GrpcConnector::set_metric_query(yagpcc::SetQueryReq req) -{ +GrpcConnector::set_metric_query(yagpcc::SetQueryReq req) { return impl->set_metric_query(req); } \ No newline at end of file From aaa0d49fc00b57372361cc9515d26bedc16c0d15 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 7 Sep 2023 11:50:04 +0300 Subject: [PATCH 037/133] Move debian configuration to teamcity --- debian/compat | 1 - debian/control | 11 ----------- debian/postinst | 8 -------- debian/rules | 10 ---------- 4 files changed, 30 deletions(-) delete mode 100644 debian/compat delete mode 100644 debian/control delete mode 100644 debian/postinst delete mode 100644 debian/rules diff --git a/debian/compat b/debian/compat deleted file mode 100644 index ec635144f60..00000000000 --- a/debian/compat +++ /dev/null @@ -1 +0,0 @@ -9 diff --git a/debian/control b/debian/control deleted file mode 100644 index 07176e94be5..00000000000 --- a/debian/control +++ /dev/null @@ -1,11 +0,0 @@ -Source: greenplum-6-yagpcc-hooks -Section: misc -Priority: optional -Maintainer: Maxim Smyatkin -Build-Depends: make, gcc, g++, debhelper (>=9), greenplum-db-6 (>=6.19.3), ya-grpc (=1.46-57-50820-02384e3918-yandex) -Standards-Version: 3.9.8 - -Package: greenplum-6-yagpcc-hooks -Architecture: any -Depends: ${misc:Depends}, ${shlibs:Depends}, greenplum-db-6 (>=6.19.3), ya-grpc (=1.46-57-50820-02384e3918-yandex) -Description: Greenplum extension to send query execution metrics to yandex command center agent diff --git a/debian/postinst b/debian/postinst deleted file mode 100644 index 27ddfc06a7d..00000000000 --- a/debian/postinst +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -set -e - -GPADMIN=gpadmin -GPHOME=/opt/greenplum-db-6 - -chown -R ${GPADMIN}:${GPADMIN} ${GPHOME} diff --git a/debian/rules b/debian/rules deleted file mode 100644 index 6c2c7491067..00000000000 --- a/debian/rules +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/make -f -# You must remove unused comment lines for the released package. -export DH_VERBOSE = 1 - - -export GPHOME := /opt/greenplum-db-6 -export PATH := $(GPHOME)/bin:$(PATH) - -%: - dh $@ From f143460db26efff75d4a4276076e231a506ff01a Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Fri, 15 Sep 2023 12:21:19 +0300 Subject: [PATCH 038/133] Move logs from psql to log file also make it more informative with less noice --- src/EventSender.cpp | 20 ++++---------------- src/EventSender.h | 1 - src/GrpcConnector.cpp | 21 +++++++++++++++------ src/GrpcConnector.h | 3 ++- 4 files changed, 21 insertions(+), 24 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 2810e581313..fabdb8003a3 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -280,7 +280,7 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); set_query_info(&req, query_desc, false, true); - send_query_info(&req, "started"); + connector->set_metric_query(req, "started"); } } @@ -304,7 +304,7 @@ void EventSender::executor_end(QueryDesc *query_desc) { // gather it here. It only makes sense when doing regular stat checks. set_gp_metrics(req.mutable_query_metrics(), query_desc, /*need_spillinfo*/ false); - send_query_info(&req, "ended"); + connector->set_metric_query(req, "ended"); } void EventSender::collect_query_submit(QueryDesc *query_desc) { @@ -312,7 +312,7 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); set_query_info(&req, query_desc, true, false); - send_query_info(&req, "submit"); + connector->set_metric_query(req, "submit"); } } @@ -321,19 +321,7 @@ void EventSender::collect_query_done(QueryDesc *query_desc, if (need_collect()) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); - send_query_info(&req, status); - } -} - -void EventSender::send_query_info(yagpcc::SetQueryReq *req, - const std::string &event) { - auto result = connector->set_metric_query(*req); - if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { - ereport(WARNING, - (errmsg("Query {%d-%d-%d} %s reporting failed with an error %s", - req->query_key().tmid(), req->query_key().ssid(), - req->query_key().ccnt(), event.c_str(), - result.error_text().c_str()))); + connector->set_metric_query(req, status); } } diff --git a/src/EventSender.h b/src/EventSender.h index f53648bed36..ee0db2f0938 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -23,7 +23,6 @@ class EventSender { private: void collect_query_submit(QueryDesc *query_desc); void collect_query_done(QueryDesc *query_desc, const std::string &status); - void send_query_info(yagpcc::SetQueryReq *req, const std::string &event); GrpcConnector *connector; int nesting_level = 0; }; \ No newline at end of file diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index d06692ec3d2..9afe9e3ead5 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -61,12 +61,13 @@ class GrpcConnector::Impl { reconnect_thread.join(); } - yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req) { + yagpcc::MetricResponse set_metric_query(const yagpcc::SetQueryReq &req, + const std::string &event) { yagpcc::MetricResponse response; if (!connected) { response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); response.set_error_text( - "Not tracing this query connection to agent has been lost"); + "Not tracing this query because grpc connection has been lost"); return response; } grpc::ClientContext context; @@ -76,9 +77,13 @@ class GrpcConnector::Impl { context.set_deadline(deadline); grpc::Status status = (stub->SetMetricQuery)(&context, req, &response); if (!status.ok()) { - response.set_error_text("Connection lost: " + status.error_message() + - "; " + status.error_details()); + response.set_error_text("GRPC error: " + status.error_message() + "; " + + status.error_details()); response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); + ereport(LOG, (errmsg("Query {%d-%d-%d} %s tracing failed with error %s", + req.query_key().tmid(), req.query_key().ssid(), + req.query_key().ccnt(), event.c_str(), + response.error_text().c_str()))); connected = false; cv.notify_one(); } @@ -108,6 +113,9 @@ class GrpcConnector::Impl { std::chrono::system_clock::now() + std::chrono::milliseconds(100); connected = channel->WaitForConnected(deadline); } + if (connected && !done) { + ereport(LOG, (errmsg("GRPC connection is restored"))); + } } } }; @@ -117,6 +125,7 @@ GrpcConnector::GrpcConnector() { impl = new Impl(); } GrpcConnector::~GrpcConnector() { delete impl; } yagpcc::MetricResponse -GrpcConnector::set_metric_query(yagpcc::SetQueryReq req) { - return impl->set_metric_query(req); +GrpcConnector::set_metric_query(const yagpcc::SetQueryReq &req, + const std::string &event) { + return impl->set_metric_query(req, event); } \ No newline at end of file diff --git a/src/GrpcConnector.h b/src/GrpcConnector.h index 4fca6960a4e..6571c626dfd 100644 --- a/src/GrpcConnector.h +++ b/src/GrpcConnector.h @@ -6,7 +6,8 @@ class GrpcConnector { public: GrpcConnector(); ~GrpcConnector(); - yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req); + yagpcc::MetricResponse set_metric_query(const yagpcc::SetQueryReq &req, + const std::string &event); private: class Impl; From 379fa0f2dc1bfb0a1c850ea86611c91987c64fa7 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 20 Sep 2023 15:18:38 +0300 Subject: [PATCH 039/133] Some hardening around memory --- src/EventSender.cpp | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index fabdb8003a3..5c7a9fdb4bf 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -48,7 +48,6 @@ std::string *get_user_name() { std::string *get_db_name() { char *dbname = get_database_name(MyDatabaseId); std::string *result = dbname ? new std::string(dbname) : nullptr; - pfree(dbname); return result; } @@ -63,7 +62,6 @@ std::string *get_rg_name() { if (rgname == nullptr) return nullptr; auto result = new std::string(rgname); - pfree(rgname); return result; } @@ -114,14 +112,12 @@ void set_query_plan(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { StringInfo norm_plan = gen_normplan(qi->plan_text().c_str()); *qi->mutable_template_plan_text() = std::string(norm_plan->data); qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); - // TODO: free stringinfo? } void set_query_text(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { *qi->mutable_query_text() = query_desc->sourceText; char *norm_query = gen_normquery(query_desc->sourceText); *qi->mutable_template_query_text() = std::string(norm_query); - pfree(norm_query); } void set_query_info(yagpcc::SetQueryReq *req, QueryDesc *query_desc, @@ -289,15 +285,16 @@ void EventSender::executor_end(QueryDesc *query_desc) { (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE)) { return; } - if (query_desc->totaltime && Config::enable_analyze() && - Config::enable_cdbstats()) { - if (query_desc->estate->dispatcherState && - query_desc->estate->dispatcherState->primaryResults) { - cdbdisp_checkDispatchResult(query_desc->estate->dispatcherState, - DISPATCH_WAIT_NONE); - } - InstrEndLoop(query_desc->totaltime); - } + /* TODO: when querying via CURSOR this call freezes. Need to investigate. + To reproduce - uncomment it and run installchecks. It will freeze around join test. + Needs investigation + + if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze() && + Config::enable_cdbstats() && query_desc->estate->dispatcherState && + query_desc->estate->dispatcherState->primaryResults) { + cdbdisp_checkDispatchResult(query_desc->estate->dispatcherState, + DISPATCH_WAIT_NONE); + }*/ auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_END); // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to From 475e1018a95e06c02ebe0bc5266d6c19275f9b5e Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 20 Sep 2023 20:25:48 +0300 Subject: [PATCH 040/133] Remove thread unsafe logging --- src/GrpcConnector.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index 9afe9e3ead5..a71ec95f1ba 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -51,6 +51,7 @@ class GrpcConnector::Impl { grpc::CreateChannel(SOCKET_FILE, grpc::InsecureChannelCredentials()); stub = yagpcc::SetQueryInfo::NewStub(channel); connected = true; + reconnected = false; done = false; reconnect_thread = std::thread(&Impl::reconnect, this); } @@ -69,6 +70,9 @@ class GrpcConnector::Impl { response.set_error_text( "Not tracing this query because grpc connection has been lost"); return response; + } else if (reconnected) { + reconnected = false; + ereport(LOG, (errmsg("GRPC connection is restored"))); } grpc::ClientContext context; int timeout = Gp_role == GP_ROLE_DISPATCH ? 500 : 250; @@ -85,6 +89,7 @@ class GrpcConnector::Impl { req.query_key().ccnt(), event.c_str(), response.error_text().c_str()))); connected = false; + reconnected = false; cv.notify_one(); } @@ -95,11 +100,10 @@ class GrpcConnector::Impl { const std::string SOCKET_FILE; std::unique_ptr stub; std::shared_ptr channel; - std::atomic_bool connected; + std::atomic_bool connected, reconnected, done; std::thread reconnect_thread; std::condition_variable cv; std::mutex mtx; - bool done; void reconnect() { MaskThreadSignals(); @@ -112,9 +116,7 @@ class GrpcConnector::Impl { auto deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(100); connected = channel->WaitForConnected(deadline); - } - if (connected && !done) { - ereport(LOG, (errmsg("GRPC connection is restored"))); + reconnected = true; } } } From ac9c5489da181665276326071d59030c2e0d2047 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 21 Sep 2023 11:18:32 +0300 Subject: [PATCH 041/133] Dirty hack: make queryid eq planid --- src/EventSender.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 5c7a9fdb4bf..0721f6b0bc0 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -129,7 +129,13 @@ void set_query_info(yagpcc::SetQueryReq *req, QueryDesc *query_desc, } if (query_desc->plannedstmt && with_plan) { set_query_plan(qi, query_desc); - qi->set_query_id(query_desc->plannedstmt->queryId); + // TODO: For now assume queryid equal to planid, which is wrong. The + // reason for doing so this bug + // https://github.com/greenplum-db/gpdb/pull/15385 (ORCA loses + // pg_stat_statements` queryid during planning phase). Need to fix it + // upstream, cherry-pick and bump gp + // qi->set_query_id(query_desc->plannedstmt->queryId); + qi->set_query_id(qi->plan_id()); } qi->set_allocated_username(get_user_name()); qi->set_allocated_databasename(get_db_name()); @@ -286,9 +292,9 @@ void EventSender::executor_end(QueryDesc *query_desc) { return; } /* TODO: when querying via CURSOR this call freezes. Need to investigate. - To reproduce - uncomment it and run installchecks. It will freeze around join test. - Needs investigation - + To reproduce - uncomment it and run installchecks. It will freeze around + join test. Needs investigation + if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze() && Config::enable_cdbstats() && query_desc->estate->dispatcherState && query_desc->estate->dispatcherState->primaryResults) { From 5f1a523bb092b3d0b096a0e3766bbf42aed23690 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Fri, 22 Sep 2023 13:35:26 +0300 Subject: [PATCH 042/133] Remove dead code and catch thread exception --- Makefile | 1 - src/EventSender.cpp | 42 ++++++++++++++++++++++++------------------ src/GrpcConnector.cpp | 6 +++--- src/SpillInfoWrapper.c | 21 --------------------- 4 files changed, 27 insertions(+), 43 deletions(-) delete mode 100644 src/SpillInfoWrapper.c diff --git a/Makefile b/Makefile index 91bcec3203e..9ebe1210bf4 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,6 @@ PG_STAT_OBJS := $(PG_STAT_DIR)/pg_stat_statements_ya_parser.o OBJS := $(PG_STAT_OBJS) \ $(PROTO_GEN_OBJECTS) \ $(SRC_DIR)/ProcStats.o \ - $(SRC_DIR)/SpillInfoWrapper.o \ $(SRC_DIR)/Config.o \ $(SRC_DIR)/GrpcConnector.o \ $(SRC_DIR)/EventSender.o \ diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 0721f6b0bc0..b4990bb9829 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,8 +1,8 @@ #include "Config.h" #include "GrpcConnector.h" #include "ProcStats.h" -#include #include +#include #define typeid __typeid #define operator __operator @@ -20,14 +20,11 @@ extern "C" { #include "cdb/cdbdisp.h" #include "cdb/cdbexplain.h" -#include "cdb/cdbvars.h" #include "cdb/cdbinterconnect.h" +#include "cdb/cdbvars.h" #include "stat_statements_parser/pg_stat_statements_ya_parser.h" #include "tcop/utility.h" - -void get_spill_info(int ssid, int ccid, int32_t *file_count, - int64_t *total_bytes); } #undef typeid #undef operator @@ -184,16 +181,7 @@ void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, decltype(std::chrono::high_resolution_clock::now()) query_start_time; -void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, - bool need_spillinfo) { - if (need_spillinfo) { - int32_t n_spill_files = 0; - int64_t n_spill_bytes = 0; - get_spill_info(gp_session_id, gp_command_count, &n_spill_files, - &n_spill_bytes); - metrics->mutable_spill()->set_filecount(n_spill_files); - metrics->mutable_spill()->set_totalbytes(n_spill_bytes); - } +void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc) { if (query_desc->planstate && query_desc->planstate->instrument) { set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc); } @@ -256,6 +244,9 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { void EventSender::executor_before_start(QueryDesc *query_desc, int /* eflags*/) { + if (!connector) { + return; + } if (!need_collect()) { return; } @@ -277,6 +268,9 @@ void EventSender::executor_before_start(QueryDesc *query_desc, } void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { + if (!connector) { + return; + } if ((Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) && need_collect()) { auto req = @@ -287,6 +281,9 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { } void EventSender::executor_end(QueryDesc *query_desc) { + if (!connector) { + return; + } if (!need_collect() || (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE)) { return; @@ -305,12 +302,14 @@ void EventSender::executor_end(QueryDesc *query_desc) { create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_END); // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to // gather it here. It only makes sense when doing regular stat checks. - set_gp_metrics(req.mutable_query_metrics(), query_desc, - /*need_spillinfo*/ false); + set_gp_metrics(req.mutable_query_metrics(), query_desc); connector->set_metric_query(req, "ended"); } void EventSender::collect_query_submit(QueryDesc *query_desc) { + if (!connector) { + return; + } if (need_collect()) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); @@ -321,6 +320,9 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { void EventSender::collect_query_done(QueryDesc *query_desc, const std::string &status) { + if (!connector) { + return; + } if (need_collect()) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); @@ -330,7 +332,11 @@ void EventSender::collect_query_done(QueryDesc *query_desc, EventSender::EventSender() { if (Config::enable_collector()) { - connector = new GrpcConnector(); + try { + connector = new GrpcConnector(); + } catch (const std::exception &e) { + ereport(INFO, (errmsg("Unable to start query tracing %s", e.what()))); + } } } diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index a71ec95f1ba..73c1944fa04 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -7,10 +7,10 @@ #include #include #include +#include +#include #include #include -#include -#include extern "C" { #include "postgres.h" @@ -116,7 +116,7 @@ class GrpcConnector::Impl { auto deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(100); connected = channel->WaitForConnected(deadline); - reconnected = true; + reconnected = connected.load(); } } } diff --git a/src/SpillInfoWrapper.c b/src/SpillInfoWrapper.c deleted file mode 100644 index c6ace0a693f..00000000000 --- a/src/SpillInfoWrapper.c +++ /dev/null @@ -1,21 +0,0 @@ -#include "postgres.h" -#include "utils/workfile_mgr.h" - -void get_spill_info(int ssid, int ccid, int32_t* file_count, int64_t* total_bytes); - -void get_spill_info(int ssid, int ccid, int32_t* file_count, int64_t* total_bytes) -{ - int count = 0; - int i = 0; - workfile_set *workfiles = workfile_mgr_cache_entries_get_copy(&count); - workfile_set *wf_iter = workfiles; - for (i = 0; i < count; ++i, ++wf_iter) - { - if (wf_iter->active && wf_iter->session_id == ssid && wf_iter->command_count == ccid) - { - *file_count += wf_iter->num_files; - *total_bytes += wf_iter->total_bytes; - } - } - pfree(workfiles); -} \ No newline at end of file From 80cd6d06281b92f4a64f616c653cdb07dbb5598d Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 21 Sep 2023 15:16:35 +0300 Subject: [PATCH 043/133] Add GUC to filter specific users --- src/Config.cpp | 43 +++++++++++++++++++++++++++++++++++++++++++ src/Config.h | 1 + src/EventSender.cpp | 3 ++- 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/src/Config.cpp b/src/Config.cpp index d97e5d45984..ee59e5511c8 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -1,4 +1,7 @@ #include "Config.h" +#include +#include +#include extern "C" { #include "postgres.h" @@ -10,6 +13,8 @@ static char *guc_uds_path = nullptr; static bool guc_enable_analyze = true; static bool guc_enable_cdbstats = true; static bool guc_enable_collector = true; +static char *guc_ignored_users = nullptr; +static std::unique_ptr> ignored_users = nullptr; void Config::init() { DefineCustomStringVariable( @@ -30,9 +35,47 @@ void Config::init() { "yagpcc.enable_cdbstats", "Collect CDB metrics in yagpcc", 0LL, &guc_enable_cdbstats, true, PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomStringVariable( + "yagpcc.ignored_users_list", + "Make yagpcc ignore queries issued by given users", 0LL, + &guc_ignored_users, "gpadmin,repl,gpperfmon,monitor", PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); } std::string Config::uds_path() { return guc_uds_path; } bool Config::enable_analyze() { return guc_enable_analyze; } bool Config::enable_cdbstats() { return guc_enable_cdbstats; } bool Config::enable_collector() { return guc_enable_collector; } + +bool Config::filter_user(const std::string &username) { + if (!ignored_users) { + ignored_users.reset(new std::unordered_set()); + if (guc_ignored_users == nullptr || guc_ignored_users[0] == '0') { + return false; + } + /* Need a modifiable copy of string */ + char *rawstring = pstrdup(guc_ignored_users); + List *elemlist; + ListCell *l; + + /* Parse string into list of identifiers */ + if (!SplitIdentifierString(rawstring, ',', &elemlist)) { + /* syntax error in list */ + pfree(rawstring); + list_free(elemlist); + ereport( + LOG, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg( + "invalid list syntax in parameter yagpcc.ignored_users_list"))); + return false; + } + foreach (l, elemlist) { + ignored_users->insert((char *)lfirst(l)); + } + pfree(rawstring); + list_free(elemlist); + } + return ignored_users->find(username) != ignored_users->end(); +} diff --git a/src/Config.h b/src/Config.h index 117481f219b..0a07306b0f8 100644 --- a/src/Config.h +++ b/src/Config.h @@ -9,4 +9,5 @@ class Config { static bool enable_analyze(); static bool enable_cdbstats(); static bool enable_collector(); + static bool filter_user(const std::string &username); }; \ No newline at end of file diff --git a/src/EventSender.cpp b/src/EventSender.cpp index b4990bb9829..632977fe2d4 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -33,7 +33,8 @@ extern "C" { #define need_collect() \ (nesting_level == 0 && gp_command_count != 0 && \ - query_desc->sourceText != nullptr && Config::enable_collector()) + query_desc->sourceText != nullptr && Config::enable_collector() && \ + !Config::filter_user(*get_user_name())) namespace { From 0f98cad29380927d620420671b8b51b5e3477e22 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 28 Sep 2023 11:41:16 +0300 Subject: [PATCH 044/133] Several fixes to user filtering * handle nullptr username properly * don't start grpc if username is filterred out * initialize grpc connector with nullptr by default --- src/Config.cpp | 4 ++-- src/Config.h | 2 +- src/EventSender.cpp | 4 ++-- src/EventSender.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Config.cpp b/src/Config.cpp index ee59e5511c8..c5c2c15f7e9 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -48,7 +48,7 @@ bool Config::enable_analyze() { return guc_enable_analyze; } bool Config::enable_cdbstats() { return guc_enable_cdbstats; } bool Config::enable_collector() { return guc_enable_collector; } -bool Config::filter_user(const std::string &username) { +bool Config::filter_user(const std::string *username) { if (!ignored_users) { ignored_users.reset(new std::unordered_set()); if (guc_ignored_users == nullptr || guc_ignored_users[0] == '0') { @@ -77,5 +77,5 @@ bool Config::filter_user(const std::string &username) { pfree(rawstring); list_free(elemlist); } - return ignored_users->find(username) != ignored_users->end(); + return !username || ignored_users->find(*username) != ignored_users->end(); } diff --git a/src/Config.h b/src/Config.h index 0a07306b0f8..999d0300640 100644 --- a/src/Config.h +++ b/src/Config.h @@ -9,5 +9,5 @@ class Config { static bool enable_analyze(); static bool enable_cdbstats(); static bool enable_collector(); - static bool filter_user(const std::string &username); + static bool filter_user(const std::string *username); }; \ No newline at end of file diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 632977fe2d4..bfff3a6179f 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -34,7 +34,7 @@ extern "C" { #define need_collect() \ (nesting_level == 0 && gp_command_count != 0 && \ query_desc->sourceText != nullptr && Config::enable_collector() && \ - !Config::filter_user(*get_user_name())) + !Config::filter_user(get_user_name())) namespace { @@ -332,7 +332,7 @@ void EventSender::collect_query_done(QueryDesc *query_desc, } EventSender::EventSender() { - if (Config::enable_collector()) { + if (Config::enable_collector() && !Config::filter_user(get_user_name())) { try { connector = new GrpcConnector(); } catch (const std::exception &e) { diff --git a/src/EventSender.h b/src/EventSender.h index ee0db2f0938..2af8b7ffa03 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -23,6 +23,6 @@ class EventSender { private: void collect_query_submit(QueryDesc *query_desc); void collect_query_done(QueryDesc *query_desc, const std::string &status); - GrpcConnector *connector; + GrpcConnector *connector = nullptr; int nesting_level = 0; }; \ No newline at end of file From ef5d362d971679cbc7c00c5471ae87fc36cfdb81 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Mon, 2 Oct 2023 12:54:32 +0300 Subject: [PATCH 045/133] Minor naming improvement --- src/EventSender.cpp | 8 ++++---- src/GrpcConnector.cpp | 10 +++++----- src/GrpcConnector.h | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index bfff3a6179f..9ddbe9e315a 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -277,7 +277,7 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); set_query_info(&req, query_desc, false, true); - connector->set_metric_query(req, "started"); + connector->report_query(req, "started"); } } @@ -304,7 +304,7 @@ void EventSender::executor_end(QueryDesc *query_desc) { // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to // gather it here. It only makes sense when doing regular stat checks. set_gp_metrics(req.mutable_query_metrics(), query_desc); - connector->set_metric_query(req, "ended"); + connector->report_query(req, "ended"); } void EventSender::collect_query_submit(QueryDesc *query_desc) { @@ -315,7 +315,7 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); set_query_info(&req, query_desc, true, false); - connector->set_metric_query(req, "submit"); + connector->report_query(req, "submit"); } } @@ -327,7 +327,7 @@ void EventSender::collect_query_done(QueryDesc *query_desc, if (need_collect()) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); - connector->set_metric_query(req, status); + connector->report_query(req, status); } } diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index 73c1944fa04..955553fe60e 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -62,8 +62,8 @@ class GrpcConnector::Impl { reconnect_thread.join(); } - yagpcc::MetricResponse set_metric_query(const yagpcc::SetQueryReq &req, - const std::string &event) { + yagpcc::MetricResponse report_query(const yagpcc::SetQueryReq &req, + const std::string &event) { yagpcc::MetricResponse response; if (!connected) { response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); @@ -127,7 +127,7 @@ GrpcConnector::GrpcConnector() { impl = new Impl(); } GrpcConnector::~GrpcConnector() { delete impl; } yagpcc::MetricResponse -GrpcConnector::set_metric_query(const yagpcc::SetQueryReq &req, - const std::string &event) { - return impl->set_metric_query(req, event); +GrpcConnector::report_query(const yagpcc::SetQueryReq &req, + const std::string &event) { + return impl->report_query(req, event); } \ No newline at end of file diff --git a/src/GrpcConnector.h b/src/GrpcConnector.h index 6571c626dfd..a7a70ee86c8 100644 --- a/src/GrpcConnector.h +++ b/src/GrpcConnector.h @@ -6,8 +6,8 @@ class GrpcConnector { public: GrpcConnector(); ~GrpcConnector(); - yagpcc::MetricResponse set_metric_query(const yagpcc::SetQueryReq &req, - const std::string &event); + yagpcc::MetricResponse report_query(const yagpcc::SetQueryReq &req, + const std::string &event); private: class Impl; From 9db45ada73a48ff8095110c205badeaa3419b937 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Mon, 2 Oct 2023 14:32:46 +0300 Subject: [PATCH 046/133] Add server-side message queueing --- src/EventSender.cpp | 20 ++++++++++++++------ src/EventSender.h | 9 +++++++++ src/GrpcConnector.cpp | 17 +++++++++++++++-- src/GrpcConnector.h | 3 ++- 4 files changed, 40 insertions(+), 9 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 9ddbe9e315a..71bd3c1739c 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,6 +1,6 @@ #include "Config.h" -#include "GrpcConnector.h" #include "ProcStats.h" +#include "GrpcConnector.h" #include #include @@ -15,7 +15,6 @@ extern "C" { #include "commands/resgroupcmds.h" #include "executor/executor.h" #include "utils/elog.h" -#include "utils/metrics_utils.h" #include "utils/workfile_mgr.h" #include "cdb/cdbdisp.h" @@ -251,6 +250,11 @@ void EventSender::executor_before_start(QueryDesc *query_desc, if (!need_collect()) { return; } + { + connector->report_query(msg_queue, "previous query"); + std::queue empty; + std::swap(msg_queue, empty); + } query_start_time = std::chrono::high_resolution_clock::now(); WorkfileResetBackendStats(); if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze()) { @@ -277,7 +281,8 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); set_query_info(&req, query_desc, false, true); - connector->report_query(req, "started"); + msg_queue.push(std::move(req)); + connector->report_query(msg_queue, "started"); } } @@ -304,7 +309,8 @@ void EventSender::executor_end(QueryDesc *query_desc) { // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to // gather it here. It only makes sense when doing regular stat checks. set_gp_metrics(req.mutable_query_metrics(), query_desc); - connector->report_query(req, "ended"); + msg_queue.push(std::move(req)); + connector->report_query(msg_queue, "ended"); } void EventSender::collect_query_submit(QueryDesc *query_desc) { @@ -315,7 +321,8 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); set_query_info(&req, query_desc, true, false); - connector->report_query(req, "submit"); + msg_queue.push(std::move(req)); + connector->report_query(msg_queue, "submit"); } } @@ -327,7 +334,8 @@ void EventSender::collect_query_done(QueryDesc *query_desc, if (need_collect()) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); - connector->report_query(req, status); + msg_queue.push(std::move(req)); + connector->report_query(msg_queue, status); } } diff --git a/src/EventSender.h b/src/EventSender.h index 2af8b7ffa03..f07b7c84759 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -1,8 +1,13 @@ #pragma once #include +#include #include +extern "C" { +#include "utils/metrics_utils.h" +} + class GrpcConnector; struct QueryDesc; namespace yagpcc { @@ -25,4 +30,8 @@ class EventSender { void collect_query_done(QueryDesc *query_desc, const std::string &status); GrpcConnector *connector = nullptr; int nesting_level = 0; + // TODO: instead of having a queue here we can make the message incremental in + // case of GRPC failures. It would requires adding submit_time, start_time and + // end_time fields to protobuf + std::queue msg_queue; }; \ No newline at end of file diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index 955553fe60e..594656b2503 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -91,6 +91,8 @@ class GrpcConnector::Impl { connected = false; reconnected = false; cv.notify_one(); + } else { + response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_SUCCESS); } return response; @@ -127,7 +129,18 @@ GrpcConnector::GrpcConnector() { impl = new Impl(); } GrpcConnector::~GrpcConnector() { delete impl; } yagpcc::MetricResponse -GrpcConnector::report_query(const yagpcc::SetQueryReq &req, +GrpcConnector::report_query(std::queue &reqs, const std::string &event) { - return impl->report_query(req, event); + while (!reqs.empty()) { + const auto &req = reqs.front(); + auto response = impl->report_query(req, event); + if (response.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_SUCCESS) { + reqs.pop(); + } else { + return response; + } + } + yagpcc::MetricResponse response; + response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_SUCCESS); + return response; } \ No newline at end of file diff --git a/src/GrpcConnector.h b/src/GrpcConnector.h index a7a70ee86c8..9ee8679342a 100644 --- a/src/GrpcConnector.h +++ b/src/GrpcConnector.h @@ -1,12 +1,13 @@ #pragma once #include "protos/yagpcc_set_service.pb.h" +#include class GrpcConnector { public: GrpcConnector(); ~GrpcConnector(); - yagpcc::MetricResponse report_query(const yagpcc::SetQueryReq &req, + yagpcc::MetricResponse report_query(std::queue &reqs, const std::string &event); private: From 78408d0e5100b8b4992984ccfe201485fad47c8d Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 31 Oct 2023 15:20:33 +0300 Subject: [PATCH 047/133] Moving to protobuf, part 1: remove GRPC --- Makefile | 15 +--- protos/yagpcc_set_service.proto | 17 ---- src/EventSender.cpp | 4 +- src/EventSender.h | 4 +- src/GrpcConnector.cpp | 146 -------------------------------- src/GrpcConnector.h | 16 ---- src/UDSConnector.cpp | 39 +++++++++ src/UDSConnector.h | 16 ++++ 8 files changed, 62 insertions(+), 195 deletions(-) delete mode 100644 src/GrpcConnector.cpp delete mode 100644 src/GrpcConnector.h create mode 100644 src/UDSConnector.cpp create mode 100644 src/UDSConnector.h diff --git a/Makefile b/Makefile index 9ebe1210bf4..d6d72c2bda1 100644 --- a/Makefile +++ b/Makefile @@ -2,17 +2,14 @@ override CFLAGS = -Wall -Wmissing-prototypes -Wpointer-arith -Wendif-labels -Wmi override CXXFLAGS = -fPIC -lstdc++ -lpthread -g3 -Wall -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=c++14 -Iinclude -Isrc/protos -Isrc -DGPBUILD COMMON_CPP_FLAGS := -Isrc -Iinclude -Isrc/stat_statements_parser PG_CXXFLAGS += $(COMMON_CPP_FLAGS) -SHLIB_LINK += -lprotobuf -lgrpc++ +SHLIB_LINK += -lprotobuf PROTOC = protoc SRC_DIR = ./src GEN_DIR = ./src/protos PROTO_DIR = ./protos PROTO_GEN_OBJECTS = $(GEN_DIR)/yagpcc_plan.pb.o $(GEN_DIR)/yagpcc_metrics.pb.o \ - $(GEN_DIR)/yagpcc_set_service.pb.o $(GEN_DIR)/yagpcc_set_service.grpc.pb.o - -GRPC_CPP_PLUGIN := grpc_cpp_plugin -GRPC_CPP_PLUGIN_PATH ?= `which $(GRPC_CPP_PLUGIN)` + $(GEN_DIR)/yagpcc_set_service.pb.o $(GEN_DIR)/%.pb.cpp : $(PROTO_DIR)/%.proto sed -i 's/optional //g' $^ @@ -20,12 +17,6 @@ $(GEN_DIR)/%.pb.cpp : $(PROTO_DIR)/%.proto $(PROTOC) --cpp_out=$(SRC_DIR) $^ mv $(GEN_DIR)/$*.pb.cc $(GEN_DIR)/$*.pb.cpp - - -$(GEN_DIR)/yagpcc_set_service.grpc.pb.cpp : $(PROTO_DIR)/yagpcc_set_service.proto - $(PROTOC) --grpc_out=$(SRC_DIR) --plugin=protoc-gen-grpc=$(GRPC_CPP_PLUGIN_PATH) $^ - mv $(GEN_DIR)/yagpcc_set_service.grpc.pb.cc $(GEN_DIR)/yagpcc_set_service.grpc.pb.cpp - PG_STAT_DIR := $(SRC_DIR)/stat_statements_parser PG_STAT_OBJS := $(PG_STAT_DIR)/pg_stat_statements_ya_parser.o @@ -33,7 +24,7 @@ OBJS := $(PG_STAT_OBJS) \ $(PROTO_GEN_OBJECTS) \ $(SRC_DIR)/ProcStats.o \ $(SRC_DIR)/Config.o \ - $(SRC_DIR)/GrpcConnector.o \ + $(SRC_DIR)/UDSConnector.o \ $(SRC_DIR)/EventSender.o \ $(SRC_DIR)/hook_wrappers.o \ $(SRC_DIR)/yagp_hooks_collector.o diff --git a/protos/yagpcc_set_service.proto b/protos/yagpcc_set_service.proto index 93c2f5a01d1..08b8e064ece 100644 --- a/protos/yagpcc_set_service.proto +++ b/protos/yagpcc_set_service.proto @@ -9,23 +9,6 @@ package yagpcc; option java_outer_classname = "SegmentYAGPCCAS"; option go_package = "a.yandex-team.ru/cloud/mdb/yagpcc/api/proto/agent_segment;greenplum"; -service SetQueryInfo { - rpc SetMetricPlanNode (SetPlanNodeReq) returns (MetricResponse) {} - - rpc SetMetricQuery (SetQueryReq) returns (MetricResponse) {} -} - -message MetricResponse { - MetricResponseStatusCode error_code = 1; - string error_text = 2; -} - -enum MetricResponseStatusCode { - METRIC_RESPONSE_STATUS_CODE_UNSPECIFIED = 0; - METRIC_RESPONSE_STATUS_CODE_SUCCESS = 1; - METRIC_RESPONSE_STATUS_CODE_ERROR = 2; -} - message SetQueryReq { QueryStatus query_status = 1; google.protobuf.Timestamp datetime = 2; diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 71bd3c1739c..9d6d5b7d7fd 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,6 +1,6 @@ #include "Config.h" #include "ProcStats.h" -#include "GrpcConnector.h" +#include "UDSConnector.h" #include #include @@ -342,7 +342,7 @@ void EventSender::collect_query_done(QueryDesc *query_desc, EventSender::EventSender() { if (Config::enable_collector() && !Config::filter_user(get_user_name())) { try { - connector = new GrpcConnector(); + connector = new UDSConnector(); } catch (const std::exception &e) { ereport(INFO, (errmsg("Unable to start query tracing %s", e.what()))); } diff --git a/src/EventSender.h b/src/EventSender.h index f07b7c84759..42cd56e1590 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -8,7 +8,7 @@ extern "C" { #include "utils/metrics_utils.h" } -class GrpcConnector; +class UDSConnector; struct QueryDesc; namespace yagpcc { class SetQueryReq; @@ -28,7 +28,7 @@ class EventSender { private: void collect_query_submit(QueryDesc *query_desc); void collect_query_done(QueryDesc *query_desc, const std::string &status); - GrpcConnector *connector = nullptr; + UDSConnector *connector = nullptr; int nesting_level = 0; // TODO: instead of having a queue here we can make the message incremental in // case of GRPC failures. It would requires adding submit_time, start_time and diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp deleted file mode 100644 index 594656b2503..00000000000 --- a/src/GrpcConnector.cpp +++ /dev/null @@ -1,146 +0,0 @@ -#include "GrpcConnector.h" -#include "Config.h" -#include "yagpcc_set_service.grpc.pb.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -extern "C" { -#include "postgres.h" -#include "cdb/cdbvars.h" -} - -/* - * Set up the thread signal mask, we don't want to run our signal handlers - * in downloading and uploading threads. - */ -static void MaskThreadSignals() { - sigset_t sigs; - - if (pthread_equal(main_tid, pthread_self())) { - ereport(ERROR, (errmsg("thread_mask is called from main thread!"))); - return; - } - - sigemptyset(&sigs); - - /* make our thread to ignore these signals (which should allow that they be - * delivered to the main thread) */ - sigaddset(&sigs, SIGHUP); - sigaddset(&sigs, SIGINT); - sigaddset(&sigs, SIGTERM); - sigaddset(&sigs, SIGALRM); - sigaddset(&sigs, SIGUSR1); - sigaddset(&sigs, SIGUSR2); - - pthread_sigmask(SIG_BLOCK, &sigs, NULL); -} - -class GrpcConnector::Impl { -public: - Impl() : SOCKET_FILE("unix://" + Config::uds_path()) { - GOOGLE_PROTOBUF_VERIFY_VERSION; - channel = - grpc::CreateChannel(SOCKET_FILE, grpc::InsecureChannelCredentials()); - stub = yagpcc::SetQueryInfo::NewStub(channel); - connected = true; - reconnected = false; - done = false; - reconnect_thread = std::thread(&Impl::reconnect, this); - } - - ~Impl() { - done = true; - cv.notify_one(); - reconnect_thread.join(); - } - - yagpcc::MetricResponse report_query(const yagpcc::SetQueryReq &req, - const std::string &event) { - yagpcc::MetricResponse response; - if (!connected) { - response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); - response.set_error_text( - "Not tracing this query because grpc connection has been lost"); - return response; - } else if (reconnected) { - reconnected = false; - ereport(LOG, (errmsg("GRPC connection is restored"))); - } - grpc::ClientContext context; - int timeout = Gp_role == GP_ROLE_DISPATCH ? 500 : 250; - auto deadline = - std::chrono::system_clock::now() + std::chrono::milliseconds(timeout); - context.set_deadline(deadline); - grpc::Status status = (stub->SetMetricQuery)(&context, req, &response); - if (!status.ok()) { - response.set_error_text("GRPC error: " + status.error_message() + "; " + - status.error_details()); - response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); - ereport(LOG, (errmsg("Query {%d-%d-%d} %s tracing failed with error %s", - req.query_key().tmid(), req.query_key().ssid(), - req.query_key().ccnt(), event.c_str(), - response.error_text().c_str()))); - connected = false; - reconnected = false; - cv.notify_one(); - } else { - response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_SUCCESS); - } - - return response; - } - -private: - const std::string SOCKET_FILE; - std::unique_ptr stub; - std::shared_ptr channel; - std::atomic_bool connected, reconnected, done; - std::thread reconnect_thread; - std::condition_variable cv; - std::mutex mtx; - - void reconnect() { - MaskThreadSignals(); - while (!done) { - { - std::unique_lock lock(mtx); - cv.wait(lock); - } - while (!connected && !done) { - auto deadline = - std::chrono::system_clock::now() + std::chrono::milliseconds(100); - connected = channel->WaitForConnected(deadline); - reconnected = connected.load(); - } - } - } -}; - -GrpcConnector::GrpcConnector() { impl = new Impl(); } - -GrpcConnector::~GrpcConnector() { delete impl; } - -yagpcc::MetricResponse -GrpcConnector::report_query(std::queue &reqs, - const std::string &event) { - while (!reqs.empty()) { - const auto &req = reqs.front(); - auto response = impl->report_query(req, event); - if (response.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_SUCCESS) { - reqs.pop(); - } else { - return response; - } - } - yagpcc::MetricResponse response; - response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_SUCCESS); - return response; -} \ No newline at end of file diff --git a/src/GrpcConnector.h b/src/GrpcConnector.h deleted file mode 100644 index 9ee8679342a..00000000000 --- a/src/GrpcConnector.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include "protos/yagpcc_set_service.pb.h" -#include - -class GrpcConnector { -public: - GrpcConnector(); - ~GrpcConnector(); - yagpcc::MetricResponse report_query(std::queue &reqs, - const std::string &event); - -private: - class Impl; - Impl *impl; -}; \ No newline at end of file diff --git a/src/UDSConnector.cpp b/src/UDSConnector.cpp new file mode 100644 index 00000000000..58362724e9d --- /dev/null +++ b/src/UDSConnector.cpp @@ -0,0 +1,39 @@ +#include "UDSConnector.h" +#include "Config.h" + +#include + +extern "C" { +#include "postgres.h" +#include "cdb/cdbvars.h" +} + +class UDSConnector::Impl { +public: + Impl() : SOCKET_FILE("unix://" + Config::uds_path()) { + GOOGLE_PROTOBUF_VERIFY_VERSION; + } + + bool report_query(const yagpcc::SetQueryReq &req, const std::string &event) { + return true; + } + +private: + const std::string SOCKET_FILE; +}; + +UDSConnector::UDSConnector() { impl = new Impl(); } + +UDSConnector::~UDSConnector() { delete impl; } + +void UDSConnector::report_query(std::queue &reqs, + const std::string &event) { + while (!reqs.empty()) { + const auto &req = reqs.front(); + if (impl->report_query(req, event)) { + reqs.pop(); + } else { + break; + } + } +} \ No newline at end of file diff --git a/src/UDSConnector.h b/src/UDSConnector.h new file mode 100644 index 00000000000..a60c15d6f19 --- /dev/null +++ b/src/UDSConnector.h @@ -0,0 +1,16 @@ +#pragma once + +#include "protos/yagpcc_set_service.pb.h" +#include + +class UDSConnector { +public: + UDSConnector(); + ~UDSConnector(); + void report_query(std::queue &reqs, + const std::string &event); + +private: + class Impl; + Impl *impl; +}; \ No newline at end of file From b1e0ed5aed1257dc402b44289317971c447a6819 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 31 Oct 2023 15:49:02 +0300 Subject: [PATCH 048/133] Add clang-format --- .clang-format | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .clang-format diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000000..99130575c9a --- /dev/null +++ b/.clang-format @@ -0,0 +1,2 @@ +BasedOnStyle: LLVM +SortIncludes: false From e68c521e5c082a9021ab214767678dec7efe3793 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 31 Oct 2023 17:43:18 +0300 Subject: [PATCH 049/133] Moving to protobuf, part2: initial implementation --- src/UDSConnector.cpp | 92 +++++++++++++++++++++++++++++++++++--------- src/UDSConnector.h | 5 +-- 2 files changed, 76 insertions(+), 21 deletions(-) diff --git a/src/UDSConnector.cpp b/src/UDSConnector.cpp index 58362724e9d..f1cbc9fb8f5 100644 --- a/src/UDSConnector.cpp +++ b/src/UDSConnector.cpp @@ -2,38 +2,94 @@ #include "Config.h" #include +#include +#include +#include +#include +#include +#include +#include extern "C" { #include "postgres.h" #include "cdb/cdbvars.h" } -class UDSConnector::Impl { -public: - Impl() : SOCKET_FILE("unix://" + Config::uds_path()) { - GOOGLE_PROTOBUF_VERIFY_VERSION; - } - - bool report_query(const yagpcc::SetQueryReq &req, const std::string &event) { - return true; - } - -private: - const std::string SOCKET_FILE; -}; - -UDSConnector::UDSConnector() { impl = new Impl(); } - -UDSConnector::~UDSConnector() { delete impl; } +UDSConnector::UDSConnector() : uds_path("unix://" + Config::uds_path()) { + GOOGLE_PROTOBUF_VERIFY_VERSION; +} void UDSConnector::report_query(std::queue &reqs, const std::string &event) { while (!reqs.empty()) { const auto &req = reqs.front(); - if (impl->report_query(req, event)) { + if (report_query(req, event)) { reqs.pop(); } else { break; } } +} + +static void inline log_tracing_failure(const yagpcc::SetQueryReq &req, + const std::string &event) { + ereport(LOG, + (errmsg("Query {%d-%d-%d} %s tracing failed with error %s", + req.query_key().tmid(), req.query_key().ssid(), + req.query_key().ccnt(), event.c_str(), strerror(errno)))); +} + +bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, + const std::string &event) { + sockaddr_un address; + address.sun_family = AF_UNIX; + strcpy(address.sun_path, uds_path.c_str()); + bool success = true; + auto sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (sockfd != -1) { + if (fcntl(sockfd, F_SETFL, O_NONBLOCK) != -1) { + if (connect(sockfd, (sockaddr *)&address, sizeof(address)) != -1) { + auto data_size = req.ByteSizeLong(); + auto total_size = data_size + sizeof(uint32_t); + uint8_t *buf = (uint8_t *)palloc(total_size); + uint32_t *size_payload = (uint32_t *)buf; + *size_payload = data_size; + req.SerializeWithCachedSizesToArray(buf + sizeof(uint32_t)); + int64_t sent = 0, sent_total = 0; + do { + sent = send(sockfd, buf + sent_total, total_size - sent_total, + MSG_DONTWAIT); + sent_total += sent; + } while ( + sent > 0 && size_t(sent_total) != total_size && + // the line below is a small throttling hack: + // if a message does not fit a single packet, we take a nap + // before sending the next one. + // Otherwise, MSG_DONTWAIT send might overflow the UDS + (std::this_thread::sleep_for(std::chrono::milliseconds(1)), true)); + if (sent < 0) { + log_tracing_failure(req, event); + success = false; + } + pfree(buf); + } else { + // log the error and go on + log_tracing_failure(req, event); + success = false; + } + } else { + // That's a very important error that should never happen, so make it + // visible to an end-user and admins. + ereport(WARNING, + (errmsg("Unable to create non-blocking socket connection %s", + strerror(errno)))); + success = false; + } + close(sockfd); + } else { + // log the error and go on + log_tracing_failure(req, event); + success = false; + } + return success; } \ No newline at end of file diff --git a/src/UDSConnector.h b/src/UDSConnector.h index a60c15d6f19..c30a01e3a50 100644 --- a/src/UDSConnector.h +++ b/src/UDSConnector.h @@ -6,11 +6,10 @@ class UDSConnector { public: UDSConnector(); - ~UDSConnector(); void report_query(std::queue &reqs, const std::string &event); private: - class Impl; - Impl *impl; + bool report_query(const yagpcc::SetQueryReq &req, const std::string &event); + const std::string uds_path; }; \ No newline at end of file From baec27ee144e1f1c950407e921f98874f573bdb2 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 1 Nov 2023 14:11:23 +0300 Subject: [PATCH 050/133] Use deprecated protobuf API for bionic compatibility --- src/UDSConnector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/UDSConnector.cpp b/src/UDSConnector.cpp index f1cbc9fb8f5..58d3a9af150 100644 --- a/src/UDSConnector.cpp +++ b/src/UDSConnector.cpp @@ -49,7 +49,7 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, if (sockfd != -1) { if (fcntl(sockfd, F_SETFL, O_NONBLOCK) != -1) { if (connect(sockfd, (sockaddr *)&address, sizeof(address)) != -1) { - auto data_size = req.ByteSizeLong(); + auto data_size = req.ByteSize(); auto total_size = data_size + sizeof(uint32_t); uint8_t *buf = (uint8_t *)palloc(total_size); uint32_t *size_payload = (uint32_t *)buf; From 93d8bd460132e8668a59a957cd34795bb4b5c0ca Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 1 Nov 2023 17:42:17 +0300 Subject: [PATCH 051/133] Replace message queue with incremental message Should save us some memory footprint, allocations and sends. --- protos/yagpcc_set_service.proto | 6 ++ src/EventSender.cpp | 117 ++++++++++++++++++-------------- src/EventSender.h | 5 +- src/UDSConnector.cpp | 12 ---- src/UDSConnector.h | 4 +- 5 files changed, 73 insertions(+), 71 deletions(-) diff --git a/protos/yagpcc_set_service.proto b/protos/yagpcc_set_service.proto index 08b8e064ece..e8fc7aaa99d 100644 --- a/protos/yagpcc_set_service.proto +++ b/protos/yagpcc_set_service.proto @@ -17,6 +17,9 @@ message SetQueryReq { QueryInfo query_info = 5; GPMetrics query_metrics = 6; repeated MetricPlan plan_tree = 7; + google.protobuf.Timestamp submit_time = 8; + google.protobuf.Timestamp start_time = 9; + google.protobuf.Timestamp end_time = 10; } message SetPlanNodeReq { @@ -26,4 +29,7 @@ message SetPlanNodeReq { SegmentKey segment_key = 4; GPMetrics node_metrics = 5; MetricPlan plan_node = 6; + google.protobuf.Timestamp submit_time = 7; + google.protobuf.Timestamp start_time = 8; + google.protobuf.Timestamp end_time = 9; } diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 9d6d5b7d7fd..834553a6187 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -101,39 +101,46 @@ void set_plan_text(std::string *plan_text, QueryDesc *query_desc) { *plan_text = std::string(es.str->data, es.str->len); } -void set_query_plan(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { - qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER - ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER - : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); - set_plan_text(qi->mutable_plan_text(), query_desc); - StringInfo norm_plan = gen_normplan(qi->plan_text().c_str()); - *qi->mutable_template_plan_text() = std::string(norm_plan->data); - qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); +void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { + if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->plannedstmt) { + auto qi = req->mutable_query_info(); + qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER + ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER + : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); + set_plan_text(qi->mutable_plan_text(), query_desc); + StringInfo norm_plan = gen_normplan(qi->plan_text().c_str()); + *qi->mutable_template_plan_text() = std::string(norm_plan->data); + qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); + // TODO: For now assume queryid equal to planid, which is wrong. The + // reason for doing so this bug + // https://github.com/greenplum-db/gpdb/pull/15385 (ORCA loses + // pg_stat_statements` queryid during planning phase). Need to fix it + // upstream, cherry-pick and bump gp + // qi->set_query_id(query_desc->plannedstmt->queryId); + qi->set_query_id(qi->plan_id()); + } } -void set_query_text(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { - *qi->mutable_query_text() = query_desc->sourceText; - char *norm_query = gen_normquery(query_desc->sourceText); - *qi->mutable_template_query_text() = std::string(norm_query); +void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { + if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->sourceText) { + auto qi = req->mutable_query_info(); + *qi->mutable_query_text() = query_desc->sourceText; + char *norm_query = gen_normquery(query_desc->sourceText); + *qi->mutable_template_query_text() = std::string(norm_query); + } } -void set_query_info(yagpcc::SetQueryReq *req, QueryDesc *query_desc, - bool with_text, bool with_plan) { +void clear_big_fields(yagpcc::SetQueryReq *req) { + if (Gp_session_role == GP_ROLE_DISPATCH) { + auto qi = req->mutable_query_info(); + qi->clear_plan_text(); + qi->clear_query_text(); + } +} + +void set_query_info(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { if (Gp_session_role == GP_ROLE_DISPATCH) { auto qi = req->mutable_query_info(); - if (query_desc->sourceText && with_text) { - set_query_text(qi, query_desc); - } - if (query_desc->plannedstmt && with_plan) { - set_query_plan(qi, query_desc); - // TODO: For now assume queryid equal to planid, which is wrong. The - // reason for doing so this bug - // https://github.com/greenplum-db/gpdb/pull/15385 (ORCA loses - // pg_stat_statements` queryid during planning phase). Need to fix it - // upstream, cherry-pick and bump gp - // qi->set_query_id(query_desc->plannedstmt->queryId); - qi->set_query_id(qi->plan_id()); - } qi->set_allocated_username(get_user_name()); qi->set_allocated_databasename(get_db_name()); qi->set_allocated_rsgname(get_rg_name()); @@ -250,10 +257,9 @@ void EventSender::executor_before_start(QueryDesc *query_desc, if (!need_collect()) { return; } - { - connector->report_query(msg_queue, "previous query"); - std::queue empty; - std::swap(msg_queue, empty); + if (query_msg->has_query_key()) { + connector->report_query(*query_msg, "previous query"); + query_msg->Clear(); } query_start_time = std::chrono::high_resolution_clock::now(); WorkfileResetBackendStats(); @@ -278,11 +284,12 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { } if ((Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) && need_collect()) { - auto req = - create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); - set_query_info(&req, query_desc, false, true); - msg_queue.push(std::move(req)); - connector->report_query(msg_queue, "started"); + query_msg->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); + *query_msg->mutable_start_time() = current_ts(); + set_query_plan(query_msg, query_desc); + if (connector->report_query(*query_msg, "started")) { + clear_big_fields(query_msg); + } } } @@ -304,13 +311,12 @@ void EventSender::executor_end(QueryDesc *query_desc) { cdbdisp_checkDispatchResult(query_desc->estate->dispatcherState, DISPATCH_WAIT_NONE); }*/ - auto req = - create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_END); - // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to - // gather it here. It only makes sense when doing regular stat checks. - set_gp_metrics(req.mutable_query_metrics(), query_desc); - msg_queue.push(std::move(req)); - connector->report_query(msg_queue, "ended"); + query_msg->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_END); + *query_msg->mutable_end_time() = current_ts(); + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc); + if (connector->report_query(*query_msg, "ended")) { + query_msg->Clear(); + } } void EventSender::collect_query_submit(QueryDesc *query_desc) { @@ -318,11 +324,14 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { return; } if (need_collect()) { - auto req = + *query_msg = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); - set_query_info(&req, query_desc, true, false); - msg_queue.push(std::move(req)); - connector->report_query(msg_queue, "submit"); + *query_msg->mutable_submit_time() = current_ts(); + set_query_info(query_msg, query_desc); + set_query_text(query_msg, query_desc); + if (connector->report_query(*query_msg, "submit")) { + clear_big_fields(query_msg); + } } } @@ -332,15 +341,16 @@ void EventSender::collect_query_done(QueryDesc *query_desc, return; } if (need_collect()) { - auto req = - create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); - msg_queue.push(std::move(req)); - connector->report_query(msg_queue, status); + query_msg->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); + if (connector->report_query(*query_msg, status)) { + clear_big_fields(query_msg); + } } } EventSender::EventSender() { if (Config::enable_collector() && !Config::filter_user(get_user_name())) { + query_msg = new yagpcc::SetQueryReq(); try { connector = new UDSConnector(); } catch (const std::exception &e) { @@ -349,4 +359,7 @@ EventSender::EventSender() { } } -EventSender::~EventSender() { delete connector; } \ No newline at end of file +EventSender::~EventSender() { + delete query_msg; + delete connector; +} \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h index 42cd56e1590..161bf6ce037 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -30,8 +30,5 @@ class EventSender { void collect_query_done(QueryDesc *query_desc, const std::string &status); UDSConnector *connector = nullptr; int nesting_level = 0; - // TODO: instead of having a queue here we can make the message incremental in - // case of GRPC failures. It would requires adding submit_time, start_time and - // end_time fields to protobuf - std::queue msg_queue; + yagpcc::SetQueryReq *query_msg; }; \ No newline at end of file diff --git a/src/UDSConnector.cpp b/src/UDSConnector.cpp index 58d3a9af150..339a5d4f374 100644 --- a/src/UDSConnector.cpp +++ b/src/UDSConnector.cpp @@ -19,18 +19,6 @@ UDSConnector::UDSConnector() : uds_path("unix://" + Config::uds_path()) { GOOGLE_PROTOBUF_VERIFY_VERSION; } -void UDSConnector::report_query(std::queue &reqs, - const std::string &event) { - while (!reqs.empty()) { - const auto &req = reqs.front(); - if (report_query(req, event)) { - reqs.pop(); - } else { - break; - } - } -} - static void inline log_tracing_failure(const yagpcc::SetQueryReq &req, const std::string &event) { ereport(LOG, diff --git a/src/UDSConnector.h b/src/UDSConnector.h index c30a01e3a50..574653023e6 100644 --- a/src/UDSConnector.h +++ b/src/UDSConnector.h @@ -6,10 +6,8 @@ class UDSConnector { public: UDSConnector(); - void report_query(std::queue &reqs, - const std::string &event); + bool report_query(const yagpcc::SetQueryReq &req, const std::string &event); private: - bool report_query(const yagpcc::SetQueryReq &req, const std::string &event); const std::string uds_path; }; \ No newline at end of file From bff38294a3290368f4a9f242a9e4aa733cc4eb8e Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 2 Nov 2023 14:38:24 +0300 Subject: [PATCH 052/133] Fix bug with missing query statuses --- src/EventSender.cpp | 47 ++++++++++++++++++++++++++++----------------- src/EventSender.h | 2 +- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 834553a6187..45d72b93e48 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -230,16 +230,10 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { // no-op: executor_after_start is enough break; case METRICS_QUERY_DONE: - collect_query_done(reinterpret_cast(arg), "done"); - break; case METRICS_QUERY_ERROR: - collect_query_done(reinterpret_cast(arg), "error"); - break; case METRICS_QUERY_CANCELING: - collect_query_done(reinterpret_cast(arg), "calcelling"); - break; case METRICS_QUERY_CANCELED: - collect_query_done(reinterpret_cast(arg), "cancelled"); + collect_query_done(reinterpret_cast(arg), status); break; case METRICS_INNER_QUERY_DONE: // TODO @@ -320,10 +314,7 @@ void EventSender::executor_end(QueryDesc *query_desc) { } void EventSender::collect_query_submit(QueryDesc *query_desc) { - if (!connector) { - return; - } - if (need_collect()) { + if (connector && need_collect()) { *query_msg = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); *query_msg->mutable_submit_time() = current_ts(); @@ -336,13 +327,33 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { } void EventSender::collect_query_done(QueryDesc *query_desc, - const std::string &status) { - if (!connector) { - return; - } - if (need_collect()) { - query_msg->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); - if (connector->report_query(*query_msg, status)) { + QueryMetricsStatus status) { + if (connector && need_collect()) { + yagpcc::QueryStatus query_status; + std::string msg; + switch (status) { + case METRICS_QUERY_DONE: + query_status = yagpcc::QueryStatus::QUERY_STATUS_DONE; + msg = "done"; + break; + case METRICS_QUERY_ERROR: + query_status = yagpcc::QueryStatus::QUERY_STATUS_ERROR; + msg = "error"; + break; + case METRICS_QUERY_CANCELING: + query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELLING; + msg = "cancelling"; + break; + case METRICS_QUERY_CANCELED: + query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELED; + msg = "cancelled"; + break; + default: + ereport(FATAL, (errmsg("Unexpected query status in query_done hook: %d", + status))); + } + query_msg->set_query_status(query_status); + if (connector->report_query(*query_msg, msg)) { clear_big_fields(query_msg); } } diff --git a/src/EventSender.h b/src/EventSender.h index 161bf6ce037..0e8985873b6 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -27,7 +27,7 @@ class EventSender { private: void collect_query_submit(QueryDesc *query_desc); - void collect_query_done(QueryDesc *query_desc, const std::string &status); + void collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status); UDSConnector *connector = nullptr; int nesting_level = 0; yagpcc::SetQueryReq *query_msg; From 680a658cf13a4fb105c9ce32d661e8f08576903e Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Mon, 13 Nov 2023 15:38:31 +0300 Subject: [PATCH 053/133] Add runtime statistics on UDS messages This commit adds a view to see cummulative statistics on UDS messages sent: total messages, different kinds of errors, max message size. As a necessary collateral it also renames the extension: yagp-hooks-collector -> yagp_hooks_collector. And fixes a bug in UDS socket path resolution. --- Makefile | 5 +- sql/yagp-hooks-collector--1.0.sql | 2 - sql/yagp-hooks-collector--unpackaged--1.0.sql | 2 - sql/yagp_hooks_collector--1.0.sql | 55 +++++++++++ src/UDSConnector.cpp | 13 ++- src/UDSConnector.h | 3 - src/YagpStat.cpp | 91 +++++++++++++++++++ src/YagpStat.h | 21 +++++ src/hook_wrappers.cpp | 52 ++++++++++- src/hook_wrappers.h | 2 + src/yagp_hooks_collector.c | 13 ++- ...or.control => yagp_hooks_collector.control | 4 +- 12 files changed, 245 insertions(+), 18 deletions(-) delete mode 100644 sql/yagp-hooks-collector--1.0.sql delete mode 100644 sql/yagp-hooks-collector--unpackaged--1.0.sql create mode 100644 sql/yagp_hooks_collector--1.0.sql create mode 100644 src/YagpStat.cpp create mode 100644 src/YagpStat.h rename yagp-hooks-collector.control => yagp_hooks_collector.control (61%) diff --git a/Makefile b/Makefile index d6d72c2bda1..cee4c05c2e2 100644 --- a/Makefile +++ b/Makefile @@ -24,16 +24,17 @@ OBJS := $(PG_STAT_OBJS) \ $(PROTO_GEN_OBJECTS) \ $(SRC_DIR)/ProcStats.o \ $(SRC_DIR)/Config.o \ + $(SRC_DIR)/YagpStat.o \ $(SRC_DIR)/UDSConnector.o \ $(SRC_DIR)/EventSender.o \ $(SRC_DIR)/hook_wrappers.o \ $(SRC_DIR)/yagp_hooks_collector.o EXTRA_CLEAN := $(GEN_DIR) DATA := $(wildcard sql/*--*.sql) -EXTENSION := yagp-hooks-collector +EXTENSION := yagp_hooks_collector EXTVERSION := $(shell grep default_version $(EXTENSION).control | \ sed -e "s/default_version[[:space:]]*=[[:space:]]*'\([^']*\)'/\1/") -MODULE_big := yagp-hooks-collector +MODULE_big := yagp_hooks_collector PG_CONFIG := pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) diff --git a/sql/yagp-hooks-collector--1.0.sql b/sql/yagp-hooks-collector--1.0.sql deleted file mode 100644 index f9ab15fb400..00000000000 --- a/sql/yagp-hooks-collector--1.0.sql +++ /dev/null @@ -1,2 +0,0 @@ --- complain if script is sourced in psql, rather than via CREATE EXTENSION -\echo Use '''CREATE EXTENSION "yagp-hooks-collector"''' to load this file. \quit diff --git a/sql/yagp-hooks-collector--unpackaged--1.0.sql b/sql/yagp-hooks-collector--unpackaged--1.0.sql deleted file mode 100644 index 0441c97bd84..00000000000 --- a/sql/yagp-hooks-collector--unpackaged--1.0.sql +++ /dev/null @@ -1,2 +0,0 @@ --- complain if script is sourced in psql, rather than via CREATE EXTENSION -\echo Use '''CREATE EXTENSION "uuid-cb" FROM unpackaged''' to load this file. \quit diff --git a/sql/yagp_hooks_collector--1.0.sql b/sql/yagp_hooks_collector--1.0.sql new file mode 100644 index 00000000000..88bbe4e0dc7 --- /dev/null +++ b/sql/yagp_hooks_collector--1.0.sql @@ -0,0 +1,55 @@ +/* yagp_hooks_collector--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION yagp_hooks_collector" to load this file. \quit + +CREATE FUNCTION __yagp_stat_messages_reset_f_on_master() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' +LANGUAGE C EXECUTE ON MASTER; + +CREATE FUNCTION __yagp_stat_messages_reset_f_on_segments() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' +LANGUAGE C EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION yagp_stat_messages_reset() +RETURNS void +AS +$$ + SELECT __yagp_stat_messages_reset_f_on_master(); + SELECT __yagp_stat_messages_reset_f_on_segments(); +$$ +LANGUAGE SQL EXECUTE ON MASTER; + +CREATE FUNCTION __yagp_stat_messages_f_on_master() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'yagp_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION __yagp_stat_messages_f_on_segments() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'yagp_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE VIEW yagp_stat_messages AS + SELECT C.* + FROM __yagp_stat_messages_f_on_master() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) + UNION ALL + SELECT C.* + FROM __yagp_stat_messages_f_on_segments() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) +ORDER BY segid; diff --git a/src/UDSConnector.cpp b/src/UDSConnector.cpp index 339a5d4f374..b9088205250 100644 --- a/src/UDSConnector.cpp +++ b/src/UDSConnector.cpp @@ -1,5 +1,6 @@ #include "UDSConnector.h" #include "Config.h" +#include "YagpStat.h" #include #include @@ -15,9 +16,7 @@ extern "C" { #include "cdb/cdbvars.h" } -UDSConnector::UDSConnector() : uds_path("unix://" + Config::uds_path()) { - GOOGLE_PROTOBUF_VERIFY_VERSION; -} +UDSConnector::UDSConnector() { GOOGLE_PROTOBUF_VERIFY_VERSION; } static void inline log_tracing_failure(const yagpcc::SetQueryReq &req, const std::string &event) { @@ -31,7 +30,7 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, const std::string &event) { sockaddr_un address; address.sun_family = AF_UNIX; - strcpy(address.sun_path, uds_path.c_str()); + strcpy(address.sun_path, Config::uds_path().c_str()); bool success = true; auto sockfd = socket(AF_UNIX, SOCK_STREAM, 0); if (sockfd != -1) { @@ -58,12 +57,16 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, if (sent < 0) { log_tracing_failure(req, event); success = false; + YagpStat::report_bad_send(total_size); + } else { + YagpStat::report_send(total_size); } pfree(buf); } else { // log the error and go on log_tracing_failure(req, event); success = false; + YagpStat::report_bad_connection(); } } else { // That's a very important error that should never happen, so make it @@ -72,12 +75,14 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, (errmsg("Unable to create non-blocking socket connection %s", strerror(errno)))); success = false; + YagpStat::report_error(); } close(sockfd); } else { // log the error and go on log_tracing_failure(req, event); success = false; + YagpStat::report_error(); } return success; } \ No newline at end of file diff --git a/src/UDSConnector.h b/src/UDSConnector.h index 574653023e6..42e0aa20968 100644 --- a/src/UDSConnector.h +++ b/src/UDSConnector.h @@ -7,7 +7,4 @@ class UDSConnector { public: UDSConnector(); bool report_query(const yagpcc::SetQueryReq &req, const std::string &event); - -private: - const std::string uds_path; }; \ No newline at end of file diff --git a/src/YagpStat.cpp b/src/YagpStat.cpp new file mode 100644 index 00000000000..879cde85212 --- /dev/null +++ b/src/YagpStat.cpp @@ -0,0 +1,91 @@ +#include "YagpStat.h" + +#include + +extern "C" { +#include "postgres.h" +#include "miscadmin.h" +#include "storage/ipc.h" +#include "storage/lwlock.h" +#include "storage/shmem.h" +#include "storage/spin.h" +} + +namespace { +struct ProtectedData { + slock_t mutex; + YagpStat::Data data; +}; +shmem_startup_hook_type prev_shmem_startup_hook = NULL; +ProtectedData *data = nullptr; + +void yagp_shmem_startup() { + if (prev_shmem_startup_hook) + prev_shmem_startup_hook(); + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + bool found; + data = reinterpret_cast( + ShmemInitStruct("yagp_stat_messages", sizeof(ProtectedData), &found)); + if (!found) { + SpinLockInit(&data->mutex); + data->data = YagpStat::Data(); + } + LWLockRelease(AddinShmemInitLock); +} + +class LockGuard { +public: + LockGuard(slock_t *mutex) : mutex_(mutex) { SpinLockAcquire(mutex_); } + ~LockGuard() { SpinLockRelease(mutex_); } + +private: + slock_t *mutex_; +}; +} // namespace + +void YagpStat::init() { + if (!process_shared_preload_libraries_in_progress) + return; + RequestAddinShmemSpace(sizeof(ProtectedData)); + prev_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = yagp_shmem_startup; +} + +void YagpStat::deinit() { shmem_startup_hook = prev_shmem_startup_hook; } + +void YagpStat::reset() { + LockGuard lg(&data->mutex); + data->data = YagpStat::Data(); +} + +void YagpStat::report_send(int32_t msg_size) { + LockGuard lg(&data->mutex); + data->data.total++; + data->data.max_message_size = std::max(msg_size, data->data.max_message_size); +} + +void YagpStat::report_bad_connection() { + LockGuard lg(&data->mutex); + data->data.total++; + data->data.failed_connects++; +} + +void YagpStat::report_bad_send(int32_t msg_size) { + LockGuard lg(&data->mutex); + data->data.total++; + data->data.failed_sends++; + data->data.max_message_size = std::max(msg_size, data->data.max_message_size); +} + +void YagpStat::report_error() { + LockGuard lg(&data->mutex); + data->data.total++; + data->data.failed_other++; +} + +YagpStat::Data YagpStat::get_stats() { + LockGuard lg(&data->mutex); + return data->data; +} + +bool YagpStat::loaded() { return data != nullptr; } diff --git a/src/YagpStat.h b/src/YagpStat.h new file mode 100644 index 00000000000..110b1fdcbb1 --- /dev/null +++ b/src/YagpStat.h @@ -0,0 +1,21 @@ +#pragma once + +#include + +class YagpStat { +public: + struct Data { + int64_t total, failed_sends, failed_connects, failed_other; + int32_t max_message_size; + }; + + static void init(); + static void deinit(); + static void reset(); + static void report_send(int32_t msg_size); + static void report_bad_connection(); + static void report_bad_send(int32_t msg_size); + static void report_error(); + static Data get_stats(); + static bool loaded(); +}; \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 66ba6547ce2..37f80385a6b 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -1,16 +1,17 @@ extern "C" { #include "postgres.h" +#include "funcapi.h" #include "executor/executor.h" #include "utils/elog.h" +#include "utils/builtins.h" #include "utils/metrics_utils.h" - #include "cdb/cdbexplain.h" #include "cdb/cdbvars.h" - #include "tcop/utility.h" } #include "Config.h" +#include "YagpStat.h" #include "EventSender.h" #include "hook_wrappers.h" #include "stat_statements_parser/pg_stat_statements_ya_parser.h" @@ -39,6 +40,7 @@ static inline EventSender *get_sender() { void hooks_init() { Config::init(); + YagpStat::init(); previous_ExecutorStart_hook = ExecutorStart_hook; ExecutorStart_hook = ya_ExecutorStart_hook; previous_ExecutorRun_hook = ExecutorRun_hook; @@ -62,6 +64,7 @@ void hooks_deinit() { if (sender) { delete sender; } + YagpStat::deinit(); } void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { @@ -150,4 +153,49 @@ void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { if (previous_query_info_collect_hook) { (*previous_query_info_collect_hook)(status, arg); } +} + +static void check_stats_loaded() { + if (!YagpStat::loaded()) { + ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("yagp_hooks_collector must be loaded via " + "shared_preload_libraries"))); + } +} + +void yagp_functions_reset() { + check_stats_loaded(); + YagpStat::reset(); +} + +Datum yagp_functions_get(FunctionCallInfo fcinfo) { + const int ATTNUM = 6; + check_stats_loaded(); + auto stats = YagpStat::get_stats(); + TupleDesc tupdesc = CreateTemplateTupleDesc(ATTNUM, false); + TupleDescInitEntry(tupdesc, (AttrNumber)1, "segid", INT4OID, -1 /* typmod */, + 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber)2, "total_messages", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber)3, "send_failures", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber)4, "connection_failures", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber)5, "other_errors", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber)6, "max_message_size", INT4OID, + -1 /* typmod */, 0 /* attdim */); + tupdesc = BlessTupleDesc(tupdesc); + Datum values[ATTNUM]; + bool nulls[ATTNUM]; + MemSet(nulls, 0, sizeof(nulls)); + values[0] = Int32GetDatum(GpIdentity.segindex); + values[1] = Int64GetDatum(stats.total); + values[2] = Int64GetDatum(stats.failed_sends); + values[3] = Int64GetDatum(stats.failed_connects); + values[4] = Int64GetDatum(stats.failed_other); + values[5] = Int32GetDatum(stats.max_message_size); + HeapTuple tuple = heap_form_tuple(tupdesc, values, nulls); + Datum result = HeapTupleGetDatum(tuple); + PG_RETURN_DATUM(result); } \ No newline at end of file diff --git a/src/hook_wrappers.h b/src/hook_wrappers.h index 815fcb7cd51..c158f42cf1d 100644 --- a/src/hook_wrappers.h +++ b/src/hook_wrappers.h @@ -6,6 +6,8 @@ extern "C" { extern void hooks_init(); extern void hooks_deinit(); +extern void yagp_functions_reset(); +extern Datum yagp_functions_get(FunctionCallInfo fcinfo); #ifdef __cplusplus } diff --git a/src/yagp_hooks_collector.c b/src/yagp_hooks_collector.c index 69475ea5079..2a9e7328e6d 100644 --- a/src/yagp_hooks_collector.c +++ b/src/yagp_hooks_collector.c @@ -1,6 +1,6 @@ #include "postgres.h" #include "cdb/cdbvars.h" -#include "fmgr.h" +#include "utils/builtins.h" #include "hook_wrappers.h" @@ -8,6 +8,8 @@ PG_MODULE_MAGIC; void _PG_init(void); void _PG_fini(void); +PG_FUNCTION_INFO_V1(yagp_stat_messages_reset); +PG_FUNCTION_INFO_V1(yagp_stat_messages); void _PG_init(void) { if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { @@ -20,3 +22,12 @@ void _PG_fini(void) { hooks_deinit(); } } + +Datum yagp_stat_messages_reset(PG_FUNCTION_ARGS) { + yagp_functions_reset(); + PG_RETURN_VOID(); +} + +Datum yagp_stat_messages(PG_FUNCTION_ARGS) { + return yagp_functions_get(fcinfo); +} \ No newline at end of file diff --git a/yagp-hooks-collector.control b/yagp_hooks_collector.control similarity index 61% rename from yagp-hooks-collector.control rename to yagp_hooks_collector.control index 82c189a88fc..b5539dd6462 100644 --- a/yagp-hooks-collector.control +++ b/yagp_hooks_collector.control @@ -1,5 +1,5 @@ -# yagp-hooks-collector extension +# yagp_hooks_collector extension comment = 'Intercept query and plan execution hooks and report them to Yandex GPCC agents' default_version = '1.0' -module_pathname = '$libdir/yagp-hooks-collector' +module_pathname = '$libdir/yagp_hooks_collector' superuser = true From e31f9165cc3e962b3b24e66a2eb95031e3184096 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 15 Nov 2023 13:37:10 +0300 Subject: [PATCH 054/133] Move query msg cleanup to the right place --- src/EventSender.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 45d72b93e48..0a26c9b85ed 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -251,10 +251,6 @@ void EventSender::executor_before_start(QueryDesc *query_desc, if (!need_collect()) { return; } - if (query_msg->has_query_key()) { - connector->report_query(*query_msg, "previous query"); - query_msg->Clear(); - } query_start_time = std::chrono::high_resolution_clock::now(); WorkfileResetBackendStats(); if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze()) { @@ -315,6 +311,10 @@ void EventSender::executor_end(QueryDesc *query_desc) { void EventSender::collect_query_submit(QueryDesc *query_desc) { if (connector && need_collect()) { + if (query_msg && query_msg->has_query_key()) { + connector->report_query(*query_msg, "previous query"); + query_msg->Clear(); + } *query_msg = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); *query_msg->mutable_submit_time() = current_ts(); From fd8a3f8472074f43d4f3b92e0540e65ebe64b28f Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 16 Nov 2023 12:46:23 +0300 Subject: [PATCH 055/133] Finalize query msg in DONE hook --- src/EventSender.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 0a26c9b85ed..62de0e36bb2 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -305,7 +305,7 @@ void EventSender::executor_end(QueryDesc *query_desc) { *query_msg->mutable_end_time() = current_ts(); set_gp_metrics(query_msg->mutable_query_metrics(), query_desc); if (connector->report_query(*query_msg, "ended")) { - query_msg->Clear(); + clear_big_fields(query_msg); } } @@ -354,7 +354,7 @@ void EventSender::collect_query_done(QueryDesc *query_desc, } query_msg->set_query_status(query_status); if (connector->report_query(*query_msg, msg)) { - clear_big_fields(query_msg); + query_msg->Clear(); } } } From 052531af3c67552998716b91915fdcc0a1a5292c Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 26 Dec 2023 16:35:12 +0300 Subject: [PATCH 056/133] Fix some memory leaks --- src/EventSender.cpp | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 62de0e36bb2..e3be58b194e 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -39,12 +39,17 @@ namespace { std::string *get_user_name() { const char *username = GetConfigOption("session_authorization", false, false); + // username is not to be freed return username ? new std::string(username) : nullptr; } std::string *get_db_name() { char *dbname = get_database_name(MyDatabaseId); - std::string *result = dbname ? new std::string(dbname) : nullptr; + std::string *result = nullptr; + if (dbname) { + result = new std::string(dbname); + pfree(dbname); + } return result; } @@ -58,8 +63,7 @@ std::string *get_rg_name() { char *rgname = GetResGroupNameForId(groupId); if (rgname == nullptr) return nullptr; - auto result = new std::string(rgname); - return result; + return new std::string(rgname); } google::protobuf::Timestamp current_ts() { @@ -97,8 +101,12 @@ ExplainState get_explain_state(QueryDesc *query_desc, bool costs) { } void set_plan_text(std::string *plan_text, QueryDesc *query_desc) { + MemoryContext oldcxt = + MemoryContextSwitchTo(query_desc->estate->es_query_cxt); auto es = get_explain_state(query_desc, true); *plan_text = std::string(es.str->data, es.str->len); + pfree(es.str->data); + MemoryContextSwitchTo(oldcxt); } void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { @@ -259,11 +267,12 @@ void EventSender::executor_before_start(QueryDesc *query_desc, query_desc->instrument_options |= INSTRUMENT_TIMER; if (Config::enable_cdbstats()) { query_desc->instrument_options |= INSTRUMENT_CDB; - - instr_time starttime; - INSTR_TIME_SET_CURRENT(starttime); - query_desc->showstatctx = - cdbexplain_showExecStatsBegin(query_desc, starttime); + if (!query_desc->showstatctx) { + instr_time starttime; + INSTR_TIME_SET_CURRENT(starttime); + query_desc->showstatctx = + cdbexplain_showExecStatsBegin(query_desc, starttime); + } } } } From f0bf05e3231fe45f4886a671c4ff1b469b4bfc93 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 26 Dec 2023 16:36:26 +0300 Subject: [PATCH 057/133] Enable honest query_id collection --- src/EventSender.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index e3be58b194e..5ac84bc423d 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -119,13 +119,7 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { StringInfo norm_plan = gen_normplan(qi->plan_text().c_str()); *qi->mutable_template_plan_text() = std::string(norm_plan->data); qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); - // TODO: For now assume queryid equal to planid, which is wrong. The - // reason for doing so this bug - // https://github.com/greenplum-db/gpdb/pull/15385 (ORCA loses - // pg_stat_statements` queryid during planning phase). Need to fix it - // upstream, cherry-pick and bump gp - // qi->set_query_id(query_desc->plannedstmt->queryId); - qi->set_query_id(qi->plan_id()); + qi->set_query_id(query_desc->plannedstmt->queryId); } } From 7331046c21b2c4827b1312a669e93e481a726e0c Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 24 Apr 2024 12:11:40 +0300 Subject: [PATCH 058/133] Get resource group from current session Instead of getting default resgroup for current role. Reason: resource group can be reassigned in another extension via resgroup_assign_hook. --- src/EventSender.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 5ac84bc423d..21c2e2117a3 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -54,10 +54,7 @@ std::string *get_db_name() { } std::string *get_rg_name() { - auto userId = GetUserId(); - if (!OidIsValid(userId)) - return nullptr; - auto groupId = GetResGroupIdForRole(userId); + auto groupId = ResGroupGetGroupIdBySessionId(MySessionState->sessionId); if (!OidIsValid(groupId)) return nullptr; char *rgname = GetResGroupNameForId(groupId); From 0411dd0eab04f28e0ae8cc00ddf5d8cf1c742df9 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Fri, 17 May 2024 15:55:27 +0300 Subject: [PATCH 059/133] Added support for nested queries - this commit allows to track or not track nested queries - presumably fixes some bad bugs when we get wrong message sequence. For example, sending q1`s END state for q2 instead. There have been multiple bugs like this --- protos/yagpcc_metrics.proto | 10 ++- protos/yagpcc_set_service.proto | 32 ++++++-- src/Config.cpp | 7 ++ src/Config.h | 1 + src/EventSender.cpp | 138 ++++++++++++++++++++++++++------ src/EventSender.h | 26 +++++- src/hook_wrappers.cpp | 2 +- 7 files changed, 178 insertions(+), 38 deletions(-) diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto index 2d20d3c46d9..68492732ece 100644 --- a/protos/yagpcc_metrics.proto +++ b/protos/yagpcc_metrics.proto @@ -36,6 +36,11 @@ message QueryInfo { string rsgname = 10; } +message AdditionalQueryInfo { + int64 nested_level = 1; + string error_message = 2; +} + enum PlanGenerator { PLAN_GENERATOR_UNSPECIFIED = 0; @@ -95,7 +100,7 @@ message MetricInstrumentation { uint64 nloops = 2; /* # of run cycles for this node */ uint64 tuplecount = 3; /* Tuples emitted so far this cycle */ double firsttuple = 4; /* Time for first tuple of this cycle */ - double startup = 5; /* Total startup time (in seconds) */ + double startup = 5; /* Total startup time (in seconds) (optimiser's cost estimation) */ double total = 6; /* Total total time (in seconds) */ uint64 shared_blks_hit = 7; /* shared blocks stats*/ uint64 shared_blks_read = 8; @@ -105,12 +110,13 @@ message MetricInstrumentation { uint64 local_blks_read = 12; uint64 local_blks_dirtied = 13; uint64 local_blks_written = 14; - uint64 temp_blks_read = 15; /* temporary tables read stat */ + uint64 temp_blks_read = 15; /* temporary tables read stat */ uint64 temp_blks_written = 16; double blk_read_time = 17; /* measured read/write time */ double blk_write_time = 18; NetworkStat sent = 19; NetworkStat received = 20; + double startup_time = 21; /* real query startup time (planning + queue time) */ } message SpillInfo { diff --git a/protos/yagpcc_set_service.proto b/protos/yagpcc_set_service.proto index e8fc7aaa99d..0b9e34df49d 100644 --- a/protos/yagpcc_set_service.proto +++ b/protos/yagpcc_set_service.proto @@ -9,17 +9,35 @@ package yagpcc; option java_outer_classname = "SegmentYAGPCCAS"; option go_package = "a.yandex-team.ru/cloud/mdb/yagpcc/api/proto/agent_segment;greenplum"; +service SetQueryInfo { + rpc SetMetricPlanNode (SetPlanNodeReq) returns (MetricResponse) {} + + rpc SetMetricQuery (SetQueryReq) returns (MetricResponse) {} +} + +message MetricResponse { + MetricResponseStatusCode error_code = 1; + string error_text = 2; +} + +enum MetricResponseStatusCode { + METRIC_RESPONSE_STATUS_CODE_UNSPECIFIED = 0; + METRIC_RESPONSE_STATUS_CODE_SUCCESS = 1; + METRIC_RESPONSE_STATUS_CODE_ERROR = 2; +} + message SetQueryReq { - QueryStatus query_status = 1; - google.protobuf.Timestamp datetime = 2; - QueryKey query_key = 3; - SegmentKey segment_key = 4; - QueryInfo query_info = 5; - GPMetrics query_metrics = 6; - repeated MetricPlan plan_tree = 7; + QueryStatus query_status = 1; + google.protobuf.Timestamp datetime = 2; + QueryKey query_key = 3; + SegmentKey segment_key = 4; + QueryInfo query_info = 5; + GPMetrics query_metrics = 6; + repeated MetricPlan plan_tree = 7; google.protobuf.Timestamp submit_time = 8; google.protobuf.Timestamp start_time = 9; google.protobuf.Timestamp end_time = 10; + AdditionalQueryInfo add_info = 11; } message SetPlanNodeReq { diff --git a/src/Config.cpp b/src/Config.cpp index c5c2c15f7e9..1bbad9a6ea3 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -13,6 +13,7 @@ static char *guc_uds_path = nullptr; static bool guc_enable_analyze = true; static bool guc_enable_cdbstats = true; static bool guc_enable_collector = true; +static bool guc_report_nested_queries = true; static char *guc_ignored_users = nullptr; static std::unique_ptr> ignored_users = nullptr; @@ -36,6 +37,11 @@ void Config::init() { &guc_enable_cdbstats, true, PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + DefineCustomBoolVariable( + "yagpcc.report_nested_queries", "Collect stats on nested queries", 0LL, + &guc_report_nested_queries, true, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + DefineCustomStringVariable( "yagpcc.ignored_users_list", "Make yagpcc ignore queries issued by given users", 0LL, @@ -47,6 +53,7 @@ std::string Config::uds_path() { return guc_uds_path; } bool Config::enable_analyze() { return guc_enable_analyze; } bool Config::enable_cdbstats() { return guc_enable_cdbstats; } bool Config::enable_collector() { return guc_enable_collector; } +bool Config::report_nested_queries() { return guc_report_nested_queries; } bool Config::filter_user(const std::string *username) { if (!ignored_users) { diff --git a/src/Config.h b/src/Config.h index 999d0300640..15f425be67c 100644 --- a/src/Config.h +++ b/src/Config.h @@ -10,4 +10,5 @@ class Config { static bool enable_cdbstats(); static bool enable_collector(); static bool filter_user(const std::string *username); + static bool report_nested_queries(); }; \ No newline at end of file diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 21c2e2117a3..116805d0646 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -10,6 +10,7 @@ extern "C" { #include "postgres.h" #include "access/hash.h" +#include "access/xact.h" #include "commands/dbcommands.h" #include "commands/explain.h" #include "commands/resgroupcmds.h" @@ -30,11 +31,6 @@ extern "C" { #include "EventSender.h" -#define need_collect() \ - (nesting_level == 0 && gp_command_count != 0 && \ - query_desc->sourceText != nullptr && Config::enable_collector() && \ - !Config::filter_user(get_user_name())) - namespace { std::string *get_user_name() { @@ -146,6 +142,11 @@ void set_query_info(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { } } +void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level) { + auto aqi = req->mutable_add_info(); + aqi->set_nested_level(nesting_level); +} + void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, QueryDesc *query_desc) { auto instrument = query_desc->planstate->instrument; @@ -210,6 +211,19 @@ yagpcc::SetQueryReq create_query_req(QueryDesc *query_desc, return req; } +inline bool is_top_level_query(QueryDesc *query_desc, int nesting_level) { + return (query_desc->gpmon_pkt && + query_desc->gpmon_pkt->u.qexec.key.tmid == 0) || + nesting_level == 0; +} + +inline bool need_collect(QueryDesc *query_desc, int nesting_level) { + return (Config::report_nested_queries() || + is_top_level_query(query_desc, nesting_level)) && + gp_command_count != 0 && query_desc->sourceText != nullptr && + Config::enable_collector() && !Config::filter_user(get_user_name()); +} + } // namespace void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { @@ -223,7 +237,8 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { // TODO break; case METRICS_QUERY_SUBMIT: - collect_query_submit(reinterpret_cast(arg)); + // don't collect anything here. We will fake this call in ExecutorStart as + // it really makes no difference. Just complicates things break; case METRICS_QUERY_START: // no-op: executor_after_start is enough @@ -232,10 +247,8 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { case METRICS_QUERY_ERROR: case METRICS_QUERY_CANCELING: case METRICS_QUERY_CANCELED: - collect_query_done(reinterpret_cast(arg), status); - break; case METRICS_INNER_QUERY_DONE: - // TODO + collect_query_done(reinterpret_cast(arg), status); break; default: ereport(FATAL, (errmsg("Unknown query status: %d", status))); @@ -247,9 +260,10 @@ void EventSender::executor_before_start(QueryDesc *query_desc, if (!connector) { return; } - if (!need_collect()) { + if (!need_collect(query_desc, nesting_level)) { return; } + collect_query_submit(query_desc); query_start_time = std::chrono::high_resolution_clock::now(); WorkfileResetBackendStats(); if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze()) { @@ -273,8 +287,10 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { return; } if ((Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) && - need_collect()) { - query_msg->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); + need_collect(query_desc, nesting_level)) { + auto *query = get_query_message(query_desc); + update_query_state(query_desc, query, QueryState::START); + auto query_msg = query->message; *query_msg->mutable_start_time() = current_ts(); set_query_plan(query_msg, query_desc); if (connector->report_query(*query_msg, "started")) { @@ -287,7 +303,7 @@ void EventSender::executor_end(QueryDesc *query_desc) { if (!connector) { return; } - if (!need_collect() || + if (!need_collect(query_desc, nesting_level) || (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE)) { return; } @@ -301,7 +317,13 @@ void EventSender::executor_end(QueryDesc *query_desc) { cdbdisp_checkDispatchResult(query_desc->estate->dispatcherState, DISPATCH_WAIT_NONE); }*/ - query_msg->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_END); + auto *query = get_query_message(query_desc); + if (query->state == UNKNOWN && !Config::report_nested_queries()) { + // COMMIT/ROLLBACK of a nested query. Happens in top-level + return; + } + update_query_state(query_desc, query, QueryState::END); + auto query_msg = query->message; *query_msg->mutable_end_time() = current_ts(); set_gp_metrics(query_msg->mutable_query_metrics(), query_desc); if (connector->report_query(*query_msg, "ended")) { @@ -310,15 +332,15 @@ void EventSender::executor_end(QueryDesc *query_desc) { } void EventSender::collect_query_submit(QueryDesc *query_desc) { - if (connector && need_collect()) { - if (query_msg && query_msg->has_query_key()) { - connector->report_query(*query_msg, "previous query"); - query_msg->Clear(); - } + if (connector && need_collect(query_desc, nesting_level)) { + auto *query = get_query_message(query_desc); + query->state = QueryState::SUBMIT; + auto query_msg = query->message; *query_msg = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); *query_msg->mutable_submit_time() = current_ts(); set_query_info(query_msg, query_desc); + set_qi_nesting_level(query_msg, query_desc->gpmon_pkt->u.qexec.key.tmid); set_query_text(query_msg, query_desc); if (connector->report_query(*query_msg, "submit")) { clear_big_fields(query_msg); @@ -328,11 +350,12 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { void EventSender::collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status) { - if (connector && need_collect()) { + if (connector && need_collect(query_desc, nesting_level)) { yagpcc::QueryStatus query_status; std::string msg; switch (status) { case METRICS_QUERY_DONE: + case METRICS_INNER_QUERY_DONE: query_status = yagpcc::QueryStatus::QUERY_STATUS_DONE; msg = "done"; break; @@ -352,16 +375,26 @@ void EventSender::collect_query_done(QueryDesc *query_desc, ereport(FATAL, (errmsg("Unexpected query status in query_done hook: %d", status))); } - query_msg->set_query_status(query_status); - if (connector->report_query(*query_msg, msg)) { - query_msg->Clear(); + auto *query = get_query_message(query_desc); + if (query->state != UNKNOWN || Config::report_nested_queries()) { + update_query_state(query_desc, query, QueryState::DONE, + query_status == + yagpcc::QueryStatus::QUERY_STATUS_DONE); + auto query_msg = query->message; + query_msg->set_query_status(query_status); + connector->report_query(*query_msg, msg); + } else { + // otherwise it`s a nested query being committed/aborted at top level + // and we should ignore it } + query_msgs.erase({query_desc->gpmon_pkt->u.qexec.key.ccnt, + query_desc->gpmon_pkt->u.qexec.key.tmid}); + pfree(query_desc->gpmon_pkt); } } EventSender::EventSender() { if (Config::enable_collector() && !Config::filter_user(get_user_name())) { - query_msg = new yagpcc::SetQueryReq(); try { connector = new UDSConnector(); } catch (const std::exception &e) { @@ -371,6 +404,59 @@ EventSender::EventSender() { } EventSender::~EventSender() { - delete query_msg; delete connector; -} \ No newline at end of file + for (auto iter = query_msgs.begin(); iter != query_msgs.end(); ++iter) { + delete iter->second.message; + } +} + +// That's basically a very simplistic state machine to fix or highlight any bugs +// coming from GP +void EventSender::update_query_state(QueryDesc *query_desc, QueryItem *query, + QueryState new_state, bool success) { + if (query->state == UNKNOWN) { + collect_query_submit(query_desc); + } + switch (new_state) { + case QueryState::SUBMIT: + Assert(false); + break; + case QueryState::START: + if (query->state == QueryState::SUBMIT) { + query->message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); + } else { + Assert(false); + } + break; + case QueryState::END: + Assert(query->state == QueryState::START || IsAbortInProgress()); + query->message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_END); + break; + case QueryState::DONE: + Assert(query->state == QueryState::END || !success); + query->message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); + break; + default: + Assert(false); + } + query->state = new_state; +} + +EventSender::QueryItem *EventSender::get_query_message(QueryDesc *query_desc) { + if (query_desc->gpmon_pkt == nullptr || + query_msgs.find({query_desc->gpmon_pkt->u.qexec.key.ccnt, + query_desc->gpmon_pkt->u.qexec.key.tmid}) == + query_msgs.end()) { + query_desc->gpmon_pkt = (gpmon_packet_t *)palloc0(sizeof(gpmon_packet_t)); + query_desc->gpmon_pkt->u.qexec.key.ccnt = gp_command_count; + query_desc->gpmon_pkt->u.qexec.key.tmid = nesting_level; + query_msgs.insert({{gp_command_count, nesting_level}, + QueryItem(UNKNOWN, new yagpcc::SetQueryReq())}); + } + return &query_msgs.at({query_desc->gpmon_pkt->u.qexec.key.ccnt, + query_desc->gpmon_pkt->u.qexec.key.tmid}); +} + +EventSender::QueryItem::QueryItem(EventSender::QueryState st, + yagpcc::SetQueryReq *msg) + : state(st), message(msg) {} \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h index 0e8985873b6..55b8daf9a91 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include extern "C" { @@ -26,9 +26,31 @@ class EventSender { ~EventSender(); private: + enum QueryState { UNKNOWN, SUBMIT, START, END, DONE }; + + struct QueryItem { + QueryState state = QueryState::UNKNOWN; + yagpcc::SetQueryReq *message = nullptr; + + QueryItem(QueryState st, yagpcc::SetQueryReq *msg); + }; + + struct pair_hash { + std::size_t operator()(const std::pair &p) const { + auto h1 = std::hash{}(p.first); + auto h2 = std::hash{}(p.second); + return h1 ^ h2; + } + }; + + void update_query_state(QueryDesc *query_desc, QueryItem *query, + QueryState new_state, bool success = true); + QueryItem *get_query_message(QueryDesc *query_desc); void collect_query_submit(QueryDesc *query_desc); void collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status); + void cleanup_messages(); + UDSConnector *connector = nullptr; int nesting_level = 0; - yagpcc::SetQueryReq *query_msg; + std::unordered_map, QueryItem, pair_hash> query_msgs; }; \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 37f80385a6b..caf38a10f6e 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -56,9 +56,9 @@ void hooks_init() { void hooks_deinit() { ExecutorStart_hook = previous_ExecutorStart_hook; + ExecutorEnd_hook = previous_ExecutorEnd_hook; ExecutorRun_hook = previous_ExecutorRun_hook; ExecutorFinish_hook = previous_ExecutorFinish_hook; - ExecutorEnd_hook = previous_ExecutorEnd_hook; query_info_collect_hook = previous_query_info_collect_hook; stat_statements_parser_deinit(); if (sender) { From 90aa85a4e81eb1ca307dd2fb202b133e748f6449 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 28 May 2024 15:25:35 +0300 Subject: [PATCH 060/133] Trim long text fields --- src/Config.cpp | 11 ++++++++++- src/Config.h | 1 + src/EventSender.cpp | 12 +++++++++--- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/Config.cpp b/src/Config.cpp index 1bbad9a6ea3..c07a6948694 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -1,7 +1,8 @@ #include "Config.h" -#include +#include #include #include +#include extern "C" { #include "postgres.h" @@ -15,6 +16,7 @@ static bool guc_enable_cdbstats = true; static bool guc_enable_collector = true; static bool guc_report_nested_queries = true; static char *guc_ignored_users = nullptr; +static int guc_max_text_size = 1024; // in KB static std::unique_ptr> ignored_users = nullptr; void Config::init() { @@ -47,6 +49,12 @@ void Config::init() { "Make yagpcc ignore queries issued by given users", 0LL, &guc_ignored_users, "gpadmin,repl,gpperfmon,monitor", PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomIntVariable( + "yagpcc.max_text_size", + "Make yagpcc trim plan and query texts longer than configured size", NULL, + &guc_max_text_size, 1024, 0, INT_MAX / 1024, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_KB, NULL, NULL, NULL); } std::string Config::uds_path() { return guc_uds_path; } @@ -54,6 +62,7 @@ bool Config::enable_analyze() { return guc_enable_analyze; } bool Config::enable_cdbstats() { return guc_enable_cdbstats; } bool Config::enable_collector() { return guc_enable_collector; } bool Config::report_nested_queries() { return guc_report_nested_queries; } +size_t Config::max_text_size() { return guc_max_text_size * 1024; } bool Config::filter_user(const std::string *username) { if (!ignored_users) { diff --git a/src/Config.h b/src/Config.h index 15f425be67c..f806bc0dbf5 100644 --- a/src/Config.h +++ b/src/Config.h @@ -11,4 +11,5 @@ class Config { static bool enable_collector(); static bool filter_user(const std::string *username); static bool report_nested_queries(); + static size_t max_text_size(); }; \ No newline at end of file diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 116805d0646..4de5564533b 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -93,11 +93,15 @@ ExplainState get_explain_state(QueryDesc *query_desc, bool costs) { return es; } +inline std::string char_to_trimmed_str(const char *str, size_t len) { + return std::string(str, std::min(len, Config::max_text_size())); +} + void set_plan_text(std::string *plan_text, QueryDesc *query_desc) { MemoryContext oldcxt = MemoryContextSwitchTo(query_desc->estate->es_query_cxt); auto es = get_explain_state(query_desc, true); - *plan_text = std::string(es.str->data, es.str->len); + *plan_text = char_to_trimmed_str(es.str->data, es.str->len); pfree(es.str->data); MemoryContextSwitchTo(oldcxt); } @@ -119,9 +123,11 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->sourceText) { auto qi = req->mutable_query_info(); - *qi->mutable_query_text() = query_desc->sourceText; + *qi->mutable_query_text() = char_to_trimmed_str( + query_desc->sourceText, strlen(query_desc->sourceText)); char *norm_query = gen_normquery(query_desc->sourceText); - *qi->mutable_template_query_text() = std::string(norm_query); + *qi->mutable_template_query_text() = + char_to_trimmed_str(norm_query, strlen(norm_query)); } } From fe766dbfe0a91ae9dc24f0223c68d2534f41556e Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 28 May 2024 16:19:58 +0300 Subject: [PATCH 061/133] Report error messages for failed queries --- src/EventSender.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 4de5564533b..84f2e41cc5d 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -153,6 +153,12 @@ void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level) { aqi->set_nested_level(nesting_level); } +void set_qi_error_message(yagpcc::SetQueryReq *req) { + auto aqi = req->mutable_add_info(); + auto error = elog_message(); + *aqi->mutable_error_message() = char_to_trimmed_str(error, strlen(error)); +} + void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, QueryDesc *query_desc) { auto instrument = query_desc->planstate->instrument; @@ -388,6 +394,9 @@ void EventSender::collect_query_done(QueryDesc *query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); auto query_msg = query->message; query_msg->set_query_status(query_status); + if (status == METRICS_QUERY_ERROR) { + set_qi_error_message(query_msg); + } connector->report_query(*query_msg, msg); } else { // otherwise it`s a nested query being committed/aborted at top level From cedcb73fdbac0fb653d95065c48f1c78b84d02c5 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 30 May 2024 15:09:40 +0300 Subject: [PATCH 062/133] Don't track the CANCELLING event --- src/EventSender.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 84f2e41cc5d..6faede07376 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -255,9 +255,13 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { case METRICS_QUERY_START: // no-op: executor_after_start is enough break; + case METRICS_QUERY_CANCELING: + // it appears we're unly interested in the actual CANCELED event. + // for now we will ignore CANCELING state unless otherwise requested from + // end users + break; case METRICS_QUERY_DONE: case METRICS_QUERY_ERROR: - case METRICS_QUERY_CANCELING: case METRICS_QUERY_CANCELED: case METRICS_INNER_QUERY_DONE: collect_query_done(reinterpret_cast(arg), status); @@ -376,6 +380,9 @@ void EventSender::collect_query_done(QueryDesc *query_desc, msg = "error"; break; case METRICS_QUERY_CANCELING: + // at the moment we don't track this event, but I`ll leave this code here + // just in case + Assert(false); query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELLING; msg = "cancelling"; break; From 1fe0b823098dd833fd5776435054a8a7f8322fc2 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Mon, 3 Jun 2024 17:09:54 +0300 Subject: [PATCH 063/133] Properly send runtime metrics on CANCEL/ERROR --- src/EventSender.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 6faede07376..8d202991986 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -395,6 +395,7 @@ void EventSender::collect_query_done(QueryDesc *query_desc, status))); } auto *query = get_query_message(query_desc); + auto prev_state = query->state; if (query->state != UNKNOWN || Config::report_nested_queries()) { update_query_state(query_desc, query, QueryState::DONE, query_status == @@ -404,6 +405,11 @@ void EventSender::collect_query_done(QueryDesc *query_desc, if (status == METRICS_QUERY_ERROR) { set_qi_error_message(query_msg); } + if (prev_state == START) { + // We've missed ExecutorEnd call due to query cancel or error. It's + // fine, but now we need to collect and report execution stats + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc); + } connector->report_query(*query_msg, msg); } else { // otherwise it`s a nested query being committed/aborted at top level @@ -451,7 +457,9 @@ void EventSender::update_query_state(QueryDesc *query_desc, QueryItem *query, } break; case QueryState::END: - Assert(query->state == QueryState::START || IsAbortInProgress()); + // Example of below assert triggering: CURSOR closes before ever being + // executed Assert(query->state == QueryState::START || + // IsAbortInProgress()); query->message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_END); break; case QueryState::DONE: From 0a866ba1852214bbd3e1be6a67fc47aa3b9d5354 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Mon, 3 Jun 2024 18:22:00 +0300 Subject: [PATCH 064/133] Allow anyone to disable report_nested_queries --- src/Config.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Config.cpp b/src/Config.cpp index c07a6948694..42fa4b2fb12 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -41,7 +41,7 @@ void Config::init() { DefineCustomBoolVariable( "yagpcc.report_nested_queries", "Collect stats on nested queries", 0LL, - &guc_report_nested_queries, true, PGC_SUSET, + &guc_report_nested_queries, true, PGC_USERSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); DefineCustomStringVariable( From 861afdc1e042f39184fd205ccce498e67a5452b2 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 13 Jun 2024 10:59:46 +0300 Subject: [PATCH 065/133] Diff stats between executor start and end The reason is that for nested statements any incremental stats (spills, proc stats, execution time) have to be calculated separately. --- src/EventSender.cpp | 21 +++++++++++---------- src/ProcStats.cpp | 8 +++----- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 8d202991986..60f21818d00 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,7 +1,6 @@ #include "Config.h" #include "ProcStats.h" #include "UDSConnector.h" -#include #include #define typeid __typeid @@ -198,19 +197,17 @@ void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, } } -decltype(std::chrono::high_resolution_clock::now()) query_start_time; - void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc) { if (query_desc->planstate && query_desc->planstate->instrument) { set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc); } fill_self_stats(metrics->mutable_systemstat()); - std::chrono::duration elapsed_seconds = - std::chrono::high_resolution_clock::now() - query_start_time; metrics->mutable_systemstat()->set_runningtimeseconds( - elapsed_seconds.count()); - metrics->mutable_spill()->set_filecount(WorkfileTotalFilesCreated()); - metrics->mutable_spill()->set_totalbytes(WorkfileTotalBytesWritten()); + time(NULL) - metrics->mutable_systemstat()->runningtimeseconds()); + metrics->mutable_spill()->set_filecount( + WorkfileTotalFilesCreated() - metrics->mutable_spill()->filecount()); + metrics->mutable_spill()->set_totalbytes( + WorkfileTotalBytesWritten() - metrics->mutable_spill()->totalbytes()); } yagpcc::SetQueryReq create_query_req(QueryDesc *query_desc, @@ -280,8 +277,6 @@ void EventSender::executor_before_start(QueryDesc *query_desc, return; } collect_query_submit(query_desc); - query_start_time = std::chrono::high_resolution_clock::now(); - WorkfileResetBackendStats(); if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze()) { query_desc->instrument_options |= INSTRUMENT_BUFFERS; query_desc->instrument_options |= INSTRUMENT_ROWS; @@ -309,9 +304,12 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { auto query_msg = query->message; *query_msg->mutable_start_time() = current_ts(); set_query_plan(query_msg, query_desc); + yagpcc::GPMetrics stats; + std::swap(stats, *query_msg->mutable_query_metrics()); if (connector->report_query(*query_msg, "started")) { clear_big_fields(query_msg); } + std::swap(stats, *query_msg->mutable_query_metrics()); } } @@ -361,6 +359,9 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { if (connector->report_query(*query_msg, "submit")) { clear_big_fields(query_msg); } + // take initial metrics snapshot so that we can safely take diff afterwards + // in END or DONE events. + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc); } } diff --git a/src/ProcStats.cpp b/src/ProcStats.cpp index 668173a0f7e..a557a20cbb0 100644 --- a/src/ProcStats.cpp +++ b/src/ProcStats.cpp @@ -92,9 +92,7 @@ void fill_status_stats(yagpcc::SystemStat *stats) { } // namespace void fill_self_stats(yagpcc::SystemStat *stats) { - static yagpcc::SystemStat prev_stats; - fill_io_stats(&prev_stats); - fill_cpu_stats(&prev_stats); - fill_status_stats(&prev_stats); - *stats = prev_stats; + fill_io_stats(stats); + fill_cpu_stats(stats); + fill_status_stats(stats); } \ No newline at end of file From cbaf85702769177f19cc1290849dc181e926f52d Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 7 Aug 2024 14:28:57 +0300 Subject: [PATCH 066/133] Fix try/catch block when calling cpp code --- src/hook_wrappers.cpp | 44 +++++++++++++++---------------------------- 1 file changed, 15 insertions(+), 29 deletions(-) diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index caf38a10f6e..93faaa0bf8f 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -38,6 +38,15 @@ static inline EventSender *get_sender() { return sender; } +template +R cpp_call(T *obj, R (T::*func)(Args...), Args... args) { + try { + return (obj->*func)(args...); + } catch (const std::exception &e) { + ereport(FATAL, (errmsg("Unexpected exception in yagpcc %s", e.what()))); + } +} + void hooks_init() { Config::init(); YagpStat::init(); @@ -68,27 +77,15 @@ void hooks_deinit() { } void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { - PG_TRY(); - { get_sender()->executor_before_start(query_desc, eflags); } - PG_CATCH(); - { - ereport(WARNING, - (errmsg("EventSender failed in ya_ExecutorBeforeStart_hook"))); - } - PG_END_TRY(); + cpp_call(get_sender(), &EventSender::executor_before_start, query_desc, + eflags); if (previous_ExecutorStart_hook) { (*previous_ExecutorStart_hook)(query_desc, eflags); } else { standard_ExecutorStart(query_desc, eflags); } - PG_TRY(); - { get_sender()->executor_after_start(query_desc, eflags); } - PG_CATCH(); - { - ereport(WARNING, - (errmsg("EventSender failed in ya_ExecutorAfterStart_hook"))); - } - PG_END_TRY(); + cpp_call(get_sender(), &EventSender::executor_after_start, query_desc, + eflags); } void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, @@ -129,11 +126,7 @@ void ya_ExecutorFinish_hook(QueryDesc *query_desc) { } void ya_ExecutorEnd_hook(QueryDesc *query_desc) { - PG_TRY(); - { get_sender()->executor_end(query_desc); } - PG_CATCH(); - { ereport(WARNING, (errmsg("EventSender failed in ya_ExecutorEnd_hook"))); } - PG_END_TRY(); + cpp_call(get_sender(), &EventSender::executor_end, query_desc); if (previous_ExecutorEnd_hook) { (*previous_ExecutorEnd_hook)(query_desc); } else { @@ -142,14 +135,7 @@ void ya_ExecutorEnd_hook(QueryDesc *query_desc) { } void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { - PG_TRY(); - { get_sender()->query_metrics_collect(status, arg); } - PG_CATCH(); - { - ereport(WARNING, - (errmsg("EventSender failed in ya_query_info_collect_hook"))); - } - PG_END_TRY(); + cpp_call(get_sender(), &EventSender::query_metrics_collect, status, arg); if (previous_query_info_collect_hook) { (*previous_query_info_collect_hook)(status, arg); } From bcc0e29d36eaefc1b501922efcc3aead9501274c Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 12 Sep 2024 16:15:26 +0300 Subject: [PATCH 067/133] Don't normalize trimmed plans --- src/EventSender.cpp | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 60f21818d00..c7f08d8e1f0 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -96,26 +96,24 @@ inline std::string char_to_trimmed_str(const char *str, size_t len) { return std::string(str, std::min(len, Config::max_text_size())); } -void set_plan_text(std::string *plan_text, QueryDesc *query_desc) { - MemoryContext oldcxt = - MemoryContextSwitchTo(query_desc->estate->es_query_cxt); - auto es = get_explain_state(query_desc, true); - *plan_text = char_to_trimmed_str(es.str->data, es.str->len); - pfree(es.str->data); - MemoryContextSwitchTo(oldcxt); -} - void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->plannedstmt) { auto qi = req->mutable_query_info(); qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); - set_plan_text(qi->mutable_plan_text(), query_desc); - StringInfo norm_plan = gen_normplan(qi->plan_text().c_str()); - *qi->mutable_template_plan_text() = std::string(norm_plan->data); + MemoryContext oldcxt = + MemoryContextSwitchTo(query_desc->estate->es_query_cxt); + auto es = get_explain_state(query_desc, true); + MemoryContextSwitchTo(oldcxt); + *qi->mutable_plan_text() = char_to_trimmed_str(es.str->data, es.str->len); + StringInfo norm_plan = gen_normplan(es.str->data); + *qi->mutable_template_plan_text() = + char_to_trimmed_str(norm_plan->data, norm_plan->len); qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); qi->set_query_id(query_desc->plannedstmt->queryId); + pfree(es.str->data); + pfree(norm_plan->data); } } From ef5b2a61f962773790b732516d67949e7208c97c Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Mon, 7 Oct 2024 16:24:07 +0300 Subject: [PATCH 068/133] Clean up forgotten text fields --- src/EventSender.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index c7f08d8e1f0..e3cb0bd67d6 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -132,7 +132,9 @@ void clear_big_fields(yagpcc::SetQueryReq *req) { if (Gp_session_role == GP_ROLE_DISPATCH) { auto qi = req->mutable_query_info(); qi->clear_plan_text(); + qi->clear_template_plan_text(); qi->clear_query_text(); + qi->clear_template_query_text(); } } From ed07cc758ddd8a6729dcd2210564b857f0ee6c4f Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 29 Oct 2024 17:42:04 +0300 Subject: [PATCH 069/133] [MDB-31938] Send nested queries only from master --- src/EventSender.cpp | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index e3cb0bd67d6..7f0b841a1d5 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -32,6 +32,31 @@ extern "C" { namespace { +/** + * Things get tricky with nested queries. + * a) A nested query on master is a real query optimized and executed from + * master. An example would be `select some_insert_function();`, where + * some_insert_function does something like `insert into tbl values (1)`. Master + * will create two statements. Outer select statement and inner insert statement + * with nesting level 1. + * For segments both statements are top-level statements with nesting level 0. + * b) A nested query on segment is something executed as sub-statement on + * segment. An example would be `select a from tbl where is_good_value(b);`. In + * this case master will issue one top-level statement, but segments will change + * contexts for UDF execution and execute is_good_value(b) once for each tuple + * as a nested query. Creating massive load on gpcc agent. + * + * Hence, here is a decision: + * 1) ignore all queries that are nested on segments + * 2) record (if enabled) all queries that are nested on master + * NODE: The truth is, we can't really ignore nested master queries, because + * segment sees those as top-level. We will deprecate disabling nested queries + * soon. + */ +bool need_report_nested_query() { + return Config::report_nested_queries() && Gp_session_role == GP_ROLE_DISPATCH; +} + std::string *get_user_name() { const char *username = GetConfigOption("session_authorization", false, false); // username is not to be freed @@ -227,7 +252,7 @@ inline bool is_top_level_query(QueryDesc *query_desc, int nesting_level) { } inline bool need_collect(QueryDesc *query_desc, int nesting_level) { - return (Config::report_nested_queries() || + return (need_report_nested_query() || is_top_level_query(query_desc, nesting_level)) && gp_command_count != 0 && query_desc->sourceText != nullptr && Config::enable_collector() && !Config::filter_user(get_user_name()); @@ -332,7 +357,7 @@ void EventSender::executor_end(QueryDesc *query_desc) { DISPATCH_WAIT_NONE); }*/ auto *query = get_query_message(query_desc); - if (query->state == UNKNOWN && !Config::report_nested_queries()) { + if (query->state == UNKNOWN && !need_report_nested_query()) { // COMMIT/ROLLBACK of a nested query. Happens in top-level return; } From d6823d5f137c9688fa8aa1bc4357a159edce7f02 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 30 Oct 2024 15:31:17 +0300 Subject: [PATCH 070/133] [MDB-31936] Add slice info --- protos/yagpcc_metrics.proto | 3 +++ src/EventSender.cpp | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto index 68492732ece..a9afb7078c6 100644 --- a/protos/yagpcc_metrics.proto +++ b/protos/yagpcc_metrics.proto @@ -39,6 +39,7 @@ message QueryInfo { message AdditionalQueryInfo { int64 nested_level = 1; string error_message = 2; + int64 slice_id = 3; } enum PlanGenerator @@ -117,6 +118,8 @@ message MetricInstrumentation { NetworkStat sent = 19; NetworkStat received = 20; double startup_time = 21; /* real query startup time (planning + queue time) */ + uint64 inherited_calls = 22; /* the number of executed sub-queries */ + uint64 inherited_time = 23; /* total time spend on inherited execution */ } message SpillInfo { diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 7f0b841a1d5..483b74c9fcd 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -177,6 +177,11 @@ void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level) { aqi->set_nested_level(nesting_level); } +void set_qi_slice_id(yagpcc::SetQueryReq *req) { + auto aqi = req->mutable_add_info(); + aqi->set_slice_id(currentSliceId); +} + void set_qi_error_message(yagpcc::SetQueryReq *req) { auto aqi = req->mutable_add_info(); auto error = elog_message(); @@ -380,6 +385,7 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { *query_msg->mutable_submit_time() = current_ts(); set_query_info(query_msg, query_desc); set_qi_nesting_level(query_msg, query_desc->gpmon_pkt->u.qexec.key.tmid); + set_qi_slice_id(query_msg); set_query_text(query_msg, query_desc); if (connector->report_query(*query_msg, "submit")) { clear_big_fields(query_msg); From 385769586190ee8fe2cc54e30c6f3e29b7a9d35d Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 5 Nov 2024 19:01:24 +0300 Subject: [PATCH 071/133] Send nested queries summary stats from segments --- protos/yagpcc_metrics.proto | 2 +- src/EventSender.cpp | 288 +++++++++++++++++++++--------------- src/EventSender.h | 3 + 3 files changed, 170 insertions(+), 123 deletions(-) diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto index a9afb7078c6..fc85386c6b0 100644 --- a/protos/yagpcc_metrics.proto +++ b/protos/yagpcc_metrics.proto @@ -119,7 +119,7 @@ message MetricInstrumentation { NetworkStat received = 20; double startup_time = 21; /* real query startup time (planning + queue time) */ uint64 inherited_calls = 22; /* the number of executed sub-queries */ - uint64 inherited_time = 23; /* total time spend on inherited execution */ + double inherited_time = 23; /* total time spend on inherited execution */ } message SpillInfo { diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 483b74c9fcd..7d2d5a1a2c2 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -32,6 +32,32 @@ extern "C" { namespace { +std::string *get_user_name() { + const char *username = GetConfigOption("session_authorization", false, false); + // username is not to be freed + return username ? new std::string(username) : nullptr; +} + +std::string *get_db_name() { + char *dbname = get_database_name(MyDatabaseId); + std::string *result = nullptr; + if (dbname) { + result = new std::string(dbname); + pfree(dbname); + } + return result; +} + +std::string *get_rg_name() { + auto groupId = ResGroupGetGroupIdBySessionId(MySessionState->sessionId); + if (!OidIsValid(groupId)) + return nullptr; + char *rgname = GetResGroupNameForId(groupId); + if (rgname == nullptr) + return nullptr; + return new std::string(rgname); +} + /** * Things get tricky with nested queries. * a) A nested query on master is a real query optimized and executed from @@ -50,37 +76,33 @@ namespace { * 1) ignore all queries that are nested on segments * 2) record (if enabled) all queries that are nested on master * NODE: The truth is, we can't really ignore nested master queries, because - * segment sees those as top-level. We will deprecate disabling nested queries - * soon. + * segment sees those as top-level. */ -bool need_report_nested_query() { - return Config::report_nested_queries() && Gp_session_role == GP_ROLE_DISPATCH; + +inline bool is_top_level_query(QueryDesc *query_desc, int nesting_level) { + return (query_desc->gpmon_pkt && + query_desc->gpmon_pkt->u.qexec.key.tmid == 0) || + nesting_level == 0; } -std::string *get_user_name() { - const char *username = GetConfigOption("session_authorization", false, false); - // username is not to be freed - return username ? new std::string(username) : nullptr; +inline bool nesting_is_valid(QueryDesc *query_desc, int nesting_level) { + return (Gp_session_role == GP_ROLE_DISPATCH && + Config::report_nested_queries()) || + is_top_level_query(query_desc, nesting_level); } -std::string *get_db_name() { - char *dbname = get_database_name(MyDatabaseId); - std::string *result = nullptr; - if (dbname) { - result = new std::string(dbname); - pfree(dbname); - } - return result; +bool need_report_nested_query() { + return Config::report_nested_queries() && Gp_session_role == GP_ROLE_DISPATCH; } -std::string *get_rg_name() { - auto groupId = ResGroupGetGroupIdBySessionId(MySessionState->sessionId); - if (!OidIsValid(groupId)) - return nullptr; - char *rgname = GetResGroupNameForId(groupId); - if (rgname == nullptr) - return nullptr; - return new std::string(rgname); +inline bool filter_query(QueryDesc *query_desc) { + return gp_command_count == 0 || query_desc->sourceText == nullptr || + !Config::enable_collector() || Config::filter_user(get_user_name()); +} + +inline bool need_collect(QueryDesc *query_desc, int nesting_level) { + return !filter_query(query_desc) && + nesting_is_valid(query_desc, nesting_level); } google::protobuf::Timestamp current_ts() { @@ -189,7 +211,8 @@ void set_qi_error_message(yagpcc::SetQueryReq *req) { } void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, - QueryDesc *query_desc) { + QueryDesc *query_desc, int nested_calls, + double nested_time) { auto instrument = query_desc->planstate->instrument; if (instrument) { metrics->set_ntuples(instrument->ntuples); @@ -225,11 +248,15 @@ void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, mlstate->stat_tuple_bytes_recvd); metrics->mutable_received()->set_chunks(mlstate->stat_total_chunks_recvd); } + metrics->set_inherited_calls(nested_calls); + metrics->set_inherited_time(nested_time); } -void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc) { +void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, + int nested_calls, double nested_time) { if (query_desc->planstate && query_desc->planstate->instrument) { - set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc); + set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc, + nested_calls, nested_time); } fill_self_stats(metrics->mutable_systemstat()); metrics->mutable_systemstat()->set_runningtimeseconds( @@ -250,17 +277,8 @@ yagpcc::SetQueryReq create_query_req(QueryDesc *query_desc, return req; } -inline bool is_top_level_query(QueryDesc *query_desc, int nesting_level) { - return (query_desc->gpmon_pkt && - query_desc->gpmon_pkt->u.qexec.key.tmid == 0) || - nesting_level == 0; -} - -inline bool need_collect(QueryDesc *query_desc, int nesting_level) { - return (need_report_nested_query() || - is_top_level_query(query_desc, nesting_level)) && - gp_command_count != 0 && query_desc->sourceText != nullptr && - Config::enable_collector() && !Config::filter_user(get_user_name()); +double protots_to_double(const google::protobuf::Timestamp &ts) { + return double(ts.seconds()) + double(ts.nanos()) / 1000000000.0; } } // namespace @@ -303,6 +321,10 @@ void EventSender::executor_before_start(QueryDesc *query_desc, if (!connector) { return; } + if (is_top_level_query(query_desc, nesting_level)) { + nested_timing = 0; + nested_calls = 0; + } if (!need_collect(query_desc, nesting_level)) { return; } @@ -327,51 +349,53 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { if (!connector) { return; } - if ((Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) && - need_collect(query_desc, nesting_level)) { - auto *query = get_query_message(query_desc); - update_query_state(query_desc, query, QueryState::START); - auto query_msg = query->message; - *query_msg->mutable_start_time() = current_ts(); - set_query_plan(query_msg, query_desc); - yagpcc::GPMetrics stats; - std::swap(stats, *query_msg->mutable_query_metrics()); - if (connector->report_query(*query_msg, "started")) { - clear_big_fields(query_msg); + if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { + if (!filter_query(query_desc)) { + auto *query = get_query_message(query_desc); + auto query_msg = query->message; + *query_msg->mutable_start_time() = current_ts(); + if (!nesting_is_valid(query_desc, nesting_level)) { + return; + } + update_query_state(query_desc, query, QueryState::START); + set_query_plan(query_msg, query_desc); + yagpcc::GPMetrics stats; + std::swap(stats, *query_msg->mutable_query_metrics()); + if (connector->report_query(*query_msg, "started")) { + clear_big_fields(query_msg); + } + std::swap(stats, *query_msg->mutable_query_metrics()); } - std::swap(stats, *query_msg->mutable_query_metrics()); } } void EventSender::executor_end(QueryDesc *query_desc) { - if (!connector) { - return; - } - if (!need_collect(query_desc, nesting_level) || + if (!connector || (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE)) { return; } - /* TODO: when querying via CURSOR this call freezes. Need to investigate. - To reproduce - uncomment it and run installchecks. It will freeze around - join test. Needs investigation - - if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze() && - Config::enable_cdbstats() && query_desc->estate->dispatcherState && - query_desc->estate->dispatcherState->primaryResults) { - cdbdisp_checkDispatchResult(query_desc->estate->dispatcherState, - DISPATCH_WAIT_NONE); - }*/ - auto *query = get_query_message(query_desc); - if (query->state == UNKNOWN && !need_report_nested_query()) { - // COMMIT/ROLLBACK of a nested query. Happens in top-level - return; - } - update_query_state(query_desc, query, QueryState::END); - auto query_msg = query->message; - *query_msg->mutable_end_time() = current_ts(); - set_gp_metrics(query_msg->mutable_query_metrics(), query_desc); - if (connector->report_query(*query_msg, "ended")) { - clear_big_fields(query_msg); + if (!filter_query(query_desc)) { + auto *query = get_query_message(query_desc); + auto query_msg = query->message; + *query_msg->mutable_end_time() = current_ts(); + if (nesting_is_valid(query_desc, nesting_level)) { + if (query->state == UNKNOWN && + // Yet another greenplum weirdness: thats actually a nested query + // which is being committed/rollbacked. Treat it accordingly. + !need_report_nested_query()) { + return; + } + update_query_state(query_desc, query, QueryState::END); + if (is_top_level_query(query_desc, nesting_level)) { + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, + nested_calls, nested_timing); + } else { + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); + } + if (connector->report_query(*query_msg, "ended")) { + clear_big_fields(query_msg); + } + } } } @@ -392,60 +416,63 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { } // take initial metrics snapshot so that we can safely take diff afterwards // in END or DONE events. - set_gp_metrics(query_msg->mutable_query_metrics(), query_desc); + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); } } void EventSender::collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status) { - if (connector && need_collect(query_desc, nesting_level)) { - yagpcc::QueryStatus query_status; - std::string msg; - switch (status) { - case METRICS_QUERY_DONE: - case METRICS_INNER_QUERY_DONE: - query_status = yagpcc::QueryStatus::QUERY_STATUS_DONE; - msg = "done"; - break; - case METRICS_QUERY_ERROR: - query_status = yagpcc::QueryStatus::QUERY_STATUS_ERROR; - msg = "error"; - break; - case METRICS_QUERY_CANCELING: - // at the moment we don't track this event, but I`ll leave this code here - // just in case - Assert(false); - query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELLING; - msg = "cancelling"; - break; - case METRICS_QUERY_CANCELED: - query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELED; - msg = "cancelled"; - break; - default: - ereport(FATAL, (errmsg("Unexpected query status in query_done hook: %d", - status))); - } + if (connector && !filter_query(query_desc)) { auto *query = get_query_message(query_desc); - auto prev_state = query->state; - if (query->state != UNKNOWN || Config::report_nested_queries()) { - update_query_state(query_desc, query, QueryState::DONE, - query_status == - yagpcc::QueryStatus::QUERY_STATUS_DONE); - auto query_msg = query->message; - query_msg->set_query_status(query_status); - if (status == METRICS_QUERY_ERROR) { - set_qi_error_message(query_msg); - } - if (prev_state == START) { - // We've missed ExecutorEnd call due to query cancel or error. It's - // fine, but now we need to collect and report execution stats - set_gp_metrics(query_msg->mutable_query_metrics(), query_desc); + if (query->state != UNKNOWN || need_report_nested_query()) { + if (nesting_is_valid(query_desc, nesting_level)) { + yagpcc::QueryStatus query_status; + std::string msg; + switch (status) { + case METRICS_QUERY_DONE: + case METRICS_INNER_QUERY_DONE: + query_status = yagpcc::QueryStatus::QUERY_STATUS_DONE; + msg = "done"; + break; + case METRICS_QUERY_ERROR: + query_status = yagpcc::QueryStatus::QUERY_STATUS_ERROR; + msg = "error"; + break; + case METRICS_QUERY_CANCELING: + // at the moment we don't track this event, but I`ll leave this code + // here just in case + Assert(false); + query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELLING; + msg = "cancelling"; + break; + case METRICS_QUERY_CANCELED: + query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELED; + msg = "cancelled"; + break; + default: + ereport(FATAL, + (errmsg("Unexpected query status in query_done hook: %d", + status))); + } + auto prev_state = query->state; + update_query_state(query_desc, query, QueryState::DONE, + query_status == + yagpcc::QueryStatus::QUERY_STATUS_DONE); + auto query_msg = query->message; + query_msg->set_query_status(query_status); + if (status == METRICS_QUERY_ERROR) { + set_qi_error_message(query_msg); + } + if (prev_state == START) { + // We've missed ExecutorEnd call due to query cancel or error. It's + // fine, but now we need to collect and report execution stats + *query_msg->mutable_end_time() = current_ts(); + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, + nested_calls, nested_timing); + } + connector->report_query(*query_msg, msg); } - connector->report_query(*query_msg, msg); - } else { - // otherwise it`s a nested query being committed/aborted at top level - // and we should ignore it + update_nested_counters(query_desc); } query_msgs.erase({query_desc->gpmon_pkt->u.qexec.key.ccnt, query_desc->gpmon_pkt->u.qexec.key.tmid}); @@ -519,6 +546,23 @@ EventSender::QueryItem *EventSender::get_query_message(QueryDesc *query_desc) { query_desc->gpmon_pkt->u.qexec.key.tmid}); } +void EventSender::update_nested_counters(QueryDesc *query_desc) { + if (!is_top_level_query(query_desc, nesting_level)) { + auto query_msg = get_query_message(query_desc); + nested_calls++; + double end_time = protots_to_double(query_msg->message->end_time()); + double start_time = protots_to_double(query_msg->message->start_time()); + if (end_time >= start_time) { + nested_timing += end_time - start_time; + } else { + ereport(WARNING, (errmsg("YAGPCC query start_time > end_time (%f > %f)", + start_time, end_time))); + ereport(DEBUG3, + (errmsg("YAGPCC nested query text %s", query_desc->sourceText))); + } + } +} + EventSender::QueryItem::QueryItem(EventSender::QueryState st, yagpcc::SetQueryReq *msg) : state(st), message(msg) {} \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h index 55b8daf9a91..9470cbf1f98 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -49,8 +49,11 @@ class EventSender { void collect_query_submit(QueryDesc *query_desc); void collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status); void cleanup_messages(); + void update_nested_counters(QueryDesc *query_desc); UDSConnector *connector = nullptr; int nesting_level = 0; + int64_t nested_calls = 0; + double nested_timing = 0; std::unordered_map, QueryItem, pair_hash> query_msgs; }; \ No newline at end of file From 18b981d2848b2a6b1a57a30c079a9e06cea8bbd7 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 7 Nov 2024 13:09:44 +0300 Subject: [PATCH 072/133] [Refactoring] Split EventSender into submodules --- Makefile | 2 + src/EventSender.cpp | 275 +------------------------------------------- src/PgUtils.cpp | 94 +++++++++++++++ src/PgUtils.h | 16 +++ src/ProtoUtils.cpp | 185 +++++++++++++++++++++++++++++ src/ProtoUtils.h | 16 +++ 6 files changed, 317 insertions(+), 271 deletions(-) create mode 100644 src/PgUtils.cpp create mode 100644 src/PgUtils.h create mode 100644 src/ProtoUtils.cpp create mode 100644 src/ProtoUtils.h diff --git a/Makefile b/Makefile index cee4c05c2e2..88c76bbda76 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,8 @@ OBJS := $(PG_STAT_OBJS) \ $(PROTO_GEN_OBJECTS) \ $(SRC_DIR)/ProcStats.o \ $(SRC_DIR)/Config.o \ + $(SRC_DIR)/PgUtils.o \ + $(SRC_DIR)/ProtoUtils.o \ $(SRC_DIR)/YagpStat.o \ $(SRC_DIR)/UDSConnector.o \ $(SRC_DIR)/EventSender.o \ diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 7d2d5a1a2c2..cdb21ef7aa6 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,287 +1,21 @@ #include "Config.h" -#include "ProcStats.h" #include "UDSConnector.h" -#include -#define typeid __typeid -#define operator __operator extern "C" { #include "postgres.h" #include "access/hash.h" -#include "access/xact.h" -#include "commands/dbcommands.h" -#include "commands/explain.h" -#include "commands/resgroupcmds.h" #include "executor/executor.h" #include "utils/elog.h" -#include "utils/workfile_mgr.h" #include "cdb/cdbdisp.h" #include "cdb/cdbexplain.h" -#include "cdb/cdbinterconnect.h" #include "cdb/cdbvars.h" - -#include "stat_statements_parser/pg_stat_statements_ya_parser.h" -#include "tcop/utility.h" } -#undef typeid -#undef operator #include "EventSender.h" - -namespace { - -std::string *get_user_name() { - const char *username = GetConfigOption("session_authorization", false, false); - // username is not to be freed - return username ? new std::string(username) : nullptr; -} - -std::string *get_db_name() { - char *dbname = get_database_name(MyDatabaseId); - std::string *result = nullptr; - if (dbname) { - result = new std::string(dbname); - pfree(dbname); - } - return result; -} - -std::string *get_rg_name() { - auto groupId = ResGroupGetGroupIdBySessionId(MySessionState->sessionId); - if (!OidIsValid(groupId)) - return nullptr; - char *rgname = GetResGroupNameForId(groupId); - if (rgname == nullptr) - return nullptr; - return new std::string(rgname); -} - -/** - * Things get tricky with nested queries. - * a) A nested query on master is a real query optimized and executed from - * master. An example would be `select some_insert_function();`, where - * some_insert_function does something like `insert into tbl values (1)`. Master - * will create two statements. Outer select statement and inner insert statement - * with nesting level 1. - * For segments both statements are top-level statements with nesting level 0. - * b) A nested query on segment is something executed as sub-statement on - * segment. An example would be `select a from tbl where is_good_value(b);`. In - * this case master will issue one top-level statement, but segments will change - * contexts for UDF execution and execute is_good_value(b) once for each tuple - * as a nested query. Creating massive load on gpcc agent. - * - * Hence, here is a decision: - * 1) ignore all queries that are nested on segments - * 2) record (if enabled) all queries that are nested on master - * NODE: The truth is, we can't really ignore nested master queries, because - * segment sees those as top-level. - */ - -inline bool is_top_level_query(QueryDesc *query_desc, int nesting_level) { - return (query_desc->gpmon_pkt && - query_desc->gpmon_pkt->u.qexec.key.tmid == 0) || - nesting_level == 0; -} - -inline bool nesting_is_valid(QueryDesc *query_desc, int nesting_level) { - return (Gp_session_role == GP_ROLE_DISPATCH && - Config::report_nested_queries()) || - is_top_level_query(query_desc, nesting_level); -} - -bool need_report_nested_query() { - return Config::report_nested_queries() && Gp_session_role == GP_ROLE_DISPATCH; -} - -inline bool filter_query(QueryDesc *query_desc) { - return gp_command_count == 0 || query_desc->sourceText == nullptr || - !Config::enable_collector() || Config::filter_user(get_user_name()); -} - -inline bool need_collect(QueryDesc *query_desc, int nesting_level) { - return !filter_query(query_desc) && - nesting_is_valid(query_desc, nesting_level); -} - -google::protobuf::Timestamp current_ts() { - google::protobuf::Timestamp current_ts; - struct timeval tv; - gettimeofday(&tv, nullptr); - current_ts.set_seconds(tv.tv_sec); - current_ts.set_nanos(static_cast(tv.tv_usec * 1000)); - return current_ts; -} - -void set_query_key(yagpcc::QueryKey *key, QueryDesc *query_desc) { - key->set_ccnt(gp_command_count); - key->set_ssid(gp_session_id); - int32 tmid = 0; - gpmon_gettmid(&tmid); - key->set_tmid(tmid); -} - -void set_segment_key(yagpcc::SegmentKey *key, QueryDesc *query_desc) { - key->set_dbid(GpIdentity.dbid); - key->set_segindex(GpIdentity.segindex); -} - -ExplainState get_explain_state(QueryDesc *query_desc, bool costs) { - ExplainState es; - ExplainInitState(&es); - es.costs = costs; - es.verbose = true; - es.format = EXPLAIN_FORMAT_TEXT; - ExplainBeginOutput(&es); - ExplainPrintPlan(&es, query_desc); - ExplainEndOutput(&es); - return es; -} - -inline std::string char_to_trimmed_str(const char *str, size_t len) { - return std::string(str, std::min(len, Config::max_text_size())); -} - -void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { - if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->plannedstmt) { - auto qi = req->mutable_query_info(); - qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER - ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER - : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); - MemoryContext oldcxt = - MemoryContextSwitchTo(query_desc->estate->es_query_cxt); - auto es = get_explain_state(query_desc, true); - MemoryContextSwitchTo(oldcxt); - *qi->mutable_plan_text() = char_to_trimmed_str(es.str->data, es.str->len); - StringInfo norm_plan = gen_normplan(es.str->data); - *qi->mutable_template_plan_text() = - char_to_trimmed_str(norm_plan->data, norm_plan->len); - qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); - qi->set_query_id(query_desc->plannedstmt->queryId); - pfree(es.str->data); - pfree(norm_plan->data); - } -} - -void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { - if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->sourceText) { - auto qi = req->mutable_query_info(); - *qi->mutable_query_text() = char_to_trimmed_str( - query_desc->sourceText, strlen(query_desc->sourceText)); - char *norm_query = gen_normquery(query_desc->sourceText); - *qi->mutable_template_query_text() = - char_to_trimmed_str(norm_query, strlen(norm_query)); - } -} - -void clear_big_fields(yagpcc::SetQueryReq *req) { - if (Gp_session_role == GP_ROLE_DISPATCH) { - auto qi = req->mutable_query_info(); - qi->clear_plan_text(); - qi->clear_template_plan_text(); - qi->clear_query_text(); - qi->clear_template_query_text(); - } -} - -void set_query_info(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { - if (Gp_session_role == GP_ROLE_DISPATCH) { - auto qi = req->mutable_query_info(); - qi->set_allocated_username(get_user_name()); - qi->set_allocated_databasename(get_db_name()); - qi->set_allocated_rsgname(get_rg_name()); - } -} - -void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level) { - auto aqi = req->mutable_add_info(); - aqi->set_nested_level(nesting_level); -} - -void set_qi_slice_id(yagpcc::SetQueryReq *req) { - auto aqi = req->mutable_add_info(); - aqi->set_slice_id(currentSliceId); -} - -void set_qi_error_message(yagpcc::SetQueryReq *req) { - auto aqi = req->mutable_add_info(); - auto error = elog_message(); - *aqi->mutable_error_message() = char_to_trimmed_str(error, strlen(error)); -} - -void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, - QueryDesc *query_desc, int nested_calls, - double nested_time) { - auto instrument = query_desc->planstate->instrument; - if (instrument) { - metrics->set_ntuples(instrument->ntuples); - metrics->set_nloops(instrument->nloops); - metrics->set_tuplecount(instrument->tuplecount); - metrics->set_firsttuple(instrument->firsttuple); - metrics->set_startup(instrument->startup); - metrics->set_total(instrument->total); - auto &buffusage = instrument->bufusage; - metrics->set_shared_blks_hit(buffusage.shared_blks_hit); - metrics->set_shared_blks_read(buffusage.shared_blks_read); - metrics->set_shared_blks_dirtied(buffusage.shared_blks_dirtied); - metrics->set_shared_blks_written(buffusage.shared_blks_written); - metrics->set_local_blks_hit(buffusage.local_blks_hit); - metrics->set_local_blks_read(buffusage.local_blks_read); - metrics->set_local_blks_dirtied(buffusage.local_blks_dirtied); - metrics->set_local_blks_written(buffusage.local_blks_written); - metrics->set_temp_blks_read(buffusage.temp_blks_read); - metrics->set_temp_blks_written(buffusage.temp_blks_written); - metrics->set_blk_read_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_read_time)); - metrics->set_blk_write_time( - INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); - } - if (query_desc->estate && query_desc->estate->motionlayer_context) { - MotionLayerState *mlstate = - (MotionLayerState *)query_desc->estate->motionlayer_context; - metrics->mutable_sent()->set_total_bytes(mlstate->stat_total_bytes_sent); - metrics->mutable_sent()->set_tuple_bytes(mlstate->stat_tuple_bytes_sent); - metrics->mutable_sent()->set_chunks(mlstate->stat_total_chunks_sent); - metrics->mutable_received()->set_total_bytes( - mlstate->stat_total_bytes_recvd); - metrics->mutable_received()->set_tuple_bytes( - mlstate->stat_tuple_bytes_recvd); - metrics->mutable_received()->set_chunks(mlstate->stat_total_chunks_recvd); - } - metrics->set_inherited_calls(nested_calls); - metrics->set_inherited_time(nested_time); -} - -void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, - int nested_calls, double nested_time) { - if (query_desc->planstate && query_desc->planstate->instrument) { - set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc, - nested_calls, nested_time); - } - fill_self_stats(metrics->mutable_systemstat()); - metrics->mutable_systemstat()->set_runningtimeseconds( - time(NULL) - metrics->mutable_systemstat()->runningtimeseconds()); - metrics->mutable_spill()->set_filecount( - WorkfileTotalFilesCreated() - metrics->mutable_spill()->filecount()); - metrics->mutable_spill()->set_totalbytes( - WorkfileTotalBytesWritten() - metrics->mutable_spill()->totalbytes()); -} - -yagpcc::SetQueryReq create_query_req(QueryDesc *query_desc, - yagpcc::QueryStatus status) { - yagpcc::SetQueryReq req; - req.set_query_status(status); - *req.mutable_datetime() = current_ts(); - set_query_key(req.mutable_query_key(), query_desc); - set_segment_key(req.mutable_segment_key(), query_desc); - return req; -} - -double protots_to_double(const google::protobuf::Timestamp &ts) { - return double(ts.seconds()) + double(ts.nanos()) / 1000000000.0; -} - -} // namespace +#include "PgUtils.h" +#include "ProtoUtils.h" void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { @@ -404,10 +138,9 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { auto *query = get_query_message(query_desc); query->state = QueryState::SUBMIT; auto query_msg = query->message; - *query_msg = - create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); + *query_msg = create_query_req(yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); *query_msg->mutable_submit_time() = current_ts(); - set_query_info(query_msg, query_desc); + set_query_info(query_msg); set_qi_nesting_level(query_msg, query_desc->gpmon_pkt->u.qexec.key.tmid); set_qi_slice_id(query_msg); set_query_text(query_msg, query_desc); diff --git a/src/PgUtils.cpp b/src/PgUtils.cpp new file mode 100644 index 00000000000..528426e6c64 --- /dev/null +++ b/src/PgUtils.cpp @@ -0,0 +1,94 @@ +#include "PgUtils.h" +#include "Config.h" + +extern "C" { +#include "utils/guc.h" +#include "commands/dbcommands.h" +#include "commands/resgroupcmds.h" +#include "cdb/cdbvars.h" +} + +std::string *get_user_name() { + const char *username = GetConfigOption("session_authorization", false, false); + // username is not to be freed + return username ? new std::string(username) : nullptr; +} + +std::string *get_db_name() { + char *dbname = get_database_name(MyDatabaseId); + std::string *result = nullptr; + if (dbname) { + result = new std::string(dbname); + pfree(dbname); + } + return result; +} + +std::string *get_rg_name() { + auto groupId = ResGroupGetGroupIdBySessionId(MySessionState->sessionId); + if (!OidIsValid(groupId)) + return nullptr; + char *rgname = GetResGroupNameForId(groupId); + if (rgname == nullptr) + return nullptr; + return new std::string(rgname); +} + +/** + * Things get tricky with nested queries. + * a) A nested query on master is a real query optimized and executed from + * master. An example would be `select some_insert_function();`, where + * some_insert_function does something like `insert into tbl values (1)`. Master + * will create two statements. Outer select statement and inner insert statement + * with nesting level 1. + * For segments both statements are top-level statements with nesting level 0. + * b) A nested query on segment is something executed as sub-statement on + * segment. An example would be `select a from tbl where is_good_value(b);`. In + * this case master will issue one top-level statement, but segments will change + * contexts for UDF execution and execute is_good_value(b) once for each tuple + * as a nested query. Creating massive load on gpcc agent. + * + * Hence, here is a decision: + * 1) ignore all queries that are nested on segments + * 2) record (if enabled) all queries that are nested on master + * NODE: The truth is, we can't really ignore nested master queries, because + * segment sees those as top-level. + */ + +bool is_top_level_query(QueryDesc *query_desc, int nesting_level) { + return (query_desc->gpmon_pkt && + query_desc->gpmon_pkt->u.qexec.key.tmid == 0) || + nesting_level == 0; +} + +bool nesting_is_valid(QueryDesc *query_desc, int nesting_level) { + return (Gp_session_role == GP_ROLE_DISPATCH && + Config::report_nested_queries()) || + is_top_level_query(query_desc, nesting_level); +} + +bool need_report_nested_query() { + return Config::report_nested_queries() && Gp_session_role == GP_ROLE_DISPATCH; +} + +bool filter_query(QueryDesc *query_desc) { + return gp_command_count == 0 || query_desc->sourceText == nullptr || + !Config::enable_collector() || Config::filter_user(get_user_name()); +} + +bool need_collect(QueryDesc *query_desc, int nesting_level) { + return !filter_query(query_desc) && + nesting_is_valid(query_desc, nesting_level); +} + +ExplainState get_explain_state(QueryDesc *query_desc, bool costs) { + ExplainState es; + ExplainInitState(&es); + es.costs = costs; + es.verbose = true; + es.format = EXPLAIN_FORMAT_TEXT; + ExplainBeginOutput(&es); + ExplainPrintPlan(&es, query_desc); + ExplainEndOutput(&es); + return es; +} diff --git a/src/PgUtils.h b/src/PgUtils.h new file mode 100644 index 00000000000..85b1eb833cd --- /dev/null +++ b/src/PgUtils.h @@ -0,0 +1,16 @@ +extern "C" { +#include "postgres.h" +#include "commands/explain.h" +} + +#include + +std::string *get_user_name(); +std::string *get_db_name(); +std::string *get_rg_name(); +bool is_top_level_query(QueryDesc *query_desc, int nesting_level); +bool nesting_is_valid(QueryDesc *query_desc, int nesting_level); +bool need_report_nested_query(); +bool filter_query(QueryDesc *query_desc); +bool need_collect(QueryDesc *query_desc, int nesting_level); +ExplainState get_explain_state(QueryDesc *query_desc, bool costs); diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp new file mode 100644 index 00000000000..e1be25b8b1e --- /dev/null +++ b/src/ProtoUtils.cpp @@ -0,0 +1,185 @@ +#include "ProtoUtils.h" +#include "PgUtils.h" +#include "ProcStats.h" +#include "Config.h" + +#define typeid __typeid +#define operator __operator +extern "C" { +#include "postgres.h" +#include "access/hash.h" +#include "cdb/cdbinterconnect.h" +#include "cdb/cdbvars.h" +#include "gpmon/gpmon.h" +#include "utils/workfile_mgr.h" + +#include "stat_statements_parser/pg_stat_statements_ya_parser.h" +} +#undef typeid +#undef operator + +#include +#include + +google::protobuf::Timestamp current_ts() { + google::protobuf::Timestamp current_ts; + struct timeval tv; + gettimeofday(&tv, nullptr); + current_ts.set_seconds(tv.tv_sec); + current_ts.set_nanos(static_cast(tv.tv_usec * 1000)); + return current_ts; +} + +void set_query_key(yagpcc::QueryKey *key) { + key->set_ccnt(gp_command_count); + key->set_ssid(gp_session_id); + int32 tmid = 0; + gpmon_gettmid(&tmid); + key->set_tmid(tmid); +} + +void set_segment_key(yagpcc::SegmentKey *key) { + key->set_dbid(GpIdentity.dbid); + key->set_segindex(GpIdentity.segindex); +} + +inline std::string char_to_trimmed_str(const char *str, size_t len) { + return std::string(str, std::min(len, Config::max_text_size())); +} + +void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { + if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->plannedstmt) { + auto qi = req->mutable_query_info(); + qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER + ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER + : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); + MemoryContext oldcxt = + MemoryContextSwitchTo(query_desc->estate->es_query_cxt); + auto es = get_explain_state(query_desc, true); + MemoryContextSwitchTo(oldcxt); + *qi->mutable_plan_text() = char_to_trimmed_str(es.str->data, es.str->len); + StringInfo norm_plan = gen_normplan(es.str->data); + *qi->mutable_template_plan_text() = + char_to_trimmed_str(norm_plan->data, norm_plan->len); + qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); + qi->set_query_id(query_desc->plannedstmt->queryId); + pfree(es.str->data); + pfree(norm_plan->data); + } +} + +void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { + if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->sourceText) { + auto qi = req->mutable_query_info(); + *qi->mutable_query_text() = char_to_trimmed_str( + query_desc->sourceText, strlen(query_desc->sourceText)); + char *norm_query = gen_normquery(query_desc->sourceText); + *qi->mutable_template_query_text() = + char_to_trimmed_str(norm_query, strlen(norm_query)); + } +} + +void clear_big_fields(yagpcc::SetQueryReq *req) { + if (Gp_session_role == GP_ROLE_DISPATCH) { + auto qi = req->mutable_query_info(); + qi->clear_plan_text(); + qi->clear_template_plan_text(); + qi->clear_query_text(); + qi->clear_template_query_text(); + } +} + +void set_query_info(yagpcc::SetQueryReq *req) { + if (Gp_session_role == GP_ROLE_DISPATCH) { + auto qi = req->mutable_query_info(); + qi->set_allocated_username(get_user_name()); + qi->set_allocated_databasename(get_db_name()); + qi->set_allocated_rsgname(get_rg_name()); + } +} + +void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level) { + auto aqi = req->mutable_add_info(); + aqi->set_nested_level(nesting_level); +} + +void set_qi_slice_id(yagpcc::SetQueryReq *req) { + auto aqi = req->mutable_add_info(); + aqi->set_slice_id(currentSliceId); +} + +void set_qi_error_message(yagpcc::SetQueryReq *req) { + auto aqi = req->mutable_add_info(); + auto error = elog_message(); + *aqi->mutable_error_message() = char_to_trimmed_str(error, strlen(error)); +} + +void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, + QueryDesc *query_desc, int nested_calls, + double nested_time) { + auto instrument = query_desc->planstate->instrument; + if (instrument) { + metrics->set_ntuples(instrument->ntuples); + metrics->set_nloops(instrument->nloops); + metrics->set_tuplecount(instrument->tuplecount); + metrics->set_firsttuple(instrument->firsttuple); + metrics->set_startup(instrument->startup); + metrics->set_total(instrument->total); + auto &buffusage = instrument->bufusage; + metrics->set_shared_blks_hit(buffusage.shared_blks_hit); + metrics->set_shared_blks_read(buffusage.shared_blks_read); + metrics->set_shared_blks_dirtied(buffusage.shared_blks_dirtied); + metrics->set_shared_blks_written(buffusage.shared_blks_written); + metrics->set_local_blks_hit(buffusage.local_blks_hit); + metrics->set_local_blks_read(buffusage.local_blks_read); + metrics->set_local_blks_dirtied(buffusage.local_blks_dirtied); + metrics->set_local_blks_written(buffusage.local_blks_written); + metrics->set_temp_blks_read(buffusage.temp_blks_read); + metrics->set_temp_blks_written(buffusage.temp_blks_written); + metrics->set_blk_read_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_read_time)); + metrics->set_blk_write_time( + INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); + } + if (query_desc->estate && query_desc->estate->motionlayer_context) { + MotionLayerState *mlstate = + (MotionLayerState *)query_desc->estate->motionlayer_context; + metrics->mutable_sent()->set_total_bytes(mlstate->stat_total_bytes_sent); + metrics->mutable_sent()->set_tuple_bytes(mlstate->stat_tuple_bytes_sent); + metrics->mutable_sent()->set_chunks(mlstate->stat_total_chunks_sent); + metrics->mutable_received()->set_total_bytes( + mlstate->stat_total_bytes_recvd); + metrics->mutable_received()->set_tuple_bytes( + mlstate->stat_tuple_bytes_recvd); + metrics->mutable_received()->set_chunks(mlstate->stat_total_chunks_recvd); + } + metrics->set_inherited_calls(nested_calls); + metrics->set_inherited_time(nested_time); +} + +void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, + int nested_calls, double nested_time) { + if (query_desc->planstate && query_desc->planstate->instrument) { + set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc, + nested_calls, nested_time); + } + fill_self_stats(metrics->mutable_systemstat()); + metrics->mutable_systemstat()->set_runningtimeseconds( + time(NULL) - metrics->mutable_systemstat()->runningtimeseconds()); + metrics->mutable_spill()->set_filecount( + WorkfileTotalFilesCreated() - metrics->mutable_spill()->filecount()); + metrics->mutable_spill()->set_totalbytes( + WorkfileTotalBytesWritten() - metrics->mutable_spill()->totalbytes()); +} + +yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status) { + yagpcc::SetQueryReq req; + req.set_query_status(status); + *req.mutable_datetime() = current_ts(); + set_query_key(req.mutable_query_key()); + set_segment_key(req.mutable_segment_key()); + return req; +} + +double protots_to_double(const google::protobuf::Timestamp &ts) { + return double(ts.seconds()) + double(ts.nanos()) / 1000000000.0; +} \ No newline at end of file diff --git a/src/ProtoUtils.h b/src/ProtoUtils.h new file mode 100644 index 00000000000..38aa75611b2 --- /dev/null +++ b/src/ProtoUtils.h @@ -0,0 +1,16 @@ +#include "protos/yagpcc_set_service.pb.h" + +struct QueryDesc; + +google::protobuf::Timestamp current_ts(); +void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc); +void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc); +void clear_big_fields(yagpcc::SetQueryReq *req); +void set_query_info(yagpcc::SetQueryReq *req); +void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level); +void set_qi_slice_id(yagpcc::SetQueryReq *req); +void set_qi_error_message(yagpcc::SetQueryReq *req); +void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, + int nested_calls, double nested_time); +yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status); +double protots_to_double(const google::protobuf::Timestamp &ts); \ No newline at end of file From a133b578109854e25117e5eb2493f5c6a7908ee9 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Mon, 7 Apr 2025 14:15:39 +0300 Subject: [PATCH 073/133] Ignore EXPLAIN VERBOSE errors --- src/PgUtils.cpp | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/PgUtils.cpp b/src/PgUtils.cpp index 528426e6c64..5982ff77c1c 100644 --- a/src/PgUtils.cpp +++ b/src/PgUtils.cpp @@ -88,7 +88,24 @@ ExplainState get_explain_state(QueryDesc *query_desc, bool costs) { es.verbose = true; es.format = EXPLAIN_FORMAT_TEXT; ExplainBeginOutput(&es); - ExplainPrintPlan(&es, query_desc); + PG_TRY(); + { ExplainPrintPlan(&es, query_desc); } + PG_CATCH(); + { + // PG and GP both have known and yet unknown bugs in EXPLAIN VERBOSE + // implementation. We don't want any queries to fail due to those bugs, so + // we report the bug here for future investigatin and continue collecting + // metrics w/o reporting any plans + resetStringInfo(es.str); + appendStringInfo( + es.str, + "Unable to restore query plan due to PostgreSQL internal error. " + "See logs for more information"); + ereport(INFO, + (errmsg("YAGPCC failed to reconstruct explain text for query: %s", + query_desc->sourceText))); + } + PG_END_TRY(); ExplainEndOutput(&es); return es; } From 402bad2cfbf0e27ace72a524d33e54544aec41cf Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Fri, 18 Apr 2025 14:58:52 +0300 Subject: [PATCH 074/133] Add support for per-slice interconnect statistics This change requires a patched version of gp code which provides interconnect teardown hook --- protos/yagpcc_metrics.proto | 56 +++++++++++++++++++++++++++++++++++++ src/EventSender.cpp | 42 +++++++++++++++++++++++++++- src/EventSender.h | 5 ++++ src/ProtoUtils.cpp | 30 ++++++++++++++++++++ src/ProtoUtils.h | 3 ++ src/hook_wrappers.cpp | 16 +++++++++++ 6 files changed, 151 insertions(+), 1 deletion(-) diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto index fc85386c6b0..086f3e63379 100644 --- a/protos/yagpcc_metrics.proto +++ b/protos/yagpcc_metrics.proto @@ -42,6 +42,11 @@ message AdditionalQueryInfo { int64 slice_id = 3; } +message AdditionalQueryStat { + string error_message = 1; + repeated int64 slices = 2; +} + enum PlanGenerator { PLAN_GENERATOR_UNSPECIFIED = 0; @@ -96,6 +101,56 @@ message NetworkStat { uint32 chunks = 3; } +message InterconnectStat { + // Receive queue size sum when main thread is trying to get a packet + uint64 total_recv_queue_size = 1; + // Counting times when computing total_recv_queue_size + uint64 recv_queue_size_counting_time = 2; + + // The capacity sum when packets are tried to be sent + uint64 total_capacity = 3; + // Counting times used to compute total_capacity + uint64 capacity_counting_time = 4; + + // Total buffers available when sending packets + uint64 total_buffers = 5; + // Counting times when compute total_buffers + uint64 buffer_counting_time = 6; + + // The number of active connections + uint64 active_connections_num = 7; + + // The number of packet retransmits + int64 retransmits = 8; + + // The number of cached future packets + int64 startup_cached_pkt_num = 9; + + // The number of mismatched packets received + int64 mismatch_num = 10; + + // The number of crc errors + int64 crc_errors = 11; + + // The number of packets sent by sender + int64 snd_pkt_num = 12; + + // The number of packets received by receiver + int64 recv_pkt_num = 13; + + // Disordered packet number + int64 disordered_pkt_num = 14; + + // Duplicate packet number + int64 duplicated_pkt_num = 15; + + // The number of Acks received + int64 recv_ack_num = 16; + + // The number of status query messages sent + int64 status_query_msg_num = 17; +} + message MetricInstrumentation { uint64 ntuples = 1; /* Total tuples produced */ uint64 nloops = 2; /* # of run cycles for this node */ @@ -120,6 +175,7 @@ message MetricInstrumentation { double startup_time = 21; /* real query startup time (planning + queue time) */ uint64 inherited_calls = 22; /* the number of executed sub-queries */ double inherited_time = 23; /* total time spend on inherited execution */ + InterconnectStat interconnect = 24; } message SpillInfo { diff --git a/src/EventSender.cpp b/src/EventSender.cpp index cdb21ef7aa6..f8bede654d8 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -35,7 +35,7 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { // no-op: executor_after_start is enough break; case METRICS_QUERY_CANCELING: - // it appears we're unly interested in the actual CANCELED event. + // it appears we're only interested in the actual CANCELED event. // for now we will ignore CANCELING state unless otherwise requested from // end users break; @@ -150,6 +150,10 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { // take initial metrics snapshot so that we can safely take diff afterwards // in END or DONE events. set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); + // same for interconnect statistics + ic_metrics_collect(); + set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), + &ic_statistics); } } @@ -203,6 +207,10 @@ void EventSender::collect_query_done(QueryDesc *query_desc, set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, nested_calls, nested_timing); } + ic_metrics_collect(); + set_ic_stats( + query_msg->mutable_query_metrics()->mutable_instrumentation(), + &ic_statistics); connector->report_query(*query_msg, msg); } update_nested_counters(query_desc); @@ -213,6 +221,37 @@ void EventSender::collect_query_done(QueryDesc *query_desc, } } +void EventSender::ic_metrics_collect() { + if (Gp_interconnect_type != INTERCONNECT_TYPE_UDPIFC) { + return; + } + if (!connector || gp_command_count == 0 || !Config::enable_collector() || + Config::filter_user(get_user_name())) { + return; + } + // we also would like to know nesting level here and filter queries BUT we + // don't have this kind of information from this callback. Will have to + // collect stats anyways and throw it away later, if necessary + auto metrics = UDPIFCGetICStats(); + ic_statistics.totalRecvQueueSize += metrics.totalRecvQueueSize; + ic_statistics.recvQueueSizeCountingTime += metrics.recvQueueSizeCountingTime; + ic_statistics.totalCapacity += metrics.totalCapacity; + ic_statistics.capacityCountingTime += metrics.capacityCountingTime; + ic_statistics.totalBuffers += metrics.totalBuffers; + ic_statistics.bufferCountingTime += metrics.bufferCountingTime; + ic_statistics.activeConnectionsNum += metrics.activeConnectionsNum; + ic_statistics.retransmits += metrics.retransmits; + ic_statistics.startupCachedPktNum += metrics.startupCachedPktNum; + ic_statistics.mismatchNum += metrics.mismatchNum; + ic_statistics.crcErrors += metrics.crcErrors; + ic_statistics.sndPktNum += metrics.sndPktNum; + ic_statistics.recvPktNum += metrics.recvPktNum; + ic_statistics.disorderedPktNum += metrics.disorderedPktNum; + ic_statistics.duplicatedPktNum += metrics.duplicatedPktNum; + ic_statistics.recvAckNum += metrics.recvAckNum; + ic_statistics.statusQueryMsgNum += metrics.statusQueryMsgNum; +} + EventSender::EventSender() { if (Config::enable_collector() && !Config::filter_user(get_user_name())) { try { @@ -221,6 +260,7 @@ EventSender::EventSender() { ereport(INFO, (errmsg("Unable to start query tracing %s", e.what()))); } } + memset(&ic_statistics, 0, sizeof(ICStatistics)); } EventSender::~EventSender() { diff --git a/src/EventSender.h b/src/EventSender.h index 9470cbf1f98..5627a540b9f 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -4,9 +4,12 @@ #include #include +#define typeid __typeid extern "C" { #include "utils/metrics_utils.h" +#include "cdb/ic_udpifc.h" } +#undef typeid class UDSConnector; struct QueryDesc; @@ -20,6 +23,7 @@ class EventSender { void executor_after_start(QueryDesc *query_desc, int eflags); void executor_end(QueryDesc *query_desc); void query_metrics_collect(QueryMetricsStatus status, void *arg); + void ic_metrics_collect(); void incr_depth() { nesting_level++; } void decr_depth() { nesting_level--; } EventSender(); @@ -55,5 +59,6 @@ class EventSender { int nesting_level = 0; int64_t nested_calls = 0; double nested_timing = 0; + ICStatistics ic_statistics; std::unordered_map, QueryItem, pair_hash> query_msgs; }; \ No newline at end of file diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp index e1be25b8b1e..30f5b4fef4b 100644 --- a/src/ProtoUtils.cpp +++ b/src/ProtoUtils.cpp @@ -10,6 +10,7 @@ extern "C" { #include "access/hash.h" #include "cdb/cdbinterconnect.h" #include "cdb/cdbvars.h" +#include "cdb/ic_udpifc.h" #include "gpmon/gpmon.h" #include "utils/workfile_mgr.h" @@ -171,6 +172,35 @@ void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, WorkfileTotalBytesWritten() - metrics->mutable_spill()->totalbytes()); } +#define UPDATE_IC_STATS(proto_name, stat_name) \ + metrics->mutable_interconnect()->set_##proto_name( \ + ic_statistics->stat_name - \ + metrics->mutable_interconnect()->proto_name()); \ + Assert(metrics->mutable_interconnect()->proto_name() >= 0 && \ + metrics->mutable_interconnect()->proto_name() <= \ + ic_statistics->stat_name) + +void set_ic_stats(yagpcc::MetricInstrumentation *metrics, + const ICStatistics *ic_statistics) { + UPDATE_IC_STATS(total_recv_queue_size, totalRecvQueueSize); + UPDATE_IC_STATS(recv_queue_size_counting_time, recvQueueSizeCountingTime); + UPDATE_IC_STATS(total_capacity, totalCapacity); + UPDATE_IC_STATS(capacity_counting_time, capacityCountingTime); + UPDATE_IC_STATS(total_buffers, totalBuffers); + UPDATE_IC_STATS(buffer_counting_time, bufferCountingTime); + UPDATE_IC_STATS(active_connections_num, activeConnectionsNum); + UPDATE_IC_STATS(retransmits, retransmits); + UPDATE_IC_STATS(startup_cached_pkt_num, startupCachedPktNum); + UPDATE_IC_STATS(mismatch_num, mismatchNum); + UPDATE_IC_STATS(crc_errors, crcErrors); + UPDATE_IC_STATS(snd_pkt_num, sndPktNum); + UPDATE_IC_STATS(recv_pkt_num, recvPktNum); + UPDATE_IC_STATS(disordered_pkt_num, disorderedPktNum); + UPDATE_IC_STATS(duplicated_pkt_num, duplicatedPktNum); + UPDATE_IC_STATS(recv_ack_num, recvAckNum); + UPDATE_IC_STATS(status_query_msg_num, statusQueryMsgNum); +} + yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status) { yagpcc::SetQueryReq req; req.set_query_status(status); diff --git a/src/ProtoUtils.h b/src/ProtoUtils.h index 38aa75611b2..4e4ed5e76a3 100644 --- a/src/ProtoUtils.h +++ b/src/ProtoUtils.h @@ -1,6 +1,7 @@ #include "protos/yagpcc_set_service.pb.h" struct QueryDesc; +struct ICStatistics; google::protobuf::Timestamp current_ts(); void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc); @@ -12,5 +13,7 @@ void set_qi_slice_id(yagpcc::SetQueryReq *req); void set_qi_error_message(yagpcc::SetQueryReq *req); void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, int nested_calls, double nested_time); +void set_ic_stats(yagpcc::MetricInstrumentation *metrics, + const ICStatistics *ic_statistics); yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status); double protots_to_double(const google::protobuf::Timestamp &ts); \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 93faaa0bf8f..b72d5b05a5c 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -1,3 +1,4 @@ +#define typeid __typeid extern "C" { #include "postgres.h" #include "funcapi.h" @@ -7,8 +8,10 @@ extern "C" { #include "utils/metrics_utils.h" #include "cdb/cdbexplain.h" #include "cdb/cdbvars.h" +#include "cdb/ml_ipc.h" #include "tcop/utility.h" } +#undef typeid #include "Config.h" #include "YagpStat.h" @@ -21,6 +24,7 @@ static ExecutorRun_hook_type previous_ExecutorRun_hook = nullptr; static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr; static ExecutorEnd_hook_type previous_ExecutorEnd_hook = nullptr; static query_info_collect_hook_type previous_query_info_collect_hook = nullptr; +static ic_teardown_hook_type previous_ic_teardown_hook = nullptr; static void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags); static void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, @@ -28,6 +32,8 @@ static void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, static void ya_ExecutorFinish_hook(QueryDesc *query_desc); static void ya_ExecutorEnd_hook(QueryDesc *query_desc); static void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg); +static void ya_ic_teardown_hook(ChunkTransportState *transportStates, + bool hasErrors); static EventSender *sender = nullptr; @@ -60,6 +66,8 @@ void hooks_init() { ExecutorEnd_hook = ya_ExecutorEnd_hook; previous_query_info_collect_hook = query_info_collect_hook; query_info_collect_hook = ya_query_info_collect_hook; + previous_ic_teardown_hook = ic_teardown_hook; + ic_teardown_hook = ya_ic_teardown_hook; stat_statements_parser_init(); } @@ -69,6 +77,7 @@ void hooks_deinit() { ExecutorRun_hook = previous_ExecutorRun_hook; ExecutorFinish_hook = previous_ExecutorFinish_hook; query_info_collect_hook = previous_query_info_collect_hook; + ic_teardown_hook = previous_ic_teardown_hook; stat_statements_parser_deinit(); if (sender) { delete sender; @@ -141,6 +150,13 @@ void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { } } +void ya_ic_teardown_hook(ChunkTransportState *transportStates, bool hasErrors) { + cpp_call(get_sender(), &EventSender::ic_metrics_collect); + if (previous_ic_teardown_hook) { + (*previous_ic_teardown_hook)(transportStates, hasErrors); + } +} + static void check_stats_loaded() { if (!YagpStat::loaded()) { ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), From 592abd4800abd7cdb1e11b0fc70c72f29f8867ea Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 6 May 2025 17:02:11 +0300 Subject: [PATCH 075/133] Don't rely on IC hook for compilation --- src/EventSender.cpp | 11 +++++++++++ src/EventSender.h | 5 +++++ src/ProtoUtils.cpp | 5 +++++ src/hook_wrappers.cpp | 8 ++++++++ 4 files changed, 29 insertions(+) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index f8bede654d8..2ba34d1e4cc 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,6 +1,7 @@ #include "Config.h" #include "UDSConnector.h" +#define typeid __typeid extern "C" { #include "postgres.h" @@ -11,7 +12,9 @@ extern "C" { #include "cdb/cdbdisp.h" #include "cdb/cdbexplain.h" #include "cdb/cdbvars.h" +#include "cdb/ml_ipc.h" } +#undef typeid #include "EventSender.h" #include "PgUtils.h" @@ -150,10 +153,12 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { // take initial metrics snapshot so that we can safely take diff afterwards // in END or DONE events. set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); +#ifdef IC_TEARDOWN_HOOK // same for interconnect statistics ic_metrics_collect(); set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), &ic_statistics); +#endif } } @@ -207,10 +212,12 @@ void EventSender::collect_query_done(QueryDesc *query_desc, set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, nested_calls, nested_timing); } +#ifdef IC_TEARDOWN_HOOK ic_metrics_collect(); set_ic_stats( query_msg->mutable_query_metrics()->mutable_instrumentation(), &ic_statistics); +#endif connector->report_query(*query_msg, msg); } update_nested_counters(query_desc); @@ -222,6 +229,7 @@ void EventSender::collect_query_done(QueryDesc *query_desc, } void EventSender::ic_metrics_collect() { +#ifdef IC_TEARDOWN_HOOK if (Gp_interconnect_type != INTERCONNECT_TYPE_UDPIFC) { return; } @@ -250,6 +258,7 @@ void EventSender::ic_metrics_collect() { ic_statistics.duplicatedPktNum += metrics.duplicatedPktNum; ic_statistics.recvAckNum += metrics.recvAckNum; ic_statistics.statusQueryMsgNum += metrics.statusQueryMsgNum; +#endif } EventSender::EventSender() { @@ -260,7 +269,9 @@ EventSender::EventSender() { ereport(INFO, (errmsg("Unable to start query tracing %s", e.what()))); } } +#ifdef IC_TEARDOWN_HOOK memset(&ic_statistics, 0, sizeof(ICStatistics)); +#endif } EventSender::~EventSender() { diff --git a/src/EventSender.h b/src/EventSender.h index 5627a540b9f..99f7b24753d 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -7,7 +7,10 @@ #define typeid __typeid extern "C" { #include "utils/metrics_utils.h" +#include "cdb/ml_ipc.h" +#ifdef IC_TEARDOWN_HOOK #include "cdb/ic_udpifc.h" +#endif } #undef typeid @@ -59,6 +62,8 @@ class EventSender { int nesting_level = 0; int64_t nested_calls = 0; double nested_timing = 0; +#ifdef IC_TEARDOWN_HOOK ICStatistics ic_statistics; +#endif std::unordered_map, QueryItem, pair_hash> query_msgs; }; \ No newline at end of file diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp index 30f5b4fef4b..c37cefb72d6 100644 --- a/src/ProtoUtils.cpp +++ b/src/ProtoUtils.cpp @@ -10,7 +10,10 @@ extern "C" { #include "access/hash.h" #include "cdb/cdbinterconnect.h" #include "cdb/cdbvars.h" +#include "cdb/ml_ipc.h" +#ifdef IC_TEARDOWN_HOOK #include "cdb/ic_udpifc.h" +#endif #include "gpmon/gpmon.h" #include "utils/workfile_mgr.h" @@ -182,6 +185,7 @@ void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, void set_ic_stats(yagpcc::MetricInstrumentation *metrics, const ICStatistics *ic_statistics) { +#ifdef IC_TEARDOWN_HOOK UPDATE_IC_STATS(total_recv_queue_size, totalRecvQueueSize); UPDATE_IC_STATS(recv_queue_size_counting_time, recvQueueSizeCountingTime); UPDATE_IC_STATS(total_capacity, totalCapacity); @@ -199,6 +203,7 @@ void set_ic_stats(yagpcc::MetricInstrumentation *metrics, UPDATE_IC_STATS(duplicated_pkt_num, duplicatedPktNum); UPDATE_IC_STATS(recv_ack_num, recvAckNum); UPDATE_IC_STATS(status_query_msg_num, statusQueryMsgNum); +#endif } yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status) { diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index b72d5b05a5c..f1d403b82f1 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -24,7 +24,9 @@ static ExecutorRun_hook_type previous_ExecutorRun_hook = nullptr; static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr; static ExecutorEnd_hook_type previous_ExecutorEnd_hook = nullptr; static query_info_collect_hook_type previous_query_info_collect_hook = nullptr; +#ifdef IC_TEARDOWN_HOOK static ic_teardown_hook_type previous_ic_teardown_hook = nullptr; +#endif static void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags); static void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, @@ -66,8 +68,10 @@ void hooks_init() { ExecutorEnd_hook = ya_ExecutorEnd_hook; previous_query_info_collect_hook = query_info_collect_hook; query_info_collect_hook = ya_query_info_collect_hook; +#ifdef IC_TEARDOWN_HOOK previous_ic_teardown_hook = ic_teardown_hook; ic_teardown_hook = ya_ic_teardown_hook; +#endif stat_statements_parser_init(); } @@ -77,7 +81,9 @@ void hooks_deinit() { ExecutorRun_hook = previous_ExecutorRun_hook; ExecutorFinish_hook = previous_ExecutorFinish_hook; query_info_collect_hook = previous_query_info_collect_hook; +#ifdef IC_TEARDOWN_HOOK ic_teardown_hook = previous_ic_teardown_hook; +#endif stat_statements_parser_deinit(); if (sender) { delete sender; @@ -152,9 +158,11 @@ void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { void ya_ic_teardown_hook(ChunkTransportState *transportStates, bool hasErrors) { cpp_call(get_sender(), &EventSender::ic_metrics_collect); +#ifdef IC_TEARDOWN_HOOK if (previous_ic_teardown_hook) { (*previous_ic_teardown_hook)(transportStates, hasErrors); } +#endif } static void check_stats_loaded() { From e3101f8ec9ee7857f55e3022765310600e81163b Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 9 Jun 2025 16:59:13 +0300 Subject: [PATCH 076/133] fix: propagate ignored users on update --- src/Config.cpp | 75 +++++++++++++++++++++++++-------------------- src/EventSender.cpp | 2 +- 2 files changed, 43 insertions(+), 34 deletions(-) diff --git a/src/Config.cpp b/src/Config.cpp index 42fa4b2fb12..0644dd444ff 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -19,6 +19,39 @@ static char *guc_ignored_users = nullptr; static int guc_max_text_size = 1024; // in KB static std::unique_ptr> ignored_users = nullptr; +extern "C" void update_ignored_users(const char *new_guc_ignored_users) { + auto new_ignored_users = std::make_unique>(); + if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') { + /* Need a modifiable copy of string */ + char *rawstring = pstrdup(new_guc_ignored_users); + List *elemlist; + ListCell *l; + + /* Parse string into list of identifiers */ + if (!SplitIdentifierString(rawstring, ',', &elemlist)) { + /* syntax error in list */ + pfree(rawstring); + list_free(elemlist); + ereport( + LOG, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg( + "invalid list syntax in parameter yagpcc.ignored_users_list"))); + return; + } + foreach (l, elemlist) { + new_ignored_users->insert((char *)lfirst(l)); + } + pfree(rawstring); + list_free(elemlist); + } + ignored_users = std::move(new_ignored_users); +} + +static void assign_ignored_users_hook(const char *newval, void *extra) { + update_ignored_users(newval); +} + void Config::init() { DefineCustomStringVariable( "yagpcc.uds_path", "Sets filesystem path of the agent socket", 0LL, @@ -44,11 +77,12 @@ void Config::init() { &guc_report_nested_queries, true, PGC_USERSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); - DefineCustomStringVariable( - "yagpcc.ignored_users_list", - "Make yagpcc ignore queries issued by given users", 0LL, - &guc_ignored_users, "gpadmin,repl,gpperfmon,monitor", PGC_SUSET, - GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + DefineCustomStringVariable("yagpcc.ignored_users_list", + "Make yagpcc ignore queries issued by given users", + 0LL, &guc_ignored_users, + "gpadmin,repl,gpperfmon,monitor", PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, + assign_ignored_users_hook, 0LL); DefineCustomIntVariable( "yagpcc.max_text_size", @@ -65,33 +99,8 @@ bool Config::report_nested_queries() { return guc_report_nested_queries; } size_t Config::max_text_size() { return guc_max_text_size * 1024; } bool Config::filter_user(const std::string *username) { - if (!ignored_users) { - ignored_users.reset(new std::unordered_set()); - if (guc_ignored_users == nullptr || guc_ignored_users[0] == '0') { - return false; - } - /* Need a modifiable copy of string */ - char *rawstring = pstrdup(guc_ignored_users); - List *elemlist; - ListCell *l; - - /* Parse string into list of identifiers */ - if (!SplitIdentifierString(rawstring, ',', &elemlist)) { - /* syntax error in list */ - pfree(rawstring); - list_free(elemlist); - ereport( - LOG, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg( - "invalid list syntax in parameter yagpcc.ignored_users_list"))); - return false; - } - foreach (l, elemlist) { - ignored_users->insert((char *)lfirst(l)); - } - pfree(rawstring); - list_free(elemlist); + if (!username || !ignored_users) { + return true; } - return !username || ignored_users->find(*username) != ignored_users->end(); + return ignored_users->find(*username) != ignored_users->end(); } diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 2ba34d1e4cc..30cdff20644 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -262,7 +262,7 @@ void EventSender::ic_metrics_collect() { } EventSender::EventSender() { - if (Config::enable_collector() && !Config::filter_user(get_user_name())) { + if (Config::enable_collector()) { try { connector = new UDSConnector(); } catch (const std::exception &e) { From b6bb94fd0fccbdffa74c12c1d141a48dc5aae2b7 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Wed, 11 Jun 2025 16:43:54 +0300 Subject: [PATCH 077/133] propagate ignored users only when executor starts --- src/Config.cpp | 42 +++++++----------------------------------- src/Config.h | 1 + src/EventSender.cpp | 38 ++++++++++++++++++++++++++++++++++++++ src/EventSender.h | 1 + 4 files changed, 47 insertions(+), 35 deletions(-) diff --git a/src/Config.cpp b/src/Config.cpp index 0644dd444ff..19aa37d1b9d 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -6,7 +6,6 @@ extern "C" { #include "postgres.h" -#include "utils/builtins.h" #include "utils/guc.h" } @@ -17,39 +16,11 @@ static bool guc_enable_collector = true; static bool guc_report_nested_queries = true; static char *guc_ignored_users = nullptr; static int guc_max_text_size = 1024; // in KB -static std::unique_ptr> ignored_users = nullptr; +std::unique_ptr> ignored_users_set = nullptr; +bool ignored_users_guc_dirty = false; -extern "C" void update_ignored_users(const char *new_guc_ignored_users) { - auto new_ignored_users = std::make_unique>(); - if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') { - /* Need a modifiable copy of string */ - char *rawstring = pstrdup(new_guc_ignored_users); - List *elemlist; - ListCell *l; - - /* Parse string into list of identifiers */ - if (!SplitIdentifierString(rawstring, ',', &elemlist)) { - /* syntax error in list */ - pfree(rawstring); - list_free(elemlist); - ereport( - LOG, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg( - "invalid list syntax in parameter yagpcc.ignored_users_list"))); - return; - } - foreach (l, elemlist) { - new_ignored_users->insert((char *)lfirst(l)); - } - pfree(rawstring); - list_free(elemlist); - } - ignored_users = std::move(new_ignored_users); -} - -static void assign_ignored_users_hook(const char *newval, void *extra) { - update_ignored_users(newval); +static void assign_ignored_users_hook(const char *, void *) { + ignored_users_guc_dirty = true; } void Config::init() { @@ -96,11 +67,12 @@ bool Config::enable_analyze() { return guc_enable_analyze; } bool Config::enable_cdbstats() { return guc_enable_cdbstats; } bool Config::enable_collector() { return guc_enable_collector; } bool Config::report_nested_queries() { return guc_report_nested_queries; } +const char *Config::ignored_users() { return guc_ignored_users; } size_t Config::max_text_size() { return guc_max_text_size * 1024; } bool Config::filter_user(const std::string *username) { - if (!username || !ignored_users) { + if (!username || !ignored_users_set) { return true; } - return ignored_users->find(*username) != ignored_users->end(); + return ignored_users_set->find(*username) != ignored_users_set->end(); } diff --git a/src/Config.h b/src/Config.h index f806bc0dbf5..9dd33c68321 100644 --- a/src/Config.h +++ b/src/Config.h @@ -11,5 +11,6 @@ class Config { static bool enable_collector(); static bool filter_user(const std::string *username); static bool report_nested_queries(); + static const char *ignored_users(); static size_t max_text_size(); }; \ No newline at end of file diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 30cdff20644..fed9b69911f 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -8,6 +8,7 @@ extern "C" { #include "access/hash.h" #include "executor/executor.h" #include "utils/elog.h" +#include "utils/builtins.h" #include "cdb/cdbdisp.h" #include "cdb/cdbexplain.h" @@ -20,6 +21,9 @@ extern "C" { #include "PgUtils.h" #include "ProtoUtils.h" +extern std::unique_ptr> ignored_users_set; +extern bool ignored_users_guc_dirty; + void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { return; @@ -62,6 +66,10 @@ void EventSender::executor_before_start(QueryDesc *query_desc, nested_timing = 0; nested_calls = 0; } + if (ignored_users_guc_dirty) { + update_ignored_users(Config::ignored_users()); + ignored_users_guc_dirty = false; + } if (!need_collect(query_desc, nesting_level)) { return; } @@ -347,6 +355,36 @@ void EventSender::update_nested_counters(QueryDesc *query_desc) { } } +void EventSender::update_ignored_users(const char *new_guc_ignored_users) { + auto new_ignored_users_set = + std::make_unique>(); + if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') { + /* Need a modifiable copy of string */ + char *rawstring = pstrdup(new_guc_ignored_users); + List *elemlist; + ListCell *l; + + /* Parse string into list of identifiers */ + if (!SplitIdentifierString(rawstring, ',', &elemlist)) { + /* syntax error in list */ + pfree(rawstring); + list_free(elemlist); + ereport( + LOG, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg( + "invalid list syntax in parameter yagpcc.ignored_users_list"))); + return; + } + foreach (l, elemlist) { + new_ignored_users_set->insert((char *)lfirst(l)); + } + pfree(rawstring); + list_free(elemlist); + } + ignored_users_set = std::move(new_ignored_users_set); +} + EventSender::QueryItem::QueryItem(EventSender::QueryState st, yagpcc::SetQueryReq *msg) : state(st), message(msg) {} \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h index 99f7b24753d..6919defbbb3 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -57,6 +57,7 @@ class EventSender { void collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status); void cleanup_messages(); void update_nested_counters(QueryDesc *query_desc); + void update_ignored_users(const char *new_guc_ignored_users); UDSConnector *connector = nullptr; int nesting_level = 0; From 65882d54668d3c1d7c689cbfb2cbe31ac74cf993 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 16 Jun 2025 11:31:28 +0300 Subject: [PATCH 078/133] fix ub in strcpy --- src/UDSConnector.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/UDSConnector.cpp b/src/UDSConnector.cpp index b9088205250..8a5f754f3b4 100644 --- a/src/UDSConnector.cpp +++ b/src/UDSConnector.cpp @@ -30,7 +30,13 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, const std::string &event) { sockaddr_un address; address.sun_family = AF_UNIX; - strcpy(address.sun_path, Config::uds_path().c_str()); + std::string uds_path = Config::uds_path(); + if (uds_path.size() >= sizeof(address.sun_path)) { + ereport(WARNING, (errmsg("UDS path is too long for socket buffer"))); + YagpStat::report_error(); + return false; + } + strcpy(address.sun_path, uds_path.c_str()); bool success = true; auto sockfd = socket(AF_UNIX, SOCK_STREAM, 0); if (sockfd != -1) { From d67dbc71259b4f509678a791da48aa061f58e138 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 16 Jun 2025 13:07:59 +0300 Subject: [PATCH 079/133] refactor --- src/Config.cpp | 44 +++++++++++++++++++++++++++++++++++++++++--- src/Config.h | 2 +- src/EventSender.cpp | 39 +-------------------------------------- src/EventSender.h | 1 - 4 files changed, 43 insertions(+), 43 deletions(-) diff --git a/src/Config.cpp b/src/Config.cpp index 19aa37d1b9d..5e0749f171d 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -6,6 +6,7 @@ extern "C" { #include "postgres.h" +#include "utils/builtins.h" #include "utils/guc.h" } @@ -16,8 +17,39 @@ static bool guc_enable_collector = true; static bool guc_report_nested_queries = true; static char *guc_ignored_users = nullptr; static int guc_max_text_size = 1024; // in KB -std::unique_ptr> ignored_users_set = nullptr; -bool ignored_users_guc_dirty = false; +static std::unique_ptr> ignored_users_set = + nullptr; +static bool ignored_users_guc_dirty = false; + +static void update_ignored_users(const char *new_guc_ignored_users) { + auto new_ignored_users_set = + std::make_unique>(); + if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') { + /* Need a modifiable copy of string */ + char *rawstring = pstrdup(new_guc_ignored_users); + List *elemlist; + ListCell *l; + + /* Parse string into list of identifiers */ + if (!SplitIdentifierString(rawstring, ',', &elemlist)) { + /* syntax error in list */ + pfree(rawstring); + list_free(elemlist); + ereport( + LOG, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg( + "invalid list syntax in parameter yagpcc.ignored_users_list"))); + return; + } + foreach (l, elemlist) { + new_ignored_users_set->insert((char *)lfirst(l)); + } + pfree(rawstring); + list_free(elemlist); + } + ignored_users_set = std::move(new_ignored_users_set); +} static void assign_ignored_users_hook(const char *, void *) { ignored_users_guc_dirty = true; @@ -67,7 +99,6 @@ bool Config::enable_analyze() { return guc_enable_analyze; } bool Config::enable_cdbstats() { return guc_enable_cdbstats; } bool Config::enable_collector() { return guc_enable_collector; } bool Config::report_nested_queries() { return guc_report_nested_queries; } -const char *Config::ignored_users() { return guc_ignored_users; } size_t Config::max_text_size() { return guc_max_text_size * 1024; } bool Config::filter_user(const std::string *username) { @@ -76,3 +107,10 @@ bool Config::filter_user(const std::string *username) { } return ignored_users_set->find(*username) != ignored_users_set->end(); } + +void Config::sync() { + if (ignored_users_guc_dirty) { + update_ignored_users(guc_ignored_users); + ignored_users_guc_dirty = false; + } +} \ No newline at end of file diff --git a/src/Config.h b/src/Config.h index 9dd33c68321..3caa0c78339 100644 --- a/src/Config.h +++ b/src/Config.h @@ -11,6 +11,6 @@ class Config { static bool enable_collector(); static bool filter_user(const std::string *username); static bool report_nested_queries(); - static const char *ignored_users(); static size_t max_text_size(); + static void sync(); }; \ No newline at end of file diff --git a/src/EventSender.cpp b/src/EventSender.cpp index fed9b69911f..fc0f7e1aa07 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -8,7 +8,6 @@ extern "C" { #include "access/hash.h" #include "executor/executor.h" #include "utils/elog.h" -#include "utils/builtins.h" #include "cdb/cdbdisp.h" #include "cdb/cdbexplain.h" @@ -21,9 +20,6 @@ extern "C" { #include "PgUtils.h" #include "ProtoUtils.h" -extern std::unique_ptr> ignored_users_set; -extern bool ignored_users_guc_dirty; - void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { return; @@ -66,10 +62,7 @@ void EventSender::executor_before_start(QueryDesc *query_desc, nested_timing = 0; nested_calls = 0; } - if (ignored_users_guc_dirty) { - update_ignored_users(Config::ignored_users()); - ignored_users_guc_dirty = false; - } + Config::sync(); if (!need_collect(query_desc, nesting_level)) { return; } @@ -355,36 +348,6 @@ void EventSender::update_nested_counters(QueryDesc *query_desc) { } } -void EventSender::update_ignored_users(const char *new_guc_ignored_users) { - auto new_ignored_users_set = - std::make_unique>(); - if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') { - /* Need a modifiable copy of string */ - char *rawstring = pstrdup(new_guc_ignored_users); - List *elemlist; - ListCell *l; - - /* Parse string into list of identifiers */ - if (!SplitIdentifierString(rawstring, ',', &elemlist)) { - /* syntax error in list */ - pfree(rawstring); - list_free(elemlist); - ereport( - LOG, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg( - "invalid list syntax in parameter yagpcc.ignored_users_list"))); - return; - } - foreach (l, elemlist) { - new_ignored_users_set->insert((char *)lfirst(l)); - } - pfree(rawstring); - list_free(elemlist); - } - ignored_users_set = std::move(new_ignored_users_set); -} - EventSender::QueryItem::QueryItem(EventSender::QueryState st, yagpcc::SetQueryReq *msg) : state(st), message(msg) {} \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h index 6919defbbb3..99f7b24753d 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -57,7 +57,6 @@ class EventSender { void collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status); void cleanup_messages(); void update_nested_counters(QueryDesc *query_desc); - void update_ignored_users(const char *new_guc_ignored_users); UDSConnector *connector = nullptr; int nesting_level = 0; From 848c3b07c11630f3b9df87012de1a07a1c9a0232 Mon Sep 17 00:00:00 2001 From: NJrslv <108277031+NJrslv@users.noreply.github.com> Date: Tue, 24 Jun 2025 14:41:03 +0300 Subject: [PATCH 080/133] Add EXPLAIN ANALYZE metrics collection This commit depends on a new hook in gpdb (but can be compiled w/o it) and allows sending analyze output for long-running queries. --- protos/yagpcc_metrics.proto | 1 + src/Config.cpp | 22 +++++++++++++-- src/Config.h | 2 ++ src/EventSender.cpp | 51 ++++++++++++++++++++++++++++++++--- src/EventSender.h | 1 + src/PgUtils.cpp | 37 ++++++++++++++++++++++++++ src/PgUtils.h | 1 + src/ProtoUtils.cpp | 53 ++++++++++++++++++++++++++++++------- src/ProtoUtils.h | 4 ++- src/hook_wrappers.cpp | 24 +++++++++++++++++ 10 files changed, 180 insertions(+), 16 deletions(-) diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto index 086f3e63379..91ac0c4941a 100644 --- a/protos/yagpcc_metrics.proto +++ b/protos/yagpcc_metrics.proto @@ -34,6 +34,7 @@ message QueryInfo { string userName = 8; string databaseName = 9; string rsgname = 10; + string analyze_text = 11; } message AdditionalQueryInfo { diff --git a/src/Config.cpp b/src/Config.cpp index 5e0749f171d..ac274a1e218 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -16,7 +16,10 @@ static bool guc_enable_cdbstats = true; static bool guc_enable_collector = true; static bool guc_report_nested_queries = true; static char *guc_ignored_users = nullptr; -static int guc_max_text_size = 1024; // in KB +static int guc_max_text_size = 1024; // in KB +static int guc_max_plan_size = 1024; // in KB +static int guc_min_analyze_time = -1; // uninitialized state + static std::unique_ptr> ignored_users_set = nullptr; static bool ignored_users_guc_dirty = false; @@ -89,9 +92,22 @@ void Config::init() { DefineCustomIntVariable( "yagpcc.max_text_size", - "Make yagpcc trim plan and query texts longer than configured size", NULL, + "Make yagpcc trim query texts longer than configured size", NULL, &guc_max_text_size, 1024, 0, INT_MAX / 1024, PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_KB, NULL, NULL, NULL); + + DefineCustomIntVariable( + "yagpcc.max_plan_size", + "Make yagpcc trim plan longer than configured size", NULL, + &guc_max_plan_size, 1024, 0, INT_MAX / 1024, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_KB, NULL, NULL, NULL); + + DefineCustomIntVariable( + "yagpcc.min_analyze_time", + "Sets the minimum execution time above which plans will be logged.", + "Zero prints all plans. -1 turns this feature off.", + &guc_min_analyze_time, -1, -1, INT_MAX, PGC_USERSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_MS, NULL, NULL, NULL); } std::string Config::uds_path() { return guc_uds_path; } @@ -100,6 +116,8 @@ bool Config::enable_cdbstats() { return guc_enable_cdbstats; } bool Config::enable_collector() { return guc_enable_collector; } bool Config::report_nested_queries() { return guc_report_nested_queries; } size_t Config::max_text_size() { return guc_max_text_size * 1024; } +size_t Config::max_plan_size() { return guc_max_plan_size * 1024; } +int Config::min_analyze_time() { return guc_min_analyze_time; }; bool Config::filter_user(const std::string *username) { if (!username || !ignored_users_set) { diff --git a/src/Config.h b/src/Config.h index 3caa0c78339..dd081c41dd6 100644 --- a/src/Config.h +++ b/src/Config.h @@ -12,5 +12,7 @@ class Config { static bool filter_user(const std::string *username); static bool report_nested_queries(); static size_t max_text_size(); + static size_t max_plan_size(); + static int min_analyze_time(); static void sync(); }; \ No newline at end of file diff --git a/src/EventSender.cpp b/src/EventSender.cpp index fc0f7e1aa07..19787fe0db0 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -20,6 +20,10 @@ extern "C" { #include "PgUtils.h" #include "ProtoUtils.h" +#define need_collect_analyze() \ + (Gp_role == GP_ROLE_DISPATCH && Config::min_analyze_time() >= 0 && \ + Config::enable_analyze()) + void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { return; @@ -53,8 +57,7 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { } } -void EventSender::executor_before_start(QueryDesc *query_desc, - int /* eflags*/) { +void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { if (!connector) { return; } @@ -67,7 +70,8 @@ void EventSender::executor_before_start(QueryDesc *query_desc, return; } collect_query_submit(query_desc); - if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze()) { + if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze() && + (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0) { query_desc->instrument_options |= INSTRUMENT_BUFFERS; query_desc->instrument_options |= INSTRUMENT_ROWS; query_desc->instrument_options |= INSTRUMENT_TIMER; @@ -97,6 +101,17 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { } update_query_state(query_desc, query, QueryState::START); set_query_plan(query_msg, query_desc); + if (need_collect_analyze()) { + // Set up to track total elapsed time during query run. + // Make sure the space is allocated in the per-query + // context so it will go away at executor_end. + if (query_desc->totaltime == NULL) { + MemoryContext oldcxt; + oldcxt = MemoryContextSwitchTo(query_desc->estate->es_query_cxt); + query_desc->totaltime = InstrAlloc(1, INSTRUMENT_ALL); + MemoryContextSwitchTo(oldcxt); + } + } yagpcc::GPMetrics stats; std::swap(stats, *query_msg->mutable_query_metrics()); if (connector->report_query(*query_msg, "started")) { @@ -262,6 +277,34 @@ void EventSender::ic_metrics_collect() { #endif } +void EventSender::analyze_stats_collect(QueryDesc *query_desc) { + if (!connector || Gp_role != GP_ROLE_DISPATCH) { + return; + } + if (!need_collect(query_desc, nesting_level)) { + return; + } + auto query = get_query_message(query_desc); + auto query_msg = query->message; + *query_msg->mutable_end_time() = current_ts(); + // Yet another greenplum weirdness: thats actually a nested query + // which is being committed/rollbacked. Treat it accordingly. + if (query->state == UNKNOWN && !need_report_nested_query()) { + return; + } + if (!query_desc->totaltime || !need_collect_analyze()) { + return; + } + // Make sure stats accumulation is done. + // (Note: it's okay if several levels of hook all do this.) + InstrEndLoop(query_desc->totaltime); + + double ms = query_desc->totaltime->total * 1000.0; + if (ms >= Config::min_analyze_time()) { + set_analyze_plan_text_json(query_desc, query_msg); + } +} + EventSender::EventSender() { if (Config::enable_collector()) { try { @@ -350,4 +393,4 @@ void EventSender::update_nested_counters(QueryDesc *query_desc) { EventSender::QueryItem::QueryItem(EventSender::QueryState st, yagpcc::SetQueryReq *msg) - : state(st), message(msg) {} \ No newline at end of file + : state(st), message(msg) {} diff --git a/src/EventSender.h b/src/EventSender.h index 99f7b24753d..4d09b429fc8 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -27,6 +27,7 @@ class EventSender { void executor_end(QueryDesc *query_desc); void query_metrics_collect(QueryMetricsStatus status, void *arg); void ic_metrics_collect(); + void analyze_stats_collect(QueryDesc *query_desc); void incr_depth() { nesting_level++; } void decr_depth() { nesting_level--; } EventSender(); diff --git a/src/PgUtils.cpp b/src/PgUtils.cpp index 5982ff77c1c..ed3e69c6d44 100644 --- a/src/PgUtils.cpp +++ b/src/PgUtils.cpp @@ -109,3 +109,40 @@ ExplainState get_explain_state(QueryDesc *query_desc, bool costs) { ExplainEndOutput(&es); return es; } + +ExplainState get_analyze_state_json(QueryDesc *query_desc, bool analyze) { + ExplainState es; + ExplainInitState(&es); + es.analyze = analyze; + es.verbose = true; + es.buffers = es.analyze; + es.timing = es.analyze; + es.summary = es.analyze; + es.format = EXPLAIN_FORMAT_JSON; + ExplainBeginOutput(&es); + if (analyze) { + PG_TRY(); + { + ExplainPrintPlan(&es, query_desc); + ExplainPrintExecStatsEnd(&es, query_desc); + } + PG_CATCH(); + { + // PG and GP both have known and yet unknown bugs in EXPLAIN VERBOSE + // implementation. We don't want any queries to fail due to those bugs, so + // we report the bug here for future investigatin and continue collecting + // metrics w/o reporting any plans + resetStringInfo(es.str); + appendStringInfo( + es.str, + "Unable to restore analyze plan due to PostgreSQL internal error. " + "See logs for more information"); + ereport(INFO, + (errmsg("YAGPCC failed to reconstruct analyze text for query: %s", + query_desc->sourceText))); + } + PG_END_TRY(); + } + ExplainEndOutput(&es); + return es; +} diff --git a/src/PgUtils.h b/src/PgUtils.h index 85b1eb833cd..81282a473a8 100644 --- a/src/PgUtils.h +++ b/src/PgUtils.h @@ -14,3 +14,4 @@ bool need_report_nested_query(); bool filter_query(QueryDesc *query_desc); bool need_collect(QueryDesc *query_desc, int nesting_level); ExplainState get_explain_state(QueryDesc *query_desc, bool costs); +ExplainState get_analyze_state_json(QueryDesc *query_desc, bool analyze); diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp index c37cefb72d6..6e9fa6bd5c5 100644 --- a/src/ProtoUtils.cpp +++ b/src/ProtoUtils.cpp @@ -8,6 +8,7 @@ extern "C" { #include "postgres.h" #include "access/hash.h" +#include "access/xact.h" #include "cdb/cdbinterconnect.h" #include "cdb/cdbvars.h" #include "cdb/ml_ipc.h" @@ -47,8 +48,9 @@ void set_segment_key(yagpcc::SegmentKey *key) { key->set_segindex(GpIdentity.segindex); } -inline std::string char_to_trimmed_str(const char *str, size_t len) { - return std::string(str, std::min(len, Config::max_text_size())); +inline std::string char_to_trimmed_str(const char *str, size_t len, + size_t lim) { + return std::string(str, std::min(len, lim)); } void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { @@ -61,10 +63,11 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { MemoryContextSwitchTo(query_desc->estate->es_query_cxt); auto es = get_explain_state(query_desc, true); MemoryContextSwitchTo(oldcxt); - *qi->mutable_plan_text() = char_to_trimmed_str(es.str->data, es.str->len); + *qi->mutable_plan_text() = + char_to_trimmed_str(es.str->data, es.str->len, Config::max_plan_size()); StringInfo norm_plan = gen_normplan(es.str->data); - *qi->mutable_template_plan_text() = - char_to_trimmed_str(norm_plan->data, norm_plan->len); + *qi->mutable_template_plan_text() = char_to_trimmed_str( + norm_plan->data, norm_plan->len, Config::max_plan_size()); qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); qi->set_query_id(query_desc->plannedstmt->queryId); pfree(es.str->data); @@ -76,10 +79,11 @@ void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->sourceText) { auto qi = req->mutable_query_info(); *qi->mutable_query_text() = char_to_trimmed_str( - query_desc->sourceText, strlen(query_desc->sourceText)); + query_desc->sourceText, strlen(query_desc->sourceText), + Config::max_text_size()); char *norm_query = gen_normquery(query_desc->sourceText); - *qi->mutable_template_query_text() = - char_to_trimmed_str(norm_query, strlen(norm_query)); + *qi->mutable_template_query_text() = char_to_trimmed_str( + norm_query, strlen(norm_query), Config::max_text_size()); } } @@ -90,6 +94,7 @@ void clear_big_fields(yagpcc::SetQueryReq *req) { qi->clear_template_plan_text(); qi->clear_query_text(); qi->clear_template_query_text(); + qi->clear_analyze_text(); } } @@ -115,7 +120,8 @@ void set_qi_slice_id(yagpcc::SetQueryReq *req) { void set_qi_error_message(yagpcc::SetQueryReq *req) { auto aqi = req->mutable_add_info(); auto error = elog_message(); - *aqi->mutable_error_message() = char_to_trimmed_str(error, strlen(error)); + *aqi->mutable_error_message() = + char_to_trimmed_str(error, strlen(error), Config::max_text_size()); } void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, @@ -217,4 +223,33 @@ yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status) { double protots_to_double(const google::protobuf::Timestamp &ts) { return double(ts.seconds()) + double(ts.nanos()) / 1000000000.0; +} + +void set_analyze_plan_text_json(QueryDesc *query_desc, + yagpcc::SetQueryReq *req) { + // Make sure it is a valid txn and it is not an utility + // statement for ExplainPrintPlan() later. + if (!IsTransactionState() || !query_desc->plannedstmt) { + return; + } + MemoryContext oldcxt = + MemoryContextSwitchTo(query_desc->estate->es_query_cxt); + + ExplainState es = get_analyze_state_json( + query_desc, query_desc->instrument_options && Config::enable_analyze()); + // Remove last line break. + if (es.str->len > 0 && es.str->data[es.str->len - 1] == '\n') { + es.str->data[--es.str->len] = '\0'; + } + // Convert JSON array to JSON object. + if (es.str->len > 0) { + es.str->data[0] = '{'; + es.str->data[es.str->len - 1] = '}'; + } + auto trimmed_analyze = + char_to_trimmed_str(es.str->data, es.str->len, Config::max_plan_size()); + req->mutable_query_info()->set_analyze_text(trimmed_analyze); + + pfree(es.str->data); + MemoryContextSwitchTo(oldcxt); } \ No newline at end of file diff --git a/src/ProtoUtils.h b/src/ProtoUtils.h index 4e4ed5e76a3..6fb880c2eb8 100644 --- a/src/ProtoUtils.h +++ b/src/ProtoUtils.h @@ -16,4 +16,6 @@ void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, void set_ic_stats(yagpcc::MetricInstrumentation *metrics, const ICStatistics *ic_statistics); yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status); -double protots_to_double(const google::protobuf::Timestamp &ts); \ No newline at end of file +double protots_to_double(const google::protobuf::Timestamp &ts); +void set_analyze_plan_text_json(QueryDesc *query_desc, + yagpcc::SetQueryReq *message); \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index f1d403b82f1..79d3ec45881 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -3,6 +3,7 @@ extern "C" { #include "postgres.h" #include "funcapi.h" #include "executor/executor.h" +#include "executor/execUtils.h" #include "utils/elog.h" #include "utils/builtins.h" #include "utils/metrics_utils.h" @@ -24,6 +25,10 @@ static ExecutorRun_hook_type previous_ExecutorRun_hook = nullptr; static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr; static ExecutorEnd_hook_type previous_ExecutorEnd_hook = nullptr; static query_info_collect_hook_type previous_query_info_collect_hook = nullptr; +#ifdef ANALYZE_STATS_COLLECT_HOOK +static analyze_stats_collect_hook_type previous_analyze_stats_collect_hook = + nullptr; +#endif #ifdef IC_TEARDOWN_HOOK static ic_teardown_hook_type previous_ic_teardown_hook = nullptr; #endif @@ -36,6 +41,9 @@ static void ya_ExecutorEnd_hook(QueryDesc *query_desc); static void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg); static void ya_ic_teardown_hook(ChunkTransportState *transportStates, bool hasErrors); +#ifdef ANALYZE_STATS_COLLECT_HOOK +static void ya_analyze_stats_collect_hook(QueryDesc *query_desc); +#endif static EventSender *sender = nullptr; @@ -71,6 +79,10 @@ void hooks_init() { #ifdef IC_TEARDOWN_HOOK previous_ic_teardown_hook = ic_teardown_hook; ic_teardown_hook = ya_ic_teardown_hook; +#endif +#ifdef ANALYZE_STATS_COLLECT_HOOK + previous_analyze_stats_collect_hook = analyze_stats_collect_hook; + analyze_stats_collect_hook = ya_analyze_stats_collect_hook; #endif stat_statements_parser_init(); } @@ -83,6 +95,9 @@ void hooks_deinit() { query_info_collect_hook = previous_query_info_collect_hook; #ifdef IC_TEARDOWN_HOOK ic_teardown_hook = previous_ic_teardown_hook; +#endif +#ifdef ANALYZE_STATS_COLLECT_HOOK + analyze_stats_collect_hook = previous_analyze_stats_collect_hook; #endif stat_statements_parser_deinit(); if (sender) { @@ -165,6 +180,15 @@ void ya_ic_teardown_hook(ChunkTransportState *transportStates, bool hasErrors) { #endif } +#ifdef ANALYZE_STATS_COLLECT_HOOK +void ya_analyze_stats_collect_hook(QueryDesc *query_desc) { + cpp_call(get_sender(), &EventSender::analyze_stats_collect, query_desc); + if (previous_analyze_stats_collect_hook) { + (*previous_analyze_stats_collect_hook)(query_desc); + } +} +#endif + static void check_stats_loaded() { if (!YagpStat::loaded()) { ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), From 405d8430da5dbdedfb74f2b8687092cb747139c9 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Fri, 27 Jun 2025 12:22:49 +0300 Subject: [PATCH 081/133] parallel makefile & move link flags --- Makefile | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 88c76bbda76..74baeb3e78a 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ override CFLAGS = -Wall -Wmissing-prototypes -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=gnu99 -Werror=uninitialized -Werror=implicit-function-declaration -DGPBUILD -override CXXFLAGS = -fPIC -lstdc++ -lpthread -g3 -Wall -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=c++14 -Iinclude -Isrc/protos -Isrc -DGPBUILD +override CXXFLAGS = -fPIC -g3 -Wall -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=c++14 -Iinclude -Isrc/protos -Isrc -DGPBUILD COMMON_CPP_FLAGS := -Isrc -Iinclude -Isrc/stat_statements_parser PG_CXXFLAGS += $(COMMON_CPP_FLAGS) -SHLIB_LINK += -lprotobuf +SHLIB_LINK += -lprotobuf -lpthread -lstdc++ PROTOC = protoc SRC_DIR = ./src @@ -11,7 +11,7 @@ PROTO_DIR = ./protos PROTO_GEN_OBJECTS = $(GEN_DIR)/yagpcc_plan.pb.o $(GEN_DIR)/yagpcc_metrics.pb.o \ $(GEN_DIR)/yagpcc_set_service.pb.o -$(GEN_DIR)/%.pb.cpp : $(PROTO_DIR)/%.proto +$(GEN_DIR)/%.pb.cpp $(GEN_DIR)/%.pb.h: $(PROTO_DIR)/%.proto sed -i 's/optional //g' $^ sed -i 's/cloud\/mdb\/yagpcc\/api\/proto\/common\//\protos\//g' $^ $(PROTOC) --cpp_out=$(SRC_DIR) $^ @@ -41,6 +41,14 @@ PG_CONFIG := pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) +$(GEN_DIR)/yagpcc_set_service.pb.o: $(GEN_DIR)/yagpcc_metrics.pb.h + +PROTO_INCLUDES = $(GEN_DIR)/yagpcc_set_service.pb.h $(GEN_DIR)/yagpcc_metrics.pb.h $(GEN_DIR)/yagpcc_plan.pb.h +$(SRC_DIR)/UDSConnector.o: PROTO_INCLUDES +$(SRC_DIR)/ProtoUtils.o: PROTO_INCLUDES +$(SRC_DIR)/EventSender.o: PROTO_INCLUDES +$(SRC_DIR)/ProcStats.o: $(GEN_DIR)/yagpcc_metrics.pb.h + gen: $(PROTO_GEN_OBJECTS) .DEFAULT_GOAL := all From fa33aca4bf82660918166c524edb90a12f25f988 Mon Sep 17 00:00:00 2001 From: NJrslv <108277031+NJrslv@users.noreply.github.com> Date: Fri, 27 Jun 2025 12:29:53 +0300 Subject: [PATCH 082/133] parallel makefile & move link flags (#8) --- Makefile | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 88c76bbda76..74baeb3e78a 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ override CFLAGS = -Wall -Wmissing-prototypes -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=gnu99 -Werror=uninitialized -Werror=implicit-function-declaration -DGPBUILD -override CXXFLAGS = -fPIC -lstdc++ -lpthread -g3 -Wall -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=c++14 -Iinclude -Isrc/protos -Isrc -DGPBUILD +override CXXFLAGS = -fPIC -g3 -Wall -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=c++14 -Iinclude -Isrc/protos -Isrc -DGPBUILD COMMON_CPP_FLAGS := -Isrc -Iinclude -Isrc/stat_statements_parser PG_CXXFLAGS += $(COMMON_CPP_FLAGS) -SHLIB_LINK += -lprotobuf +SHLIB_LINK += -lprotobuf -lpthread -lstdc++ PROTOC = protoc SRC_DIR = ./src @@ -11,7 +11,7 @@ PROTO_DIR = ./protos PROTO_GEN_OBJECTS = $(GEN_DIR)/yagpcc_plan.pb.o $(GEN_DIR)/yagpcc_metrics.pb.o \ $(GEN_DIR)/yagpcc_set_service.pb.o -$(GEN_DIR)/%.pb.cpp : $(PROTO_DIR)/%.proto +$(GEN_DIR)/%.pb.cpp $(GEN_DIR)/%.pb.h: $(PROTO_DIR)/%.proto sed -i 's/optional //g' $^ sed -i 's/cloud\/mdb\/yagpcc\/api\/proto\/common\//\protos\//g' $^ $(PROTOC) --cpp_out=$(SRC_DIR) $^ @@ -41,6 +41,14 @@ PG_CONFIG := pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) +$(GEN_DIR)/yagpcc_set_service.pb.o: $(GEN_DIR)/yagpcc_metrics.pb.h + +PROTO_INCLUDES = $(GEN_DIR)/yagpcc_set_service.pb.h $(GEN_DIR)/yagpcc_metrics.pb.h $(GEN_DIR)/yagpcc_plan.pb.h +$(SRC_DIR)/UDSConnector.o: PROTO_INCLUDES +$(SRC_DIR)/ProtoUtils.o: PROTO_INCLUDES +$(SRC_DIR)/EventSender.o: PROTO_INCLUDES +$(SRC_DIR)/ProcStats.o: $(GEN_DIR)/yagpcc_metrics.pb.h + gen: $(PROTO_GEN_OBJECTS) .DEFAULT_GOAL := all From 631d73f5d496de2ee0e9217dcbaa4d96445b7fcb Mon Sep 17 00:00:00 2001 From: NJrslv Date: Fri, 27 Jun 2025 13:31:34 +0300 Subject: [PATCH 083/133] correct expand var --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 74baeb3e78a..0df06818ff8 100644 --- a/Makefile +++ b/Makefile @@ -44,9 +44,9 @@ include $(PGXS) $(GEN_DIR)/yagpcc_set_service.pb.o: $(GEN_DIR)/yagpcc_metrics.pb.h PROTO_INCLUDES = $(GEN_DIR)/yagpcc_set_service.pb.h $(GEN_DIR)/yagpcc_metrics.pb.h $(GEN_DIR)/yagpcc_plan.pb.h -$(SRC_DIR)/UDSConnector.o: PROTO_INCLUDES -$(SRC_DIR)/ProtoUtils.o: PROTO_INCLUDES -$(SRC_DIR)/EventSender.o: PROTO_INCLUDES +$(SRC_DIR)/UDSConnector.o: $(PROTO_INCLUDES) +$(SRC_DIR)/ProtoUtils.o: $(PROTO_INCLUDES) +$(SRC_DIR)/EventSender.o: $(PROTO_INCLUDES) $(SRC_DIR)/ProcStats.o: $(GEN_DIR)/yagpcc_metrics.pb.h gen: $(PROTO_GEN_OBJECTS) From 306a5f7020a01bf1a5b6a807835db9beb4f00e16 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Fri, 4 Jul 2025 19:31:19 +0300 Subject: [PATCH 084/133] add safe cpp wrappers around pg funcs --- Makefile | 1 + src/Config.cpp | 16 +- src/EventSender.cpp | 20 +- src/EventSender.h | 3 - src/PgUtils.cpp | 74 +--- src/ProtoUtils.cpp | 67 +-- src/ProtoUtils.h | 2 + src/UDSConnector.cpp | 6 +- src/UDSConnector.h | 1 - src/hook_wrappers.cpp | 6 +- src/memory/gpdbwrappers.cpp | 412 ++++++++++++++++++ src/memory/gpdbwrappers.h | 38 ++ .../pg_stat_statements_ya_parser.h | 6 +- 13 files changed, 519 insertions(+), 133 deletions(-) create mode 100644 src/memory/gpdbwrappers.cpp create mode 100644 src/memory/gpdbwrappers.h diff --git a/Makefile b/Makefile index 0df06818ff8..dedbec9a5ae 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,7 @@ OBJS := $(PG_STAT_OBJS) \ $(SRC_DIR)/UDSConnector.o \ $(SRC_DIR)/EventSender.o \ $(SRC_DIR)/hook_wrappers.o \ + $(SRC_DIR)/memory/gpdbwrappers.o \ $(SRC_DIR)/yagp_hooks_collector.o EXTRA_CLEAN := $(GEN_DIR) DATA := $(wildcard sql/*--*.sql) diff --git a/src/Config.cpp b/src/Config.cpp index ac274a1e218..e9564ef8959 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -1,4 +1,5 @@ #include "Config.h" +#include "memory/gpdbwrappers.h" #include #include #include @@ -6,7 +7,6 @@ extern "C" { #include "postgres.h" -#include "utils/builtins.h" #include "utils/guc.h" } @@ -29,15 +29,15 @@ static void update_ignored_users(const char *new_guc_ignored_users) { std::make_unique>(); if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') { /* Need a modifiable copy of string */ - char *rawstring = pstrdup(new_guc_ignored_users); + char *rawstring = gpdb::pstrdup(new_guc_ignored_users); List *elemlist; ListCell *l; /* Parse string into list of identifiers */ - if (!SplitIdentifierString(rawstring, ',', &elemlist)) { + if (!gpdb::split_identifier_string(rawstring, ',', &elemlist)) { /* syntax error in list */ - pfree(rawstring); - list_free(elemlist); + gpdb::pfree(rawstring); + gpdb::list_free(elemlist); ereport( LOG, (errcode(ERRCODE_SYNTAX_ERROR), @@ -48,8 +48,8 @@ static void update_ignored_users(const char *new_guc_ignored_users) { foreach (l, elemlist) { new_ignored_users_set->insert((char *)lfirst(l)); } - pfree(rawstring); - list_free(elemlist); + gpdb::pfree(rawstring); + gpdb::list_free(elemlist); } ignored_users_set = std::move(new_ignored_users_set); } @@ -131,4 +131,4 @@ void Config::sync() { update_ignored_users(guc_ignored_users); ignored_users_guc_dirty = false; } -} \ No newline at end of file +} diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 19787fe0db0..8711c4cbd4f 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,15 +1,14 @@ #include "Config.h" #include "UDSConnector.h" +#include "memory/gpdbwrappers.h" #define typeid __typeid extern "C" { #include "postgres.h" -#include "access/hash.h" #include "executor/executor.h" #include "utils/elog.h" -#include "cdb/cdbdisp.h" #include "cdb/cdbexplain.h" #include "cdb/cdbvars.h" #include "cdb/ml_ipc.h" @@ -81,7 +80,7 @@ void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { instr_time starttime; INSTR_TIME_SET_CURRENT(starttime); query_desc->showstatctx = - cdbexplain_showExecStatsBegin(query_desc, starttime); + gpdb::cdbexplain_showExecStatsBegin(query_desc, starttime); } } } @@ -106,10 +105,10 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { // Make sure the space is allocated in the per-query // context so it will go away at executor_end. if (query_desc->totaltime == NULL) { - MemoryContext oldcxt; - oldcxt = MemoryContextSwitchTo(query_desc->estate->es_query_cxt); - query_desc->totaltime = InstrAlloc(1, INSTRUMENT_ALL); - MemoryContextSwitchTo(oldcxt); + MemoryContext oldcxt = + gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + query_desc->totaltime = gpdb::instr_alloc(1, INSTRUMENT_ALL); + gpdb::mem_ctx_switch_to(oldcxt); } } yagpcc::GPMetrics stats; @@ -240,7 +239,7 @@ void EventSender::collect_query_done(QueryDesc *query_desc, } query_msgs.erase({query_desc->gpmon_pkt->u.qexec.key.ccnt, query_desc->gpmon_pkt->u.qexec.key.tmid}); - pfree(query_desc->gpmon_pkt); + gpdb::pfree(query_desc->gpmon_pkt); } } @@ -297,7 +296,7 @@ void EventSender::analyze_stats_collect(QueryDesc *query_desc) { } // Make sure stats accumulation is done. // (Note: it's okay if several levels of hook all do this.) - InstrEndLoop(query_desc->totaltime); + gpdb::instr_end_loop(query_desc->totaltime); double ms = query_desc->totaltime->total * 1000.0; if (ms >= Config::min_analyze_time()) { @@ -364,7 +363,8 @@ EventSender::QueryItem *EventSender::get_query_message(QueryDesc *query_desc) { query_msgs.find({query_desc->gpmon_pkt->u.qexec.key.ccnt, query_desc->gpmon_pkt->u.qexec.key.tmid}) == query_msgs.end()) { - query_desc->gpmon_pkt = (gpmon_packet_t *)palloc0(sizeof(gpmon_packet_t)); + query_desc->gpmon_pkt = + (gpmon_packet_t *)gpdb::palloc0(sizeof(gpmon_packet_t)); query_desc->gpmon_pkt->u.qexec.key.ccnt = gp_command_count; query_desc->gpmon_pkt->u.qexec.key.tmid = nesting_level; query_msgs.insert({{gp_command_count, nesting_level}, diff --git a/src/EventSender.h b/src/EventSender.h index 4d09b429fc8..f3dd1d2a528 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -1,13 +1,10 @@ #pragma once -#include #include -#include #define typeid __typeid extern "C" { #include "utils/metrics_utils.h" -#include "cdb/ml_ipc.h" #ifdef IC_TEARDOWN_HOOK #include "cdb/ic_udpifc.h" #endif diff --git a/src/PgUtils.cpp b/src/PgUtils.cpp index ed3e69c6d44..69a520aef6a 100644 --- a/src/PgUtils.cpp +++ b/src/PgUtils.cpp @@ -1,5 +1,6 @@ #include "PgUtils.h" #include "Config.h" +#include "memory/gpdbwrappers.h" extern "C" { #include "utils/guc.h" @@ -9,17 +10,18 @@ extern "C" { } std::string *get_user_name() { - const char *username = GetConfigOption("session_authorization", false, false); + const char *username = + gpdb::get_config_option("session_authorization", false, false); // username is not to be freed return username ? new std::string(username) : nullptr; } std::string *get_db_name() { - char *dbname = get_database_name(MyDatabaseId); + char *dbname = gpdb::get_database_name(MyDatabaseId); std::string *result = nullptr; if (dbname) { result = new std::string(dbname); - pfree(dbname); + gpdb::pfree(dbname); } return result; } @@ -80,69 +82,3 @@ bool need_collect(QueryDesc *query_desc, int nesting_level) { return !filter_query(query_desc) && nesting_is_valid(query_desc, nesting_level); } - -ExplainState get_explain_state(QueryDesc *query_desc, bool costs) { - ExplainState es; - ExplainInitState(&es); - es.costs = costs; - es.verbose = true; - es.format = EXPLAIN_FORMAT_TEXT; - ExplainBeginOutput(&es); - PG_TRY(); - { ExplainPrintPlan(&es, query_desc); } - PG_CATCH(); - { - // PG and GP both have known and yet unknown bugs in EXPLAIN VERBOSE - // implementation. We don't want any queries to fail due to those bugs, so - // we report the bug here for future investigatin and continue collecting - // metrics w/o reporting any plans - resetStringInfo(es.str); - appendStringInfo( - es.str, - "Unable to restore query plan due to PostgreSQL internal error. " - "See logs for more information"); - ereport(INFO, - (errmsg("YAGPCC failed to reconstruct explain text for query: %s", - query_desc->sourceText))); - } - PG_END_TRY(); - ExplainEndOutput(&es); - return es; -} - -ExplainState get_analyze_state_json(QueryDesc *query_desc, bool analyze) { - ExplainState es; - ExplainInitState(&es); - es.analyze = analyze; - es.verbose = true; - es.buffers = es.analyze; - es.timing = es.analyze; - es.summary = es.analyze; - es.format = EXPLAIN_FORMAT_JSON; - ExplainBeginOutput(&es); - if (analyze) { - PG_TRY(); - { - ExplainPrintPlan(&es, query_desc); - ExplainPrintExecStatsEnd(&es, query_desc); - } - PG_CATCH(); - { - // PG and GP both have known and yet unknown bugs in EXPLAIN VERBOSE - // implementation. We don't want any queries to fail due to those bugs, so - // we report the bug here for future investigatin and continue collecting - // metrics w/o reporting any plans - resetStringInfo(es.str); - appendStringInfo( - es.str, - "Unable to restore analyze plan due to PostgreSQL internal error. " - "See logs for more information"); - ereport(INFO, - (errmsg("YAGPCC failed to reconstruct analyze text for query: %s", - query_desc->sourceText))); - } - PG_END_TRY(); - } - ExplainEndOutput(&es); - return es; -} diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp index 6e9fa6bd5c5..1c7ca1598f1 100644 --- a/src/ProtoUtils.cpp +++ b/src/ProtoUtils.cpp @@ -2,6 +2,7 @@ #include "PgUtils.h" #include "ProcStats.h" #include "Config.h" +#include "memory/gpdbwrappers.h" #define typeid __typeid #define operator __operator @@ -15,10 +16,7 @@ extern "C" { #ifdef IC_TEARDOWN_HOOK #include "cdb/ic_udpifc.h" #endif -#include "gpmon/gpmon.h" #include "utils/workfile_mgr.h" - -#include "stat_statements_parser/pg_stat_statements_ya_parser.h" } #undef typeid #undef operator @@ -60,18 +58,21 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); MemoryContext oldcxt = - MemoryContextSwitchTo(query_desc->estate->es_query_cxt); - auto es = get_explain_state(query_desc, true); - MemoryContextSwitchTo(oldcxt); - *qi->mutable_plan_text() = - char_to_trimmed_str(es.str->data, es.str->len, Config::max_plan_size()); - StringInfo norm_plan = gen_normplan(es.str->data); - *qi->mutable_template_plan_text() = char_to_trimmed_str( - norm_plan->data, norm_plan->len, Config::max_plan_size()); - qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); - qi->set_query_id(query_desc->plannedstmt->queryId); - pfree(es.str->data); - pfree(norm_plan->data); + gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + auto es = gpdb::get_explain_state(query_desc, true); + if (es.str) { + *qi->mutable_plan_text() = char_to_trimmed_str(es.str->data, es.str->len, + Config::max_plan_size()); + StringInfo norm_plan = gpdb::gen_normplan(es.str->data); + *qi->mutable_template_plan_text() = char_to_trimmed_str( + norm_plan->data, norm_plan->len, Config::max_plan_size()); + qi->set_plan_id( + hash_any((unsigned char *)norm_plan->data, norm_plan->len)); + qi->set_query_id(query_desc->plannedstmt->queryId); + pfree(es.str->data); + pfree(norm_plan->data); + } + gpdb::mem_ctx_switch_to(oldcxt); } } @@ -81,7 +82,7 @@ void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { *qi->mutable_query_text() = char_to_trimmed_str( query_desc->sourceText, strlen(query_desc->sourceText), Config::max_text_size()); - char *norm_query = gen_normquery(query_desc->sourceText); + char *norm_query = gpdb::gen_normquery(query_desc->sourceText); *qi->mutable_template_query_text() = char_to_trimmed_str( norm_query, strlen(norm_query), Config::max_text_size()); } @@ -233,23 +234,23 @@ void set_analyze_plan_text_json(QueryDesc *query_desc, return; } MemoryContext oldcxt = - MemoryContextSwitchTo(query_desc->estate->es_query_cxt); - - ExplainState es = get_analyze_state_json( + gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + ExplainState es = gpdb::get_analyze_state_json( query_desc, query_desc->instrument_options && Config::enable_analyze()); - // Remove last line break. - if (es.str->len > 0 && es.str->data[es.str->len - 1] == '\n') { - es.str->data[--es.str->len] = '\0'; - } - // Convert JSON array to JSON object. - if (es.str->len > 0) { - es.str->data[0] = '{'; - es.str->data[es.str->len - 1] = '}'; + gpdb::mem_ctx_switch_to(oldcxt); + if (es.str) { + // Remove last line break. + if (es.str->len > 0 && es.str->data[es.str->len - 1] == '\n') { + es.str->data[--es.str->len] = '\0'; + } + // Convert JSON array to JSON object. + if (es.str->len > 0) { + es.str->data[0] = '{'; + es.str->data[es.str->len - 1] = '}'; + } + auto trimmed_analyze = + char_to_trimmed_str(es.str->data, es.str->len, Config::max_plan_size()); + req->mutable_query_info()->set_analyze_text(trimmed_analyze); + gpdb::pfree(es.str->data); } - auto trimmed_analyze = - char_to_trimmed_str(es.str->data, es.str->len, Config::max_plan_size()); - req->mutable_query_info()->set_analyze_text(trimmed_analyze); - - pfree(es.str->data); - MemoryContextSwitchTo(oldcxt); } \ No newline at end of file diff --git a/src/ProtoUtils.h b/src/ProtoUtils.h index 6fb880c2eb8..8287b3de7ea 100644 --- a/src/ProtoUtils.h +++ b/src/ProtoUtils.h @@ -1,3 +1,5 @@ +#pragma once + #include "protos/yagpcc_set_service.pb.h" struct QueryDesc; diff --git a/src/UDSConnector.cpp b/src/UDSConnector.cpp index 8a5f754f3b4..b5b70836db4 100644 --- a/src/UDSConnector.cpp +++ b/src/UDSConnector.cpp @@ -1,6 +1,7 @@ #include "UDSConnector.h" #include "Config.h" #include "YagpStat.h" +#include "memory/gpdbwrappers.h" #include #include @@ -13,7 +14,6 @@ extern "C" { #include "postgres.h" -#include "cdb/cdbvars.h" } UDSConnector::UDSConnector() { GOOGLE_PROTOBUF_VERIFY_VERSION; } @@ -44,7 +44,7 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, if (connect(sockfd, (sockaddr *)&address, sizeof(address)) != -1) { auto data_size = req.ByteSize(); auto total_size = data_size + sizeof(uint32_t); - uint8_t *buf = (uint8_t *)palloc(total_size); + uint8_t *buf = (uint8_t *)gpdb::palloc(total_size); uint32_t *size_payload = (uint32_t *)buf; *size_payload = data_size; req.SerializeWithCachedSizesToArray(buf + sizeof(uint32_t)); @@ -67,7 +67,7 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, } else { YagpStat::report_send(total_size); } - pfree(buf); + gpdb::pfree(buf); } else { // log the error and go on log_tracing_failure(req, event); diff --git a/src/UDSConnector.h b/src/UDSConnector.h index 42e0aa20968..67504fc8529 100644 --- a/src/UDSConnector.h +++ b/src/UDSConnector.h @@ -1,7 +1,6 @@ #pragma once #include "protos/yagpcc_set_service.pb.h" -#include class UDSConnector { public: diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 79d3ec45881..25a85f086d1 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -7,10 +7,10 @@ extern "C" { #include "utils/elog.h" #include "utils/builtins.h" #include "utils/metrics_utils.h" -#include "cdb/cdbexplain.h" #include "cdb/cdbvars.h" #include "cdb/ml_ipc.h" #include "tcop/utility.h" +#include "stat_statements_parser/pg_stat_statements_ya_parser.h" } #undef typeid @@ -18,7 +18,7 @@ extern "C" { #include "YagpStat.h" #include "EventSender.h" #include "hook_wrappers.h" -#include "stat_statements_parser/pg_stat_statements_ya_parser.h" +#include "memory/gpdbwrappers.h" static ExecutorStart_hook_type previous_ExecutorStart_hook = nullptr; static ExecutorRun_hook_type previous_ExecutorRun_hook = nullptr; @@ -229,7 +229,7 @@ Datum yagp_functions_get(FunctionCallInfo fcinfo) { values[3] = Int64GetDatum(stats.failed_connects); values[4] = Int64GetDatum(stats.failed_other); values[5] = Int32GetDatum(stats.max_message_size); - HeapTuple tuple = heap_form_tuple(tupdesc, values, nulls); + HeapTuple tuple = gpdb::heap_form_tuple(tupdesc, values, nulls); Datum result = HeapTupleGetDatum(tuple); PG_RETURN_DATUM(result); } \ No newline at end of file diff --git a/src/memory/gpdbwrappers.cpp b/src/memory/gpdbwrappers.cpp new file mode 100644 index 00000000000..c7df182738a --- /dev/null +++ b/src/memory/gpdbwrappers.cpp @@ -0,0 +1,412 @@ +#include "gpdbwrappers.h" + +extern "C" { +#include "postgres.h" +#include "utils/memutils.h" +#include "utils/guc.h" +#include "commands/dbcommands.h" +#include "utils/builtins.h" +#include "nodes/pg_list.h" +#include "commands/explain.h" +#include "executor/instrument.h" +#include "access/tupdesc.h" +#include "access/htup.h" +#include "utils/elog.h" +#include "cdb/cdbexplain.h" +#include "stat_statements_parser/pg_stat_statements_ya_parser.h" +} + +#include + +void *gpdb::palloc(Size size) { + void *result = nullptr; + bool success; + + PG_TRY(); + { + result = ::palloc(size); + success = true; + } + PG_CATCH(); + { + FlushErrorState(); + success = false; + } + PG_END_TRY(); + + if (!success) + throw std::runtime_error("Memory allocation failed"); + + return result; +} + +void *gpdb::palloc0(Size size) { + void *result = nullptr; + bool success; + + PG_TRY(); + { + result = ::palloc0(size); + success = true; + } + PG_CATCH(); + { + FlushErrorState(); + success = false; + } + PG_END_TRY(); + + if (!success) + throw std::runtime_error("Zero init memory allocation failed"); + + return result; +} + +char *gpdb::pstrdup(const char *str) { + char *result = nullptr; + bool success; + + PG_TRY(); + { + result = ::pstrdup(str); + success = true; + } + PG_CATCH(); + { + FlushErrorState(); + success = false; + } + PG_END_TRY(); + + if (!success) + throw std::runtime_error("String duplication failed"); + + return result; +} + +char *gpdb::get_database_name(Oid dbid) noexcept { + char *result = nullptr; + + PG_TRY(); + { + result = ::get_database_name(dbid); + } + PG_CATCH(); + { + FlushErrorState(); + } + PG_END_TRY(); + + return result; +} + +bool gpdb::split_identifier_string(char *rawstring, char separator, + List **namelist) noexcept { + bool result = false; + + PG_TRY(); + { + result = SplitIdentifierString(rawstring, separator, namelist); + } + PG_CATCH(); + { + FlushErrorState(); + } + PG_END_TRY(); + + return result; +} + +ExplainState gpdb::get_explain_state(QueryDesc *query_desc, bool costs) noexcept { + ExplainState es = {0}; + + PG_TRY(); + { + ExplainInitState(&es); + es.costs = costs; + es.verbose = true; + es.format = EXPLAIN_FORMAT_TEXT; + ExplainBeginOutput(&es); + ExplainPrintPlan(&es, query_desc); + ExplainEndOutput(&es); + } + PG_CATCH(); + { + // PG and GP both have known and yet unknown bugs in EXPLAIN VERBOSE + // implementation. We don't want any queries to fail due to those bugs, so + // we report the bug here for future investigatin and continue collecting + // metrics w/o reporting any plans + if (es.str && es.str->data) { + resetStringInfo(es.str); + } + // appendStringInfo() can ereport(ERROR), do not call it in PG_CATCH(). + // appendStringInfo( + // es.str, + // "Unable to restore query plan due to PostgreSQL internal error. " + // "See logs for more information"); + ereport(INFO, + (errmsg("YAGPCC failed to reconstruct explain text for query: %s", + query_desc->sourceText))); + FlushErrorState(); + } + PG_END_TRY(); + + return es; +} + +ExplainState gpdb::get_analyze_state_json(QueryDesc *query_desc, + bool analyze) noexcept { + ExplainState es = {0}; + + PG_TRY(); + { + ExplainInitState(&es); + es.analyze = analyze; + es.verbose = true; + es.buffers = es.analyze; + es.timing = es.analyze; + es.summary = es.analyze; + es.format = EXPLAIN_FORMAT_JSON; + ExplainBeginOutput(&es); + if (analyze) { + ExplainPrintPlan(&es, query_desc); + ExplainPrintExecStatsEnd(&es, query_desc); + } + ExplainEndOutput(&es); + } + PG_CATCH(); + { + // PG and GP both have known and yet unknown bugs in EXPLAIN VERBOSE + // implementation. We don't want any queries to fail due to those bugs, so + // we report the bug here for future investigatin and continue collecting + // metrics w/o reporting any plans + if (es.str && es.str->data) { + resetStringInfo(es.str); + } + // appendStringInfo() can ereport(ERROR), do not call it in PG_CATCH(). + // appendStringInfo( + // es.str, + // "Unable to restore analyze plan due to PostgreSQL internal error. " + // "See logs for more information"); + ereport(INFO, + (errmsg("YAGPCC failed to reconstruct analyze text for query: %s", + query_desc->sourceText))); + FlushErrorState(); + } + PG_END_TRY(); + + return es; +} + +Instrumentation *gpdb::instr_alloc(size_t n, int instrument_options) { + Instrumentation *result = nullptr; + bool success; + + PG_TRY(); + { + result = InstrAlloc(n, instrument_options); + success = true; + } + PG_CATCH(); + { + FlushErrorState(); + success = false; + } + PG_END_TRY(); + + if (!success) + throw std::runtime_error("Instrumentation allocation failed"); + + return result; +} + +HeapTuple gpdb::heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, + bool *isnull) { + if (!tupleDescriptor || !values || !isnull) + throw std::runtime_error( + "Invalid input parameters for heap tuple formation"); + + HeapTuple result = nullptr; + bool success; + + PG_TRY(); + { + result = ::heap_form_tuple(tupleDescriptor, values, isnull); + success = true; + } + PG_CATCH(); + { + FlushErrorState(); + success = false; + } + PG_END_TRY(); + + if (!success) + throw std::runtime_error("Heap tuple formation failed"); + + return result; +} + +void gpdb::pfree(void *pointer) { + if (!pointer) + return; + + bool success; + + PG_TRY(); + { + ::pfree(pointer); + success = true; + } + PG_CATCH(); + { + FlushErrorState(); + success = false; + } + PG_END_TRY(); + + if (!success) + throw std::runtime_error("Memory deallocation failed"); +} + +MemoryContext gpdb::mem_ctx_switch_to(MemoryContext context) noexcept { + return MemoryContextSwitchTo(context); +} + +const char *gpdb::get_config_option(const char *name, bool missing_ok, + bool restrict_superuser) noexcept { + if (!name) + return nullptr; + + const char *result = nullptr; + + PG_TRY(); + { + result = GetConfigOption(name, missing_ok, restrict_superuser); + } + PG_CATCH(); + { + FlushErrorState(); + } + PG_END_TRY(); + + return result; +} + +void gpdb::list_free(List *list) { + if (!list) + return; + + bool success; + + PG_TRY(); + { + ::list_free(list); + success = true; + } + PG_CATCH(); + { + FlushErrorState(); + success = false; + } + PG_END_TRY(); + + if (!success) + throw std::runtime_error("List deallocation failed"); +} + +CdbExplain_ShowStatCtx * +gpdb::cdbexplain_showExecStatsBegin(QueryDesc *query_desc, + instr_time starttime) { + if (!query_desc) + throw std::runtime_error("Invalid query descriptor"); + + CdbExplain_ShowStatCtx *result = nullptr; + bool success; + + PG_TRY(); + { + result = ::cdbexplain_showExecStatsBegin(query_desc, starttime); + success = true; + } + PG_CATCH(); + { + FlushErrorState(); + success = false; + } + PG_END_TRY(); + + if (!success) + throw std::runtime_error("CdbExplain ShowExecStatsBegin failed"); + + return result; +} + +void gpdb::instr_end_loop(Instrumentation *instr) { + if (!instr) + throw std::runtime_error("Invalid instrumentation pointer"); + + bool success; + + PG_TRY(); + { + ::InstrEndLoop(instr); + success = true; + } + PG_CATCH(); + { + FlushErrorState(); + success = false; + } + PG_END_TRY(); + + if (!success) + throw std::runtime_error("InstrEndLoop failed"); +} + +char *gpdb::gen_normquery(const char *query) { + char *result = nullptr; + bool success; + + PG_TRY(); + { + result = ::gen_normquery(query); + success = true; + } + PG_CATCH(); + { + FlushErrorState(); + success = false; + } + PG_END_TRY(); + + if (!success) + throw std::runtime_error("gen_normquery failed"); + + return result; +} + +StringInfo gpdb::gen_normplan(const char *exec_plan) { + if (!exec_plan) + throw std::runtime_error("Invalid execution plan string"); + + StringInfo result = nullptr; + bool success; + + PG_TRY(); + { + result = ::gen_normplan(exec_plan); + success = true; + } + PG_CATCH(); + { + FlushErrorState(); + success = false; + } + PG_END_TRY(); + + if (!success) + throw std::runtime_error("gen_normplan failed"); + + return result; +} \ No newline at end of file diff --git a/src/memory/gpdbwrappers.h b/src/memory/gpdbwrappers.h new file mode 100644 index 00000000000..c37a98f96db --- /dev/null +++ b/src/memory/gpdbwrappers.h @@ -0,0 +1,38 @@ +#pragma once + +extern "C" { +#include "postgres.h" +#include "nodes/pg_list.h" +#include "commands/explain.h" +#include "executor/instrument.h" +#include "access/htup.h" +} + +namespace gpdb { + +// Functions that call palloc(). +// Make sure correct memory context is set. +void *palloc(Size size); +void *palloc0(Size size); +char *pstrdup(const char *str); +char *get_database_name(Oid dbid) noexcept; +bool split_identifier_string(char *rawstring, char separator, + List **namelist) noexcept; +ExplainState get_explain_state(QueryDesc *query_desc, bool costs) noexcept; +ExplainState get_analyze_state_json(QueryDesc *query_desc, bool analyze) noexcept; +Instrumentation *instr_alloc(size_t n, int instrument_options); +HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull); +CdbExplain_ShowStatCtx *cdbexplain_showExecStatsBegin(QueryDesc *query_desc, + instr_time starttime); +void instr_end_loop(Instrumentation *instr); +char *gen_normquery(const char *query); +StringInfo gen_normplan(const char *executionPlan); + +// Palloc-free functions. +void pfree(void *pointer); +MemoryContext mem_ctx_switch_to(MemoryContext context) noexcept; +const char *get_config_option(const char *name, bool missing_ok, + bool restrict_superuser) noexcept; +void list_free(List *list); + +} // namespace gpdb diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.h b/src/stat_statements_parser/pg_stat_statements_ya_parser.h index aa9cd217e31..b08e8533992 100644 --- a/src/stat_statements_parser/pg_stat_statements_ya_parser.h +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.h @@ -8,9 +8,9 @@ extern "C" extern void stat_statements_parser_init(void); extern void stat_statements_parser_deinit(void); +StringInfo gen_normplan(const char *executionPlan); +char *gen_normquery(const char *query); + #ifdef __cplusplus } #endif - -StringInfo gen_normplan(const char *executionPlan); -char *gen_normquery(const char *query); \ No newline at end of file From 193c672b5bcb31e30c2849a8d60a7114ea8ad299 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Wed, 9 Jul 2025 16:19:05 +0300 Subject: [PATCH 085/133] add minimal readme --- README.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 00000000000..0313fc41c96 --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +## YAGP Hooks Collector + +An extension for collecting greenplum query execution metrics and reporting then to an external agent + +### Collected Statistics + +#### 1. Query Lifecycle +- **What:** Captures query text, normalized query text, timestamps (submit, start, end, done), and user/database info. +- **GUC:** `yagpcc.enable`. + +#### 2. `EXPLAIN` data +- **What:** Triggers generation of the `EXPLAIN (TEXT, COSTS, VERBOSE)` and captures it. +- **GUC:** `yagpcc.enable`. + +#### 3. `EXPLAIN ANALYZE` data +- **What:** Triggers generation of the `EXPLAIN (JSON, ANALYZE, BUFFERS, TIMING, VERBOSE)` and captures it. +- **GUCs:** `yagpcc.enable`, `yagpcc.min_analyze_time`, `yagpcc.enable_cdbstats`(ANALYZE), `yagpcc.enable_analyze`(BUFFERS, TIMING, VERBOSE). + +#### 4. Other Metrics +- **What:** Captures Instrument, Greenplum, System, Network, Interconnect, Spill metrics. +- **GUC:** `yagpcc.enable`. + +### General Configuration +- **Data Destination:** All collected data is sent to a Unix Domain Socket. Configure the path with `yagpcc.uds_path`. +- **User Filtering:** To exclude activity from certain roles, add them to the comma-separated list in `yagpcc.ignored_users_list`. + From 422cdf9961003f76f368696b58aa8f21076129fa Mon Sep 17 00:00:00 2001 From: NJrslv Date: Thu, 10 Jul 2025 11:54:20 +0300 Subject: [PATCH 086/133] add nested queries info --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0313fc41c96..1aac88fe153 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ## YAGP Hooks Collector -An extension for collecting greenplum query execution metrics and reporting then to an external agent +An extension for collecting greenplum query execution metrics and reporting them to an external agent ### Collected Statistics @@ -16,7 +16,13 @@ An extension for collecting greenplum query execution metrics and reporting then - **What:** Triggers generation of the `EXPLAIN (JSON, ANALYZE, BUFFERS, TIMING, VERBOSE)` and captures it. - **GUCs:** `yagpcc.enable`, `yagpcc.min_analyze_time`, `yagpcc.enable_cdbstats`(ANALYZE), `yagpcc.enable_analyze`(BUFFERS, TIMING, VERBOSE). -#### 4. Other Metrics +#### 4. Nested queries +- **What:** + - Disabled: Top-level queries are being reported from coordinator and segments. + - Enabled: Top-level and nested queries are being reported from coordinator. Any nested queries from segments are collected as aggregates. +- **GUC:** `yagpcc.report_nested_queries`. + +#### 5. Other Metrics - **What:** Captures Instrument, Greenplum, System, Network, Interconnect, Spill metrics. - **GUC:** `yagpcc.enable`. From 4a25ae20df076091cfe78ee3fefec05082bb28ca Mon Sep 17 00:00:00 2001 From: NJrslv Date: Thu, 10 Jul 2025 12:06:19 +0300 Subject: [PATCH 087/133] move nested queries to general info --- README.md | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 1aac88fe153..5a7e2e562be 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ## YAGP Hooks Collector -An extension for collecting greenplum query execution metrics and reporting them to an external agent +An extension for collecting greenplum query execution metrics and reporting them to an external agent. ### Collected Statistics @@ -16,17 +16,12 @@ An extension for collecting greenplum query execution metrics and reporting them - **What:** Triggers generation of the `EXPLAIN (JSON, ANALYZE, BUFFERS, TIMING, VERBOSE)` and captures it. - **GUCs:** `yagpcc.enable`, `yagpcc.min_analyze_time`, `yagpcc.enable_cdbstats`(ANALYZE), `yagpcc.enable_analyze`(BUFFERS, TIMING, VERBOSE). -#### 4. Nested queries -- **What:** - - Disabled: Top-level queries are being reported from coordinator and segments. - - Enabled: Top-level and nested queries are being reported from coordinator. Any nested queries from segments are collected as aggregates. -- **GUC:** `yagpcc.report_nested_queries`. - -#### 5. Other Metrics +#### 4. Other Metrics - **What:** Captures Instrument, Greenplum, System, Network, Interconnect, Spill metrics. - **GUC:** `yagpcc.enable`. ### General Configuration +- **Nested Queries:** When `yagpcc.report_nested_queries` is `false`, only top-level queries are reported from the coordinator and segments, when `true`, both top-level and nested queries are reported from the coordinator, from segments collected as aggregates. - **Data Destination:** All collected data is sent to a Unix Domain Socket. Configure the path with `yagpcc.uds_path`. - **User Filtering:** To exclude activity from certain roles, add them to the comma-separated list in `yagpcc.ignored_users_list`. From b647e5903922ff83e1c1a528db082e8032c79de9 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Wed, 9 Jul 2025 16:19:05 +0300 Subject: [PATCH 088/133] add minimal readme --- README.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 00000000000..0313fc41c96 --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +## YAGP Hooks Collector + +An extension for collecting greenplum query execution metrics and reporting then to an external agent + +### Collected Statistics + +#### 1. Query Lifecycle +- **What:** Captures query text, normalized query text, timestamps (submit, start, end, done), and user/database info. +- **GUC:** `yagpcc.enable`. + +#### 2. `EXPLAIN` data +- **What:** Triggers generation of the `EXPLAIN (TEXT, COSTS, VERBOSE)` and captures it. +- **GUC:** `yagpcc.enable`. + +#### 3. `EXPLAIN ANALYZE` data +- **What:** Triggers generation of the `EXPLAIN (JSON, ANALYZE, BUFFERS, TIMING, VERBOSE)` and captures it. +- **GUCs:** `yagpcc.enable`, `yagpcc.min_analyze_time`, `yagpcc.enable_cdbstats`(ANALYZE), `yagpcc.enable_analyze`(BUFFERS, TIMING, VERBOSE). + +#### 4. Other Metrics +- **What:** Captures Instrument, Greenplum, System, Network, Interconnect, Spill metrics. +- **GUC:** `yagpcc.enable`. + +### General Configuration +- **Data Destination:** All collected data is sent to a Unix Domain Socket. Configure the path with `yagpcc.uds_path`. +- **User Filtering:** To exclude activity from certain roles, add them to the comma-separated list in `yagpcc.ignored_users_list`. + From 33620b15065001c19a70ad19e9b171f7ee0c03d3 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Thu, 10 Jul 2025 11:54:20 +0300 Subject: [PATCH 089/133] add nested queries info --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0313fc41c96..1aac88fe153 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ## YAGP Hooks Collector -An extension for collecting greenplum query execution metrics and reporting then to an external agent +An extension for collecting greenplum query execution metrics and reporting them to an external agent ### Collected Statistics @@ -16,7 +16,13 @@ An extension for collecting greenplum query execution metrics and reporting then - **What:** Triggers generation of the `EXPLAIN (JSON, ANALYZE, BUFFERS, TIMING, VERBOSE)` and captures it. - **GUCs:** `yagpcc.enable`, `yagpcc.min_analyze_time`, `yagpcc.enable_cdbstats`(ANALYZE), `yagpcc.enable_analyze`(BUFFERS, TIMING, VERBOSE). -#### 4. Other Metrics +#### 4. Nested queries +- **What:** + - Disabled: Top-level queries are being reported from coordinator and segments. + - Enabled: Top-level and nested queries are being reported from coordinator. Any nested queries from segments are collected as aggregates. +- **GUC:** `yagpcc.report_nested_queries`. + +#### 5. Other Metrics - **What:** Captures Instrument, Greenplum, System, Network, Interconnect, Spill metrics. - **GUC:** `yagpcc.enable`. From 62b0da36a757609fb5feecf03bcd0760c4bc6143 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Thu, 10 Jul 2025 12:06:19 +0300 Subject: [PATCH 090/133] move nested queries to general info --- README.md | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 1aac88fe153..5a7e2e562be 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ## YAGP Hooks Collector -An extension for collecting greenplum query execution metrics and reporting them to an external agent +An extension for collecting greenplum query execution metrics and reporting them to an external agent. ### Collected Statistics @@ -16,17 +16,12 @@ An extension for collecting greenplum query execution metrics and reporting them - **What:** Triggers generation of the `EXPLAIN (JSON, ANALYZE, BUFFERS, TIMING, VERBOSE)` and captures it. - **GUCs:** `yagpcc.enable`, `yagpcc.min_analyze_time`, `yagpcc.enable_cdbstats`(ANALYZE), `yagpcc.enable_analyze`(BUFFERS, TIMING, VERBOSE). -#### 4. Nested queries -- **What:** - - Disabled: Top-level queries are being reported from coordinator and segments. - - Enabled: Top-level and nested queries are being reported from coordinator. Any nested queries from segments are collected as aggregates. -- **GUC:** `yagpcc.report_nested_queries`. - -#### 5. Other Metrics +#### 4. Other Metrics - **What:** Captures Instrument, Greenplum, System, Network, Interconnect, Spill metrics. - **GUC:** `yagpcc.enable`. ### General Configuration +- **Nested Queries:** When `yagpcc.report_nested_queries` is `false`, only top-level queries are reported from the coordinator and segments, when `true`, both top-level and nested queries are reported from the coordinator, from segments collected as aggregates. - **Data Destination:** All collected data is sent to a Unix Domain Socket. Configure the path with `yagpcc.uds_path`. - **User Filtering:** To exclude activity from certain roles, add them to the comma-separated list in `yagpcc.ignored_users_list`. From a4a72b888d649001d34057120b33446135e70ce1 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Fri, 11 Jul 2025 10:08:46 +0300 Subject: [PATCH 091/133] log actual err msg & templatize code & make free noexcept --- Makefile | 2 +- src/PgUtils.cpp | 6 +- src/ProtoUtils.cpp | 6 +- src/memory/gpdbwrappers.cpp | 350 +++++------------------------------- src/memory/gpdbwrappers.h | 107 ++++++++++- 5 files changed, 149 insertions(+), 322 deletions(-) diff --git a/Makefile b/Makefile index dedbec9a5ae..661780acb99 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ override CFLAGS = -Wall -Wmissing-prototypes -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=gnu99 -Werror=uninitialized -Werror=implicit-function-declaration -DGPBUILD -override CXXFLAGS = -fPIC -g3 -Wall -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=c++14 -Iinclude -Isrc/protos -Isrc -DGPBUILD +override CXXFLAGS = -fPIC -g3 -Wall -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=c++17 -Iinclude -Isrc/protos -Isrc -DGPBUILD COMMON_CPP_FLAGS := -Isrc -Iinclude -Isrc/stat_statements_parser PG_CXXFLAGS += $(COMMON_CPP_FLAGS) SHLIB_LINK += -lprotobuf -lpthread -lstdc++ diff --git a/src/PgUtils.cpp b/src/PgUtils.cpp index 69a520aef6a..f1d1e2623c0 100644 --- a/src/PgUtils.cpp +++ b/src/PgUtils.cpp @@ -3,8 +3,6 @@ #include "memory/gpdbwrappers.h" extern "C" { -#include "utils/guc.h" -#include "commands/dbcommands.h" #include "commands/resgroupcmds.h" #include "cdb/cdbvars.h" } @@ -27,10 +25,10 @@ std::string *get_db_name() { } std::string *get_rg_name() { - auto groupId = ResGroupGetGroupIdBySessionId(MySessionState->sessionId); + auto groupId = gpdb::get_rg_id_by_session_id(MySessionState->sessionId); if (!OidIsValid(groupId)) return nullptr; - char *rgname = GetResGroupNameForId(groupId); + char *rgname = gpdb::get_rg_name_for_id(groupId); if (rgname == nullptr) return nullptr; return new std::string(rgname); diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp index 1c7ca1598f1..93fbb60cb28 100644 --- a/src/ProtoUtils.cpp +++ b/src/ProtoUtils.cpp @@ -59,7 +59,7 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); MemoryContext oldcxt = gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); - auto es = gpdb::get_explain_state(query_desc, true); + ExplainState es = gpdb::get_explain_state(query_desc, true); if (es.str) { *qi->mutable_plan_text() = char_to_trimmed_str(es.str->data, es.str->len, Config::max_plan_size()); @@ -69,8 +69,8 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { qi->set_plan_id( hash_any((unsigned char *)norm_plan->data, norm_plan->len)); qi->set_query_id(query_desc->plannedstmt->queryId); - pfree(es.str->data); - pfree(norm_plan->data); + gpdb::pfree(es.str->data); + gpdb::pfree(norm_plan->data); } gpdb::mem_ctx_switch_to(oldcxt); } diff --git a/src/memory/gpdbwrappers.cpp b/src/memory/gpdbwrappers.cpp index c7df182738a..1fba702a9f5 100644 --- a/src/memory/gpdbwrappers.cpp +++ b/src/memory/gpdbwrappers.cpp @@ -2,9 +2,9 @@ extern "C" { #include "postgres.h" -#include "utils/memutils.h" #include "utils/guc.h" #include "commands/dbcommands.h" +#include "commands/resgroupcmds.h" #include "utils/builtins.h" #include "nodes/pg_list.h" #include "commands/explain.h" @@ -16,112 +16,28 @@ extern "C" { #include "stat_statements_parser/pg_stat_statements_ya_parser.h" } -#include +void *gpdb::palloc(Size size) { return detail::wrap_throw(::palloc, size); } -void *gpdb::palloc(Size size) { - void *result = nullptr; - bool success; - - PG_TRY(); - { - result = ::palloc(size); - success = true; - } - PG_CATCH(); - { - FlushErrorState(); - success = false; - } - PG_END_TRY(); - - if (!success) - throw std::runtime_error("Memory allocation failed"); - - return result; -} - -void *gpdb::palloc0(Size size) { - void *result = nullptr; - bool success; - - PG_TRY(); - { - result = ::palloc0(size); - success = true; - } - PG_CATCH(); - { - FlushErrorState(); - success = false; - } - PG_END_TRY(); - - if (!success) - throw std::runtime_error("Zero init memory allocation failed"); - - return result; -} +void *gpdb::palloc0(Size size) { return detail::wrap_throw(::palloc0, size); } char *gpdb::pstrdup(const char *str) { - char *result = nullptr; - bool success; - - PG_TRY(); - { - result = ::pstrdup(str); - success = true; - } - PG_CATCH(); - { - FlushErrorState(); - success = false; - } - PG_END_TRY(); - - if (!success) - throw std::runtime_error("String duplication failed"); - - return result; + return detail::wrap_throw(::pstrdup, str); } char *gpdb::get_database_name(Oid dbid) noexcept { - char *result = nullptr; - - PG_TRY(); - { - result = ::get_database_name(dbid); - } - PG_CATCH(); - { - FlushErrorState(); - } - PG_END_TRY(); - - return result; + return detail::wrap_noexcept(::get_database_name, dbid); } bool gpdb::split_identifier_string(char *rawstring, char separator, - List **namelist) noexcept { - bool result = false; - - PG_TRY(); - { - result = SplitIdentifierString(rawstring, separator, namelist); - } - PG_CATCH(); - { - FlushErrorState(); - } - PG_END_TRY(); - - return result; + List **namelist) noexcept { + return detail::wrap_noexcept(SplitIdentifierString, rawstring, separator, + namelist); } -ExplainState gpdb::get_explain_state(QueryDesc *query_desc, bool costs) noexcept { - ExplainState es = {0}; - - PG_TRY(); - { +ExplainState gpdb::get_explain_state(QueryDesc *query_desc, + bool costs) noexcept { + return detail::wrap_noexcept([&]() { + ExplainState es; ExplainInitState(&es); es.costs = costs; es.verbose = true; @@ -129,37 +45,14 @@ ExplainState gpdb::get_explain_state(QueryDesc *query_desc, bool costs) noexcept ExplainBeginOutput(&es); ExplainPrintPlan(&es, query_desc); ExplainEndOutput(&es); - } - PG_CATCH(); - { - // PG and GP both have known and yet unknown bugs in EXPLAIN VERBOSE - // implementation. We don't want any queries to fail due to those bugs, so - // we report the bug here for future investigatin and continue collecting - // metrics w/o reporting any plans - if (es.str && es.str->data) { - resetStringInfo(es.str); - } - // appendStringInfo() can ereport(ERROR), do not call it in PG_CATCH(). - // appendStringInfo( - // es.str, - // "Unable to restore query plan due to PostgreSQL internal error. " - // "See logs for more information"); - ereport(INFO, - (errmsg("YAGPCC failed to reconstruct explain text for query: %s", - query_desc->sourceText))); - FlushErrorState(); - } - PG_END_TRY(); - - return es; + return es; + }); } ExplainState gpdb::get_analyze_state_json(QueryDesc *query_desc, - bool analyze) noexcept { - ExplainState es = {0}; - - PG_TRY(); - { + bool analyze) noexcept { + return detail::wrap_noexcept([&]() { + ExplainState es; ExplainInitState(&es); es.analyze = analyze; es.verbose = true; @@ -173,100 +66,29 @@ ExplainState gpdb::get_analyze_state_json(QueryDesc *query_desc, ExplainPrintExecStatsEnd(&es, query_desc); } ExplainEndOutput(&es); - } - PG_CATCH(); - { - // PG and GP both have known and yet unknown bugs in EXPLAIN VERBOSE - // implementation. We don't want any queries to fail due to those bugs, so - // we report the bug here for future investigatin and continue collecting - // metrics w/o reporting any plans - if (es.str && es.str->data) { - resetStringInfo(es.str); - } - // appendStringInfo() can ereport(ERROR), do not call it in PG_CATCH(). - // appendStringInfo( - // es.str, - // "Unable to restore analyze plan due to PostgreSQL internal error. " - // "See logs for more information"); - ereport(INFO, - (errmsg("YAGPCC failed to reconstruct analyze text for query: %s", - query_desc->sourceText))); - FlushErrorState(); - } - PG_END_TRY(); - - return es; + return es; + }); } Instrumentation *gpdb::instr_alloc(size_t n, int instrument_options) { - Instrumentation *result = nullptr; - bool success; - - PG_TRY(); - { - result = InstrAlloc(n, instrument_options); - success = true; - } - PG_CATCH(); - { - FlushErrorState(); - success = false; - } - PG_END_TRY(); - - if (!success) - throw std::runtime_error("Instrumentation allocation failed"); - - return result; + return detail::wrap_throw(InstrAlloc, n, instrument_options); } HeapTuple gpdb::heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, - bool *isnull) { + bool *isnull) { if (!tupleDescriptor || !values || !isnull) throw std::runtime_error( "Invalid input parameters for heap tuple formation"); - HeapTuple result = nullptr; - bool success; - - PG_TRY(); - { - result = ::heap_form_tuple(tupleDescriptor, values, isnull); - success = true; - } - PG_CATCH(); - { - FlushErrorState(); - success = false; - } - PG_END_TRY(); - - if (!success) - throw std::runtime_error("Heap tuple formation failed"); - - return result; + return detail::wrap_throw(::heap_form_tuple, tupleDescriptor, values, isnull); } -void gpdb::pfree(void *pointer) { +void gpdb::pfree(void *pointer) noexcept { + // Note that ::pfree asserts that pointer != NULL. if (!pointer) return; - bool success; - - PG_TRY(); - { - ::pfree(pointer); - success = true; - } - PG_CATCH(); - { - FlushErrorState(); - success = false; - } - PG_END_TRY(); - - if (!success) - throw std::runtime_error("Memory deallocation failed"); + detail::wrap_noexcept(::pfree, pointer); } MemoryContext gpdb::mem_ctx_switch_to(MemoryContext context) noexcept { @@ -274,139 +96,53 @@ MemoryContext gpdb::mem_ctx_switch_to(MemoryContext context) noexcept { } const char *gpdb::get_config_option(const char *name, bool missing_ok, - bool restrict_superuser) noexcept { + bool restrict_superuser) noexcept { if (!name) return nullptr; - const char *result = nullptr; - - PG_TRY(); - { - result = GetConfigOption(name, missing_ok, restrict_superuser); - } - PG_CATCH(); - { - FlushErrorState(); - } - PG_END_TRY(); - - return result; + return detail::wrap_noexcept(GetConfigOption, name, missing_ok, + restrict_superuser); } -void gpdb::list_free(List *list) { +void gpdb::list_free(List *list) noexcept { if (!list) return; - bool success; - - PG_TRY(); - { - ::list_free(list); - success = true; - } - PG_CATCH(); - { - FlushErrorState(); - success = false; - } - PG_END_TRY(); - - if (!success) - throw std::runtime_error("List deallocation failed"); + detail::wrap_noexcept(::list_free, list); } CdbExplain_ShowStatCtx * gpdb::cdbexplain_showExecStatsBegin(QueryDesc *query_desc, - instr_time starttime) { + instr_time starttime) { if (!query_desc) throw std::runtime_error("Invalid query descriptor"); - CdbExplain_ShowStatCtx *result = nullptr; - bool success; - - PG_TRY(); - { - result = ::cdbexplain_showExecStatsBegin(query_desc, starttime); - success = true; - } - PG_CATCH(); - { - FlushErrorState(); - success = false; - } - PG_END_TRY(); - - if (!success) - throw std::runtime_error("CdbExplain ShowExecStatsBegin failed"); - - return result; + return detail::wrap_throw(::cdbexplain_showExecStatsBegin, query_desc, + starttime); } void gpdb::instr_end_loop(Instrumentation *instr) { if (!instr) throw std::runtime_error("Invalid instrumentation pointer"); - bool success; - - PG_TRY(); - { - ::InstrEndLoop(instr); - success = true; - } - PG_CATCH(); - { - FlushErrorState(); - success = false; - } - PG_END_TRY(); - - if (!success) - throw std::runtime_error("InstrEndLoop failed"); + detail::wrap_throw(::InstrEndLoop, instr); } char *gpdb::gen_normquery(const char *query) { - char *result = nullptr; - bool success; - - PG_TRY(); - { - result = ::gen_normquery(query); - success = true; - } - PG_CATCH(); - { - FlushErrorState(); - success = false; - } - PG_END_TRY(); - - if (!success) - throw std::runtime_error("gen_normquery failed"); - - return result; + return detail::wrap_throw(::gen_normquery, query); } - + StringInfo gpdb::gen_normplan(const char *exec_plan) { if (!exec_plan) throw std::runtime_error("Invalid execution plan string"); - StringInfo result = nullptr; - bool success; - - PG_TRY(); - { - result = ::gen_normplan(exec_plan); - success = true; - } - PG_CATCH(); - { - FlushErrorState(); - success = false; - } - PG_END_TRY(); + return detail::wrap_throw(::gen_normplan, exec_plan); +} - if (!success) - throw std::runtime_error("gen_normplan failed"); +char *gpdb::get_rg_name_for_id(Oid group_id) { + return detail::wrap_throw(GetResGroupNameForId, group_id); +} - return result; +Oid gpdb::get_rg_id_by_session_id(int session_id) { + return detail::wrap_throw(ResGroupGetGroupIdBySessionId, session_id); } \ No newline at end of file diff --git a/src/memory/gpdbwrappers.h b/src/memory/gpdbwrappers.h index c37a98f96db..437a5dd5d29 100644 --- a/src/memory/gpdbwrappers.h +++ b/src/memory/gpdbwrappers.h @@ -6,9 +6,98 @@ extern "C" { #include "commands/explain.h" #include "executor/instrument.h" #include "access/htup.h" +#include "utils/elog.h" +#include "utils/memutils.h" } +#include +#include +#include +#include +#include + namespace gpdb { +namespace detail { + +template +auto wrap(Func &&func, Args &&...args) noexcept(!Throws) + -> decltype(func(std::forward(args)...)) { + + using RetType = decltype(func(std::forward(args)...)); + + // Empty struct for void return type. + struct VoidResult {}; + using ResultHolder = std::conditional_t, VoidResult, + std::optional>; + + bool success; + ErrorData *edata; + ResultHolder result_holder; + + PG_TRY(); + { + if constexpr (!std::is_void_v) { + result_holder.emplace(func(std::forward(args)...)); + } else { + func(std::forward(args)...); + } + edata = NULL; + success = true; + } + PG_CATCH(); + { + MemoryContext oldctx = MemoryContextSwitchTo(TopMemoryContext); + edata = CopyErrorData(); + MemoryContextSwitchTo(oldctx); + FlushErrorState(); + success = false; + } + PG_END_TRY(); + + if (!success) { + std::string err; + if (edata && edata->message) { + err = std::string(edata->message); + } else { + err = "Unknown error occurred"; + } + + if (edata) { + FreeErrorData(edata); + } + + if constexpr (Throws) { + throw std::runtime_error(err); + } + + if constexpr (!std::is_void_v) { + return RetType{}; + } else { + return; + } + } + + if constexpr (!std::is_void_v) { + return *std::move(result_holder); + } else { + return; + } +} + +template +auto wrap_throw(Func &&func, Args &&...args) + -> decltype(func(std::forward(args)...)) { + return detail::wrap(std::forward(func), + std::forward(args)...); +} + +template +auto wrap_noexcept(Func &&func, Args &&...args) noexcept + -> decltype(func(std::forward(args)...)) { + return detail::wrap(std::forward(func), + std::forward(args)...); +} +} // namespace detail // Functions that call palloc(). // Make sure correct memory context is set. @@ -17,22 +106,26 @@ void *palloc0(Size size); char *pstrdup(const char *str); char *get_database_name(Oid dbid) noexcept; bool split_identifier_string(char *rawstring, char separator, - List **namelist) noexcept; + List **namelist) noexcept; ExplainState get_explain_state(QueryDesc *query_desc, bool costs) noexcept; -ExplainState get_analyze_state_json(QueryDesc *query_desc, bool analyze) noexcept; +ExplainState get_analyze_state_json(QueryDesc *query_desc, + bool analyze) noexcept; Instrumentation *instr_alloc(size_t n, int instrument_options); -HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull); +HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, + bool *isnull); CdbExplain_ShowStatCtx *cdbexplain_showExecStatsBegin(QueryDesc *query_desc, - instr_time starttime); + instr_time starttime); void instr_end_loop(Instrumentation *instr); char *gen_normquery(const char *query); StringInfo gen_normplan(const char *executionPlan); +char *get_rg_name_for_id(Oid group_id); // Palloc-free functions. -void pfree(void *pointer); +void pfree(void *pointer) noexcept; MemoryContext mem_ctx_switch_to(MemoryContext context) noexcept; const char *get_config_option(const char *name, bool missing_ok, - bool restrict_superuser) noexcept; -void list_free(List *list); + bool restrict_superuser) noexcept; +void list_free(List *list) noexcept; +Oid get_rg_id_by_session_id(int session_id); } // namespace gpdb From 22ea10715bf4efea7e67874892911d009d4b9aa7 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Fri, 11 Jul 2025 10:12:56 +0300 Subject: [PATCH 092/133] add tab --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 661780acb99..c02f63e5763 100644 --- a/Makefile +++ b/Makefile @@ -30,7 +30,7 @@ OBJS := $(PG_STAT_OBJS) \ $(SRC_DIR)/UDSConnector.o \ $(SRC_DIR)/EventSender.o \ $(SRC_DIR)/hook_wrappers.o \ - $(SRC_DIR)/memory/gpdbwrappers.o \ + $(SRC_DIR)/memory/gpdbwrappers.o \ $(SRC_DIR)/yagp_hooks_collector.o EXTRA_CLEAN := $(GEN_DIR) DATA := $(wildcard sql/*--*.sql) From 96f4628c5f91b712f2a19d51dd239c5ca0cb4464 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 14 Jul 2025 16:14:49 +0300 Subject: [PATCH 093/133] change namespace name to avoid conflicts with gpos --- src/Config.cpp | 12 ++++++------ src/EventSender.cpp | 14 +++++++------- src/PgUtils.cpp | 10 +++++----- src/ProtoUtils.cpp | 22 ++++++++++----------- src/UDSConnector.cpp | 4 ++-- src/hook_wrappers.cpp | 2 +- src/memory/gpdbwrappers.cpp | 38 ++++++++++++++++++------------------- src/memory/gpdbwrappers.h | 4 ++-- 8 files changed, 53 insertions(+), 53 deletions(-) diff --git a/src/Config.cpp b/src/Config.cpp index e9564ef8959..53143ff2d26 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -29,15 +29,15 @@ static void update_ignored_users(const char *new_guc_ignored_users) { std::make_unique>(); if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') { /* Need a modifiable copy of string */ - char *rawstring = gpdb::pstrdup(new_guc_ignored_users); + char *rawstring = ya_gpdb::pstrdup(new_guc_ignored_users); List *elemlist; ListCell *l; /* Parse string into list of identifiers */ - if (!gpdb::split_identifier_string(rawstring, ',', &elemlist)) { + if (!ya_gpdb::split_identifier_string(rawstring, ',', &elemlist)) { /* syntax error in list */ - gpdb::pfree(rawstring); - gpdb::list_free(elemlist); + ya_gpdb::pfree(rawstring); + ya_gpdb::list_free(elemlist); ereport( LOG, (errcode(ERRCODE_SYNTAX_ERROR), @@ -48,8 +48,8 @@ static void update_ignored_users(const char *new_guc_ignored_users) { foreach (l, elemlist) { new_ignored_users_set->insert((char *)lfirst(l)); } - gpdb::pfree(rawstring); - gpdb::list_free(elemlist); + ya_gpdb::pfree(rawstring); + ya_gpdb::list_free(elemlist); } ignored_users_set = std::move(new_ignored_users_set); } diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 8711c4cbd4f..57c13456337 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -80,7 +80,7 @@ void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { instr_time starttime; INSTR_TIME_SET_CURRENT(starttime); query_desc->showstatctx = - gpdb::cdbexplain_showExecStatsBegin(query_desc, starttime); + ya_gpdb::cdbexplain_showExecStatsBegin(query_desc, starttime); } } } @@ -106,9 +106,9 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { // context so it will go away at executor_end. if (query_desc->totaltime == NULL) { MemoryContext oldcxt = - gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); - query_desc->totaltime = gpdb::instr_alloc(1, INSTRUMENT_ALL); - gpdb::mem_ctx_switch_to(oldcxt); + ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + query_desc->totaltime = ya_gpdb::instr_alloc(1, INSTRUMENT_ALL); + ya_gpdb::mem_ctx_switch_to(oldcxt); } } yagpcc::GPMetrics stats; @@ -239,7 +239,7 @@ void EventSender::collect_query_done(QueryDesc *query_desc, } query_msgs.erase({query_desc->gpmon_pkt->u.qexec.key.ccnt, query_desc->gpmon_pkt->u.qexec.key.tmid}); - gpdb::pfree(query_desc->gpmon_pkt); + ya_gpdb::pfree(query_desc->gpmon_pkt); } } @@ -296,7 +296,7 @@ void EventSender::analyze_stats_collect(QueryDesc *query_desc) { } // Make sure stats accumulation is done. // (Note: it's okay if several levels of hook all do this.) - gpdb::instr_end_loop(query_desc->totaltime); + ya_gpdb::instr_end_loop(query_desc->totaltime); double ms = query_desc->totaltime->total * 1000.0; if (ms >= Config::min_analyze_time()) { @@ -364,7 +364,7 @@ EventSender::QueryItem *EventSender::get_query_message(QueryDesc *query_desc) { query_desc->gpmon_pkt->u.qexec.key.tmid}) == query_msgs.end()) { query_desc->gpmon_pkt = - (gpmon_packet_t *)gpdb::palloc0(sizeof(gpmon_packet_t)); + (gpmon_packet_t *)ya_gpdb::palloc0(sizeof(gpmon_packet_t)); query_desc->gpmon_pkt->u.qexec.key.ccnt = gp_command_count; query_desc->gpmon_pkt->u.qexec.key.tmid = nesting_level; query_msgs.insert({{gp_command_count, nesting_level}, diff --git a/src/PgUtils.cpp b/src/PgUtils.cpp index f1d1e2623c0..81a9e2f6c4c 100644 --- a/src/PgUtils.cpp +++ b/src/PgUtils.cpp @@ -9,26 +9,26 @@ extern "C" { std::string *get_user_name() { const char *username = - gpdb::get_config_option("session_authorization", false, false); + ya_gpdb::get_config_option("session_authorization", false, false); // username is not to be freed return username ? new std::string(username) : nullptr; } std::string *get_db_name() { - char *dbname = gpdb::get_database_name(MyDatabaseId); + char *dbname = ya_gpdb::get_database_name(MyDatabaseId); std::string *result = nullptr; if (dbname) { result = new std::string(dbname); - gpdb::pfree(dbname); + ya_gpdb::pfree(dbname); } return result; } std::string *get_rg_name() { - auto groupId = gpdb::get_rg_id_by_session_id(MySessionState->sessionId); + auto groupId = ya_gpdb::get_rg_id_by_session_id(MySessionState->sessionId); if (!OidIsValid(groupId)) return nullptr; - char *rgname = gpdb::get_rg_name_for_id(groupId); + char *rgname = ya_gpdb::get_rg_name_for_id(groupId); if (rgname == nullptr) return nullptr; return new std::string(rgname); diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp index 93fbb60cb28..6cbc7d20183 100644 --- a/src/ProtoUtils.cpp +++ b/src/ProtoUtils.cpp @@ -58,21 +58,21 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); MemoryContext oldcxt = - gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); - ExplainState es = gpdb::get_explain_state(query_desc, true); + ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + ExplainState es = ya_gpdb::get_explain_state(query_desc, true); if (es.str) { *qi->mutable_plan_text() = char_to_trimmed_str(es.str->data, es.str->len, Config::max_plan_size()); - StringInfo norm_plan = gpdb::gen_normplan(es.str->data); + StringInfo norm_plan = ya_gpdb::gen_normplan(es.str->data); *qi->mutable_template_plan_text() = char_to_trimmed_str( norm_plan->data, norm_plan->len, Config::max_plan_size()); qi->set_plan_id( hash_any((unsigned char *)norm_plan->data, norm_plan->len)); qi->set_query_id(query_desc->plannedstmt->queryId); - gpdb::pfree(es.str->data); - gpdb::pfree(norm_plan->data); + ya_gpdb::pfree(es.str->data); + ya_gpdb::pfree(norm_plan->data); } - gpdb::mem_ctx_switch_to(oldcxt); + ya_gpdb::mem_ctx_switch_to(oldcxt); } } @@ -82,7 +82,7 @@ void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { *qi->mutable_query_text() = char_to_trimmed_str( query_desc->sourceText, strlen(query_desc->sourceText), Config::max_text_size()); - char *norm_query = gpdb::gen_normquery(query_desc->sourceText); + char *norm_query = ya_gpdb::gen_normquery(query_desc->sourceText); *qi->mutable_template_query_text() = char_to_trimmed_str( norm_query, strlen(norm_query), Config::max_text_size()); } @@ -234,10 +234,10 @@ void set_analyze_plan_text_json(QueryDesc *query_desc, return; } MemoryContext oldcxt = - gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); - ExplainState es = gpdb::get_analyze_state_json( + ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + ExplainState es = ya_gpdb::get_analyze_state_json( query_desc, query_desc->instrument_options && Config::enable_analyze()); - gpdb::mem_ctx_switch_to(oldcxt); + ya_gpdb::mem_ctx_switch_to(oldcxt); if (es.str) { // Remove last line break. if (es.str->len > 0 && es.str->data[es.str->len - 1] == '\n') { @@ -251,6 +251,6 @@ void set_analyze_plan_text_json(QueryDesc *query_desc, auto trimmed_analyze = char_to_trimmed_str(es.str->data, es.str->len, Config::max_plan_size()); req->mutable_query_info()->set_analyze_text(trimmed_analyze); - gpdb::pfree(es.str->data); + ya_gpdb::pfree(es.str->data); } } \ No newline at end of file diff --git a/src/UDSConnector.cpp b/src/UDSConnector.cpp index b5b70836db4..f8c4586126d 100644 --- a/src/UDSConnector.cpp +++ b/src/UDSConnector.cpp @@ -44,7 +44,7 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, if (connect(sockfd, (sockaddr *)&address, sizeof(address)) != -1) { auto data_size = req.ByteSize(); auto total_size = data_size + sizeof(uint32_t); - uint8_t *buf = (uint8_t *)gpdb::palloc(total_size); + uint8_t *buf = (uint8_t *)ya_gpdb::palloc(total_size); uint32_t *size_payload = (uint32_t *)buf; *size_payload = data_size; req.SerializeWithCachedSizesToArray(buf + sizeof(uint32_t)); @@ -67,7 +67,7 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, } else { YagpStat::report_send(total_size); } - gpdb::pfree(buf); + ya_gpdb::pfree(buf); } else { // log the error and go on log_tracing_failure(req, event); diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 25a85f086d1..d76b7c64e10 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -229,7 +229,7 @@ Datum yagp_functions_get(FunctionCallInfo fcinfo) { values[3] = Int64GetDatum(stats.failed_connects); values[4] = Int64GetDatum(stats.failed_other); values[5] = Int32GetDatum(stats.max_message_size); - HeapTuple tuple = gpdb::heap_form_tuple(tupdesc, values, nulls); + HeapTuple tuple = ya_gpdb::heap_form_tuple(tupdesc, values, nulls); Datum result = HeapTupleGetDatum(tuple); PG_RETURN_DATUM(result); } \ No newline at end of file diff --git a/src/memory/gpdbwrappers.cpp b/src/memory/gpdbwrappers.cpp index 1fba702a9f5..ac21f24e48f 100644 --- a/src/memory/gpdbwrappers.cpp +++ b/src/memory/gpdbwrappers.cpp @@ -16,25 +16,25 @@ extern "C" { #include "stat_statements_parser/pg_stat_statements_ya_parser.h" } -void *gpdb::palloc(Size size) { return detail::wrap_throw(::palloc, size); } +void *ya_gpdb::palloc(Size size) { return detail::wrap_throw(::palloc, size); } -void *gpdb::palloc0(Size size) { return detail::wrap_throw(::palloc0, size); } +void *ya_gpdb::palloc0(Size size) { return detail::wrap_throw(::palloc0, size); } -char *gpdb::pstrdup(const char *str) { +char *ya_gpdb::pstrdup(const char *str) { return detail::wrap_throw(::pstrdup, str); } -char *gpdb::get_database_name(Oid dbid) noexcept { +char *ya_gpdb::get_database_name(Oid dbid) noexcept { return detail::wrap_noexcept(::get_database_name, dbid); } -bool gpdb::split_identifier_string(char *rawstring, char separator, +bool ya_gpdb::split_identifier_string(char *rawstring, char separator, List **namelist) noexcept { return detail::wrap_noexcept(SplitIdentifierString, rawstring, separator, namelist); } -ExplainState gpdb::get_explain_state(QueryDesc *query_desc, +ExplainState ya_gpdb::get_explain_state(QueryDesc *query_desc, bool costs) noexcept { return detail::wrap_noexcept([&]() { ExplainState es; @@ -49,7 +49,7 @@ ExplainState gpdb::get_explain_state(QueryDesc *query_desc, }); } -ExplainState gpdb::get_analyze_state_json(QueryDesc *query_desc, +ExplainState ya_gpdb::get_analyze_state_json(QueryDesc *query_desc, bool analyze) noexcept { return detail::wrap_noexcept([&]() { ExplainState es; @@ -70,11 +70,11 @@ ExplainState gpdb::get_analyze_state_json(QueryDesc *query_desc, }); } -Instrumentation *gpdb::instr_alloc(size_t n, int instrument_options) { +Instrumentation *ya_gpdb::instr_alloc(size_t n, int instrument_options) { return detail::wrap_throw(InstrAlloc, n, instrument_options); } -HeapTuple gpdb::heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, +HeapTuple ya_gpdb::heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull) { if (!tupleDescriptor || !values || !isnull) throw std::runtime_error( @@ -83,7 +83,7 @@ HeapTuple gpdb::heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, return detail::wrap_throw(::heap_form_tuple, tupleDescriptor, values, isnull); } -void gpdb::pfree(void *pointer) noexcept { +void ya_gpdb::pfree(void *pointer) noexcept { // Note that ::pfree asserts that pointer != NULL. if (!pointer) return; @@ -91,11 +91,11 @@ void gpdb::pfree(void *pointer) noexcept { detail::wrap_noexcept(::pfree, pointer); } -MemoryContext gpdb::mem_ctx_switch_to(MemoryContext context) noexcept { +MemoryContext ya_gpdb::mem_ctx_switch_to(MemoryContext context) noexcept { return MemoryContextSwitchTo(context); } -const char *gpdb::get_config_option(const char *name, bool missing_ok, +const char *ya_gpdb::get_config_option(const char *name, bool missing_ok, bool restrict_superuser) noexcept { if (!name) return nullptr; @@ -104,7 +104,7 @@ const char *gpdb::get_config_option(const char *name, bool missing_ok, restrict_superuser); } -void gpdb::list_free(List *list) noexcept { +void ya_gpdb::list_free(List *list) noexcept { if (!list) return; @@ -112,7 +112,7 @@ void gpdb::list_free(List *list) noexcept { } CdbExplain_ShowStatCtx * -gpdb::cdbexplain_showExecStatsBegin(QueryDesc *query_desc, +ya_gpdb::cdbexplain_showExecStatsBegin(QueryDesc *query_desc, instr_time starttime) { if (!query_desc) throw std::runtime_error("Invalid query descriptor"); @@ -121,28 +121,28 @@ gpdb::cdbexplain_showExecStatsBegin(QueryDesc *query_desc, starttime); } -void gpdb::instr_end_loop(Instrumentation *instr) { +void ya_gpdb::instr_end_loop(Instrumentation *instr) { if (!instr) throw std::runtime_error("Invalid instrumentation pointer"); detail::wrap_throw(::InstrEndLoop, instr); } -char *gpdb::gen_normquery(const char *query) { +char *ya_gpdb::gen_normquery(const char *query) { return detail::wrap_throw(::gen_normquery, query); } -StringInfo gpdb::gen_normplan(const char *exec_plan) { +StringInfo ya_gpdb::gen_normplan(const char *exec_plan) { if (!exec_plan) throw std::runtime_error("Invalid execution plan string"); return detail::wrap_throw(::gen_normplan, exec_plan); } -char *gpdb::get_rg_name_for_id(Oid group_id) { +char *ya_gpdb::get_rg_name_for_id(Oid group_id) { return detail::wrap_throw(GetResGroupNameForId, group_id); } -Oid gpdb::get_rg_id_by_session_id(int session_id) { +Oid ya_gpdb::get_rg_id_by_session_id(int session_id) { return detail::wrap_throw(ResGroupGetGroupIdBySessionId, session_id); } \ No newline at end of file diff --git a/src/memory/gpdbwrappers.h b/src/memory/gpdbwrappers.h index 437a5dd5d29..a29218d97d0 100644 --- a/src/memory/gpdbwrappers.h +++ b/src/memory/gpdbwrappers.h @@ -16,7 +16,7 @@ extern "C" { #include #include -namespace gpdb { +namespace ya_gpdb { namespace detail { template @@ -128,4 +128,4 @@ const char *get_config_option(const char *name, bool missing_ok, void list_free(List *list) noexcept; Oid get_rg_id_by_session_id(int session_id); -} // namespace gpdb +} // namespace ya_gpdb From 62a1d557b941bc0fbb90ed366e4843c8fb7dae8c Mon Sep 17 00:00:00 2001 From: NJrslv Date: Tue, 15 Jul 2025 16:01:37 +0300 Subject: [PATCH 094/133] move template to .cpp --- src/memory/gpdbwrappers.cpp | 131 ++++++++++++++++++++++++++++-------- src/memory/gpdbwrappers.h | 81 ---------------------- 2 files changed, 103 insertions(+), 109 deletions(-) diff --git a/src/memory/gpdbwrappers.cpp b/src/memory/gpdbwrappers.cpp index ac21f24e48f..9d579a91a30 100644 --- a/src/memory/gpdbwrappers.cpp +++ b/src/memory/gpdbwrappers.cpp @@ -16,27 +16,104 @@ extern "C" { #include "stat_statements_parser/pg_stat_statements_ya_parser.h" } -void *ya_gpdb::palloc(Size size) { return detail::wrap_throw(::palloc, size); } +namespace { -void *ya_gpdb::palloc0(Size size) { return detail::wrap_throw(::palloc0, size); } +template +auto wrap(Func &&func, Args &&...args) noexcept(!Throws) + -> decltype(func(std::forward(args)...)) { -char *ya_gpdb::pstrdup(const char *str) { - return detail::wrap_throw(::pstrdup, str); + using RetType = decltype(func(std::forward(args)...)); + + // Empty struct for void return type. + struct VoidResult {}; + using ResultHolder = std::conditional_t, VoidResult, + std::optional>; + + bool success; + ErrorData *edata; + ResultHolder result_holder; + + PG_TRY(); + { + if constexpr (!std::is_void_v) { + result_holder.emplace(func(std::forward(args)...)); + } else { + func(std::forward(args)...); + } + edata = NULL; + success = true; + } + PG_CATCH(); + { + MemoryContext oldctx = MemoryContextSwitchTo(TopMemoryContext); + edata = CopyErrorData(); + MemoryContextSwitchTo(oldctx); + FlushErrorState(); + success = false; + } + PG_END_TRY(); + + if (!success) { + std::string err; + if (edata && edata->message) { + err = std::string(edata->message); + } else { + err = "Unknown error occurred"; + } + + if (edata) { + FreeErrorData(edata); + } + + if constexpr (Throws) { + throw std::runtime_error(err); + } + + if constexpr (!std::is_void_v) { + return RetType{}; + } else { + return; + } + } + + if constexpr (!std::is_void_v) { + return *std::move(result_holder); + } else { + return; + } +} + +template +auto wrap_throw(Func &&func, Args &&...args) + -> decltype(func(std::forward(args)...)) { + return wrap(std::forward(func), std::forward(args)...); } +template +auto wrap_noexcept(Func &&func, Args &&...args) noexcept + -> decltype(func(std::forward(args)...)) { + return wrap(std::forward(func), std::forward(args)...); +} +} // namespace + +void *ya_gpdb::palloc(Size size) { return wrap_throw(::palloc, size); } + +void *ya_gpdb::palloc0(Size size) { return wrap_throw(::palloc0, size); } + +char *ya_gpdb::pstrdup(const char *str) { return wrap_throw(::pstrdup, str); } + char *ya_gpdb::get_database_name(Oid dbid) noexcept { - return detail::wrap_noexcept(::get_database_name, dbid); + return wrap_noexcept(::get_database_name, dbid); } bool ya_gpdb::split_identifier_string(char *rawstring, char separator, - List **namelist) noexcept { - return detail::wrap_noexcept(SplitIdentifierString, rawstring, separator, - namelist); + List **namelist) noexcept { + return wrap_noexcept(SplitIdentifierString, rawstring, separator, namelist); } ExplainState ya_gpdb::get_explain_state(QueryDesc *query_desc, - bool costs) noexcept { - return detail::wrap_noexcept([&]() { + bool costs) noexcept { + return wrap_noexcept([&]() { ExplainState es; ExplainInitState(&es); es.costs = costs; @@ -50,8 +127,8 @@ ExplainState ya_gpdb::get_explain_state(QueryDesc *query_desc, } ExplainState ya_gpdb::get_analyze_state_json(QueryDesc *query_desc, - bool analyze) noexcept { - return detail::wrap_noexcept([&]() { + bool analyze) noexcept { + return wrap_noexcept([&]() { ExplainState es; ExplainInitState(&es); es.analyze = analyze; @@ -71,16 +148,16 @@ ExplainState ya_gpdb::get_analyze_state_json(QueryDesc *query_desc, } Instrumentation *ya_gpdb::instr_alloc(size_t n, int instrument_options) { - return detail::wrap_throw(InstrAlloc, n, instrument_options); + return wrap_throw(InstrAlloc, n, instrument_options); } HeapTuple ya_gpdb::heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, - bool *isnull) { + bool *isnull) { if (!tupleDescriptor || !values || !isnull) throw std::runtime_error( "Invalid input parameters for heap tuple formation"); - return detail::wrap_throw(::heap_form_tuple, tupleDescriptor, values, isnull); + return wrap_throw(::heap_form_tuple, tupleDescriptor, values, isnull); } void ya_gpdb::pfree(void *pointer) noexcept { @@ -88,7 +165,7 @@ void ya_gpdb::pfree(void *pointer) noexcept { if (!pointer) return; - detail::wrap_noexcept(::pfree, pointer); + wrap_noexcept(::pfree, pointer); } MemoryContext ya_gpdb::mem_ctx_switch_to(MemoryContext context) noexcept { @@ -96,53 +173,51 @@ MemoryContext ya_gpdb::mem_ctx_switch_to(MemoryContext context) noexcept { } const char *ya_gpdb::get_config_option(const char *name, bool missing_ok, - bool restrict_superuser) noexcept { + bool restrict_superuser) noexcept { if (!name) return nullptr; - return detail::wrap_noexcept(GetConfigOption, name, missing_ok, - restrict_superuser); + return wrap_noexcept(GetConfigOption, name, missing_ok, restrict_superuser); } void ya_gpdb::list_free(List *list) noexcept { if (!list) return; - detail::wrap_noexcept(::list_free, list); + wrap_noexcept(::list_free, list); } CdbExplain_ShowStatCtx * ya_gpdb::cdbexplain_showExecStatsBegin(QueryDesc *query_desc, - instr_time starttime) { + instr_time starttime) { if (!query_desc) throw std::runtime_error("Invalid query descriptor"); - return detail::wrap_throw(::cdbexplain_showExecStatsBegin, query_desc, - starttime); + return wrap_throw(::cdbexplain_showExecStatsBegin, query_desc, starttime); } void ya_gpdb::instr_end_loop(Instrumentation *instr) { if (!instr) throw std::runtime_error("Invalid instrumentation pointer"); - detail::wrap_throw(::InstrEndLoop, instr); + wrap_throw(::InstrEndLoop, instr); } char *ya_gpdb::gen_normquery(const char *query) { - return detail::wrap_throw(::gen_normquery, query); + return wrap_throw(::gen_normquery, query); } StringInfo ya_gpdb::gen_normplan(const char *exec_plan) { if (!exec_plan) throw std::runtime_error("Invalid execution plan string"); - return detail::wrap_throw(::gen_normplan, exec_plan); + return wrap_throw(::gen_normplan, exec_plan); } char *ya_gpdb::get_rg_name_for_id(Oid group_id) { - return detail::wrap_throw(GetResGroupNameForId, group_id); + return wrap_throw(GetResGroupNameForId, group_id); } Oid ya_gpdb::get_rg_id_by_session_id(int session_id) { - return detail::wrap_throw(ResGroupGetGroupIdBySessionId, session_id); + return wrap_throw(ResGroupGetGroupIdBySessionId, session_id); } \ No newline at end of file diff --git a/src/memory/gpdbwrappers.h b/src/memory/gpdbwrappers.h index a29218d97d0..ad7ae96c362 100644 --- a/src/memory/gpdbwrappers.h +++ b/src/memory/gpdbwrappers.h @@ -17,87 +17,6 @@ extern "C" { #include namespace ya_gpdb { -namespace detail { - -template -auto wrap(Func &&func, Args &&...args) noexcept(!Throws) - -> decltype(func(std::forward(args)...)) { - - using RetType = decltype(func(std::forward(args)...)); - - // Empty struct for void return type. - struct VoidResult {}; - using ResultHolder = std::conditional_t, VoidResult, - std::optional>; - - bool success; - ErrorData *edata; - ResultHolder result_holder; - - PG_TRY(); - { - if constexpr (!std::is_void_v) { - result_holder.emplace(func(std::forward(args)...)); - } else { - func(std::forward(args)...); - } - edata = NULL; - success = true; - } - PG_CATCH(); - { - MemoryContext oldctx = MemoryContextSwitchTo(TopMemoryContext); - edata = CopyErrorData(); - MemoryContextSwitchTo(oldctx); - FlushErrorState(); - success = false; - } - PG_END_TRY(); - - if (!success) { - std::string err; - if (edata && edata->message) { - err = std::string(edata->message); - } else { - err = "Unknown error occurred"; - } - - if (edata) { - FreeErrorData(edata); - } - - if constexpr (Throws) { - throw std::runtime_error(err); - } - - if constexpr (!std::is_void_v) { - return RetType{}; - } else { - return; - } - } - - if constexpr (!std::is_void_v) { - return *std::move(result_holder); - } else { - return; - } -} - -template -auto wrap_throw(Func &&func, Args &&...args) - -> decltype(func(std::forward(args)...)) { - return detail::wrap(std::forward(func), - std::forward(args)...); -} - -template -auto wrap_noexcept(Func &&func, Args &&...args) noexcept - -> decltype(func(std::forward(args)...)) { - return detail::wrap(std::forward(func), - std::forward(args)...); -} -} // namespace detail // Functions that call palloc(). // Make sure correct memory context is set. From 5bd13592a544b595cbf8aa0dc9cb32e24404cc38 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Tue, 15 Jul 2025 19:46:49 +0300 Subject: [PATCH 095/133] fix mem leak --- src/ProcStats.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ProcStats.cpp b/src/ProcStats.cpp index a557a20cbb0..5c09fa0bce4 100644 --- a/src/ProcStats.cpp +++ b/src/ProcStats.cpp @@ -75,16 +75,16 @@ void fill_status_stats(yagpcc::SystemStat *stats) { stats->set_vmpeakkb(value); proc_stat >> measure; if (measure != "kB") { - ereport(FATAL, (errmsg("Expected memory sizes in kB, but got in %s", - measure.c_str()))); + throw std::runtime_error("Expected memory sizes in kB, but got in " + + measure); } } else if (key == "VmSize:") { uint64_t value; proc_stat >> value; stats->set_vmsizekb(value); if (measure != "kB") { - ereport(FATAL, (errmsg("Expected memory sizes in kB, but got in %s", - measure.c_str()))); + throw std::runtime_error("Expected memory sizes in kB, but got in " + + measure); } } } From 20502d8d6048b6658449109bff87a8a3a8dd4697 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Thu, 17 Jul 2025 08:47:17 +0300 Subject: [PATCH 096/133] add trimming gucs to readme --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 5a7e2e562be..764f0a8b15b 100644 --- a/README.md +++ b/README.md @@ -24,4 +24,5 @@ An extension for collecting greenplum query execution metrics and reporting them - **Nested Queries:** When `yagpcc.report_nested_queries` is `false`, only top-level queries are reported from the coordinator and segments, when `true`, both top-level and nested queries are reported from the coordinator, from segments collected as aggregates. - **Data Destination:** All collected data is sent to a Unix Domain Socket. Configure the path with `yagpcc.uds_path`. - **User Filtering:** To exclude activity from certain roles, add them to the comma-separated list in `yagpcc.ignored_users_list`. +- **Trimming plans:** Query texts and execution plans are trimmed based on `yagpcc.max_text_size` and `yagpcc.max_plan_size` (default: 1024KB). For now, it is not recommended to set these GUCs higher than 1024KB. From d28f7f597ef67190470cb34effe585176d5358fe Mon Sep 17 00:00:00 2001 From: NJrslv <108277031+NJrslv@users.noreply.github.com> Date: Thu, 17 Jul 2025 08:53:43 +0300 Subject: [PATCH 097/133] fix typo --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index f823ce1f1d2..da660d0d56a 100644 --- a/README.md +++ b/README.md @@ -25,4 +25,3 @@ An extension for collecting greenplum query execution metrics and reporting them - **Data Destination:** All collected data is sent to a Unix Domain Socket. Configure the path with `yagpcc.uds_path`. - **User Filtering:** To exclude activity from certain roles, add them to the comma-separated list in `yagpcc.ignored_users_list`. - **Trimming plans:** Query texts and execution plans are trimmed based on `yagpcc.max_text_size` and `yagpcc.max_plan_size` (default: 1024KB). For now, it is not recommended to set these GUCs higher than 1024KB. -- \ No newline at end of file From dc309d7d9995e08aafba2148bbc1035bffe188a7 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Thu, 7 Aug 2025 14:01:40 +0300 Subject: [PATCH 098/133] add metrics desc --- metric.md | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 metric.md diff --git a/metric.md b/metric.md new file mode 100644 index 00000000000..a32d2fe88d6 --- /dev/null +++ b/metric.md @@ -0,0 +1,127 @@ +# YAGP Hooks Collector Metrics + +## Introduction + +### States +A Postgres process goes through 4 executor functions to execute a query: +1) `ExecutorStart()` - resource allocation for the query. +2) `ExecutorRun()` - query execution. +3) `ExecutorFinish()` - cleanup. +4) `ExecutorEnd()` - cleanup. + +yagp-hooks-collector sends messages with 4 states, from _Dispatcher_ and/or _Execute_ processes: `submit`, `start`, `end`, `done`, in this order: +``` +submit -> ExecutorStart() -> start -> ExecutorRun() -> ExecutorFinish() -> end -> ExecutorEnd() -> done +``` + +## Key Points +- Some queries may skip the _end_ state, then the _end_ statistics is sent during _done_. +- If a query finishes with an error (`METRICS_QUERY_ERROR`), or is cancelled (`METRICS_QUERY_CANCELLED`), statistics is sent at _done_. +- Some statistics is calculated as the difference between the current global metric and the previous. The initial snapshot is taken at submit, and at _end_/_done_ the diff is calculated. +- Nested queries on _Dispatcher_ become top-level on _Execute_. +- Each process (_Dispatcher_/_Execute_) sends its own statistics. + +## Notations +- **S** = Submit event. +- **T** = Start event. +- **E** = End event. +- **D** = Done event. +- **DIFF** = current_value - submit_value (submit event). +- **ABS** = Absolute value, or where diff is not applicable, the value taken. +- **Local*** - Statistics that starts counting from zero for each new query. A nested query is also considered new. + +## Statistics Table + +| Proto Field | Type | When | DIFF/ABS | Local* | Scope | Dispatcher | Execute | Units | Notes | +| :--------------------------- | :----- | :------ | :------- | ------ | :------ | :--------: | :-----: | :------ | :-------------------------------------------------- | +| **SystemStat** | | | | | | | | | | +| `runningTimeSeconds` | double | E, D | DIFF | - | Node | + | + | seconds | Wall clock time | +| `userTimeSeconds` | double | E, D | DIFF | - | Node | + | + | seconds | /proc/pid/stat utime | +| `kernelTimeSeconds` | double | E, D | DIFF | - | Node | + | + | seconds | /proc/pid/stat stime | +| `vsize` | uint64 | E, D | ABS | - | Node | + | + | pages | /proc/pid/stat vsize | +| `rss` | uint64 | E, D | ABS | - | Node | + | + | pages | /proc/pid/stat rss | +| `VmSizeKb` | uint64 | E, D | ABS | - | Node | + | + | KB | /proc/pid/status VmSize | +| `VmPeakKb` | uint64 | E, D | ABS | - | Node | + | + | KB | /proc/pid/status VmPeak | +| `rchar` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io rchar | +| `wchar` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io wchar | +| `syscr` | uint64 | E, D | DIFF | - | Node | + | + | count | /proc/pid/io syscr | +| `syscw` | uint64 | E, D | DIFF | - | Node | + | + | count | /proc/pid/io syscw | +| `read_bytes` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io read_bytes | +| `write_bytes` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io write_bytes | +| `cancelled_write_bytes` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io cancelled_write_bytes | +| **MetricInstrumentation** | | | | | | | | | | +| `ntuples` | uint64 | E, D | ABS | + | Node | + | + | tuples | Accumulated total tuples | +| `nloops` | uint64 | E, D | ABS | + | Node | + | + | count | Number of cycles | +| `tuplecount` | uint64 | E, D | ABS | + | Node | + | + | tuples | Accumulated tuples per cycle | +| `firsttuple` | double | E, D | ABS | + | Node | + | + | seconds | Time for first tuple of this cycle | +| `startup` | double | E, D | ABS | + | Node | + | + | seconds | Start time of current iteration | +| `total` | double | E, D | ABS | + | Node | + | + | seconds | Total time taken | +| `shared_blks_hit` | uint64 | E, D | ABS | + | Node | + | + | blocks | Shared buffer blocks found in cache | +| `shared_blks_read` | uint64 | E, D | ABS | + | Node | + | + | blocks | Shared buffer blocks read from disk | +| `shared_blks_dirtied` | uint64 | E, D | ABS | + | Node | + | + | blocks | Shared blocks dirtied | +| `shared_blks_written` | uint64 | E, D | ABS | + | Node | + | + | blocks | Dirty shared buffer blocks written to disk | +| `local_blks_hit` | uint64 | E, D | ABS | + | Node | + | + | blocks | Local buffer hits | +| `local_blks_read` | uint64 | E, D | ABS | + | Node | + | + | blocks | Disk blocks read | +| `local_blks_dirtied` | uint64 | E, D | ABS | + | Node | + | + | blocks | Local blocks dirtied | +| `local_blks_written` | uint64 | E, D | ABS | + | Node | + | + | blocks | Local blocks written to disk | +| `temp_blks_read` | uint64 | E, D | ABS | + | Node | + | + | blocks | Temp file blocks read | +| `temp_blks_written` | uint64 | E, D | ABS | + | Node | + | + | blocks | Temp file blocks written | +| `blk_read_time` | double | E, D | ABS | + | Node | + | + | seconds | Time reading data blocks | +| `blk_write_time` | double | E, D | ABS | + | Node | + | + | seconds | Time writing data blocks | +| `inherited_calls` | uint64 | E, D | ABS | - | Node | + | + | count | Nested query count (YAGPCC-specific) | +| `inherited_time` | double | E, D | ABS | - | Node | + | + | seconds | Nested query time (YAGPCC-specific) | +| **NetworkStat (sent)** | | | | | | | | | | +| `sent.total_bytes` | uint32 | D | ABS | - | Node | + | + | bytes | Bytes sent, including headers | +| `sent.tuple_bytes` | uint32 | D | ABS | - | Node | + | + | bytes | Bytes of pure tuple-data sent | +| `sent.chunks` | uint32 | D | ABS | - | Node | + | + | count | Tuple-chunks sent | +| **NetworkStat (received)** | | | | | | | | | | +| `received.total_bytes` | uint32 | D | ABS | - | Node | + | + | bytes | Bytes of pure tuple-data received | +| `received.tuple_bytes` | uint32 | D | ABS | - | Node | + | + | bytes | Bytes of pure tuple-data received | +| `received.chunks` | uint32 | D | ABS | - | Node | + | + | count | Tuple-chunks received | +| **InterconnectStat** | | | | | | | | | | +| `total_recv_queue_size` | uint64 | D | DIFF | - | Node | + | + | bytes | Receive queue size sum | +| `recv_queue_size_counting_t` | uint64 | D | DIFF | - | Node | + | + | count | Counting times when computing total_recv_queue_size | +| `total_capacity` | uint64 | D | DIFF | - | Node | + | + | bytes | the capacity sum for sent packets | +| `capacity_counting_time` | uint64 | D | DIFF | - | Node | + | + | count | counting times used to compute total_capacity | +| `total_buffers` | uint64 | D | DIFF | - | Node | + | + | count | Available buffers | +| `buffer_counting_time` | uint64 | D | DIFF | - | Node | + | + | count | counting times when compute total_buffers | +| `active_connections_num` | uint64 | D | DIFF | - | Node | + | + | count | Active connections | +| `retransmits` | int64 | D | DIFF | - | Node | + | + | count | Packet retransmits | +| `startup_cached_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Startup cached packets | +| `mismatch_num` | int64 | D | DIFF | - | Node | + | + | count | Mismatched packets received | +| `crc_errors` | int64 | D | DIFF | - | Node | + | + | count | CRC errors | +| `snd_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Packets sent | +| `recv_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Packets received | +| `disordered_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Out-of-order packets | +| `duplicated_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Duplicate packets | +| `recv_ack_num` | int64 | D | DIFF | - | Node | + | + | count | ACKs received | +| `status_query_msg_num` | int64 | D | DIFF | - | Node | + | + | count | Status query messages sent | +| **SpillInfo** | | | | | | | | | | +| `fileCount` | int32 | E, D | DIFF | - | Node | + | + | count | Spill (temp) files created | +| `totalBytes` | int64 | E, D | DIFF | - | Node | + | + | bytes | Spill bytes written | +| **QueryInfo** | | | | | | | | | | +| `generator` | enum | T, E, D | ABS | - | Cluster | + | - | enum | Planner/Optimizer | +| `query_id` | uint64 | T, E, D | ABS | - | Cluster | + | - | id | Query ID | +| `plan_id` | uint64 | T, E, D | ABS | - | Cluster | + | - | id | Hash of normalized plan | +| `query_text` | string | S | ABS | - | Cluster | + | - | text | Query text | +| `plan_text` | string | T | ABS | - | Cluster | + | - | text | EXPLAIN text | +| `template_query_text` | string | S | ABS | - | Cluster | + | - | text | Normalized query text | +| `template_plan_text` | string | T | ABS | - | Cluster | + | - | text | Normalized plan text | +| `userName` | string | All | ABS | - | Cluster | + | - | text | Session user | +| `databaseName` | string | All | ABS | - | Cluster | + | - | text | Database name | +| `rsgname` | string | All | ABS | - | Cluster | + | - | text | Resource group name | +| `analyze_text` | string | D | ABS | - | Cluster | + | - | text | EXPLAIN ANALYZE JSON | +| **AdditionalQueryInfo** | | | | | | | | | | +| `nested_level` | int64 | All | ABS | - | Node | + | + | count | Current nesting level | +| `error_message` | string | D | ABS | - | Node | + | + | text | Error message | +| `slice_id` | int64 | All | ABS | - | Node | + | + | id | Slice ID | +| **QueryKey** | | | | | | | | | | +| `tmid` | int32 | All | ABS | - | Node | + | + | id | Time ID | +| `ssid` | int32 | All | ABS | - | Node | + | + | id | Session ID | +| `ccnt` | int32 | All | ABS | - | Node | + | + | count | Command counter | +| **SegmentKey** | | | | | | | | | | +| `dbid` | int32 | All | ABS | - | Node | + | + | id | Database ID | +| `segment_index` | int32 | All | ABS | - | Node | + | + | id | Segment index (-1=coordinator) | + +--- + From 26bdc5311237e7eb5ed468d2da37d544bc966d79 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Thu, 7 Aug 2025 14:03:58 +0300 Subject: [PATCH 099/133] clear --- metric.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/metric.md b/metric.md index a32d2fe88d6..2d198391a67 100644 --- a/metric.md +++ b/metric.md @@ -1,6 +1,4 @@ -# YAGP Hooks Collector Metrics - -## Introduction +## YAGP Hooks Collector Metrics ### States A Postgres process goes through 4 executor functions to execute a query: @@ -14,14 +12,14 @@ yagp-hooks-collector sends messages with 4 states, from _Dispatcher_ and/or _Exe submit -> ExecutorStart() -> start -> ExecutorRun() -> ExecutorFinish() -> end -> ExecutorEnd() -> done ``` -## Key Points +### Key Points - Some queries may skip the _end_ state, then the _end_ statistics is sent during _done_. - If a query finishes with an error (`METRICS_QUERY_ERROR`), or is cancelled (`METRICS_QUERY_CANCELLED`), statistics is sent at _done_. - Some statistics is calculated as the difference between the current global metric and the previous. The initial snapshot is taken at submit, and at _end_/_done_ the diff is calculated. - Nested queries on _Dispatcher_ become top-level on _Execute_. - Each process (_Dispatcher_/_Execute_) sends its own statistics. -## Notations +### Notations - **S** = Submit event. - **T** = Start event. - **E** = End event. @@ -30,7 +28,7 @@ submit -> ExecutorStart() -> start -> ExecutorRun() -> ExecutorFinish() -> end - - **ABS** = Absolute value, or where diff is not applicable, the value taken. - **Local*** - Statistics that starts counting from zero for each new query. A nested query is also considered new. -## Statistics Table +### Statistics Table | Proto Field | Type | When | DIFF/ABS | Local* | Scope | Dispatcher | Execute | Units | Notes | | :--------------------------- | :----- | :------ | :------- | ------ | :------ | :--------: | :-----: | :------ | :-------------------------------------------------- | From 2990fe2872022be3d2d1eab5f14b4d84c6b327de Mon Sep 17 00:00:00 2001 From: NJrslv <108277031+NJrslv@users.noreply.github.com> Date: Wed, 13 Aug 2025 14:56:04 +0300 Subject: [PATCH 100/133] Fix missing states (utility statements included) (#16) --- src/EventSender.cpp | 401 ++++++++++++++++++++++++-------------------- src/EventSender.h | 94 +++++++++-- src/PgUtils.cpp | 10 +- 3 files changed, 299 insertions(+), 206 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 57c13456337..6deed8bb56b 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -8,6 +8,7 @@ extern "C" { #include "executor/executor.h" #include "utils/elog.h" +#include "utils/guc.h" #include "cdb/cdbexplain.h" #include "cdb/cdbvars.h" @@ -27,6 +28,7 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { return; } + auto *query_desc = reinterpret_cast(arg); switch (status) { case METRICS_PLAN_NODE_INITIALIZE: case METRICS_PLAN_NODE_EXECUTING: @@ -34,8 +36,7 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { // TODO break; case METRICS_QUERY_SUBMIT: - // don't collect anything here. We will fake this call in ExecutorStart as - // it really makes no difference. Just complicates things + collect_query_submit(query_desc); break; case METRICS_QUERY_START: // no-op: executor_after_start is enough @@ -49,7 +50,7 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { case METRICS_QUERY_ERROR: case METRICS_QUERY_CANCELED: case METRICS_INNER_QUERY_DONE: - collect_query_done(reinterpret_cast(arg), status); + collect_query_done(query_desc, status); break; default: ereport(FATAL, (errmsg("Unknown query status: %d", status))); @@ -60,15 +61,15 @@ void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { if (!connector) { return; } - if (is_top_level_query(query_desc, nesting_level)) { - nested_timing = 0; - nested_calls = 0; + if (filter_query(query_desc)) { + return; + } + if (!qdesc_submitted(query_desc)) { + collect_query_submit(query_desc); } - Config::sync(); if (!need_collect(query_desc, nesting_level)) { return; } - collect_query_submit(query_desc); if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze() && (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0) { query_desc->instrument_options |= INSTRUMENT_BUFFERS; @@ -87,160 +88,187 @@ void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { } void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { - if (!connector) { + if (!connector || !need_collect(query_desc, nesting_level)) { return; } - if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { - if (!filter_query(query_desc)) { - auto *query = get_query_message(query_desc); - auto query_msg = query->message; - *query_msg->mutable_start_time() = current_ts(); - if (!nesting_is_valid(query_desc, nesting_level)) { - return; - } - update_query_state(query_desc, query, QueryState::START); - set_query_plan(query_msg, query_desc); - if (need_collect_analyze()) { - // Set up to track total elapsed time during query run. - // Make sure the space is allocated in the per-query - // context so it will go away at executor_end. - if (query_desc->totaltime == NULL) { - MemoryContext oldcxt = - ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); - query_desc->totaltime = ya_gpdb::instr_alloc(1, INSTRUMENT_ALL); - ya_gpdb::mem_ctx_switch_to(oldcxt); - } - } - yagpcc::GPMetrics stats; - std::swap(stats, *query_msg->mutable_query_metrics()); - if (connector->report_query(*query_msg, "started")) { - clear_big_fields(query_msg); - } - std::swap(stats, *query_msg->mutable_query_metrics()); + if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { + return; + } + auto &query = get_query(query_desc); + auto query_msg = query.message.get(); + *query_msg->mutable_start_time() = current_ts(); + update_query_state(query, QueryState::START); + set_query_plan(query_msg, query_desc); + if (need_collect_analyze()) { + // Set up to track total elapsed time during query run. + // Make sure the space is allocated in the per-query + // context so it will go away at executor_end. + if (query_desc->totaltime == NULL) { + MemoryContext oldcxt = + ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + query_desc->totaltime = ya_gpdb::instr_alloc(1, INSTRUMENT_ALL); + ya_gpdb::mem_ctx_switch_to(oldcxt); } } + yagpcc::GPMetrics stats; + std::swap(stats, *query_msg->mutable_query_metrics()); + if (connector->report_query(*query_msg, "started")) { + clear_big_fields(query_msg); + } + std::swap(stats, *query_msg->mutable_query_metrics()); } void EventSender::executor_end(QueryDesc *query_desc) { - if (!connector || - (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE)) { + if (!connector || !need_collect(query_desc, nesting_level)) { return; } - if (!filter_query(query_desc)) { - auto *query = get_query_message(query_desc); - auto query_msg = query->message; - *query_msg->mutable_end_time() = current_ts(); - if (nesting_is_valid(query_desc, nesting_level)) { - if (query->state == UNKNOWN && - // Yet another greenplum weirdness: thats actually a nested query - // which is being committed/rollbacked. Treat it accordingly. - !need_report_nested_query()) { - return; - } - update_query_state(query_desc, query, QueryState::END); - if (is_top_level_query(query_desc, nesting_level)) { - set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, - nested_calls, nested_timing); - } else { - set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); - } - if (connector->report_query(*query_msg, "ended")) { - clear_big_fields(query_msg); - } - } + if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { + return; + } + auto &query = get_query(query_desc); + auto *query_msg = query.message.get(); + *query_msg->mutable_end_time() = current_ts(); + update_query_state(query, QueryState::END); + if (is_top_level_query(query_desc, nesting_level)) { + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, nested_calls, + nested_timing); + } else { + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); + } + if (connector->report_query(*query_msg, "ended")) { + clear_big_fields(query_msg); } } void EventSender::collect_query_submit(QueryDesc *query_desc) { - if (connector && need_collect(query_desc, nesting_level)) { - auto *query = get_query_message(query_desc); - query->state = QueryState::SUBMIT; - auto query_msg = query->message; - *query_msg = create_query_req(yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); - *query_msg->mutable_submit_time() = current_ts(); - set_query_info(query_msg); - set_qi_nesting_level(query_msg, query_desc->gpmon_pkt->u.qexec.key.tmid); - set_qi_slice_id(query_msg); - set_query_text(query_msg, query_desc); - if (connector->report_query(*query_msg, "submit")) { - clear_big_fields(query_msg); - } - // take initial metrics snapshot so that we can safely take diff afterwards - // in END or DONE events. - set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); + if (!connector) { + return; + } + Config::sync(); + // Register qkey for a nested query we won't report, + // so we can detect nesting_level > 0 and skip reporting at end/done. + if (!need_report_nested_query() && nesting_level > 0) { + QueryKey::register_qkey(query_desc, nesting_level); + return; + } + if (is_top_level_query(query_desc, nesting_level)) { + nested_timing = 0; + nested_calls = 0; + } + if (!need_collect(query_desc, nesting_level)) { + return; + } + submit_query(query_desc); + auto &query = get_query(query_desc); + auto *query_msg = query.message.get(); + *query_msg = create_query_req(yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); + *query_msg->mutable_submit_time() = current_ts(); + set_query_info(query_msg); + set_qi_nesting_level(query_msg, nesting_level); + set_qi_slice_id(query_msg); + set_query_text(query_msg, query_desc); + if (connector->report_query(*query_msg, "submit")) { + clear_big_fields(query_msg); + } + // take initial metrics snapshot so that we can safely take diff afterwards + // in END or DONE events. + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); #ifdef IC_TEARDOWN_HOOK - // same for interconnect statistics - ic_metrics_collect(); - set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), - &ic_statistics); + // same for interconnect statistics + ic_metrics_collect(); + set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), + &ic_statistics); #endif +} + +void EventSender::report_query_done(QueryDesc *query_desc, QueryItem &query, + QueryMetricsStatus status) { + yagpcc::QueryStatus query_status; + std::string msg; + switch (status) { + case METRICS_QUERY_DONE: + case METRICS_INNER_QUERY_DONE: + query_status = yagpcc::QueryStatus::QUERY_STATUS_DONE; + msg = "done"; + break; + case METRICS_QUERY_ERROR: + query_status = yagpcc::QueryStatus::QUERY_STATUS_ERROR; + msg = "error"; + break; + case METRICS_QUERY_CANCELING: + // at the moment we don't track this event, but I`ll leave this code + // here just in case + Assert(false); + query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELLING; + msg = "cancelling"; + break; + case METRICS_QUERY_CANCELED: + query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELED; + msg = "cancelled"; + break; + default: + ereport(FATAL, + (errmsg("Unexpected query status in query_done hook: %d", status))); } + auto prev_state = query.state; + update_query_state(query, QueryState::DONE, + query_status == yagpcc::QueryStatus::QUERY_STATUS_DONE); + auto query_msg = query.message.get(); + query_msg->set_query_status(query_status); + if (status == METRICS_QUERY_ERROR) { + set_qi_error_message(query_msg); + } + if (prev_state == START) { + // We've missed ExecutorEnd call due to query cancel or error. It's + // fine, but now we need to collect and report execution stats + *query_msg->mutable_end_time() = current_ts(); + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, nested_calls, + nested_timing); + } +#ifdef IC_TEARDOWN_HOOK + ic_metrics_collect(); + set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), + &ic_statistics); +#endif + connector->report_query(*query_msg, msg); } void EventSender::collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status) { - if (connector && !filter_query(query_desc)) { - auto *query = get_query_message(query_desc); - if (query->state != UNKNOWN || need_report_nested_query()) { - if (nesting_is_valid(query_desc, nesting_level)) { - yagpcc::QueryStatus query_status; - std::string msg; - switch (status) { - case METRICS_QUERY_DONE: - case METRICS_INNER_QUERY_DONE: - query_status = yagpcc::QueryStatus::QUERY_STATUS_DONE; - msg = "done"; - break; - case METRICS_QUERY_ERROR: - query_status = yagpcc::QueryStatus::QUERY_STATUS_ERROR; - msg = "error"; - break; - case METRICS_QUERY_CANCELING: - // at the moment we don't track this event, but I`ll leave this code - // here just in case - Assert(false); - query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELLING; - msg = "cancelling"; - break; - case METRICS_QUERY_CANCELED: - query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELED; - msg = "cancelled"; - break; - default: - ereport(FATAL, - (errmsg("Unexpected query status in query_done hook: %d", - status))); - } - auto prev_state = query->state; - update_query_state(query_desc, query, QueryState::DONE, - query_status == - yagpcc::QueryStatus::QUERY_STATUS_DONE); - auto query_msg = query->message; - query_msg->set_query_status(query_status); - if (status == METRICS_QUERY_ERROR) { - set_qi_error_message(query_msg); - } - if (prev_state == START) { - // We've missed ExecutorEnd call due to query cancel or error. It's - // fine, but now we need to collect and report execution stats - *query_msg->mutable_end_time() = current_ts(); - set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, - nested_calls, nested_timing); - } -#ifdef IC_TEARDOWN_HOOK - ic_metrics_collect(); - set_ic_stats( - query_msg->mutable_query_metrics()->mutable_instrumentation(), - &ic_statistics); -#endif - connector->report_query(*query_msg, msg); - } - update_nested_counters(query_desc); + if (!connector || !need_collect(query_desc, nesting_level)) { + return; + } + + // Skip sending done message if query errored before submit. + if (!qdesc_submitted(query_desc)) { + if (status != METRICS_QUERY_ERROR) { + ereport(WARNING, (errmsg("YAGPCC trying to process DONE hook for " + "unsubmitted and unerrored query"))); + ereport(DEBUG3, + (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); } - query_msgs.erase({query_desc->gpmon_pkt->u.qexec.key.ccnt, - query_desc->gpmon_pkt->u.qexec.key.tmid}); - ya_gpdb::pfree(query_desc->gpmon_pkt); + return; + } + + if (queries.empty()) { + ereport(WARNING, (errmsg("YAGPCC cannot find query to process DONE hook"))); + ereport(DEBUG3, + (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + return; } + auto &query = get_query(query_desc); + + bool report = need_report_nested_query() || + is_top_level_query(query_desc, nesting_level); + if (report) + report_query_done(query_desc, query, status); + + if (need_report_nested_query()) + update_nested_counters(query_desc); + + queries.erase(QueryKey::from_qdesc(query_desc)); + pfree(query_desc->yagp_query_key); + query_desc->yagp_query_key = NULL; } void EventSender::ic_metrics_collect() { @@ -283,14 +311,9 @@ void EventSender::analyze_stats_collect(QueryDesc *query_desc) { if (!need_collect(query_desc, nesting_level)) { return; } - auto query = get_query_message(query_desc); - auto query_msg = query->message; + auto &query = get_query(query_desc); + auto *query_msg = query.message.get(); *query_msg->mutable_end_time() = current_ts(); - // Yet another greenplum weirdness: thats actually a nested query - // which is being committed/rollbacked. Treat it accordingly. - if (query->state == UNKNOWN && !need_report_nested_query()) { - return; - } if (!query_desc->totaltime || !need_collect_analyze()) { return; } @@ -317,27 +340,19 @@ EventSender::EventSender() { #endif } -EventSender::~EventSender() { - delete connector; - for (auto iter = query_msgs.begin(); iter != query_msgs.end(); ++iter) { - delete iter->second.message; - } -} +EventSender::~EventSender() { delete connector; } // That's basically a very simplistic state machine to fix or highlight any bugs // coming from GP -void EventSender::update_query_state(QueryDesc *query_desc, QueryItem *query, - QueryState new_state, bool success) { - if (query->state == UNKNOWN) { - collect_query_submit(query_desc); - } +void EventSender::update_query_state(QueryItem &query, QueryState new_state, + bool success) { switch (new_state) { case QueryState::SUBMIT: Assert(false); break; case QueryState::START: - if (query->state == QueryState::SUBMIT) { - query->message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); + if (query.state == QueryState::SUBMIT) { + query.message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); } else { Assert(false); } @@ -346,40 +361,52 @@ void EventSender::update_query_state(QueryDesc *query_desc, QueryItem *query, // Example of below assert triggering: CURSOR closes before ever being // executed Assert(query->state == QueryState::START || // IsAbortInProgress()); - query->message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_END); + query.message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_END); break; case QueryState::DONE: - Assert(query->state == QueryState::END || !success); - query->message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); + Assert(query.state == QueryState::END || !success); + query.message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); break; default: Assert(false); } - query->state = new_state; + query.state = new_state; } -EventSender::QueryItem *EventSender::get_query_message(QueryDesc *query_desc) { - if (query_desc->gpmon_pkt == nullptr || - query_msgs.find({query_desc->gpmon_pkt->u.qexec.key.ccnt, - query_desc->gpmon_pkt->u.qexec.key.tmid}) == - query_msgs.end()) { - query_desc->gpmon_pkt = - (gpmon_packet_t *)ya_gpdb::palloc0(sizeof(gpmon_packet_t)); - query_desc->gpmon_pkt->u.qexec.key.ccnt = gp_command_count; - query_desc->gpmon_pkt->u.qexec.key.tmid = nesting_level; - query_msgs.insert({{gp_command_count, nesting_level}, - QueryItem(UNKNOWN, new yagpcc::SetQueryReq())}); - } - return &query_msgs.at({query_desc->gpmon_pkt->u.qexec.key.ccnt, - query_desc->gpmon_pkt->u.qexec.key.tmid}); +EventSender::QueryItem &EventSender::get_query(QueryDesc *query_desc) { + if (!qdesc_submitted(query_desc)) { + ereport(WARNING, + (errmsg("YAGPCC attempting to get query that was not submitted"))); + ereport(DEBUG3, + (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + throw std::runtime_error("Attempting to get query that was not submitted"); + } + return queries.find(QueryKey::from_qdesc(query_desc))->second; +} + +void EventSender::submit_query(QueryDesc *query_desc) { + if (query_desc->yagp_query_key) { + ereport(WARNING, + (errmsg("YAGPCC trying to submit already submitted query"))); + ereport(DEBUG3, + (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + } + QueryKey::register_qkey(query_desc, nesting_level); + auto key = QueryKey::from_qdesc(query_desc); + auto [_, inserted] = queries.emplace(key, QueryItem(QueryState::SUBMIT)); + if (!inserted) { + ereport(WARNING, (errmsg("YAGPCC duplicate query submit detected"))); + ereport(DEBUG3, + (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + } } void EventSender::update_nested_counters(QueryDesc *query_desc) { if (!is_top_level_query(query_desc, nesting_level)) { - auto query_msg = get_query_message(query_desc); + auto &query = get_query(query_desc); nested_calls++; - double end_time = protots_to_double(query_msg->message->end_time()); - double start_time = protots_to_double(query_msg->message->start_time()); + double end_time = protots_to_double(query.message->end_time()); + double start_time = protots_to_double(query.message->start_time()); if (end_time >= start_time) { nested_timing += end_time - start_time; } else { @@ -391,6 +418,12 @@ void EventSender::update_nested_counters(QueryDesc *query_desc) { } } -EventSender::QueryItem::QueryItem(EventSender::QueryState st, - yagpcc::SetQueryReq *msg) - : state(st), message(msg) {} +bool EventSender::qdesc_submitted(QueryDesc *query_desc) { + if (query_desc->yagp_query_key == NULL) { + return false; + } + return queries.find(QueryKey::from_qdesc(query_desc)) != queries.end(); +} + +EventSender::QueryItem::QueryItem(QueryState st) + : message(std::make_unique()), state(st) {} diff --git a/src/EventSender.h b/src/EventSender.h index f3dd1d2a528..4071d580ff9 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -1,6 +1,8 @@ #pragma once +#include #include +#include #define typeid __typeid extern "C" { @@ -11,12 +13,75 @@ extern "C" { } #undef typeid +#include "memory/gpdbwrappers.h" + class UDSConnector; struct QueryDesc; namespace yagpcc { class SetQueryReq; } +#include + +struct QueryKey { + int tmid; + int ssid; + int ccnt; + int nesting_level; + uintptr_t query_desc_addr; + + bool operator==(const QueryKey &other) const { + return std::tie(tmid, ssid, ccnt, nesting_level, query_desc_addr) == + std::tie(other.tmid, other.ssid, other.ccnt, other.nesting_level, + other.query_desc_addr); + } + + static void register_qkey(QueryDesc *query_desc, size_t nesting_level) { + query_desc->yagp_query_key = + (YagpQueryKey *)ya_gpdb::palloc0(sizeof(YagpQueryKey)); + int32 tmid; + gpmon_gettmid(&tmid); + query_desc->yagp_query_key->tmid = tmid; + query_desc->yagp_query_key->ssid = gp_session_id; + query_desc->yagp_query_key->ccnt = gp_command_count; + query_desc->yagp_query_key->nesting_level = nesting_level; + query_desc->yagp_query_key->query_desc_addr = (uintptr_t)query_desc; + } + + static QueryKey from_qdesc(QueryDesc *query_desc) { + return { + .tmid = query_desc->yagp_query_key->tmid, + .ssid = query_desc->yagp_query_key->ssid, + .ccnt = query_desc->yagp_query_key->ccnt, + .nesting_level = query_desc->yagp_query_key->nesting_level, + .query_desc_addr = query_desc->yagp_query_key->query_desc_addr, + }; + } +}; + +// https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html +template inline void hash_combine(std::size_t &seed, const T &v) { + std::hash hasher; + seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + +namespace std { +template <> struct hash { + size_t operator()(const QueryKey &k) const noexcept { + size_t seed = hash{}(k.tmid); + hash_combine(seed, k.ssid); + hash_combine(seed, k.ccnt); + hash_combine(seed, k.nesting_level); + uintptr_t addr = k.query_desc_addr; + if constexpr (SIZE_MAX < UINTPTR_MAX) { + addr %= SIZE_MAX; + } + hash_combine(seed, addr); + return seed; + } +}; +} // namespace std + class EventSender { public: void executor_before_start(QueryDesc *query_desc, int eflags); @@ -31,30 +96,25 @@ class EventSender { ~EventSender(); private: - enum QueryState { UNKNOWN, SUBMIT, START, END, DONE }; + enum QueryState { SUBMIT, START, END, DONE }; struct QueryItem { - QueryState state = QueryState::UNKNOWN; - yagpcc::SetQueryReq *message = nullptr; + std::unique_ptr message; + QueryState state; - QueryItem(QueryState st, yagpcc::SetQueryReq *msg); - }; - - struct pair_hash { - std::size_t operator()(const std::pair &p) const { - auto h1 = std::hash{}(p.first); - auto h2 = std::hash{}(p.second); - return h1 ^ h2; - } + explicit QueryItem(QueryState st); }; - void update_query_state(QueryDesc *query_desc, QueryItem *query, - QueryState new_state, bool success = true); - QueryItem *get_query_message(QueryDesc *query_desc); + void update_query_state(QueryItem &query, QueryState new_state, + bool success = true); + QueryItem &get_query(QueryDesc *query_desc); + void submit_query(QueryDesc *query_desc); void collect_query_submit(QueryDesc *query_desc); + void report_query_done(QueryDesc *query_desc, QueryItem &query, + QueryMetricsStatus status); void collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status); - void cleanup_messages(); void update_nested_counters(QueryDesc *query_desc); + bool qdesc_submitted(QueryDesc *query_desc); UDSConnector *connector = nullptr; int nesting_level = 0; @@ -63,5 +123,5 @@ class EventSender { #ifdef IC_TEARDOWN_HOOK ICStatistics ic_statistics; #endif - std::unordered_map, QueryItem, pair_hash> query_msgs; + std::unordered_map queries; }; \ No newline at end of file diff --git a/src/PgUtils.cpp b/src/PgUtils.cpp index 81a9e2f6c4c..700a1a3d8c8 100644 --- a/src/PgUtils.cpp +++ b/src/PgUtils.cpp @@ -56,14 +56,14 @@ std::string *get_rg_name() { */ bool is_top_level_query(QueryDesc *query_desc, int nesting_level) { - return (query_desc->gpmon_pkt && - query_desc->gpmon_pkt->u.qexec.key.tmid == 0) || - nesting_level == 0; + if (query_desc->yagp_query_key == NULL) { + return nesting_level == 0; + } + return query_desc->yagp_query_key->nesting_level == 0; } bool nesting_is_valid(QueryDesc *query_desc, int nesting_level) { - return (Gp_session_role == GP_ROLE_DISPATCH && - Config::report_nested_queries()) || + return need_report_nested_query() || is_top_level_query(query_desc, nesting_level); } From 4c85135d187d5c94f08cd45d729db4000a91c3bd Mon Sep 17 00:00:00 2001 From: NJrslv Date: Wed, 13 Aug 2025 15:06:41 +0300 Subject: [PATCH 101/133] fix cpp & pg memleak --- src/Config.cpp | 6 +++--- src/Config.h | 2 +- src/PgUtils.cpp | 26 +++++++++++++++----------- src/PgUtils.h | 6 +++--- src/ProtoUtils.cpp | 6 +++--- 5 files changed, 25 insertions(+), 21 deletions(-) diff --git a/src/Config.cpp b/src/Config.cpp index 53143ff2d26..aef09fc7d73 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -119,11 +119,11 @@ size_t Config::max_text_size() { return guc_max_text_size * 1024; } size_t Config::max_plan_size() { return guc_max_plan_size * 1024; } int Config::min_analyze_time() { return guc_min_analyze_time; }; -bool Config::filter_user(const std::string *username) { - if (!username || !ignored_users_set) { +bool Config::filter_user(std::string username) { + if (!ignored_users_set) { return true; } - return ignored_users_set->find(*username) != ignored_users_set->end(); + return ignored_users_set->find(username) != ignored_users_set->end(); } void Config::sync() { diff --git a/src/Config.h b/src/Config.h index dd081c41dd6..eff83f0960a 100644 --- a/src/Config.h +++ b/src/Config.h @@ -9,7 +9,7 @@ class Config { static bool enable_analyze(); static bool enable_cdbstats(); static bool enable_collector(); - static bool filter_user(const std::string *username); + static bool filter_user(std::string username); static bool report_nested_queries(); static size_t max_text_size(); static size_t max_plan_size(); diff --git a/src/PgUtils.cpp b/src/PgUtils.cpp index 700a1a3d8c8..929f0cf2681 100644 --- a/src/PgUtils.cpp +++ b/src/PgUtils.cpp @@ -7,31 +7,35 @@ extern "C" { #include "cdb/cdbvars.h" } -std::string *get_user_name() { +std::string get_user_name() { + // username is allocated on stack, we don't need to pfree it. const char *username = ya_gpdb::get_config_option("session_authorization", false, false); - // username is not to be freed - return username ? new std::string(username) : nullptr; + return username ? std::string(username) : ""; } -std::string *get_db_name() { +std::string get_db_name() { char *dbname = ya_gpdb::get_database_name(MyDatabaseId); - std::string *result = nullptr; if (dbname) { - result = new std::string(dbname); + std::string result(dbname); ya_gpdb::pfree(dbname); + return result; } - return result; + return ""; } -std::string *get_rg_name() { +std::string get_rg_name() { auto groupId = ya_gpdb::get_rg_id_by_session_id(MySessionState->sessionId); if (!OidIsValid(groupId)) - return nullptr; + return ""; + char *rgname = ya_gpdb::get_rg_name_for_id(groupId); if (rgname == nullptr) - return nullptr; - return new std::string(rgname); + return ""; + + std::string result(rgname); + ya_gpdb::pfree(rgname); + return result; } /** diff --git a/src/PgUtils.h b/src/PgUtils.h index 81282a473a8..ceb07c2e8e5 100644 --- a/src/PgUtils.h +++ b/src/PgUtils.h @@ -5,9 +5,9 @@ extern "C" { #include -std::string *get_user_name(); -std::string *get_db_name(); -std::string *get_rg_name(); +std::string get_user_name(); +std::string get_db_name(); +std::string get_rg_name(); bool is_top_level_query(QueryDesc *query_desc, int nesting_level); bool nesting_is_valid(QueryDesc *query_desc, int nesting_level); bool need_report_nested_query(); diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp index 6cbc7d20183..4655433c806 100644 --- a/src/ProtoUtils.cpp +++ b/src/ProtoUtils.cpp @@ -102,9 +102,9 @@ void clear_big_fields(yagpcc::SetQueryReq *req) { void set_query_info(yagpcc::SetQueryReq *req) { if (Gp_session_role == GP_ROLE_DISPATCH) { auto qi = req->mutable_query_info(); - qi->set_allocated_username(get_user_name()); - qi->set_allocated_databasename(get_db_name()); - qi->set_allocated_rsgname(get_rg_name()); + qi->set_username(get_user_name()); + qi->set_databasename(get_db_name()); + qi->set_rsgname(get_rg_name()); } } From 654381c248dc26edc62a376fff6e76975ad615f3 Mon Sep 17 00:00:00 2001 From: NJrslv <108277031+NJrslv@users.noreply.github.com> Date: Fri, 15 Aug 2025 12:11:14 +0300 Subject: [PATCH 102/133] report bug queries at the end of extension (#19) * ereport(log) bug queries at the end of extension --- src/EventSender.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 6deed8bb56b..133d409b574 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -340,7 +340,15 @@ EventSender::EventSender() { #endif } -EventSender::~EventSender() { delete connector; } +EventSender::~EventSender() { + for (const auto &[qkey, _] : queries) { + ereport(LOG, + (errmsg("YAGPCC query with missing done event: " + "tmid=%d ssid=%d ccnt=%d nlvl=%d", + qkey.tmid, qkey.ssid, qkey.ccnt, qkey.nesting_level))); + } + delete connector; +} // That's basically a very simplistic state machine to fix or highlight any bugs // coming from GP From 1799aa233ceac307ffcdafd15cffd80eb2cf1e89 Mon Sep 17 00:00:00 2001 From: NJrslv <108277031+NJrslv@users.noreply.github.com> Date: Thu, 4 Sep 2025 13:26:16 +0300 Subject: [PATCH 103/133] fix defs & trim spaces (#21) --- metric.md | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/metric.md b/metric.md index 2d198391a67..78e62b72909 100644 --- a/metric.md +++ b/metric.md @@ -1,32 +1,33 @@ ## YAGP Hooks Collector Metrics -### States -A Postgres process goes through 4 executor functions to execute a query: -1) `ExecutorStart()` - resource allocation for the query. -2) `ExecutorRun()` - query execution. -3) `ExecutorFinish()` - cleanup. -4) `ExecutorEnd()` - cleanup. +### States +A Postgres process goes through 4 executor functions to execute a query: +1) `ExecutorStart()` - resource allocation for the query. +2) `ExecutorRun()` - query execution. +3) `ExecutorFinish()` - cleanup. +4) `ExecutorEnd()` - cleanup. -yagp-hooks-collector sends messages with 4 states, from _Dispatcher_ and/or _Execute_ processes: `submit`, `start`, `end`, `done`, in this order: +yagp-hooks-collector sends messages with 4 states, from _Dispatcher_ and/or _Execute_ processes: `submit`, `start`, `end`, `done`, in this order: ``` submit -> ExecutorStart() -> start -> ExecutorRun() -> ExecutorFinish() -> end -> ExecutorEnd() -> done ``` -### Key Points -- Some queries may skip the _end_ state, then the _end_ statistics is sent during _done_. -- If a query finishes with an error (`METRICS_QUERY_ERROR`), or is cancelled (`METRICS_QUERY_CANCELLED`), statistics is sent at _done_. -- Some statistics is calculated as the difference between the current global metric and the previous. The initial snapshot is taken at submit, and at _end_/_done_ the diff is calculated. -- Nested queries on _Dispatcher_ become top-level on _Execute_. -- Each process (_Dispatcher_/_Execute_) sends its own statistics. +### Key Points +- Some queries may skip the _end_ state, then the _end_ statistics is sent during _done_. +- If a query finishes with an error (`METRICS_QUERY_ERROR`), or is cancelled (`METRICS_QUERY_CANCELLED`), statistics is sent at _done_. +- Some statistics is calculated as the difference between the current global metric and the previous. The initial snapshot is taken at submit, and at _end_/_done_ the diff is calculated. +- Nested queries on _Dispatcher_ become top-level on _Execute_. +- Each process (_Dispatcher_/_Execute_) sends its own statistics -### Notations -- **S** = Submit event. -- **T** = Start event. -- **E** = End event. -- **D** = Done event. -- **DIFF** = current_value - submit_value (submit event). -- **ABS** = Absolute value, or where diff is not applicable, the value taken. -- **Local*** - Statistics that starts counting from zero for each new query. A nested query is also considered new. +### Notations +- **S** = Submit event. +- **T** = Start event. +- **E** = End event. +- **D** = Done event. +- **DIFF** = current_value - submit_value (submit event). +- **ABS** = Absolute value, or where diff is not applicable, the value taken. +- **Local*** - Statistics that starts counting from zero for each new query. A nested query is also considered new. +- **Node** - PG process, either a `Query Dispatcher` (on master) or an `Execute` (on segment). ### Statistics Table @@ -36,7 +37,7 @@ submit -> ExecutorStart() -> start -> ExecutorRun() -> ExecutorFinish() -> end - | `runningTimeSeconds` | double | E, D | DIFF | - | Node | + | + | seconds | Wall clock time | | `userTimeSeconds` | double | E, D | DIFF | - | Node | + | + | seconds | /proc/pid/stat utime | | `kernelTimeSeconds` | double | E, D | DIFF | - | Node | + | + | seconds | /proc/pid/stat stime | -| `vsize` | uint64 | E, D | ABS | - | Node | + | + | pages | /proc/pid/stat vsize | +| `vsize` | uint64 | E, D | ABS | - | Node | + | + | bytes | /proc/pid/stat vsize | | `rss` | uint64 | E, D | ABS | - | Node | + | + | pages | /proc/pid/stat rss | | `VmSizeKb` | uint64 | E, D | ABS | - | Node | + | + | KB | /proc/pid/status VmSize | | `VmPeakKb` | uint64 | E, D | ABS | - | Node | + | + | KB | /proc/pid/status VmPeak | @@ -114,7 +115,7 @@ submit -> ExecutorStart() -> start -> ExecutorRun() -> ExecutorFinish() -> end - | `error_message` | string | D | ABS | - | Node | + | + | text | Error message | | `slice_id` | int64 | All | ABS | - | Node | + | + | id | Slice ID | | **QueryKey** | | | | | | | | | | -| `tmid` | int32 | All | ABS | - | Node | + | + | id | Time ID | +| `tmid` | int32 | All | ABS | - | Node | + | + | id | Transaction start time | | `ssid` | int32 | All | ABS | - | Node | + | + | id | Session ID | | `ccnt` | int32 | All | ABS | - | Node | + | + | count | Command counter | | **SegmentKey** | | | | | | | | | | From 15f9dc56ae8e425170e7825d038dec3481c89c2d Mon Sep 17 00:00:00 2001 From: NJrslv <108277031+NJrslv@users.noreply.github.com> Date: Tue, 9 Sep 2025 11:51:58 +0300 Subject: [PATCH 104/133] Add PG alike tests (#20) * add pg alike tests --- Makefile | 9 +- expected/yagp_cursors.out | 99 ++++++++++ expected/yagp_dist.out | 175 ++++++++++++++++++ expected/yagp_select.out | 136 ++++++++++++++ sql/yagp_cursors.sql | 69 +++++++ sql/yagp_dist.sql | 84 +++++++++ sql/yagp_select.sql | 65 +++++++ src/Config.cpp | 13 ++ src/Config.h | 4 + src/EventSender.cpp | 51 +++-- src/EventSender.h | 4 +- src/UDSConnector.cpp | 3 +- src/UDSConnector.h | 4 +- src/hook_wrappers.h | 3 + src/log/LogOps.cpp | 129 +++++++++++++ src/log/LogOps.h | 19 ++ src/log/LogSchema.cpp | 136 ++++++++++++++ src/log/LogSchema.h | 164 ++++++++++++++++ src/memory/gpdbwrappers.cpp | 7 +- src/memory/gpdbwrappers.h | 5 + src/yagp_hooks_collector.c | 14 +- yagp_hooks_collector--1.0--1.1.sql | 113 +++++++++++ ...--1.0.sql => yagp_hooks_collector--1.0.sql | 2 +- yagp_hooks_collector--1.1.sql | 95 ++++++++++ yagp_hooks_collector.control | 2 +- 25 files changed, 1376 insertions(+), 29 deletions(-) create mode 100644 expected/yagp_cursors.out create mode 100644 expected/yagp_dist.out create mode 100644 expected/yagp_select.out create mode 100644 sql/yagp_cursors.sql create mode 100644 sql/yagp_dist.sql create mode 100644 sql/yagp_select.sql create mode 100644 src/log/LogOps.cpp create mode 100644 src/log/LogOps.h create mode 100644 src/log/LogSchema.cpp create mode 100644 src/log/LogSchema.h create mode 100644 yagp_hooks_collector--1.0--1.1.sql rename sql/yagp_hooks_collector--1.0.sql => yagp_hooks_collector--1.0.sql (99%) create mode 100644 yagp_hooks_collector--1.1.sql diff --git a/Makefile b/Makefile index c02f63e5763..e7132da9b06 100644 --- a/Makefile +++ b/Makefile @@ -31,9 +31,11 @@ OBJS := $(PG_STAT_OBJS) \ $(SRC_DIR)/EventSender.o \ $(SRC_DIR)/hook_wrappers.o \ $(SRC_DIR)/memory/gpdbwrappers.o \ - $(SRC_DIR)/yagp_hooks_collector.o + $(SRC_DIR)/yagp_hooks_collector.o \ + $(SRC_DIR)/log/LogOps.o \ + $(SRC_DIR)/log/LogSchema.o EXTRA_CLEAN := $(GEN_DIR) -DATA := $(wildcard sql/*--*.sql) +DATA := $(wildcard *--*.sql) EXTENSION := yagp_hooks_collector EXTVERSION := $(shell grep default_version $(EXTENSION).control | \ sed -e "s/default_version[[:space:]]*=[[:space:]]*'\([^']*\)'/\1/") @@ -45,10 +47,11 @@ include $(PGXS) $(GEN_DIR)/yagpcc_set_service.pb.o: $(GEN_DIR)/yagpcc_metrics.pb.h PROTO_INCLUDES = $(GEN_DIR)/yagpcc_set_service.pb.h $(GEN_DIR)/yagpcc_metrics.pb.h $(GEN_DIR)/yagpcc_plan.pb.h -$(SRC_DIR)/UDSConnector.o: $(PROTO_INCLUDES) +$(SRC_DIR)/UDSConnector.o: $(PROTO_INCLUDES) src/log/LogOps.h $(SRC_DIR)/ProtoUtils.o: $(PROTO_INCLUDES) $(SRC_DIR)/EventSender.o: $(PROTO_INCLUDES) $(SRC_DIR)/ProcStats.o: $(GEN_DIR)/yagpcc_metrics.pb.h +$(SRC_DIR)/log/LogOps.o: $(PROTO_INCLUDES) gen: $(PROTO_GEN_OBJECTS) diff --git a/expected/yagp_cursors.out b/expected/yagp_cursors.out new file mode 100644 index 00000000000..c9bbbbf8a07 --- /dev/null +++ b/expected/yagp_cursors.out @@ -0,0 +1,99 @@ +-- FETCH is not tested here because truly utility statements (those +-- without sub-queries that go through the executor) are not logged. +-- Currently, only executor states are reported. Utility hooks are +-- not implemented. +CREATE EXTENSION yagp_hooks_collector; +CREATE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.enable TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; +-- DECLARE +SET yagpcc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_0 CURSOR FOR SELECT 0; +CLOSE cursor_stats_0; +COMMIT; +RESET yagpcc.logging_mode; +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | ccnt | query_text | query_status +-------+------+---------------------------------------------+--------------------- + -1 | 10 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_SUBMIT + -1 | 10 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_START + -1 | 10 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_END + -1 | 10 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_DONE +(4 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- DECLARE WITH HOLD +SET yagpcc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; +CLOSE cursor_stats_1; +DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; +CLOSE cursor_stats_2; +COMMIT; +RESET yagpcc.logging_mode; +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | ccnt | query_text | query_status +-------+------+-------------------------------------------------------+--------------------- + -1 | 23 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_SUBMIT + -1 | 23 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_START + -1 | 23 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_END + -1 | 23 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_DONE + -1 | 26 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_SUBMIT + -1 | 26 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_START + -1 | 26 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_END + -1 | 26 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_DONE +(8 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- ROLLBACK +SET yagpcc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_3 CURSOR FOR SELECT 1; +CLOSE cursor_stats_3; +DECLARE cursor_stats_4 CURSOR FOR SELECT 1; +ROLLBACK; +RESET yagpcc.logging_mode; +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | ccnt | query_text | query_status +-------+------+---------------------------------------------+--------------------- + -1 | 39 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT + -1 | 39 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_START + -1 | 39 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_END + -1 | 39 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_DONE + -1 | 42 | DECLARE cursor_stats_4 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT + -1 | 42 | DECLARE cursor_stats_4 CURSOR FOR SELECT 1; | QUERY_STATUS_START +(6 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; diff --git a/expected/yagp_dist.out b/expected/yagp_dist.out new file mode 100644 index 00000000000..9112b936001 --- /dev/null +++ b/expected/yagp_dist.out @@ -0,0 +1,175 @@ +CREATE EXTENSION yagp_hooks_collector; +CREATE OR REPLACE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.enable TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; +-- Hash distributed table +CREATE TABLE test_hash_dist (id int) DISTRIBUTED BY (id); +INSERT INTO test_hash_dist SELECT 1; +SET yagpcc.logging_mode to 'TBL'; +SET optimizer_enable_direct_dispatch TO TRUE; +-- Direct dispatch is used here, only one segment is scanned. +select * from test_hash_dist where id = 1; + id +---- + 1 +(1 row) + +RESET optimizer_enable_direct_dispatch; +RESET yagpcc.logging_mode; +-- Should see 8 rows. +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | ccnt | query_text | query_status +-------+------+--------------------------------------------+--------------------- + -1 | 13 | select * from test_hash_dist where id = 1; | QUERY_STATUS_SUBMIT + -1 | 13 | select * from test_hash_dist where id = 1; | QUERY_STATUS_START + -1 | 13 | select * from test_hash_dist where id = 1; | QUERY_STATUS_END + -1 | 13 | select * from test_hash_dist where id = 1; | QUERY_STATUS_DONE + 1 | 13 | | QUERY_STATUS_SUBMIT + 1 | 13 | | QUERY_STATUS_START + 1 | 13 | | QUERY_STATUS_END + 1 | 13 | | QUERY_STATUS_DONE +(8 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +SET yagpcc.logging_mode to 'TBL'; +-- Scan all segments. +select * from test_hash_dist; + id +---- + 1 +(1 row) + +DROP TABLE test_hash_dist; +RESET yagpcc.logging_mode; +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | ccnt | query_text | query_status +-------+------+-------------------------------+--------------------- + -1 | 24 | select * from test_hash_dist; | QUERY_STATUS_SUBMIT + -1 | 24 | select * from test_hash_dist; | QUERY_STATUS_START + -1 | 24 | select * from test_hash_dist; | QUERY_STATUS_END + -1 | 24 | select * from test_hash_dist; | QUERY_STATUS_DONE + 1 | 24 | | QUERY_STATUS_SUBMIT + 1 | 24 | | QUERY_STATUS_START + 1 | 24 | | QUERY_STATUS_END + 1 | 24 | | QUERY_STATUS_DONE + 2 | 24 | | QUERY_STATUS_SUBMIT + 2 | 24 | | QUERY_STATUS_START + 2 | 24 | | QUERY_STATUS_END + 2 | 24 | | QUERY_STATUS_DONE + | 24 | | QUERY_STATUS_SUBMIT + | 24 | | QUERY_STATUS_START + | 24 | | QUERY_STATUS_END + | 24 | | QUERY_STATUS_DONE +(16 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- Replicated table +CREATE FUNCTION force_segments() RETURNS SETOF text AS $$ +BEGIN + RETURN NEXT 'seg'; +END; +$$ LANGUAGE plpgsql VOLATILE EXECUTE ON ALL SEGMENTS; +CREATE TABLE test_replicated (id int) DISTRIBUTED REPLICATED; +INSERT INTO test_replicated SELECT 1; +SET yagpcc.logging_mode to 'TBL'; +SELECT COUNT(*) FROM test_replicated, force_segments(); + count +------- + 3 +(1 row) + +DROP TABLE test_replicated; +DROP FUNCTION force_segments(); +RESET yagpcc.logging_mode; +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | ccnt | query_text | query_status +-------+------+---------------------------------------------------------+--------------------- + -1 | 39 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_SUBMIT + -1 | 39 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_START + -1 | 39 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_END + -1 | 39 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_DONE + 1 | 39 | | QUERY_STATUS_SUBMIT + 1 | 39 | | QUERY_STATUS_START + 1 | 39 | | QUERY_STATUS_END + 1 | 39 | | QUERY_STATUS_DONE + 2 | 39 | | QUERY_STATUS_SUBMIT + 2 | 39 | | QUERY_STATUS_START + 2 | 39 | | QUERY_STATUS_END + 2 | 39 | | QUERY_STATUS_DONE + | 39 | | QUERY_STATUS_SUBMIT + | 39 | | QUERY_STATUS_START + | 39 | | QUERY_STATUS_END + | 39 | | QUERY_STATUS_DONE +(16 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- Partially distributed table (2 numsegments) +SET allow_system_table_mods = ON; +CREATE TABLE test_partial_dist (id int, data text) DISTRIBUTED BY (id); +UPDATE gp_distribution_policy SET numsegments = 2 WHERE localoid = 'test_partial_dist'::regclass; +INSERT INTO test_partial_dist SELECT * FROM generate_series(1, 100); +SET yagpcc.logging_mode to 'TBL'; +SELECT COUNT(*) FROM test_partial_dist; + count +------- + 100 +(1 row) + +RESET yagpcc.logging_mode; +DROP TABLE test_partial_dist; +RESET allow_system_table_mods; +-- Should see 12 rows. +SELECT ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + ccnt | query_text | query_status +------+-----------------------------------------+--------------------- + 57 | SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_SUBMIT + 57 | SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_START + 57 | SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_END + 57 | SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_DONE + 57 | | QUERY_STATUS_SUBMIT + 57 | | QUERY_STATUS_START + 57 | | QUERY_STATUS_END + 57 | | QUERY_STATUS_DONE + 57 | | QUERY_STATUS_SUBMIT + 57 | | QUERY_STATUS_START + 57 | | QUERY_STATUS_END + 57 | | QUERY_STATUS_DONE +(12 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; diff --git a/expected/yagp_select.out b/expected/yagp_select.out new file mode 100644 index 00000000000..c2282d8c217 --- /dev/null +++ b/expected/yagp_select.out @@ -0,0 +1,136 @@ +CREATE EXTENSION yagp_hooks_collector; +CREATE OR REPLACE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.enable TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; +-- Basic SELECT tests +SET yagpcc.logging_mode to 'TBL'; +SELECT 1; + ?column? +---------- + 1 +(1 row) + +SELECT COUNT(*) FROM generate_series(1,10); + count +------- + 10 +(1 row) + +RESET yagpcc.logging_mode; +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | ccnt | query_text | query_status +-------+------+---------------------------------------------+--------------------- + -1 | 9 | SELECT 1; | QUERY_STATUS_SUBMIT + -1 | 9 | SELECT 1; | QUERY_STATUS_START + -1 | 9 | SELECT 1; | QUERY_STATUS_END + -1 | 9 | SELECT 1; | QUERY_STATUS_DONE + -1 | 11 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_SUBMIT + -1 | 11 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_START + -1 | 11 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_END + -1 | 11 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_DONE +(8 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- Transaction test +SET yagpcc.logging_mode to 'TBL'; +BEGIN; +SELECT 1; + ?column? +---------- + 1 +(1 row) + +COMMIT; +RESET yagpcc.logging_mode; +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | ccnt | query_text | query_status +-------+------+------------+--------------------- + -1 | 22 | SELECT 1; | QUERY_STATUS_SUBMIT + -1 | 22 | SELECT 1; | QUERY_STATUS_START + -1 | 22 | SELECT 1; | QUERY_STATUS_END + -1 | 22 | SELECT 1; | QUERY_STATUS_DONE +(4 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- CTE test +SET yagpcc.logging_mode to 'TBL'; +WITH t AS (VALUES (1), (2)) +SELECT * FROM t; + column1 +--------- + 1 + 2 +(2 rows) + +RESET yagpcc.logging_mode; +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | ccnt | query_text | query_status +-------+------+-----------------------------+--------------------- + -1 | 33 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_SUBMIT + | | SELECT * FROM t; | + -1 | 33 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_START + | | SELECT * FROM t; | + -1 | 33 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_END + | | SELECT * FROM t; | + -1 | 33 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_DONE + | | SELECT * FROM t; | +(4 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- Prepared statement test +SET yagpcc.logging_mode to 'TBL'; +PREPARE test_stmt AS SELECT 1; +EXECUTE test_stmt; + ?column? +---------- + 1 +(1 row) + +DEALLOCATE test_stmt; +RESET yagpcc.logging_mode; +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | ccnt | query_text | query_status +-------+------+--------------------------------+--------------------- + -1 | 44 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_SUBMIT + -1 | 44 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_START + -1 | 44 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_END + -1 | 44 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_DONE +(4 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; diff --git a/sql/yagp_cursors.sql b/sql/yagp_cursors.sql new file mode 100644 index 00000000000..fcd2df136f5 --- /dev/null +++ b/sql/yagp_cursors.sql @@ -0,0 +1,69 @@ +-- FETCH is not tested here because truly utility statements (those +-- without sub-queries that go through the executor) are not logged. +-- Currently, only executor states are reported. Utility hooks are +-- not implemented. + +CREATE EXTENSION yagp_hooks_collector; + +CREATE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +SET yagpcc.enable TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; + +-- DECLARE +SET yagpcc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_0 CURSOR FOR SELECT 0; +CLOSE cursor_stats_0; +COMMIT; + +RESET yagpcc.logging_mode; +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- DECLARE WITH HOLD +SET yagpcc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; +CLOSE cursor_stats_1; +DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; +CLOSE cursor_stats_2; +COMMIT; + +RESET yagpcc.logging_mode; + +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- ROLLBACK +SET yagpcc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_3 CURSOR FOR SELECT 1; +CLOSE cursor_stats_3; +DECLARE cursor_stats_4 CURSOR FOR SELECT 1; +ROLLBACK; + +RESET yagpcc.logging_mode; + +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; \ No newline at end of file diff --git a/sql/yagp_dist.sql b/sql/yagp_dist.sql new file mode 100644 index 00000000000..d92a7c2e7af --- /dev/null +++ b/sql/yagp_dist.sql @@ -0,0 +1,84 @@ +CREATE EXTENSION yagp_hooks_collector; + +CREATE OR REPLACE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +SET yagpcc.enable TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; + +-- Hash distributed table + +CREATE TABLE test_hash_dist (id int) DISTRIBUTED BY (id); +INSERT INTO test_hash_dist SELECT 1; + +SET yagpcc.logging_mode to 'TBL'; +SET optimizer_enable_direct_dispatch TO TRUE; +-- Direct dispatch is used here, only one segment is scanned. +select * from test_hash_dist where id = 1; +RESET optimizer_enable_direct_dispatch; + +RESET yagpcc.logging_mode; +-- Should see 8 rows. +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +SET yagpcc.logging_mode to 'TBL'; + +-- Scan all segments. +select * from test_hash_dist; + +DROP TABLE test_hash_dist; +RESET yagpcc.logging_mode; +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Replicated table +CREATE FUNCTION force_segments() RETURNS SETOF text AS $$ +BEGIN + RETURN NEXT 'seg'; +END; +$$ LANGUAGE plpgsql VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE TABLE test_replicated (id int) DISTRIBUTED REPLICATED; +INSERT INTO test_replicated SELECT 1; + +SET yagpcc.logging_mode to 'TBL'; +SELECT COUNT(*) FROM test_replicated, force_segments(); +DROP TABLE test_replicated; +DROP FUNCTION force_segments(); + +RESET yagpcc.logging_mode; +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Partially distributed table (2 numsegments) +SET allow_system_table_mods = ON; +CREATE TABLE test_partial_dist (id int, data text) DISTRIBUTED BY (id); +UPDATE gp_distribution_policy SET numsegments = 2 WHERE localoid = 'test_partial_dist'::regclass; +INSERT INTO test_partial_dist SELECT * FROM generate_series(1, 100); + +SET yagpcc.logging_mode to 'TBL'; +SELECT COUNT(*) FROM test_partial_dist; +RESET yagpcc.logging_mode; + +DROP TABLE test_partial_dist; +RESET allow_system_table_mods; +-- Should see 12 rows. +SELECT ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; diff --git a/sql/yagp_select.sql b/sql/yagp_select.sql new file mode 100644 index 00000000000..d99b6c40387 --- /dev/null +++ b/sql/yagp_select.sql @@ -0,0 +1,65 @@ +CREATE EXTENSION yagp_hooks_collector; + +CREATE OR REPLACE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +SET yagpcc.enable TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; + +-- Basic SELECT tests +SET yagpcc.logging_mode to 'TBL'; + +SELECT 1; +SELECT COUNT(*) FROM generate_series(1,10); + +RESET yagpcc.logging_mode; +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Transaction test +SET yagpcc.logging_mode to 'TBL'; + +BEGIN; +SELECT 1; +COMMIT; + +RESET yagpcc.logging_mode; +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- CTE test +SET yagpcc.logging_mode to 'TBL'; + +WITH t AS (VALUES (1), (2)) +SELECT * FROM t; + +RESET yagpcc.logging_mode; +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Prepared statement test +SET yagpcc.logging_mode to 'TBL'; + +PREPARE test_stmt AS SELECT 1; +EXECUTE test_stmt; +DEALLOCATE test_stmt; + +RESET yagpcc.logging_mode; +SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; \ No newline at end of file diff --git a/src/Config.cpp b/src/Config.cpp index aef09fc7d73..bca5cbf5707 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -19,6 +19,12 @@ static char *guc_ignored_users = nullptr; static int guc_max_text_size = 1024; // in KB static int guc_max_plan_size = 1024; // in KB static int guc_min_analyze_time = -1; // uninitialized state +static int guc_logging_mode = LOG_MODE_UDS; + +static const struct config_enum_entry logging_mode_options[] = { + {"uds", LOG_MODE_UDS, false /* hidden */}, + {"tbl", LOG_MODE_TBL, false}, + {NULL, 0, false}}; static std::unique_ptr> ignored_users_set = nullptr; @@ -108,6 +114,12 @@ void Config::init() { "Zero prints all plans. -1 turns this feature off.", &guc_min_analyze_time, -1, -1, INT_MAX, PGC_USERSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_MS, NULL, NULL, NULL); + + DefineCustomEnumVariable( + "yagpcc.logging_mode", "Logging mode: UDS or PG Table", NULL, + &guc_logging_mode, LOG_MODE_UDS, logging_mode_options, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_SUPERUSER_ONLY, NULL, NULL, + NULL); } std::string Config::uds_path() { return guc_uds_path; } @@ -118,6 +130,7 @@ bool Config::report_nested_queries() { return guc_report_nested_queries; } size_t Config::max_text_size() { return guc_max_text_size * 1024; } size_t Config::max_plan_size() { return guc_max_plan_size * 1024; } int Config::min_analyze_time() { return guc_min_analyze_time; }; +int Config::logging_mode() { return guc_logging_mode; } bool Config::filter_user(std::string username) { if (!ignored_users_set) { diff --git a/src/Config.h b/src/Config.h index eff83f0960a..debabd3b826 100644 --- a/src/Config.h +++ b/src/Config.h @@ -2,6 +2,9 @@ #include +#define LOG_MODE_UDS 0 +#define LOG_MODE_TBL 1 + class Config { public: static void init(); @@ -14,5 +17,6 @@ class Config { static size_t max_text_size(); static size_t max_plan_size(); static int min_analyze_time(); + static int logging_mode(); static void sync(); }; \ No newline at end of file diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 133d409b574..6866db1f52f 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,6 +1,7 @@ #include "Config.h" #include "UDSConnector.h" #include "memory/gpdbwrappers.h" +#include "log/LogOps.h" #define typeid __typeid extern "C" { @@ -24,6 +25,23 @@ extern "C" { (Gp_role == GP_ROLE_DISPATCH && Config::min_analyze_time() >= 0 && \ Config::enable_analyze()) +bool EventSender::log_query_req(const yagpcc::SetQueryReq &req, + const std::string &event) { + bool clear_big_fields = false; + switch (Config::logging_mode()) { + case LOG_MODE_UDS: + clear_big_fields = UDSConnector::report_query(req, event); + break; + case LOG_MODE_TBL: + ya_gpdb::insert_log(req); + clear_big_fields = false; + break; + default: + Assert(false); + } + return clear_big_fields; +} + void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { return; @@ -58,7 +76,7 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { } void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { - if (!connector) { + if (!proto_verified) { return; } if (filter_query(query_desc)) { @@ -88,7 +106,7 @@ void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { } void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { - if (!connector || !need_collect(query_desc, nesting_level)) { + if (!proto_verified || !need_collect(query_desc, nesting_level)) { return; } if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { @@ -112,14 +130,14 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { } yagpcc::GPMetrics stats; std::swap(stats, *query_msg->mutable_query_metrics()); - if (connector->report_query(*query_msg, "started")) { + if (log_query_req(*query_msg, "started")) { clear_big_fields(query_msg); } std::swap(stats, *query_msg->mutable_query_metrics()); } void EventSender::executor_end(QueryDesc *query_desc) { - if (!connector || !need_collect(query_desc, nesting_level)) { + if (!proto_verified || !need_collect(query_desc, nesting_level)) { return; } if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { @@ -135,13 +153,13 @@ void EventSender::executor_end(QueryDesc *query_desc) { } else { set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); } - if (connector->report_query(*query_msg, "ended")) { + if (log_query_req(*query_msg, "ended")) { clear_big_fields(query_msg); } } void EventSender::collect_query_submit(QueryDesc *query_desc) { - if (!connector) { + if (!proto_verified) { return; } Config::sync(); @@ -167,7 +185,7 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { set_qi_nesting_level(query_msg, nesting_level); set_qi_slice_id(query_msg); set_query_text(query_msg, query_desc); - if (connector->report_query(*query_msg, "submit")) { + if (log_query_req(*query_msg, "submit")) { clear_big_fields(query_msg); } // take initial metrics snapshot so that we can safely take diff afterwards @@ -230,12 +248,12 @@ void EventSender::report_query_done(QueryDesc *query_desc, QueryItem &query, set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), &ic_statistics); #endif - connector->report_query(*query_msg, msg); + (void)log_query_req(*query_msg, msg); } void EventSender::collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status) { - if (!connector || !need_collect(query_desc, nesting_level)) { + if (!proto_verified || !need_collect(query_desc, nesting_level)) { return; } @@ -276,7 +294,7 @@ void EventSender::ic_metrics_collect() { if (Gp_interconnect_type != INTERCONNECT_TYPE_UDPIFC) { return; } - if (!connector || gp_command_count == 0 || !Config::enable_collector() || + if (!proto_verified || gp_command_count == 0 || !Config::enable_collector() || Config::filter_user(get_user_name())) { return; } @@ -305,7 +323,7 @@ void EventSender::ic_metrics_collect() { } void EventSender::analyze_stats_collect(QueryDesc *query_desc) { - if (!connector || Gp_role != GP_ROLE_DISPATCH) { + if (!proto_verified || Gp_role != GP_ROLE_DISPATCH) { return; } if (!need_collect(query_desc, nesting_level)) { @@ -330,7 +348,8 @@ void EventSender::analyze_stats_collect(QueryDesc *query_desc) { EventSender::EventSender() { if (Config::enable_collector()) { try { - connector = new UDSConnector(); + GOOGLE_PROTOBUF_VERIFY_VERSION; + proto_verified = true; } catch (const std::exception &e) { ereport(INFO, (errmsg("Unable to start query tracing %s", e.what()))); } @@ -342,12 +361,10 @@ EventSender::EventSender() { EventSender::~EventSender() { for (const auto &[qkey, _] : queries) { - ereport(LOG, - (errmsg("YAGPCC query with missing done event: " - "tmid=%d ssid=%d ccnt=%d nlvl=%d", - qkey.tmid, qkey.ssid, qkey.ccnt, qkey.nesting_level))); + ereport(LOG, (errmsg("YAGPCC query with missing done event: " + "tmid=%d ssid=%d ccnt=%d nlvl=%d", + qkey.tmid, qkey.ssid, qkey.ccnt, qkey.nesting_level))); } - delete connector; } // That's basically a very simplistic state machine to fix or highlight any bugs diff --git a/src/EventSender.h b/src/EventSender.h index 4071d580ff9..87cf453861b 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -105,6 +105,8 @@ class EventSender { explicit QueryItem(QueryState st); }; + static bool log_query_req(const yagpcc::SetQueryReq &req, + const std::string &event); void update_query_state(QueryItem &query, QueryState new_state, bool success = true); QueryItem &get_query(QueryDesc *query_desc); @@ -116,7 +118,7 @@ class EventSender { void update_nested_counters(QueryDesc *query_desc); bool qdesc_submitted(QueryDesc *query_desc); - UDSConnector *connector = nullptr; + bool proto_verified = false; int nesting_level = 0; int64_t nested_calls = 0; double nested_timing = 0; diff --git a/src/UDSConnector.cpp b/src/UDSConnector.cpp index f8c4586126d..b6af303218d 100644 --- a/src/UDSConnector.cpp +++ b/src/UDSConnector.cpp @@ -2,6 +2,7 @@ #include "Config.h" #include "YagpStat.h" #include "memory/gpdbwrappers.h" +#include "log/LogOps.h" #include #include @@ -16,8 +17,6 @@ extern "C" { #include "postgres.h" } -UDSConnector::UDSConnector() { GOOGLE_PROTOBUF_VERIFY_VERSION; } - static void inline log_tracing_failure(const yagpcc::SetQueryReq &req, const std::string &event) { ereport(LOG, diff --git a/src/UDSConnector.h b/src/UDSConnector.h index 67504fc8529..f0dfcb77a3f 100644 --- a/src/UDSConnector.h +++ b/src/UDSConnector.h @@ -4,6 +4,6 @@ class UDSConnector { public: - UDSConnector(); - bool report_query(const yagpcc::SetQueryReq &req, const std::string &event); + bool static report_query(const yagpcc::SetQueryReq &req, + const std::string &event); }; \ No newline at end of file diff --git a/src/hook_wrappers.h b/src/hook_wrappers.h index c158f42cf1d..cfabf39485e 100644 --- a/src/hook_wrappers.h +++ b/src/hook_wrappers.h @@ -9,6 +9,9 @@ extern void hooks_deinit(); extern void yagp_functions_reset(); extern Datum yagp_functions_get(FunctionCallInfo fcinfo); +extern void init_log(); +extern void truncate_log(); + #ifdef __cplusplus } #endif \ No newline at end of file diff --git a/src/log/LogOps.cpp b/src/log/LogOps.cpp new file mode 100644 index 00000000000..6c80d9663f7 --- /dev/null +++ b/src/log/LogOps.cpp @@ -0,0 +1,129 @@ +#include "protos/yagpcc_set_service.pb.h" + +#include "LogOps.h" +#include "LogSchema.h" + +extern "C" { +#include "postgres.h" + +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/xact.h" +#include "catalog/dependency.h" +#include "catalog/heap.h" +#include "catalog/namespace.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_type.h" +#include "cdb/cdbvars.h" +#include "commands/tablecmds.h" +#include "funcapi.h" +#include "fmgr.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" +#include "utils/timestamp.h" +} + +void init_log() { + Oid namespaceId; + Oid relationId; + ObjectAddress tableAddr; + ObjectAddress schemaAddr; + + namespaceId = get_namespace_oid(schema_name.data(), false /* missing_ok */); + + /* Create table */ + relationId = heap_create_with_catalog( + log_relname.data() /* relname */, namespaceId /* namespace */, + 0 /* tablespace */, InvalidOid /* relid */, InvalidOid /* reltype oid */, + InvalidOid /* reloftypeid */, GetUserId() /* owner */, + DescribeTuple() /* rel tuple */, NIL, InvalidOid /* relam */, + RELKIND_RELATION, RELPERSISTENCE_PERMANENT, RELSTORAGE_HEAP, false, false, + true, 0, ONCOMMIT_NOOP, NULL /* GP Policy */, (Datum)0, + false /* use_user_acl */, true, true, false /* valid_opts */, + false /* is_part_child */, false /* is part parent */, NULL); + + /* Make the table visible */ + CommandCounterIncrement(); + + /* Record dependency of the table on the schema */ + if (OidIsValid(relationId) && OidIsValid(namespaceId)) { + ObjectAddressSet(tableAddr, RelationRelationId, relationId); + ObjectAddressSet(schemaAddr, NamespaceRelationId, namespaceId); + + /* Table can be dropped only via DROP EXTENSION */ + recordDependencyOn(&tableAddr, &schemaAddr, DEPENDENCY_EXTENSION); + } else { + ereport(NOTICE, (errmsg("YAGPCC failed to create log table or schema"))); + } + + /* Make changes visible */ + CommandCounterIncrement(); +} + +void insert_log(const yagpcc::SetQueryReq &req) { + Oid namespaceId; + Oid relationId; + Relation rel; + HeapTuple tuple; + + /* Return if xact is not valid (needed for catalog lookups). */ + if (!IsTransactionState()) { + return; + } + + /* Return if extension was not loaded */ + namespaceId = get_namespace_oid(schema_name.data(), true /* missing_ok */); + if (!OidIsValid(namespaceId)) { + return; + } + + /* Return if the table was not created yet */ + relationId = get_relname_relid(log_relname.data(), namespaceId); + if (!OidIsValid(relationId)) { + return; + } + + bool nulls[natts_yagp_log]; + Datum values[natts_yagp_log]; + + memset(nulls, true, sizeof(nulls)); + memset(values, 0, sizeof(values)); + + extract_query_req(req, "", values, nulls); + + rel = heap_open(relationId, RowExclusiveLock); + + /* Insert the tuple as a frozen one to ensure it is logged even if txn rolls + * back or aborts */ + tuple = heap_form_tuple(RelationGetDescr(rel), values, nulls); + frozen_heap_insert(rel, tuple); + + heap_freetuple(tuple); + /* Keep lock on rel until end of xact */ + heap_close(rel, NoLock); + + /* Make changes visible */ + CommandCounterIncrement(); +} + +void truncate_log() { + Oid namespaceId; + Oid relationId; + Relation relation; + + namespaceId = get_namespace_oid(schema_name.data(), false /* missing_ok */); + relationId = get_relname_relid(log_relname.data(), namespaceId); + + relation = heap_open(relationId, AccessExclusiveLock); + + /* Truncate the main table */ + heap_truncate_one_rel(relation); + + /* Keep lock on rel until end of xact */ + heap_close(relation, NoLock); + + /* Make changes visible */ + CommandCounterIncrement(); +} \ No newline at end of file diff --git a/src/log/LogOps.h b/src/log/LogOps.h new file mode 100644 index 00000000000..d4ee3a57fb2 --- /dev/null +++ b/src/log/LogOps.h @@ -0,0 +1,19 @@ +#pragma once + +#include + +extern "C" { +#include "postgres.h" +#include "fmgr.h" +} + +extern "C" { +/* CREATE TABLE yagpcc.__log (...); */ +void init_log(); + +/* TRUNCATE yagpcc.__log */ +void truncate_log(); +} + +/* INSERT INTO yagpcc.__log VALUES (...) */ +void insert_log(const yagpcc::SetQueryReq &req); diff --git a/src/log/LogSchema.cpp b/src/log/LogSchema.cpp new file mode 100644 index 00000000000..61e11dd0e3b --- /dev/null +++ b/src/log/LogSchema.cpp @@ -0,0 +1,136 @@ +#include "google/protobuf/reflection.h" +#include "google/protobuf/descriptor.h" +#include "google/protobuf/timestamp.pb.h" + +#include "LogSchema.h" + +const std::unordered_map & +proto_name_to_col_idx() { + static const auto name_col_idx = [] { + std::unordered_map map; + map.reserve(log_tbl_desc.size()); + + for (size_t idx = 0; idx < natts_yagp_log; ++idx) { + map.emplace(log_tbl_desc[idx].proto_field_name, idx); + } + + return map; + }(); + return name_col_idx; +} + +TupleDesc DescribeTuple() { + TupleDesc tupdesc = CreateTemplateTupleDesc(natts_yagp_log, false); + + for (size_t anum = 1; anum <= natts_yagp_log; ++anum) { + TupleDescInitEntry(tupdesc, anum, log_tbl_desc[anum - 1].pg_att_name.data(), + log_tbl_desc[anum - 1].type_oid, -1 /* typmod */, + 0 /* attdim */); + } + + return tupdesc; +} + +Datum protots_to_timestamptz(const google::protobuf::Timestamp &ts) { + TimestampTz pgtimestamp = + (TimestampTz)ts.seconds() * USECS_PER_SEC + (ts.nanos() / 1000); + pgtimestamp -= (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * USECS_PER_DAY; + return TimestampTzGetDatum(pgtimestamp); +} + +Datum field_to_datum(const google::protobuf::FieldDescriptor *field, + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg) { + using namespace google::protobuf; + + switch (field->cpp_type()) { + case FieldDescriptor::CPPTYPE_INT32: + return Int32GetDatum(reflection->GetInt32(msg, field)); + case FieldDescriptor::CPPTYPE_INT64: + return Int64GetDatum(reflection->GetInt64(msg, field)); + case FieldDescriptor::CPPTYPE_UINT32: + return Int64GetDatum(reflection->GetUInt32(msg, field)); + case FieldDescriptor::CPPTYPE_UINT64: + return Int64GetDatum( + static_cast(reflection->GetUInt64(msg, field))); + case FieldDescriptor::CPPTYPE_DOUBLE: + return Float8GetDatum(reflection->GetDouble(msg, field)); + case FieldDescriptor::CPPTYPE_FLOAT: + return Float4GetDatum(reflection->GetFloat(msg, field)); + case FieldDescriptor::CPPTYPE_BOOL: + return BoolGetDatum(reflection->GetBool(msg, field)); + case FieldDescriptor::CPPTYPE_ENUM: + return CStringGetTextDatum(reflection->GetEnum(msg, field)->name().data()); + case FieldDescriptor::CPPTYPE_STRING: + return CStringGetTextDatum(reflection->GetString(msg, field).c_str()); + default: + return (Datum)0; + } +} + +void process_field(const google::protobuf::FieldDescriptor *field, + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg, + const std::string &field_name, Datum *values, bool *nulls) { + + auto proto_idx_map = proto_name_to_col_idx(); + auto it = proto_idx_map.find(field_name); + + if (it == proto_idx_map.end()) { + ereport(NOTICE, + (errmsg("YAGPCC protobuf field %s is not registered in log table", + field_name.c_str()))); + return; + } + + int idx = it->second; + + if (!reflection->HasField(msg, field)) { + nulls[idx] = true; + return; + } + + if (field->cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE && + field->message_type()->full_name() == "google.protobuf.Timestamp") { + const auto &ts = static_cast( + reflection->GetMessage(msg, field)); + values[idx] = protots_to_timestamptz(ts); + } else { + values[idx] = field_to_datum(field, reflection, msg); + } + nulls[idx] = false; + + return; +} + +void extract_query_req(const google::protobuf::Message &msg, + const std::string &prefix, Datum *values, bool *nulls) { + using namespace google::protobuf; + + const Descriptor *descriptor = msg.GetDescriptor(); + const Reflection *reflection = msg.GetReflection(); + + for (int i = 0; i < descriptor->field_count(); ++i) { + const FieldDescriptor *field = descriptor->field(i); + + // For now, we do not log any repeated fields plus they need special + // treatment. + if (field->is_repeated()) { + continue; + } + + std::string curr_pref = prefix.empty() ? "" : prefix + "."; + std::string field_name = curr_pref + field->name().data(); + + if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE && + field->message_type()->full_name() != "google.protobuf.Timestamp") { + + if (reflection->HasField(msg, field)) { + const Message &nested = reflection->GetMessage(msg, field); + extract_query_req(nested, field_name, values, nulls); + } + } else { + process_field(field, reflection, msg, field_name, values, nulls); + } + } +} diff --git a/src/log/LogSchema.h b/src/log/LogSchema.h new file mode 100644 index 00000000000..e68de7b6bdb --- /dev/null +++ b/src/log/LogSchema.h @@ -0,0 +1,164 @@ +#pragma once + +#include +#include +#include +#include + +extern "C" { +#include "postgres.h" +#include "access/htup_details.h" +#include "access/tupdesc.h" +#include "catalog/pg_type.h" +#include "utils/timestamp.h" +#include "utils/builtins.h" +} + +namespace google { +namespace protobuf { +class FieldDescriptor; +class Message; +class Reflection; +class Timestamp; +} // namespace protobuf +} // namespace google + +inline constexpr std::string_view schema_name = "yagpcc"; +inline constexpr std::string_view log_relname = "__log"; + +struct LogDesc { + std::string_view pg_att_name; + std::string_view proto_field_name; + Oid type_oid; +}; + +/* + * Definition of the log table structure. + * + * System stats collected as %lu (unsigned) may + * overflow INT8OID (signed), but this is acceptable. + */ +/* clang-format off */ +inline constexpr std::array log_tbl_desc = { + /* 8-byte aligned types first - Query Info */ + LogDesc{"query_id", "query_info.query_id", INT8OID}, + LogDesc{"plan_id", "query_info.plan_id", INT8OID}, + LogDesc{"nested_level", "add_info.nested_level", INT8OID}, + LogDesc{"slice_id", "add_info.slice_id", INT8OID}, + /* 8-byte aligned types - System Stats */ + LogDesc{"systemstat_vsize", "query_metrics.systemStat.vsize", INT8OID}, + LogDesc{"systemstat_rss", "query_metrics.systemStat.rss", INT8OID}, + LogDesc{"systemstat_vmsizekb", "query_metrics.systemStat.VmSizeKb", INT8OID}, + LogDesc{"systemstat_vmpeakkb", "query_metrics.systemStat.VmPeakKb", INT8OID}, + LogDesc{"systemstat_rchar", "query_metrics.systemStat.rchar", INT8OID}, + LogDesc{"systemstat_wchar", "query_metrics.systemStat.wchar", INT8OID}, + LogDesc{"systemstat_syscr", "query_metrics.systemStat.syscr", INT8OID}, + LogDesc{"systemstat_syscw", "query_metrics.systemStat.syscw", INT8OID}, + LogDesc{"systemstat_read_bytes", "query_metrics.systemStat.read_bytes", INT8OID}, + LogDesc{"systemstat_write_bytes", "query_metrics.systemStat.write_bytes", INT8OID}, + LogDesc{"systemstat_cancelled_write_bytes", "query_metrics.systemStat.cancelled_write_bytes", INT8OID}, + /* 8-byte aligned types - Metric Instrumentation */ + LogDesc{"instrumentation_ntuples", "query_metrics.instrumentation.ntuples", INT8OID}, + LogDesc{"instrumentation_nloops", "query_metrics.instrumentation.nloops", INT8OID}, + LogDesc{"instrumentation_tuplecount", "query_metrics.instrumentation.tuplecount", INT8OID}, + LogDesc{"instrumentation_shared_blks_hit", "query_metrics.instrumentation.shared_blks_hit", INT8OID}, + LogDesc{"instrumentation_shared_blks_read", "query_metrics.instrumentation.shared_blks_read", INT8OID}, + LogDesc{"instrumentation_shared_blks_dirtied", "query_metrics.instrumentation.shared_blks_dirtied", INT8OID}, + LogDesc{"instrumentation_shared_blks_written", "query_metrics.instrumentation.shared_blks_written", INT8OID}, + LogDesc{"instrumentation_local_blks_hit", "query_metrics.instrumentation.local_blks_hit", INT8OID}, + LogDesc{"instrumentation_local_blks_read", "query_metrics.instrumentation.local_blks_read", INT8OID}, + LogDesc{"instrumentation_local_blks_dirtied", "query_metrics.instrumentation.local_blks_dirtied", INT8OID}, + LogDesc{"instrumentation_local_blks_written", "query_metrics.instrumentation.local_blks_written", INT8OID}, + LogDesc{"instrumentation_temp_blks_read", "query_metrics.instrumentation.temp_blks_read", INT8OID}, + LogDesc{"instrumentation_temp_blks_written", "query_metrics.instrumentation.temp_blks_written", INT8OID}, + LogDesc{"instrumentation_inherited_calls", "query_metrics.instrumentation.inherited_calls", INT8OID}, + /* 8-byte aligned types - Network Stats */ + LogDesc{"instrumentation_sent_total_bytes", "query_metrics.instrumentation.sent.total_bytes", INT8OID}, + LogDesc{"instrumentation_sent_tuple_bytes", "query_metrics.instrumentation.sent.tuple_bytes", INT8OID}, + LogDesc{"instrumentation_sent_chunks", "query_metrics.instrumentation.sent.chunks", INT8OID}, + LogDesc{"instrumentation_received_total_bytes", "query_metrics.instrumentation.received.total_bytes", INT8OID}, + LogDesc{"instrumentation_received_tuple_bytes", "query_metrics.instrumentation.received.tuple_bytes", INT8OID}, + LogDesc{"instrumentation_received_chunks", "query_metrics.instrumentation.received.chunks", INT8OID}, + /* 8-byte aligned types - Interconnect Stats and spilled bytes */ + LogDesc{"interconnect_total_recv_queue_size", "query_metrics.instrumentation.interconnect.total_recv_queue_size", INT8OID}, + LogDesc{"interconnect_recv_queue_size_counting_time", "query_metrics.instrumentation.interconnect.recv_queue_size_counting_time", INT8OID}, + LogDesc{"interconnect_total_capacity", "query_metrics.instrumentation.interconnect.total_capacity", INT8OID}, + LogDesc{"interconnect_capacity_counting_time", "query_metrics.instrumentation.interconnect.capacity_counting_time", INT8OID}, + LogDesc{"interconnect_total_buffers", "query_metrics.instrumentation.interconnect.total_buffers", INT8OID}, + LogDesc{"interconnect_buffer_counting_time", "query_metrics.instrumentation.interconnect.buffer_counting_time", INT8OID}, + LogDesc{"interconnect_active_connections_num", "query_metrics.instrumentation.interconnect.active_connections_num", INT8OID}, + LogDesc{"interconnect_retransmits", "query_metrics.instrumentation.interconnect.retransmits", INT8OID}, + LogDesc{"interconnect_startup_cached_pkt_num", "query_metrics.instrumentation.interconnect.startup_cached_pkt_num", INT8OID}, + LogDesc{"interconnect_mismatch_num", "query_metrics.instrumentation.interconnect.mismatch_num", INT8OID}, + LogDesc{"interconnect_crc_errors", "query_metrics.instrumentation.interconnect.crc_errors", INT8OID}, + LogDesc{"interconnect_snd_pkt_num", "query_metrics.instrumentation.interconnect.snd_pkt_num", INT8OID}, + LogDesc{"interconnect_recv_pkt_num", "query_metrics.instrumentation.interconnect.recv_pkt_num", INT8OID}, + LogDesc{"interconnect_disordered_pkt_num", "query_metrics.instrumentation.interconnect.disordered_pkt_num", INT8OID}, + LogDesc{"interconnect_duplicated_pkt_num", "query_metrics.instrumentation.interconnect.duplicated_pkt_num", INT8OID}, + LogDesc{"interconnect_recv_ack_num", "query_metrics.instrumentation.interconnect.recv_ack_num", INT8OID}, + LogDesc{"interconnect_status_query_msg_num", "query_metrics.instrumentation.interconnect.status_query_msg_num", INT8OID}, + LogDesc{"spill_totalbytes", "query_metrics.spill.totalBytes", INT8OID}, + /* 8-byte aligned types - Float and Timestamp */ + LogDesc{"systemstat_runningtimeseconds", "query_metrics.systemStat.runningTimeSeconds", FLOAT8OID}, + LogDesc{"systemstat_usertimeseconds", "query_metrics.systemStat.userTimeSeconds", FLOAT8OID}, + LogDesc{"systemstat_kerneltimeseconds", "query_metrics.systemStat.kernelTimeSeconds", FLOAT8OID}, + LogDesc{"instrumentation_firsttuple", "query_metrics.instrumentation.firsttuple", FLOAT8OID}, + LogDesc{"instrumentation_startup", "query_metrics.instrumentation.startup", FLOAT8OID}, + LogDesc{"instrumentation_total", "query_metrics.instrumentation.total", FLOAT8OID}, + LogDesc{"instrumentation_blk_read_time", "query_metrics.instrumentation.blk_read_time", FLOAT8OID}, + LogDesc{"instrumentation_blk_write_time", "query_metrics.instrumentation.blk_write_time", FLOAT8OID}, + LogDesc{"instrumentation_startup_time", "query_metrics.instrumentation.startup_time", FLOAT8OID}, + LogDesc{"instrumentation_inherited_time", "query_metrics.instrumentation.inherited_time", FLOAT8OID}, + LogDesc{"datetime", "datetime", TIMESTAMPTZOID}, + LogDesc{"submit_time", "submit_time", TIMESTAMPTZOID}, + LogDesc{"start_time", "start_time", TIMESTAMPTZOID}, + LogDesc{"end_time", "end_time", TIMESTAMPTZOID}, + /* 4-byte aligned types - Query Key */ + LogDesc{"tmid", "query_key.tmid", INT4OID}, + LogDesc{"ssid", "query_key.ssid", INT4OID}, + LogDesc{"ccnt", "query_key.ccnt", INT4OID}, + /* 4-byte aligned types - Segment Key */ + LogDesc{"dbid", "segment_key.dbid", INT4OID}, + LogDesc{"segid", "segment_key.segindex", INT4OID}, + LogDesc{"spill_filecount", "query_metrics.spill.fileCount", INT4OID}, + /* Variable-length types - Query Info */ + LogDesc{"generator", "query_info.generator", TEXTOID}, + LogDesc{"query_text", "query_info.query_text", TEXTOID}, + LogDesc{"plan_text", "query_info.plan_text", TEXTOID}, + LogDesc{"template_query_text", "query_info.template_query_text", TEXTOID}, + LogDesc{"template_plan_text", "query_info.template_plan_text", TEXTOID}, + LogDesc{"user_name", "query_info.userName", TEXTOID}, + LogDesc{"database_name", "query_info.databaseName", TEXTOID}, + LogDesc{"rsgname", "query_info.rsgname", TEXTOID}, + LogDesc{"analyze_text", "query_info.analyze_text", TEXTOID}, + LogDesc{"error_message", "add_info.error_message", TEXTOID}, + LogDesc{"query_status", "query_status", TEXTOID}, +}; +/* clang-format on */ + +inline constexpr size_t natts_yagp_log = log_tbl_desc.size(); + +const std::unordered_map & +proto_name_to_col_idx(); + +TupleDesc DescribeTuple(); + +Datum protots_to_timestamptz(const google::protobuf::Timestamp &ts); + +Datum field_to_datum(const google::protobuf::FieldDescriptor *field, + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg); + +/* Process a single proto field and store in values/nulls arrays */ +void process_field(const google::protobuf::FieldDescriptor *field, + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg, + const std::string &field_name, Datum *values, bool *nulls); + +/* + * Extracts values from msg into values/nulls arrays. Caller must + * pre-init nulls[] to true (this function does net set nulls + * to true for nested messages if parent message is missing). + */ +void extract_query_req(const google::protobuf::Message &msg, + const std::string &prefix, Datum *values, bool *nulls); diff --git a/src/memory/gpdbwrappers.cpp b/src/memory/gpdbwrappers.cpp index 9d579a91a30..c0078a88a9b 100644 --- a/src/memory/gpdbwrappers.cpp +++ b/src/memory/gpdbwrappers.cpp @@ -1,4 +1,5 @@ #include "gpdbwrappers.h" +#include "log/LogOps.h" extern "C" { #include "postgres.h" @@ -220,4 +221,8 @@ char *ya_gpdb::get_rg_name_for_id(Oid group_id) { Oid ya_gpdb::get_rg_id_by_session_id(int session_id) { return wrap_throw(ResGroupGetGroupIdBySessionId, session_id); -} \ No newline at end of file +} + +void ya_gpdb::insert_log(const yagpcc::SetQueryReq &req) { + return wrap_throw(::insert_log, req); +} diff --git a/src/memory/gpdbwrappers.h b/src/memory/gpdbwrappers.h index ad7ae96c362..385dd1a3abc 100644 --- a/src/memory/gpdbwrappers.h +++ b/src/memory/gpdbwrappers.h @@ -16,6 +16,10 @@ extern "C" { #include #include +namespace yagpcc { +class SetQueryReq; +} // yagpcc + namespace ya_gpdb { // Functions that call palloc(). @@ -38,6 +42,7 @@ void instr_end_loop(Instrumentation *instr); char *gen_normquery(const char *query); StringInfo gen_normplan(const char *executionPlan); char *get_rg_name_for_id(Oid group_id); +void insert_log(const yagpcc::SetQueryReq &req); // Palloc-free functions. void pfree(void *pointer) noexcept; diff --git a/src/yagp_hooks_collector.c b/src/yagp_hooks_collector.c index 2a9e7328e6d..9db73638b24 100644 --- a/src/yagp_hooks_collector.c +++ b/src/yagp_hooks_collector.c @@ -10,6 +10,8 @@ void _PG_init(void); void _PG_fini(void); PG_FUNCTION_INFO_V1(yagp_stat_messages_reset); PG_FUNCTION_INFO_V1(yagp_stat_messages); +PG_FUNCTION_INFO_V1(yagp_init_log); +PG_FUNCTION_INFO_V1(yagp_truncate_log); void _PG_init(void) { if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { @@ -30,4 +32,14 @@ Datum yagp_stat_messages_reset(PG_FUNCTION_ARGS) { Datum yagp_stat_messages(PG_FUNCTION_ARGS) { return yagp_functions_get(fcinfo); -} \ No newline at end of file +} + +Datum yagp_init_log(PG_FUNCTION_ARGS) { + init_log(); + PG_RETURN_VOID(); +} + +Datum yagp_truncate_log(PG_FUNCTION_ARGS) { + truncate_log(); + PG_RETURN_VOID(); +} diff --git a/yagp_hooks_collector--1.0--1.1.sql b/yagp_hooks_collector--1.0--1.1.sql new file mode 100644 index 00000000000..959d4f235d1 --- /dev/null +++ b/yagp_hooks_collector--1.0--1.1.sql @@ -0,0 +1,113 @@ +/* yagp_hooks_collector--1.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION yagp_hooks_collector UPDATE TO '1.1'" to load this file. \quit + +CREATE SCHEMA yagpcc; + +-- Unlink existing objects from extension. +ALTER EXTENSION yagp_hooks_collector DROP VIEW yagp_stat_messages; +ALTER EXTENSION yagp_hooks_collector DROP FUNCTION yagp_stat_messages_reset(); +ALTER EXTENSION yagp_hooks_collector DROP FUNCTION __yagp_stat_messages_f_on_segments(); +ALTER EXTENSION yagp_hooks_collector DROP FUNCTION __yagp_stat_messages_f_on_master(); +ALTER EXTENSION yagp_hooks_collector DROP FUNCTION __yagp_stat_messages_reset_f_on_segments(); +ALTER EXTENSION yagp_hooks_collector DROP FUNCTION __yagp_stat_messages_reset_f_on_master(); + +-- Now drop the objects. +DROP VIEW yagp_stat_messages; +DROP FUNCTION yagp_stat_messages_reset(); +DROP FUNCTION __yagp_stat_messages_f_on_segments(); +DROP FUNCTION __yagp_stat_messages_f_on_master(); +DROP FUNCTION __yagp_stat_messages_reset_f_on_segments(); +DROP FUNCTION __yagp_stat_messages_reset_f_on_master(); + +-- Recreate functions and view in new schema. +CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_master() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' +LANGUAGE C EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_segments() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' +LANGUAGE C EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION yagpcc.stat_messages_reset() +RETURNS void +AS +$$ + SELECT yagpcc.__stat_messages_reset_f_on_master(); + SELECT yagpcc.__stat_messages_reset_f_on_segments(); +$$ +LANGUAGE SQL EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__stat_messages_f_on_master() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'yagp_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__stat_messages_f_on_segments() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'yagp_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE VIEW yagpcc.stat_messages AS + SELECT C.* + FROM yagpcc.__stat_messages_f_on_master() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) + UNION ALL + SELECT C.* + FROM yagpcc.__stat_messages_f_on_segments() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) +ORDER BY segid; + +-- Create new objects. +CREATE FUNCTION yagpcc.__init_log_on_master() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__init_log_on_segments() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +-- Creates log table inside yagpcc schema. +SELECT yagpcc.__init_log_on_master(); +SELECT yagpcc.__init_log_on_segments(); + +CREATE VIEW yagpcc.log AS + SELECT * FROM yagpcc.__log -- master + UNION ALL + SELECT * FROM gp_dist_random('yagpcc.__log') -- segments + ORDER BY tmid, ssid, ccnt; + +CREATE FUNCTION yagpcc.__truncate_log_on_master() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__truncate_log_on_segments() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION yagpcc.truncate_log() +RETURNS void AS $$ +BEGIN + PERFORM yagpcc.__truncate_log_on_master(); + PERFORM yagpcc.__truncate_log_on_segments(); +END; +$$ LANGUAGE plpgsql VOLATILE; diff --git a/sql/yagp_hooks_collector--1.0.sql b/yagp_hooks_collector--1.0.sql similarity index 99% rename from sql/yagp_hooks_collector--1.0.sql rename to yagp_hooks_collector--1.0.sql index 88bbe4e0dc7..7ab4e1b2fb7 100644 --- a/sql/yagp_hooks_collector--1.0.sql +++ b/yagp_hooks_collector--1.0.sql @@ -15,7 +15,7 @@ LANGUAGE C EXECUTE ON ALL SEGMENTS; CREATE FUNCTION yagp_stat_messages_reset() RETURNS void -AS +AS $$ SELECT __yagp_stat_messages_reset_f_on_master(); SELECT __yagp_stat_messages_reset_f_on_segments(); diff --git a/yagp_hooks_collector--1.1.sql b/yagp_hooks_collector--1.1.sql new file mode 100644 index 00000000000..657720a88f2 --- /dev/null +++ b/yagp_hooks_collector--1.1.sql @@ -0,0 +1,95 @@ +/* yagp_hooks_collector--1.1.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION yagp_hooks_collector" to load this file. \quit + +CREATE SCHEMA yagpcc; + +CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_master() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' +LANGUAGE C EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_segments() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' +LANGUAGE C EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION yagpcc.stat_messages_reset() +RETURNS void +AS +$$ + SELECT yagpcc.__stat_messages_reset_f_on_master(); + SELECT yagpcc.__stat_messages_reset_f_on_segments(); +$$ +LANGUAGE SQL EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__stat_messages_f_on_master() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'yagp_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__stat_messages_f_on_segments() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'yagp_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE VIEW yagpcc.stat_messages AS + SELECT C.* + FROM yagpcc.__stat_messages_f_on_master() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) + UNION ALL + SELECT C.* + FROM yagpcc.__stat_messages_f_on_segments() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) +ORDER BY segid; + +CREATE FUNCTION yagpcc.__init_log_on_master() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__init_log_on_segments() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +-- Creates log table inside yagpcc schema. +SELECT yagpcc.__init_log_on_master(); +SELECT yagpcc.__init_log_on_segments(); + +CREATE VIEW yagpcc.log AS + SELECT * FROM yagpcc.__log -- master + UNION ALL + SELECT * FROM gp_dist_random('yagpcc.__log') -- segments +ORDER BY tmid, ssid, ccnt; + +CREATE FUNCTION yagpcc.__truncate_log_on_master() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__truncate_log_on_segments() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION yagpcc.truncate_log() +RETURNS void AS $$ +BEGIN + PERFORM yagpcc.__truncate_log_on_master(); + PERFORM yagpcc.__truncate_log_on_segments(); +END; +$$ LANGUAGE plpgsql VOLATILE; diff --git a/yagp_hooks_collector.control b/yagp_hooks_collector.control index b5539dd6462..cb5906a1302 100644 --- a/yagp_hooks_collector.control +++ b/yagp_hooks_collector.control @@ -1,5 +1,5 @@ # yagp_hooks_collector extension comment = 'Intercept query and plan execution hooks and report them to Yandex GPCC agents' -default_version = '1.0' +default_version = '1.1' module_pathname = '$libdir/yagp_hooks_collector' superuser = true From 7fb64752aa41fd1fb0957275a18fe506e935b0e0 Mon Sep 17 00:00:00 2001 From: NJrslv <108277031+NJrslv@users.noreply.github.com> Date: Wed, 10 Sep 2025 14:58:22 +0300 Subject: [PATCH 105/133] send analyze in text & enable it (#22) * send analyze in text & enable it --- README.md | 3 ++- gmon.out | Bin 0 -> 12568 bytes metric.md | 2 +- src/Config.cpp | 4 ++-- src/EventSender.cpp | 2 +- src/PgUtils.h | 2 +- src/ProtoUtils.cpp | 11 +++-------- src/ProtoUtils.h | 2 +- src/memory/gpdbwrappers.cpp | 4 ++-- src/memory/gpdbwrappers.h | 2 +- 10 files changed, 14 insertions(+), 18 deletions(-) create mode 100644 gmon.out diff --git a/README.md b/README.md index da660d0d56a..9f465a190cb 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ An extension for collecting greenplum query execution metrics and reporting them - **GUC:** `yagpcc.enable`. #### 3. `EXPLAIN ANALYZE` data -- **What:** Triggers generation of the `EXPLAIN (JSON, ANALYZE, BUFFERS, TIMING, VERBOSE)` and captures it. +- **What:** Triggers generation of the `EXPLAIN (TEXT, ANALYZE, BUFFERS, TIMING, VERBOSE)` and captures it. - **GUCs:** `yagpcc.enable`, `yagpcc.min_analyze_time`, `yagpcc.enable_cdbstats`(ANALYZE), `yagpcc.enable_analyze`(BUFFERS, TIMING, VERBOSE). #### 4. Other Metrics @@ -25,3 +25,4 @@ An extension for collecting greenplum query execution metrics and reporting them - **Data Destination:** All collected data is sent to a Unix Domain Socket. Configure the path with `yagpcc.uds_path`. - **User Filtering:** To exclude activity from certain roles, add them to the comma-separated list in `yagpcc.ignored_users_list`. - **Trimming plans:** Query texts and execution plans are trimmed based on `yagpcc.max_text_size` and `yagpcc.max_plan_size` (default: 1024KB). For now, it is not recommended to set these GUCs higher than 1024KB. +- **Analyze collection:** Analyze is sent if execution time exceeds `yagpcc.min_analyze_time`, which is 10 seconds by default. Analyze is collected if `yagpcc.enable_analyze` is true. diff --git a/gmon.out b/gmon.out new file mode 100644 index 0000000000000000000000000000000000000000..f1190ddefdcdd1a1a9fc1cb0332014953c649124 GIT binary patch literal 12568 zcmeI&t4;$!6vpu#5(I7?0#gS9f#O<71yx*9E(wGJX+jAhftI8S$0P6vtI#V@1hEp? zN5J5C1O&@`XUL4;3HaB{&bM>s^!!rUtJ~qgWq*8%Q@xm*&a!^}IJ&$T4*H{T`u8CO ztOy{000IagfB*srAbE39oESbG$;pCBC`q>K$L%9zhZM>v=$HtrKs(h+mTxH_n%lq=`Kqr#&v4x?L ppFM ExecutorStart() -> start -> ExecutorRun() -> ExecutorFinish() -> end - | `userName` | string | All | ABS | - | Cluster | + | - | text | Session user | | `databaseName` | string | All | ABS | - | Cluster | + | - | text | Database name | | `rsgname` | string | All | ABS | - | Cluster | + | - | text | Resource group name | -| `analyze_text` | string | D | ABS | - | Cluster | + | - | text | EXPLAIN ANALYZE JSON | +| `analyze_text` | string | D | ABS | - | Cluster | + | - | text | EXPLAIN ANALYZE | | **AdditionalQueryInfo** | | | | | | | | | | | `nested_level` | int64 | All | ABS | - | Node | + | + | count | Current nesting level | | `error_message` | string | D | ABS | - | Node | + | + | text | Error message | diff --git a/src/Config.cpp b/src/Config.cpp index bca5cbf5707..c95d989d638 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -18,7 +18,7 @@ static bool guc_report_nested_queries = true; static char *guc_ignored_users = nullptr; static int guc_max_text_size = 1024; // in KB static int guc_max_plan_size = 1024; // in KB -static int guc_min_analyze_time = -1; // uninitialized state +static int guc_min_analyze_time = 10000; // in seconds static int guc_logging_mode = LOG_MODE_UDS; static const struct config_enum_entry logging_mode_options[] = { @@ -112,7 +112,7 @@ void Config::init() { "yagpcc.min_analyze_time", "Sets the minimum execution time above which plans will be logged.", "Zero prints all plans. -1 turns this feature off.", - &guc_min_analyze_time, -1, -1, INT_MAX, PGC_USERSET, + &guc_min_analyze_time, 10000, -1, INT_MAX, PGC_USERSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_MS, NULL, NULL, NULL); DefineCustomEnumVariable( diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 6866db1f52f..4c523784cd9 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -341,7 +341,7 @@ void EventSender::analyze_stats_collect(QueryDesc *query_desc) { double ms = query_desc->totaltime->total * 1000.0; if (ms >= Config::min_analyze_time()) { - set_analyze_plan_text_json(query_desc, query_msg); + set_analyze_plan_text(query_desc, query_msg); } } diff --git a/src/PgUtils.h b/src/PgUtils.h index ceb07c2e8e5..8f331837739 100644 --- a/src/PgUtils.h +++ b/src/PgUtils.h @@ -14,4 +14,4 @@ bool need_report_nested_query(); bool filter_query(QueryDesc *query_desc); bool need_collect(QueryDesc *query_desc, int nesting_level); ExplainState get_explain_state(QueryDesc *query_desc, bool costs); -ExplainState get_analyze_state_json(QueryDesc *query_desc, bool analyze); +ExplainState get_analyze_state(QueryDesc *query_desc, bool analyze); diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp index 4655433c806..b1da4e969a3 100644 --- a/src/ProtoUtils.cpp +++ b/src/ProtoUtils.cpp @@ -226,8 +226,8 @@ double protots_to_double(const google::protobuf::Timestamp &ts) { return double(ts.seconds()) + double(ts.nanos()) / 1000000000.0; } -void set_analyze_plan_text_json(QueryDesc *query_desc, - yagpcc::SetQueryReq *req) { +void set_analyze_plan_text(QueryDesc *query_desc, + yagpcc::SetQueryReq *req) { // Make sure it is a valid txn and it is not an utility // statement for ExplainPrintPlan() later. if (!IsTransactionState() || !query_desc->plannedstmt) { @@ -235,7 +235,7 @@ void set_analyze_plan_text_json(QueryDesc *query_desc, } MemoryContext oldcxt = ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); - ExplainState es = ya_gpdb::get_analyze_state_json( + ExplainState es = ya_gpdb::get_analyze_state( query_desc, query_desc->instrument_options && Config::enable_analyze()); ya_gpdb::mem_ctx_switch_to(oldcxt); if (es.str) { @@ -243,11 +243,6 @@ void set_analyze_plan_text_json(QueryDesc *query_desc, if (es.str->len > 0 && es.str->data[es.str->len - 1] == '\n') { es.str->data[--es.str->len] = '\0'; } - // Convert JSON array to JSON object. - if (es.str->len > 0) { - es.str->data[0] = '{'; - es.str->data[es.str->len - 1] = '}'; - } auto trimmed_analyze = char_to_trimmed_str(es.str->data, es.str->len, Config::max_plan_size()); req->mutable_query_info()->set_analyze_text(trimmed_analyze); diff --git a/src/ProtoUtils.h b/src/ProtoUtils.h index 8287b3de7ea..f62be1fd2bf 100644 --- a/src/ProtoUtils.h +++ b/src/ProtoUtils.h @@ -19,5 +19,5 @@ void set_ic_stats(yagpcc::MetricInstrumentation *metrics, const ICStatistics *ic_statistics); yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status); double protots_to_double(const google::protobuf::Timestamp &ts); -void set_analyze_plan_text_json(QueryDesc *query_desc, +void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *message); \ No newline at end of file diff --git a/src/memory/gpdbwrappers.cpp b/src/memory/gpdbwrappers.cpp index c0078a88a9b..9424c6aa444 100644 --- a/src/memory/gpdbwrappers.cpp +++ b/src/memory/gpdbwrappers.cpp @@ -127,7 +127,7 @@ ExplainState ya_gpdb::get_explain_state(QueryDesc *query_desc, }); } -ExplainState ya_gpdb::get_analyze_state_json(QueryDesc *query_desc, +ExplainState ya_gpdb::get_analyze_state(QueryDesc *query_desc, bool analyze) noexcept { return wrap_noexcept([&]() { ExplainState es; @@ -137,7 +137,7 @@ ExplainState ya_gpdb::get_analyze_state_json(QueryDesc *query_desc, es.buffers = es.analyze; es.timing = es.analyze; es.summary = es.analyze; - es.format = EXPLAIN_FORMAT_JSON; + es.format = EXPLAIN_FORMAT_TEXT; ExplainBeginOutput(&es); if (analyze) { ExplainPrintPlan(&es, query_desc); diff --git a/src/memory/gpdbwrappers.h b/src/memory/gpdbwrappers.h index 385dd1a3abc..3033c25378c 100644 --- a/src/memory/gpdbwrappers.h +++ b/src/memory/gpdbwrappers.h @@ -31,7 +31,7 @@ char *get_database_name(Oid dbid) noexcept; bool split_identifier_string(char *rawstring, char separator, List **namelist) noexcept; ExplainState get_explain_state(QueryDesc *query_desc, bool costs) noexcept; -ExplainState get_analyze_state_json(QueryDesc *query_desc, +ExplainState get_analyze_state(QueryDesc *query_desc, bool analyze) noexcept; Instrumentation *instr_alloc(size_t n, int instrument_options); HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, From 8c847d3bef308423c14bfa0f78232fd87bf3b77e Mon Sep 17 00:00:00 2001 From: NJrslv <108277031+NJrslv@users.noreply.github.com> Date: Mon, 15 Sep 2025 10:37:20 +0300 Subject: [PATCH 106/133] clean (#23) rm -f gmon.out & fix typo --- gmon.out | Bin 12568 -> 0 bytes src/Config.cpp | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 gmon.out diff --git a/gmon.out b/gmon.out deleted file mode 100644 index f1190ddefdcdd1a1a9fc1cb0332014953c649124..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12568 zcmeI&t4;$!6vpu#5(I7?0#gS9f#O<71yx*9E(wGJX+jAhftI8S$0P6vtI#V@1hEp? zN5J5C1O&@`XUL4;3HaB{&bM>s^!!rUtJ~qgWq*8%Q@xm*&a!^}IJ&$T4*H{T`u8CO ztOy{000IagfB*srAbE39oESbG$;pCBC`q>K$L%9zhZM>v=$HtrKs(h+mTxH_n%lq=`Kqr#&v4x?L ppFM Date: Mon, 29 Sep 2025 14:40:17 +0300 Subject: [PATCH 107/133] report utility stmt (#24) * report utility stmt --- expected/yagp_cursors.out | 134 +++++++++++++----- expected/yagp_dist.out | 130 ++++++++--------- expected/yagp_select.out | 74 +++++----- expected/yagp_utility.out | 272 ++++++++++++++++++++++++++++++++++++ sql/yagp_cursors.sql | 32 +++-- sql/yagp_dist.sql | 10 +- sql/yagp_select.sql | 12 +- sql/yagp_utility.sql | 133 ++++++++++++++++++ src/Config.cpp | 11 +- src/Config.h | 1 + src/EventSender.cpp | 166 +++++++++++++--------- src/EventSender.h | 16 ++- src/PgUtils.cpp | 5 - src/PgUtils.h | 3 - src/ProtoUtils.cpp | 11 +- src/ProtoUtils.h | 5 +- src/hook_wrappers.cpp | 60 +++++++- src/log/LogOps.cpp | 4 +- src/log/LogOps.h | 2 +- src/log/LogSchema.cpp | 3 +- src/log/LogSchema.h | 6 +- src/memory/gpdbwrappers.cpp | 6 +- src/memory/gpdbwrappers.h | 7 +- 23 files changed, 847 insertions(+), 256 deletions(-) create mode 100644 expected/yagp_utility.out create mode 100644 sql/yagp_utility.sql diff --git a/expected/yagp_cursors.out b/expected/yagp_cursors.out index c9bbbbf8a07..9587c00b550 100644 --- a/expected/yagp_cursors.out +++ b/expected/yagp_cursors.out @@ -1,7 +1,3 @@ --- FETCH is not tested here because truly utility statements (those --- without sub-queries that go through the executor) are not logged. --- Currently, only executor states are reported. Utility hooks are --- not implemented. CREATE EXTENSION yagp_hooks_collector; CREATE FUNCTION yagp_status_order(status text) RETURNS integer @@ -17,6 +13,7 @@ BEGIN END; $$ LANGUAGE plpgsql IMMUTABLE; SET yagpcc.enable TO TRUE; +SET yagpcc.enable_utility TO TRUE; SET yagpcc.report_nested_queries TO TRUE; -- DECLARE SET yagpcc.logging_mode to 'TBL'; @@ -25,14 +22,20 @@ DECLARE cursor_stats_0 CURSOR FOR SELECT 0; CLOSE cursor_stats_0; COMMIT; RESET yagpcc.logging_mode; -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | ccnt | query_text | query_status --------+------+---------------------------------------------+--------------------- - -1 | 10 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_SUBMIT - -1 | 10 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_START - -1 | 10 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_END - -1 | 10 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_DONE -(4 rows) +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_0; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_0; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(10 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; t @@ -49,18 +52,24 @@ DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; CLOSE cursor_stats_2; COMMIT; RESET yagpcc.logging_mode; -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | ccnt | query_text | query_status --------+------+-------------------------------------------------------+--------------------- - -1 | 23 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_SUBMIT - -1 | 23 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_START - -1 | 23 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_END - -1 | 23 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_DONE - -1 | 26 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_SUBMIT - -1 | 26 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_START - -1 | 26 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_END - -1 | 26 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_DONE -(8 rows) +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_1; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_1; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_2; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_2; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(14 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; t @@ -76,16 +85,72 @@ CLOSE cursor_stats_3; DECLARE cursor_stats_4 CURSOR FOR SELECT 1; ROLLBACK; RESET yagpcc.logging_mode; -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | ccnt | query_text | query_status --------+------+---------------------------------------------+--------------------- - -1 | 39 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT - -1 | 39 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_START - -1 | 39 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_END - -1 | 39 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_DONE - -1 | 42 | DECLARE cursor_stats_4 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT - -1 | 42 | DECLARE cursor_stats_4 CURSOR FOR SELECT 1; | QUERY_STATUS_START -(6 rows) +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_3; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_3; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_4 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_4 CURSOR FOR SELECT 1; | QUERY_STATUS_DONE + -1 | ROLLBACK; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(12 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- FETCH +SET yagpcc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; +DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; +FETCH 1 IN cursor_stats_5; + ?column? +---------- + 2 +(1 row) + +FETCH 1 IN cursor_stats_6; + ?column? +---------- + 3 +(1 row) + +CLOSE cursor_stats_5; +CLOSE cursor_stats_6; +COMMIT; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; | QUERY_STATUS_DONE + -1 | FETCH 1 IN cursor_stats_5; | QUERY_STATUS_SUBMIT + -1 | FETCH 1 IN cursor_stats_5; | QUERY_STATUS_DONE + -1 | FETCH 1 IN cursor_stats_6; | QUERY_STATUS_SUBMIT + -1 | FETCH 1 IN cursor_stats_6; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_5; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_5; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_6; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_6; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(18 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; t @@ -97,3 +162,4 @@ DROP FUNCTION yagp_status_order(text); DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; diff --git a/expected/yagp_dist.out b/expected/yagp_dist.out index 9112b936001..ebaf839601d 100644 --- a/expected/yagp_dist.out +++ b/expected/yagp_dist.out @@ -14,6 +14,7 @@ END; $$ LANGUAGE plpgsql IMMUTABLE; SET yagpcc.enable TO TRUE; SET yagpcc.report_nested_queries TO TRUE; +SET yagpcc.enable_utility TO FALSE; -- Hash distributed table CREATE TABLE test_hash_dist (id int) DISTRIBUTED BY (id); INSERT INTO test_hash_dist SELECT 1; @@ -29,17 +30,17 @@ select * from test_hash_dist where id = 1; RESET optimizer_enable_direct_dispatch; RESET yagpcc.logging_mode; -- Should see 8 rows. -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | ccnt | query_text | query_status --------+------+--------------------------------------------+--------------------- - -1 | 13 | select * from test_hash_dist where id = 1; | QUERY_STATUS_SUBMIT - -1 | 13 | select * from test_hash_dist where id = 1; | QUERY_STATUS_START - -1 | 13 | select * from test_hash_dist where id = 1; | QUERY_STATUS_END - -1 | 13 | select * from test_hash_dist where id = 1; | QUERY_STATUS_DONE - 1 | 13 | | QUERY_STATUS_SUBMIT - 1 | 13 | | QUERY_STATUS_START - 1 | 13 | | QUERY_STATUS_END - 1 | 13 | | QUERY_STATUS_DONE +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+--------------------------------------------+--------------------- + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_SUBMIT + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_START + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_END + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_DONE + 1 | | QUERY_STATUS_SUBMIT + 1 | | QUERY_STATUS_START + 1 | | QUERY_STATUS_END + 1 | | QUERY_STATUS_DONE (8 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -58,25 +59,25 @@ select * from test_hash_dist; DROP TABLE test_hash_dist; RESET yagpcc.logging_mode; -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | ccnt | query_text | query_status --------+------+-------------------------------+--------------------- - -1 | 24 | select * from test_hash_dist; | QUERY_STATUS_SUBMIT - -1 | 24 | select * from test_hash_dist; | QUERY_STATUS_START - -1 | 24 | select * from test_hash_dist; | QUERY_STATUS_END - -1 | 24 | select * from test_hash_dist; | QUERY_STATUS_DONE - 1 | 24 | | QUERY_STATUS_SUBMIT - 1 | 24 | | QUERY_STATUS_START - 1 | 24 | | QUERY_STATUS_END - 1 | 24 | | QUERY_STATUS_DONE - 2 | 24 | | QUERY_STATUS_SUBMIT - 2 | 24 | | QUERY_STATUS_START - 2 | 24 | | QUERY_STATUS_END - 2 | 24 | | QUERY_STATUS_DONE - | 24 | | QUERY_STATUS_SUBMIT - | 24 | | QUERY_STATUS_START - | 24 | | QUERY_STATUS_END - | 24 | | QUERY_STATUS_DONE +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------+--------------------- + -1 | select * from test_hash_dist; | QUERY_STATUS_SUBMIT + -1 | select * from test_hash_dist; | QUERY_STATUS_START + -1 | select * from test_hash_dist; | QUERY_STATUS_END + -1 | select * from test_hash_dist; | QUERY_STATUS_DONE + 1 | | QUERY_STATUS_SUBMIT + 1 | | QUERY_STATUS_START + 1 | | QUERY_STATUS_END + 1 | | QUERY_STATUS_DONE + 2 | | QUERY_STATUS_SUBMIT + 2 | | QUERY_STATUS_START + 2 | | QUERY_STATUS_END + 2 | | QUERY_STATUS_DONE + | | QUERY_STATUS_SUBMIT + | | QUERY_STATUS_START + | | QUERY_STATUS_END + | | QUERY_STATUS_DONE (16 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -103,25 +104,25 @@ SELECT COUNT(*) FROM test_replicated, force_segments(); DROP TABLE test_replicated; DROP FUNCTION force_segments(); RESET yagpcc.logging_mode; -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | ccnt | query_text | query_status --------+------+---------------------------------------------------------+--------------------- - -1 | 39 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_SUBMIT - -1 | 39 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_START - -1 | 39 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_END - -1 | 39 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_DONE - 1 | 39 | | QUERY_STATUS_SUBMIT - 1 | 39 | | QUERY_STATUS_START - 1 | 39 | | QUERY_STATUS_END - 1 | 39 | | QUERY_STATUS_DONE - 2 | 39 | | QUERY_STATUS_SUBMIT - 2 | 39 | | QUERY_STATUS_START - 2 | 39 | | QUERY_STATUS_END - 2 | 39 | | QUERY_STATUS_DONE - | 39 | | QUERY_STATUS_SUBMIT - | 39 | | QUERY_STATUS_START - | 39 | | QUERY_STATUS_END - | 39 | | QUERY_STATUS_DONE +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------------------+--------------------- + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_SUBMIT + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_START + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_END + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_DONE + 1 | | QUERY_STATUS_SUBMIT + 1 | | QUERY_STATUS_START + 1 | | QUERY_STATUS_END + 1 | | QUERY_STATUS_DONE + 2 | | QUERY_STATUS_SUBMIT + 2 | | QUERY_STATUS_START + 2 | | QUERY_STATUS_END + 2 | | QUERY_STATUS_DONE + | | QUERY_STATUS_SUBMIT + | | QUERY_STATUS_START + | | QUERY_STATUS_END + | | QUERY_STATUS_DONE (16 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -146,21 +147,21 @@ RESET yagpcc.logging_mode; DROP TABLE test_partial_dist; RESET allow_system_table_mods; -- Should see 12 rows. -SELECT ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - ccnt | query_text | query_status -------+-----------------------------------------+--------------------- - 57 | SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_SUBMIT - 57 | SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_START - 57 | SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_END - 57 | SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_DONE - 57 | | QUERY_STATUS_SUBMIT - 57 | | QUERY_STATUS_START - 57 | | QUERY_STATUS_END - 57 | | QUERY_STATUS_DONE - 57 | | QUERY_STATUS_SUBMIT - 57 | | QUERY_STATUS_START - 57 | | QUERY_STATUS_END - 57 | | QUERY_STATUS_DONE +SELECT query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + query_text | query_status +-----------------------------------------+--------------------- + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_SUBMIT + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_START + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_END + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_DONE + | QUERY_STATUS_SUBMIT + | QUERY_STATUS_START + | QUERY_STATUS_END + | QUERY_STATUS_DONE + | QUERY_STATUS_SUBMIT + | QUERY_STATUS_START + | QUERY_STATUS_END + | QUERY_STATUS_DONE (12 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -173,3 +174,4 @@ DROP FUNCTION yagp_status_order(text); DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; diff --git a/expected/yagp_select.out b/expected/yagp_select.out index c2282d8c217..4c4a0218150 100644 --- a/expected/yagp_select.out +++ b/expected/yagp_select.out @@ -14,6 +14,7 @@ END; $$ LANGUAGE plpgsql IMMUTABLE; SET yagpcc.enable TO TRUE; SET yagpcc.report_nested_queries TO TRUE; +SET yagpcc.enable_utility TO FALSE; -- Basic SELECT tests SET yagpcc.logging_mode to 'TBL'; SELECT 1; @@ -29,17 +30,17 @@ SELECT COUNT(*) FROM generate_series(1,10); (1 row) RESET yagpcc.logging_mode; -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | ccnt | query_text | query_status --------+------+---------------------------------------------+--------------------- - -1 | 9 | SELECT 1; | QUERY_STATUS_SUBMIT - -1 | 9 | SELECT 1; | QUERY_STATUS_START - -1 | 9 | SELECT 1; | QUERY_STATUS_END - -1 | 9 | SELECT 1; | QUERY_STATUS_DONE - -1 | 11 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_SUBMIT - -1 | 11 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_START - -1 | 11 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_END - -1 | 11 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_DONE +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------+--------------------- + -1 | SELECT 1; | QUERY_STATUS_SUBMIT + -1 | SELECT 1; | QUERY_STATUS_START + -1 | SELECT 1; | QUERY_STATUS_END + -1 | SELECT 1; | QUERY_STATUS_DONE + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_SUBMIT + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_START + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_END + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_DONE (8 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -59,13 +60,13 @@ SELECT 1; COMMIT; RESET yagpcc.logging_mode; -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | ccnt | query_text | query_status --------+------+------------+--------------------- - -1 | 22 | SELECT 1; | QUERY_STATUS_SUBMIT - -1 | 22 | SELECT 1; | QUERY_STATUS_START - -1 | 22 | SELECT 1; | QUERY_STATUS_END - -1 | 22 | SELECT 1; | QUERY_STATUS_DONE +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+------------+--------------------- + -1 | SELECT 1; | QUERY_STATUS_SUBMIT + -1 | SELECT 1; | QUERY_STATUS_START + -1 | SELECT 1; | QUERY_STATUS_END + -1 | SELECT 1; | QUERY_STATUS_DONE (4 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -85,17 +86,17 @@ SELECT * FROM t; (2 rows) RESET yagpcc.logging_mode; -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | ccnt | query_text | query_status --------+------+-----------------------------+--------------------- - -1 | 33 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_SUBMIT - | | SELECT * FROM t; | - -1 | 33 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_START - | | SELECT * FROM t; | - -1 | 33 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_END - | | SELECT * FROM t; | - -1 | 33 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_DONE - | | SELECT * FROM t; | +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+-----------------------------+--------------------- + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_SUBMIT + | SELECT * FROM t; | + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_START + | SELECT * FROM t; | + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_END + | SELECT * FROM t; | + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_DONE + | SELECT * FROM t; | (4 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -115,13 +116,13 @@ EXECUTE test_stmt; DEALLOCATE test_stmt; RESET yagpcc.logging_mode; -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | ccnt | query_text | query_status --------+------+--------------------------------+--------------------- - -1 | 44 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_SUBMIT - -1 | 44 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_START - -1 | 44 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_END - -1 | 44 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_DONE +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+--------------------------------+--------------------- + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_SUBMIT + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_START + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_END + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_DONE (4 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -134,3 +135,4 @@ DROP FUNCTION yagp_status_order(text); DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; diff --git a/expected/yagp_utility.out b/expected/yagp_utility.out new file mode 100644 index 00000000000..03c17713575 --- /dev/null +++ b/expected/yagp_utility.out @@ -0,0 +1,272 @@ +CREATE EXTENSION yagp_hooks_collector; +CREATE OR REPLACE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.enable TO TRUE; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; +SET yagpcc.logging_mode to 'TBL'; +CREATE TABLE test_table (a int, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE INDEX test_idx ON test_table(a); +ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; +DROP TABLE test_table; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+----------------------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_SUBMIT + -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_DONE + -1 | CREATE INDEX test_idx ON test_table(a); | QUERY_STATUS_SUBMIT + -1 | CREATE INDEX test_idx ON test_table(a); | QUERY_STATUS_DONE + -1 | ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; | QUERY_STATUS_SUBMIT + -1 | ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; | QUERY_STATUS_DONE + -1 | DROP TABLE test_table; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE test_table; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(10 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- Partitioning +SET yagpcc.logging_mode to 'TBL'; +CREATE TABLE pt_test (a int, b int) +DISTRIBUTED BY (a) +PARTITION BY RANGE (a) +(START (0) END (100) EVERY (50)); +NOTICE: CREATE TABLE will create partition "pt_test_1_prt_1" for table "pt_test" +NOTICE: CREATE TABLE will create partition "pt_test_1_prt_2" for table "pt_test" +DROP TABLE pt_test; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_SUBMIT + | DISTRIBUTED BY (a) +| + | PARTITION BY RANGE (a) +| + | (START (0) END (100) EVERY (50)); | + -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_SUBMIT + | DISTRIBUTED BY (a) +| + | PARTITION BY RANGE (a) +| + | (START (0) END (100) EVERY (50)); | + -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_SUBMIT + | DISTRIBUTED BY (a) +| + | PARTITION BY RANGE (a) +| + | (START (0) END (100) EVERY (50)); | + -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_DONE + | DISTRIBUTED BY (a) +| + | PARTITION BY RANGE (a) +| + | (START (0) END (100) EVERY (50)); | + -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_DONE + | DISTRIBUTED BY (a) +| + | PARTITION BY RANGE (a) +| + | (START (0) END (100) EVERY (50)); | + -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_DONE + | DISTRIBUTED BY (a) +| + | PARTITION BY RANGE (a) +| + | (START (0) END (100) EVERY (50)); | + -1 | DROP TABLE pt_test; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE pt_test; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(10 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- Views and Functions +SET yagpcc.logging_mode to 'TBL'; +CREATE VIEW test_view AS SELECT 1 AS a; +CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; +DROP VIEW test_view; +DROP FUNCTION test_func(int); +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+------------------------------------------------------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_SUBMIT + -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_DONE + -1 | CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; | QUERY_STATUS_SUBMIT + -1 | CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; | QUERY_STATUS_DONE + -1 | DROP VIEW test_view; | QUERY_STATUS_SUBMIT + -1 | DROP VIEW test_view; | QUERY_STATUS_DONE + -1 | DROP FUNCTION test_func(int); | QUERY_STATUS_SUBMIT + -1 | DROP FUNCTION test_func(int); | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(10 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- Transaction Operations +SET yagpcc.logging_mode to 'TBL'; +BEGIN; +SAVEPOINT sp1; +ROLLBACK TO sp1; +COMMIT; +BEGIN; +SAVEPOINT sp2; +ABORT; +BEGIN; +ROLLBACK; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+----------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp2; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | ROLLBACK; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(18 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- DML Operations +SET yagpcc.logging_mode to 'TBL'; +CREATE TABLE dml_test (a int, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO dml_test VALUES (1, 'test'); +UPDATE dml_test SET b = 'updated' WHERE a = 1; +DELETE FROM dml_test WHERE a = 1; +DROP TABLE dml_test; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+----------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_SUBMIT + -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_DONE + -1 | DROP TABLE dml_test; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE dml_test; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(6 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- COPY Operations +SET yagpcc.logging_mode to 'TBL'; +CREATE TABLE copy_test (a int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +COPY (SELECT 1) TO STDOUT; +1 +DROP TABLE copy_test; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_SUBMIT + -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_DONE + -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_SUBMIT + -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_DONE + -1 | DROP TABLE copy_test; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE copy_test; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(8 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- Prepared Statements and error during execute +SET yagpcc.logging_mode to 'TBL'; +PREPARE test_prep(int) AS SELECT $1/0 AS value; +EXECUTE test_prep(0::int); +ERROR: division by zero +DEALLOCATE test_prep; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_SUBMIT + -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_DONE + -1 | EXECUTE test_prep(0::int); | QUERY_STATUS_SUBMIT + -1 | EXECUTE test_prep(0::int); | QUERY_STATUS_ERROR + -1 | DEALLOCATE test_prep; | QUERY_STATUS_SUBMIT + -1 | DEALLOCATE test_prep; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(8 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- GUC Settings +SET yagpcc.logging_mode to 'TBL'; +SET yagpcc.report_nested_queries TO FALSE; +RESET yagpcc.report_nested_queries; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+--------------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.report_nested_queries TO FALSE; | QUERY_STATUS_SUBMIT + -1 | SET yagpcc.report_nested_queries TO FALSE; | QUERY_STATUS_DONE + -1 | RESET yagpcc.report_nested_queries; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.report_nested_queries; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(6 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; diff --git a/sql/yagp_cursors.sql b/sql/yagp_cursors.sql index fcd2df136f5..5d5bde58110 100644 --- a/sql/yagp_cursors.sql +++ b/sql/yagp_cursors.sql @@ -1,8 +1,3 @@ --- FETCH is not tested here because truly utility statements (those --- without sub-queries that go through the executor) are not logged. --- Currently, only executor states are reported. Utility hooks are --- not implemented. - CREATE EXTENSION yagp_hooks_collector; CREATE FUNCTION yagp_status_order(status text) @@ -20,6 +15,7 @@ END; $$ LANGUAGE plpgsql IMMUTABLE; SET yagpcc.enable TO TRUE; +SET yagpcc.enable_utility TO TRUE; SET yagpcc.report_nested_queries TO TRUE; -- DECLARE @@ -31,7 +27,7 @@ CLOSE cursor_stats_0; COMMIT; RESET yagpcc.logging_mode; -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; SELECT yagpcc.truncate_log() IS NOT NULL AS t; -- DECLARE WITH HOLD @@ -46,7 +42,7 @@ COMMIT; RESET yagpcc.logging_mode; -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; SELECT yagpcc.truncate_log() IS NOT NULL AS t; -- ROLLBACK @@ -60,10 +56,28 @@ ROLLBACK; RESET yagpcc.logging_mode; -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- FETCH +SET yagpcc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; +DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; +FETCH 1 IN cursor_stats_5; +FETCH 1 IN cursor_stats_6; +CLOSE cursor_stats_5; +CLOSE cursor_stats_6; +COMMIT; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; SELECT yagpcc.truncate_log() IS NOT NULL AS t; DROP FUNCTION yagp_status_order(text); DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; -RESET yagpcc.report_nested_queries; \ No newline at end of file +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; diff --git a/sql/yagp_dist.sql b/sql/yagp_dist.sql index d92a7c2e7af..b837ef05335 100644 --- a/sql/yagp_dist.sql +++ b/sql/yagp_dist.sql @@ -16,6 +16,7 @@ $$ LANGUAGE plpgsql IMMUTABLE; SET yagpcc.enable TO TRUE; SET yagpcc.report_nested_queries TO TRUE; +SET yagpcc.enable_utility TO FALSE; -- Hash distributed table @@ -30,7 +31,7 @@ RESET optimizer_enable_direct_dispatch; RESET yagpcc.logging_mode; -- Should see 8 rows. -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; SELECT yagpcc.truncate_log() IS NOT NULL AS t; SET yagpcc.logging_mode to 'TBL'; @@ -40,7 +41,7 @@ select * from test_hash_dist; DROP TABLE test_hash_dist; RESET yagpcc.logging_mode; -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; SELECT yagpcc.truncate_log() IS NOT NULL AS t; -- Replicated table @@ -59,7 +60,7 @@ DROP TABLE test_replicated; DROP FUNCTION force_segments(); RESET yagpcc.logging_mode; -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; SELECT yagpcc.truncate_log() IS NOT NULL AS t; -- Partially distributed table (2 numsegments) @@ -75,10 +76,11 @@ RESET yagpcc.logging_mode; DROP TABLE test_partial_dist; RESET allow_system_table_mods; -- Should see 12 rows. -SELECT ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; SELECT yagpcc.truncate_log() IS NOT NULL AS t; DROP FUNCTION yagp_status_order(text); DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; diff --git a/sql/yagp_select.sql b/sql/yagp_select.sql index d99b6c40387..4038c6b7b63 100644 --- a/sql/yagp_select.sql +++ b/sql/yagp_select.sql @@ -16,6 +16,7 @@ $$ LANGUAGE plpgsql IMMUTABLE; SET yagpcc.enable TO TRUE; SET yagpcc.report_nested_queries TO TRUE; +SET yagpcc.enable_utility TO FALSE; -- Basic SELECT tests SET yagpcc.logging_mode to 'TBL'; @@ -24,7 +25,7 @@ SELECT 1; SELECT COUNT(*) FROM generate_series(1,10); RESET yagpcc.logging_mode; -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; SELECT yagpcc.truncate_log() IS NOT NULL AS t; -- Transaction test @@ -35,7 +36,7 @@ SELECT 1; COMMIT; RESET yagpcc.logging_mode; -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; SELECT yagpcc.truncate_log() IS NOT NULL AS t; -- CTE test @@ -45,7 +46,7 @@ WITH t AS (VALUES (1), (2)) SELECT * FROM t; RESET yagpcc.logging_mode; -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; SELECT yagpcc.truncate_log() IS NOT NULL AS t; -- Prepared statement test @@ -56,10 +57,11 @@ EXECUTE test_stmt; DEALLOCATE test_stmt; RESET yagpcc.logging_mode; -SELECT segid, ccnt, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; SELECT yagpcc.truncate_log() IS NOT NULL AS t; DROP FUNCTION yagp_status_order(text); DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; -RESET yagpcc.report_nested_queries; \ No newline at end of file +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; diff --git a/sql/yagp_utility.sql b/sql/yagp_utility.sql new file mode 100644 index 00000000000..b4cca6f5421 --- /dev/null +++ b/sql/yagp_utility.sql @@ -0,0 +1,133 @@ +CREATE EXTENSION yagp_hooks_collector; + +CREATE OR REPLACE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +SET yagpcc.enable TO TRUE; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; + +SET yagpcc.logging_mode to 'TBL'; + +CREATE TABLE test_table (a int, b text); +CREATE INDEX test_idx ON test_table(a); +ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; +DROP TABLE test_table; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Partitioning +SET yagpcc.logging_mode to 'TBL'; + +CREATE TABLE pt_test (a int, b int) +DISTRIBUTED BY (a) +PARTITION BY RANGE (a) +(START (0) END (100) EVERY (50)); +DROP TABLE pt_test; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Views and Functions +SET yagpcc.logging_mode to 'TBL'; + +CREATE VIEW test_view AS SELECT 1 AS a; +CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; +DROP VIEW test_view; +DROP FUNCTION test_func(int); + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Transaction Operations +SET yagpcc.logging_mode to 'TBL'; + +BEGIN; +SAVEPOINT sp1; +ROLLBACK TO sp1; +COMMIT; + +BEGIN; +SAVEPOINT sp2; +ABORT; + +BEGIN; +ROLLBACK; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- DML Operations +SET yagpcc.logging_mode to 'TBL'; + +CREATE TABLE dml_test (a int, b text); +INSERT INTO dml_test VALUES (1, 'test'); +UPDATE dml_test SET b = 'updated' WHERE a = 1; +DELETE FROM dml_test WHERE a = 1; +DROP TABLE dml_test; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- COPY Operations +SET yagpcc.logging_mode to 'TBL'; + +CREATE TABLE copy_test (a int); +COPY (SELECT 1) TO STDOUT; +DROP TABLE copy_test; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Prepared Statements and error during execute +SET yagpcc.logging_mode to 'TBL'; + +PREPARE test_prep(int) AS SELECT $1/0 AS value; +EXECUTE test_prep(0::int); +DEALLOCATE test_prep; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- GUC Settings +SET yagpcc.logging_mode to 'TBL'; + +SET yagpcc.report_nested_queries TO FALSE; +RESET yagpcc.report_nested_queries; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; diff --git a/src/Config.cpp b/src/Config.cpp index e6639206b0b..9030411b903 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -16,10 +16,11 @@ static bool guc_enable_cdbstats = true; static bool guc_enable_collector = true; static bool guc_report_nested_queries = true; static char *guc_ignored_users = nullptr; -static int guc_max_text_size = 1024; // in KB -static int guc_max_plan_size = 1024; // in KB +static int guc_max_text_size = 1024; // in KB +static int guc_max_plan_size = 1024; // in KB static int guc_min_analyze_time = 10000; // in ms static int guc_logging_mode = LOG_MODE_UDS; +static bool guc_enable_utility = false; static const struct config_enum_entry logging_mode_options[] = { {"uds", LOG_MODE_UDS, false /* hidden */}, @@ -120,12 +121,18 @@ void Config::init() { &guc_logging_mode, LOG_MODE_UDS, logging_mode_options, PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_SUPERUSER_ONLY, NULL, NULL, NULL); + + DefineCustomBoolVariable( + "yagpcc.enable_utility", "Collect utility statement stats", NULL, + &guc_enable_utility, false, PGC_USERSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, NULL, NULL, NULL); } std::string Config::uds_path() { return guc_uds_path; } bool Config::enable_analyze() { return guc_enable_analyze; } bool Config::enable_cdbstats() { return guc_enable_cdbstats; } bool Config::enable_collector() { return guc_enable_collector; } +bool Config::enable_utility() { return guc_enable_utility; } bool Config::report_nested_queries() { return guc_report_nested_queries; } size_t Config::max_text_size() { return guc_max_text_size * 1024; } size_t Config::max_plan_size() { return guc_max_plan_size * 1024; } diff --git a/src/Config.h b/src/Config.h index debabd3b826..7501c727a44 100644 --- a/src/Config.h +++ b/src/Config.h @@ -12,6 +12,7 @@ class Config { static bool enable_analyze(); static bool enable_cdbstats(); static bool enable_collector(); + static bool enable_utility(); static bool filter_user(std::string username); static bool report_nested_queries(); static size_t max_text_size(); diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 4c523784cd9..fee435a6dcc 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -25,15 +25,72 @@ extern "C" { (Gp_role == GP_ROLE_DISPATCH && Config::min_analyze_time() >= 0 && \ Config::enable_analyze()) +static bool enable_utility = Config::enable_utility(); + +bool EventSender::verify_query(QueryDesc *query_desc, QueryState state, + bool utility) { + if (!proto_verified) { + return false; + } + if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { + return false; + } + + switch (state) { + case QueryState::SUBMIT: + // Cache enable_utility at SUBMIT to ensure consistent behavior at DONE. + // Without caching, a query that sets enable_utility to false from true + // would be accepted at SUBMIT (guc is true) but rejected at DONE (guc + // is false), causing a leak. + enable_utility = Config::enable_utility(); + if (utility && enable_utility == false) { + return false; + } + // Sync config in case current query changes it. + Config::sync(); + // Register qkey for a nested query we won't report, + // so we can detect nesting_level > 0 and skip reporting at end/done. + if (!need_report_nested_query() && nesting_level > 0) { + QueryKey::register_qkey(query_desc, nesting_level); + return false; + } + if (is_top_level_query(query_desc, nesting_level)) { + nested_timing = 0; + nested_calls = 0; + } + break; + case QueryState::START: + if (!qdesc_submitted(query_desc)) { + collect_query_submit(query_desc, false /* utility */); + } + break; + case QueryState::DONE: + if (utility && enable_utility == false) { + return false; + } + default: + break; + } + + if (filter_query(query_desc)) { + return false; + } + if (!nesting_is_valid(query_desc, nesting_level)) { + return false; + } + + return true; +} + bool EventSender::log_query_req(const yagpcc::SetQueryReq &req, - const std::string &event) { + const std::string &event, bool utility) { bool clear_big_fields = false; switch (Config::logging_mode()) { case LOG_MODE_UDS: clear_big_fields = UDSConnector::report_query(req, event); break; case LOG_MODE_TBL: - ya_gpdb::insert_log(req); + ya_gpdb::insert_log(req, utility); clear_big_fields = false; break; default: @@ -42,10 +99,8 @@ bool EventSender::log_query_req(const yagpcc::SetQueryReq &req, return clear_big_fields; } -void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { - if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { - return; - } +void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg, + bool utility, ErrorData *edata) { auto *query_desc = reinterpret_cast(arg); switch (status) { case METRICS_PLAN_NODE_INITIALIZE: @@ -54,7 +109,7 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { // TODO break; case METRICS_QUERY_SUBMIT: - collect_query_submit(query_desc); + collect_query_submit(query_desc, utility); break; case METRICS_QUERY_START: // no-op: executor_after_start is enough @@ -68,7 +123,7 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { case METRICS_QUERY_ERROR: case METRICS_QUERY_CANCELED: case METRICS_INNER_QUERY_DONE: - collect_query_done(query_desc, status); + collect_query_done(query_desc, utility, status, edata); break; default: ereport(FATAL, (errmsg("Unknown query status: %d", status))); @@ -76,18 +131,10 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { } void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { - if (!proto_verified) { - return; - } - if (filter_query(query_desc)) { - return; - } - if (!qdesc_submitted(query_desc)) { - collect_query_submit(query_desc); - } - if (!need_collect(query_desc, nesting_level)) { + if (!verify_query(query_desc, QueryState::START, false /* utility*/)) { return; } + if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze() && (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0) { query_desc->instrument_options |= INSTRUMENT_BUFFERS; @@ -106,16 +153,14 @@ void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { } void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { - if (!proto_verified || !need_collect(query_desc, nesting_level)) { - return; - } - if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { + if (!verify_query(query_desc, QueryState::START, false /* utility */)) { return; } + auto &query = get_query(query_desc); auto query_msg = query.message.get(); *query_msg->mutable_start_time() = current_ts(); - update_query_state(query, QueryState::START); + update_query_state(query, QueryState::START, false /* utility */); set_query_plan(query_msg, query_desc); if (need_collect_analyze()) { // Set up to track total elapsed time during query run. @@ -130,52 +175,37 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { } yagpcc::GPMetrics stats; std::swap(stats, *query_msg->mutable_query_metrics()); - if (log_query_req(*query_msg, "started")) { + if (log_query_req(*query_msg, "started", false /* utility */)) { clear_big_fields(query_msg); } std::swap(stats, *query_msg->mutable_query_metrics()); } void EventSender::executor_end(QueryDesc *query_desc) { - if (!proto_verified || !need_collect(query_desc, nesting_level)) { - return; - } - if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { + if (!verify_query(query_desc, QueryState::END, false /* utility */)) { return; } + auto &query = get_query(query_desc); auto *query_msg = query.message.get(); *query_msg->mutable_end_time() = current_ts(); - update_query_state(query, QueryState::END); + update_query_state(query, QueryState::END, false /* utility */); if (is_top_level_query(query_desc, nesting_level)) { set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, nested_calls, nested_timing); } else { set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); } - if (log_query_req(*query_msg, "ended")) { + if (log_query_req(*query_msg, "ended", false /* utility */)) { clear_big_fields(query_msg); } } -void EventSender::collect_query_submit(QueryDesc *query_desc) { - if (!proto_verified) { - return; - } - Config::sync(); - // Register qkey for a nested query we won't report, - // so we can detect nesting_level > 0 and skip reporting at end/done. - if (!need_report_nested_query() && nesting_level > 0) { - QueryKey::register_qkey(query_desc, nesting_level); - return; - } - if (is_top_level_query(query_desc, nesting_level)) { - nested_timing = 0; - nested_calls = 0; - } - if (!need_collect(query_desc, nesting_level)) { +void EventSender::collect_query_submit(QueryDesc *query_desc, bool utility) { + if (!verify_query(query_desc, QueryState::SUBMIT, utility)) { return; } + submit_query(query_desc); auto &query = get_query(query_desc); auto *query_msg = query.message.get(); @@ -185,7 +215,7 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { set_qi_nesting_level(query_msg, nesting_level); set_qi_slice_id(query_msg); set_query_text(query_msg, query_desc); - if (log_query_req(*query_msg, "submit")) { + if (log_query_req(*query_msg, "submit", utility)) { clear_big_fields(query_msg); } // take initial metrics snapshot so that we can safely take diff afterwards @@ -200,7 +230,8 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { } void EventSender::report_query_done(QueryDesc *query_desc, QueryItem &query, - QueryMetricsStatus status) { + QueryMetricsStatus status, bool utility, + ErrorData *edata) { yagpcc::QueryStatus query_status; std::string msg; switch (status) { @@ -229,12 +260,20 @@ void EventSender::report_query_done(QueryDesc *query_desc, QueryItem &query, (errmsg("Unexpected query status in query_done hook: %d", status))); } auto prev_state = query.state; - update_query_state(query, QueryState::DONE, + update_query_state(query, QueryState::DONE, utility, query_status == yagpcc::QueryStatus::QUERY_STATUS_DONE); auto query_msg = query.message.get(); query_msg->set_query_status(query_status); if (status == METRICS_QUERY_ERROR) { - set_qi_error_message(query_msg); + bool error_flushed = elog_message() == NULL; + if (error_flushed && edata->message == NULL) { + ereport(WARNING, (errmsg("YAGPCC missing error message"))); + ereport(DEBUG3, + (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + } else { + set_qi_error_message(query_msg, + error_flushed ? edata->message : elog_message()); + } } if (prev_state == START) { // We've missed ExecutorEnd call due to query cancel or error. It's @@ -248,12 +287,13 @@ void EventSender::report_query_done(QueryDesc *query_desc, QueryItem &query, set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), &ic_statistics); #endif - (void)log_query_req(*query_msg, msg); + (void)log_query_req(*query_msg, msg, utility); } -void EventSender::collect_query_done(QueryDesc *query_desc, - QueryMetricsStatus status) { - if (!proto_verified || !need_collect(query_desc, nesting_level)) { +void EventSender::collect_query_done(QueryDesc *query_desc, bool utility, + QueryMetricsStatus status, + ErrorData *edata) { + if (!verify_query(query_desc, QueryState::DONE, utility)) { return; } @@ -276,10 +316,7 @@ void EventSender::collect_query_done(QueryDesc *query_desc, } auto &query = get_query(query_desc); - bool report = need_report_nested_query() || - is_top_level_query(query_desc, nesting_level); - if (report) - report_query_done(query_desc, query, status); + report_query_done(query_desc, query, status, utility, edata); if (need_report_nested_query()) update_nested_counters(query_desc); @@ -323,15 +360,12 @@ void EventSender::ic_metrics_collect() { } void EventSender::analyze_stats_collect(QueryDesc *query_desc) { - if (!proto_verified || Gp_role != GP_ROLE_DISPATCH) { + if (!verify_query(query_desc, QueryState::END, false /* utility */)) { return; } - if (!need_collect(query_desc, nesting_level)) { + if (Gp_role != GP_ROLE_DISPATCH) { return; } - auto &query = get_query(query_desc); - auto *query_msg = query.message.get(); - *query_msg->mutable_end_time() = current_ts(); if (!query_desc->totaltime || !need_collect_analyze()) { return; } @@ -341,6 +375,8 @@ void EventSender::analyze_stats_collect(QueryDesc *query_desc) { double ms = query_desc->totaltime->total * 1000.0; if (ms >= Config::min_analyze_time()) { + auto &query = get_query(query_desc); + auto *query_msg = query.message.get(); set_analyze_plan_text(query_desc, query_msg); } } @@ -370,7 +406,7 @@ EventSender::~EventSender() { // That's basically a very simplistic state machine to fix or highlight any bugs // coming from GP void EventSender::update_query_state(QueryItem &query, QueryState new_state, - bool success) { + bool utility, bool success) { switch (new_state) { case QueryState::SUBMIT: Assert(false); @@ -389,7 +425,7 @@ void EventSender::update_query_state(QueryItem &query, QueryState new_state, query.message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_END); break; case QueryState::DONE: - Assert(query.state == QueryState::END || !success); + Assert(query.state == QueryState::END || !success || utility); query.message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); break; default: diff --git a/src/EventSender.h b/src/EventSender.h index 87cf453861b..4afdf1e14a4 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -87,7 +87,8 @@ class EventSender { void executor_before_start(QueryDesc *query_desc, int eflags); void executor_after_start(QueryDesc *query_desc, int eflags); void executor_end(QueryDesc *query_desc); - void query_metrics_collect(QueryMetricsStatus status, void *arg); + void query_metrics_collect(QueryMetricsStatus status, void *arg, bool utility, + ErrorData *edata = NULL); void ic_metrics_collect(); void analyze_stats_collect(QueryDesc *query_desc); void incr_depth() { nesting_level++; } @@ -106,15 +107,18 @@ class EventSender { }; static bool log_query_req(const yagpcc::SetQueryReq &req, - const std::string &event); - void update_query_state(QueryItem &query, QueryState new_state, + const std::string &event, bool utility); + bool verify_query(QueryDesc *query_desc, QueryState state, bool utility); + void update_query_state(QueryItem &query, QueryState new_state, bool utility, bool success = true); QueryItem &get_query(QueryDesc *query_desc); void submit_query(QueryDesc *query_desc); - void collect_query_submit(QueryDesc *query_desc); + void collect_query_submit(QueryDesc *query_desc, bool utility); void report_query_done(QueryDesc *query_desc, QueryItem &query, - QueryMetricsStatus status); - void collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status); + QueryMetricsStatus status, bool utility, + ErrorData *edata = NULL); + void collect_query_done(QueryDesc *query_desc, bool utility, + QueryMetricsStatus status, ErrorData *edata = NULL); void update_nested_counters(QueryDesc *query_desc); bool qdesc_submitted(QueryDesc *query_desc); diff --git a/src/PgUtils.cpp b/src/PgUtils.cpp index 929f0cf2681..fc58112bfaa 100644 --- a/src/PgUtils.cpp +++ b/src/PgUtils.cpp @@ -79,8 +79,3 @@ bool filter_query(QueryDesc *query_desc) { return gp_command_count == 0 || query_desc->sourceText == nullptr || !Config::enable_collector() || Config::filter_user(get_user_name()); } - -bool need_collect(QueryDesc *query_desc, int nesting_level) { - return !filter_query(query_desc) && - nesting_is_valid(query_desc, nesting_level); -} diff --git a/src/PgUtils.h b/src/PgUtils.h index 8f331837739..02f084c597a 100644 --- a/src/PgUtils.h +++ b/src/PgUtils.h @@ -12,6 +12,3 @@ bool is_top_level_query(QueryDesc *query_desc, int nesting_level); bool nesting_is_valid(QueryDesc *query_desc, int nesting_level); bool need_report_nested_query(); bool filter_query(QueryDesc *query_desc); -bool need_collect(QueryDesc *query_desc, int nesting_level); -ExplainState get_explain_state(QueryDesc *query_desc, bool costs); -ExplainState get_analyze_state(QueryDesc *query_desc, bool analyze); diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp index b1da4e969a3..a26f348c344 100644 --- a/src/ProtoUtils.cpp +++ b/src/ProtoUtils.cpp @@ -103,7 +103,8 @@ void set_query_info(yagpcc::SetQueryReq *req) { if (Gp_session_role == GP_ROLE_DISPATCH) { auto qi = req->mutable_query_info(); qi->set_username(get_user_name()); - qi->set_databasename(get_db_name()); + if (IsTransactionState()) + qi->set_databasename(get_db_name()); qi->set_rsgname(get_rg_name()); } } @@ -118,11 +119,10 @@ void set_qi_slice_id(yagpcc::SetQueryReq *req) { aqi->set_slice_id(currentSliceId); } -void set_qi_error_message(yagpcc::SetQueryReq *req) { +void set_qi_error_message(yagpcc::SetQueryReq *req, const char *err_msg) { auto aqi = req->mutable_add_info(); - auto error = elog_message(); *aqi->mutable_error_message() = - char_to_trimmed_str(error, strlen(error), Config::max_text_size()); + char_to_trimmed_str(err_msg, strlen(err_msg), Config::max_text_size()); } void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, @@ -226,8 +226,7 @@ double protots_to_double(const google::protobuf::Timestamp &ts) { return double(ts.seconds()) + double(ts.nanos()) / 1000000000.0; } -void set_analyze_plan_text(QueryDesc *query_desc, - yagpcc::SetQueryReq *req) { +void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *req) { // Make sure it is a valid txn and it is not an utility // statement for ExplainPrintPlan() later. if (!IsTransactionState() || !query_desc->plannedstmt) { diff --git a/src/ProtoUtils.h b/src/ProtoUtils.h index f62be1fd2bf..725a634f765 100644 --- a/src/ProtoUtils.h +++ b/src/ProtoUtils.h @@ -12,12 +12,11 @@ void clear_big_fields(yagpcc::SetQueryReq *req); void set_query_info(yagpcc::SetQueryReq *req); void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level); void set_qi_slice_id(yagpcc::SetQueryReq *req); -void set_qi_error_message(yagpcc::SetQueryReq *req); +void set_qi_error_message(yagpcc::SetQueryReq *req, const char *err_msg); void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, int nested_calls, double nested_time); void set_ic_stats(yagpcc::MetricInstrumentation *metrics, const ICStatistics *ic_statistics); yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status); double protots_to_double(const google::protobuf::Timestamp &ts); -void set_analyze_plan_text(QueryDesc *query_desc, - yagpcc::SetQueryReq *message); \ No newline at end of file +void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *message); \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index d76b7c64e10..07ac511d546 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -32,6 +32,7 @@ static analyze_stats_collect_hook_type previous_analyze_stats_collect_hook = #ifdef IC_TEARDOWN_HOOK static ic_teardown_hook_type previous_ic_teardown_hook = nullptr; #endif +static ProcessUtility_hook_type previous_ProcessUtility_hook = nullptr; static void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags); static void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, @@ -44,6 +45,10 @@ static void ya_ic_teardown_hook(ChunkTransportState *transportStates, #ifdef ANALYZE_STATS_COLLECT_HOOK static void ya_analyze_stats_collect_hook(QueryDesc *query_desc); #endif +static void ya_process_utility_hook(Node *parsetree, const char *queryString, + ProcessUtilityContext context, + ParamListInfo params, DestReceiver *dest, + char *completionTag); static EventSender *sender = nullptr; @@ -85,6 +90,8 @@ void hooks_init() { analyze_stats_collect_hook = ya_analyze_stats_collect_hook; #endif stat_statements_parser_init(); + previous_ProcessUtility_hook = ProcessUtility_hook; + ProcessUtility_hook = ya_process_utility_hook; } void hooks_deinit() { @@ -104,6 +111,7 @@ void hooks_deinit() { delete sender; } YagpStat::deinit(); + ProcessUtility_hook = previous_ProcessUtility_hook; } void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { @@ -165,7 +173,8 @@ void ya_ExecutorEnd_hook(QueryDesc *query_desc) { } void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { - cpp_call(get_sender(), &EventSender::query_metrics_collect, status, arg); + cpp_call(get_sender(), &EventSender::query_metrics_collect, status, + arg /* queryDesc */, false /* utility */, (ErrorData *)NULL); if (previous_query_info_collect_hook) { (*previous_query_info_collect_hook)(status, arg); } @@ -189,6 +198,55 @@ void ya_analyze_stats_collect_hook(QueryDesc *query_desc) { } #endif +static void ya_process_utility_hook(Node *parsetree, const char *queryString, + ProcessUtilityContext context, + ParamListInfo params, DestReceiver *dest, + char *completionTag) { + /* Project utility data on QueryDesc to use existing logic */ + QueryDesc *query_desc = (QueryDesc *)palloc0(sizeof(QueryDesc)); + query_desc->sourceText = queryString; + + cpp_call(get_sender(), &EventSender::query_metrics_collect, + METRICS_QUERY_SUBMIT, (void *)query_desc, true /* utility */, + (ErrorData *)NULL); + + get_sender()->incr_depth(); + PG_TRY(); + { + if (previous_ProcessUtility_hook) { + (*previous_ProcessUtility_hook)(parsetree, queryString, context, params, + dest, completionTag); + } else { + standard_ProcessUtility(parsetree, queryString, context, params, dest, + completionTag); + } + + get_sender()->decr_depth(); + cpp_call(get_sender(), &EventSender::query_metrics_collect, METRICS_QUERY_DONE, + (void *)query_desc, true /* utility */, (ErrorData *)NULL); + + pfree(query_desc); + } + PG_CATCH(); + { + ErrorData *edata; + MemoryContext oldctx; + + oldctx = MemoryContextSwitchTo(TopMemoryContext); + edata = CopyErrorData(); + FlushErrorState(); + MemoryContextSwitchTo(oldctx); + + get_sender()->decr_depth(); + cpp_call(get_sender(), &EventSender::query_metrics_collect, METRICS_QUERY_ERROR, + (void *)query_desc, true /* utility */, edata); + + pfree(query_desc); + ReThrowError(edata); + } + PG_END_TRY(); +} + static void check_stats_loaded() { if (!YagpStat::loaded()) { ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), diff --git a/src/log/LogOps.cpp b/src/log/LogOps.cpp index 6c80d9663f7..0868dd9fc1c 100644 --- a/src/log/LogOps.cpp +++ b/src/log/LogOps.cpp @@ -62,7 +62,7 @@ void init_log() { CommandCounterIncrement(); } -void insert_log(const yagpcc::SetQueryReq &req) { +void insert_log(const yagpcc::SetQueryReq &req, bool utility) { Oid namespaceId; Oid relationId; Relation rel; @@ -92,6 +92,8 @@ void insert_log(const yagpcc::SetQueryReq &req) { memset(values, 0, sizeof(values)); extract_query_req(req, "", values, nulls); + nulls[attnum_yagp_log_utility] = false; + values[attnum_yagp_log_utility] = BoolGetDatum(utility); rel = heap_open(relationId, RowExclusiveLock); diff --git a/src/log/LogOps.h b/src/log/LogOps.h index d4ee3a57fb2..bad03d09a8f 100644 --- a/src/log/LogOps.h +++ b/src/log/LogOps.h @@ -16,4 +16,4 @@ void truncate_log(); } /* INSERT INTO yagpcc.__log VALUES (...) */ -void insert_log(const yagpcc::SetQueryReq &req); +void insert_log(const yagpcc::SetQueryReq &req, bool utility); diff --git a/src/log/LogSchema.cpp b/src/log/LogSchema.cpp index 61e11dd0e3b..335a3103cfd 100644 --- a/src/log/LogSchema.cpp +++ b/src/log/LogSchema.cpp @@ -4,8 +4,7 @@ #include "LogSchema.h" -const std::unordered_map & -proto_name_to_col_idx() { +const std::unordered_map &proto_name_to_col_idx() { static const auto name_col_idx = [] { std::unordered_map map; map.reserve(log_tbl_desc.size()); diff --git a/src/log/LogSchema.h b/src/log/LogSchema.h index e68de7b6bdb..f713c1e9b0e 100644 --- a/src/log/LogSchema.h +++ b/src/log/LogSchema.h @@ -133,13 +133,15 @@ inline constexpr std::array log_tbl_desc = { LogDesc{"analyze_text", "query_info.analyze_text", TEXTOID}, LogDesc{"error_message", "add_info.error_message", TEXTOID}, LogDesc{"query_status", "query_status", TEXTOID}, + /* Extra field */ + LogDesc{"utility", "", BOOLOID}, }; /* clang-format on */ inline constexpr size_t natts_yagp_log = log_tbl_desc.size(); +inline constexpr size_t attnum_yagp_log_utility = natts_yagp_log - 1; -const std::unordered_map & -proto_name_to_col_idx(); +const std::unordered_map &proto_name_to_col_idx(); TupleDesc DescribeTuple(); diff --git a/src/memory/gpdbwrappers.cpp b/src/memory/gpdbwrappers.cpp index 9424c6aa444..0824a3a6808 100644 --- a/src/memory/gpdbwrappers.cpp +++ b/src/memory/gpdbwrappers.cpp @@ -128,7 +128,7 @@ ExplainState ya_gpdb::get_explain_state(QueryDesc *query_desc, } ExplainState ya_gpdb::get_analyze_state(QueryDesc *query_desc, - bool analyze) noexcept { + bool analyze) noexcept { return wrap_noexcept([&]() { ExplainState es; ExplainInitState(&es); @@ -223,6 +223,6 @@ Oid ya_gpdb::get_rg_id_by_session_id(int session_id) { return wrap_throw(ResGroupGetGroupIdBySessionId, session_id); } -void ya_gpdb::insert_log(const yagpcc::SetQueryReq &req) { - return wrap_throw(::insert_log, req); +void ya_gpdb::insert_log(const yagpcc::SetQueryReq &req, bool utility) { + return wrap_throw(::insert_log, req, utility); } diff --git a/src/memory/gpdbwrappers.h b/src/memory/gpdbwrappers.h index 3033c25378c..8f5f146cc67 100644 --- a/src/memory/gpdbwrappers.h +++ b/src/memory/gpdbwrappers.h @@ -18,7 +18,7 @@ extern "C" { namespace yagpcc { class SetQueryReq; -} // yagpcc +} // namespace yagpcc namespace ya_gpdb { @@ -31,8 +31,7 @@ char *get_database_name(Oid dbid) noexcept; bool split_identifier_string(char *rawstring, char separator, List **namelist) noexcept; ExplainState get_explain_state(QueryDesc *query_desc, bool costs) noexcept; -ExplainState get_analyze_state(QueryDesc *query_desc, - bool analyze) noexcept; +ExplainState get_analyze_state(QueryDesc *query_desc, bool analyze) noexcept; Instrumentation *instr_alloc(size_t n, int instrument_options); HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull); @@ -42,7 +41,7 @@ void instr_end_loop(Instrumentation *instr); char *gen_normquery(const char *query); StringInfo gen_normplan(const char *executionPlan); char *get_rg_name_for_id(Oid group_id); -void insert_log(const yagpcc::SetQueryReq &req); +void insert_log(const yagpcc::SetQueryReq &req, bool utility); // Palloc-free functions. void pfree(void *pointer) noexcept; From ca620e9f75f5bc717620d529ef6753c7666c2d99 Mon Sep 17 00:00:00 2001 From: NJrslv <108277031+NJrslv@users.noreply.github.com> Date: Wed, 3 Dec 2025 18:19:42 +0300 Subject: [PATCH 108/133] Trim utf8 (#25) Trim strings larger than 1MB by default and if we cut multi-byte utf8 then discard the character and shift the cut position. --- expected/yagp_utf8_trim.out | 66 +++++++++++++++++++++++++++++++++++++ sql/yagp_utf8_trim.sql | 43 ++++++++++++++++++++++++ src/Config.cpp | 14 ++++---- src/ProtoUtils.cpp | 53 ++++++++++++++++++++++------- 4 files changed, 157 insertions(+), 19 deletions(-) create mode 100644 expected/yagp_utf8_trim.out create mode 100644 sql/yagp_utf8_trim.sql diff --git a/expected/yagp_utf8_trim.out b/expected/yagp_utf8_trim.out new file mode 100644 index 00000000000..194ee6b3609 --- /dev/null +++ b/expected/yagp_utf8_trim.out @@ -0,0 +1,66 @@ +CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; +CREATE OR REPLACE FUNCTION get_marked_query(marker TEXT) +RETURNS TEXT AS $$ + SELECT query_text + FROM yagpcc.log + WHERE query_text LIKE '%' || marker || '%' + ORDER BY datetime DESC + LIMIT 1 +$$ LANGUAGE sql VOLATILE; +SET yagpcc.enable TO TRUE; +-- Test 1: 1 byte chars +SET yagpcc.max_text_size to 19; +SET yagpcc.logging_mode to 'TBL'; +SELECT /*test1*/ 'HelloWorld'; + ?column? +------------ + HelloWorld +(1 row) + +RESET yagpcc.logging_mode; +SELECT octet_length(get_marked_query('test1')) = 19 AS correct_length; + correct_length +---------------- + t +(1 row) + +-- Test 2: 2 byte chars +SET yagpcc.max_text_size to 19; +SET yagpcc.logging_mode to 'TBL'; +SELECT /*test2*/ 'РУССКИЙЯЗЫК'; + ?column? +------------- + РУССКИЙЯЗЫК +(1 row) + +RESET yagpcc.logging_mode; +-- Character 'Р' has two bytes and cut in the middle => not included. +SELECT octet_length(get_marked_query('test2')) = 18 AS correct_length; + correct_length +---------------- + t +(1 row) + +-- Test 3: 4 byte chars +SET yagpcc.max_text_size to 21; +SET yagpcc.logging_mode to 'TBL'; +SELECT /*test3*/ '😀'; + ?column? +---------- + 😀 +(1 row) + +RESET yagpcc.logging_mode; +-- Emoji has 4 bytes and cut before the last byte => not included. +SELECT octet_length(get_marked_query('test3')) = 18 AS correct_length; + correct_length +---------------- + t +(1 row) + +-- Cleanup +DROP FUNCTION get_marked_query(TEXT); +RESET yagpcc.max_text_size; +RESET yagpcc.logging_mode; +RESET yagpcc.enable; +DROP EXTENSION yagp_hooks_collector; diff --git a/sql/yagp_utf8_trim.sql b/sql/yagp_utf8_trim.sql new file mode 100644 index 00000000000..c0fdcce24a5 --- /dev/null +++ b/sql/yagp_utf8_trim.sql @@ -0,0 +1,43 @@ +CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; + +CREATE OR REPLACE FUNCTION get_marked_query(marker TEXT) +RETURNS TEXT AS $$ + SELECT query_text + FROM yagpcc.log + WHERE query_text LIKE '%' || marker || '%' + ORDER BY datetime DESC + LIMIT 1 +$$ LANGUAGE sql VOLATILE; + +SET yagpcc.enable TO TRUE; + +-- Test 1: 1 byte chars +SET yagpcc.max_text_size to 19; +SET yagpcc.logging_mode to 'TBL'; +SELECT /*test1*/ 'HelloWorld'; +RESET yagpcc.logging_mode; +SELECT octet_length(get_marked_query('test1')) = 19 AS correct_length; + +-- Test 2: 2 byte chars +SET yagpcc.max_text_size to 19; +SET yagpcc.logging_mode to 'TBL'; +SELECT /*test2*/ 'РУССКИЙЯЗЫК'; +RESET yagpcc.logging_mode; +-- Character 'Р' has two bytes and cut in the middle => not included. +SELECT octet_length(get_marked_query('test2')) = 18 AS correct_length; + +-- Test 3: 4 byte chars +SET yagpcc.max_text_size to 21; +SET yagpcc.logging_mode to 'TBL'; +SELECT /*test3*/ '😀'; +RESET yagpcc.logging_mode; +-- Emoji has 4 bytes and cut before the last byte => not included. +SELECT octet_length(get_marked_query('test3')) = 18 AS correct_length; + +-- Cleanup +DROP FUNCTION get_marked_query(TEXT); +RESET yagpcc.max_text_size; +RESET yagpcc.logging_mode; +RESET yagpcc.enable; + +DROP EXTENSION yagp_hooks_collector; diff --git a/src/Config.cpp b/src/Config.cpp index 9030411b903..dbd7e25b483 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -16,9 +16,9 @@ static bool guc_enable_cdbstats = true; static bool guc_enable_collector = true; static bool guc_report_nested_queries = true; static char *guc_ignored_users = nullptr; -static int guc_max_text_size = 1024; // in KB -static int guc_max_plan_size = 1024; // in KB -static int guc_min_analyze_time = 10000; // in ms +static int guc_max_text_size = 1 << 20; // in bytes (1MB) +static int guc_max_plan_size = 1024; // in KB +static int guc_min_analyze_time = 10000; // in ms static int guc_logging_mode = LOG_MODE_UDS; static bool guc_enable_utility = false; @@ -99,9 +99,9 @@ void Config::init() { DefineCustomIntVariable( "yagpcc.max_text_size", - "Make yagpcc trim query texts longer than configured size", NULL, - &guc_max_text_size, 1024, 0, INT_MAX / 1024, PGC_SUSET, - GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_KB, NULL, NULL, NULL); + "Make yagpcc trim query texts longer than configured size in bytes", NULL, + &guc_max_text_size, 1 << 20 /* 1MB */, 0, INT_MAX, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, NULL, NULL, NULL); DefineCustomIntVariable( "yagpcc.max_plan_size", @@ -134,7 +134,7 @@ bool Config::enable_cdbstats() { return guc_enable_cdbstats; } bool Config::enable_collector() { return guc_enable_collector; } bool Config::enable_utility() { return guc_enable_utility; } bool Config::report_nested_queries() { return guc_report_nested_queries; } -size_t Config::max_text_size() { return guc_max_text_size * 1024; } +size_t Config::max_text_size() { return guc_max_text_size; } size_t Config::max_plan_size() { return guc_max_plan_size * 1024; } int Config::min_analyze_time() { return guc_min_analyze_time; }; int Config::logging_mode() { return guc_logging_mode; } diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp index a26f348c344..f28714da6ec 100644 --- a/src/ProtoUtils.cpp +++ b/src/ProtoUtils.cpp @@ -24,6 +24,18 @@ extern "C" { #include #include +namespace { +constexpr uint8_t UTF8_CONTINUATION_BYTE_MASK = (1 << 7) | (1 << 6); +constexpr uint8_t UTF8_CONTINUATION_BYTE = (1 << 7); +constexpr uint8_t UTF8_MAX_SYMBOL_BYTES = 4; + +// Returns true if byte is the starting byte of utf8 +// character, false if byte is the continuation (10xxxxxx). +inline bool utf8_start_byte(uint8_t byte) { + return (byte & UTF8_CONTINUATION_BYTE_MASK) != UTF8_CONTINUATION_BYTE; +} +} // namespace + google::protobuf::Timestamp current_ts() { google::protobuf::Timestamp current_ts; struct timeval tv; @@ -46,9 +58,26 @@ void set_segment_key(yagpcc::SegmentKey *key) { key->set_segindex(GpIdentity.segindex); } -inline std::string char_to_trimmed_str(const char *str, size_t len, - size_t lim) { - return std::string(str, std::min(len, lim)); +std::string trim_str_shrink_utf8(const char *str, size_t len, size_t lim) { + if (unlikely(str == nullptr)) { + return std::string(); + } + if (likely(len <= lim || GetDatabaseEncoding() != PG_UTF8)) { + return std::string(str, std::min(len, lim)); + } + + // Handle trimming of utf8 correctly, do not cut multi-byte characters. + size_t cut_pos = lim; + size_t visited_bytes = 1; + while (visited_bytes < UTF8_MAX_SYMBOL_BYTES && cut_pos > 0) { + if (utf8_start_byte(static_cast(str[cut_pos]))) { + break; + } + ++visited_bytes; + --cut_pos; + } + + return std::string(str, cut_pos); } void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { @@ -61,10 +90,10 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); ExplainState es = ya_gpdb::get_explain_state(query_desc, true); if (es.str) { - *qi->mutable_plan_text() = char_to_trimmed_str(es.str->data, es.str->len, - Config::max_plan_size()); + *qi->mutable_plan_text() = trim_str_shrink_utf8(es.str->data, es.str->len, + Config::max_plan_size()); StringInfo norm_plan = ya_gpdb::gen_normplan(es.str->data); - *qi->mutable_template_plan_text() = char_to_trimmed_str( + *qi->mutable_template_plan_text() = trim_str_shrink_utf8( norm_plan->data, norm_plan->len, Config::max_plan_size()); qi->set_plan_id( hash_any((unsigned char *)norm_plan->data, norm_plan->len)); @@ -79,11 +108,11 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->sourceText) { auto qi = req->mutable_query_info(); - *qi->mutable_query_text() = char_to_trimmed_str( + *qi->mutable_query_text() = trim_str_shrink_utf8( query_desc->sourceText, strlen(query_desc->sourceText), Config::max_text_size()); char *norm_query = ya_gpdb::gen_normquery(query_desc->sourceText); - *qi->mutable_template_query_text() = char_to_trimmed_str( + *qi->mutable_template_query_text() = trim_str_shrink_utf8( norm_query, strlen(norm_query), Config::max_text_size()); } } @@ -122,7 +151,7 @@ void set_qi_slice_id(yagpcc::SetQueryReq *req) { void set_qi_error_message(yagpcc::SetQueryReq *req, const char *err_msg) { auto aqi = req->mutable_add_info(); *aqi->mutable_error_message() = - char_to_trimmed_str(err_msg, strlen(err_msg), Config::max_text_size()); + trim_str_shrink_utf8(err_msg, strlen(err_msg), Config::max_text_size()); } void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, @@ -242,9 +271,9 @@ void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *req) { if (es.str->len > 0 && es.str->data[es.str->len - 1] == '\n') { es.str->data[--es.str->len] = '\0'; } - auto trimmed_analyze = - char_to_trimmed_str(es.str->data, es.str->len, Config::max_plan_size()); + auto trimmed_analyze = trim_str_shrink_utf8(es.str->data, es.str->len, + Config::max_plan_size()); req->mutable_query_info()->set_analyze_text(trimmed_analyze); ya_gpdb::pfree(es.str->data); } -} \ No newline at end of file +} From 08c6941423991c32bc43d66754ed51832ce6bc19 Mon Sep 17 00:00:00 2001 From: reshke Date: Tue, 23 Dec 2025 18:58:46 +0500 Subject: [PATCH 109/133] Backport: Properly NULL-terminate GSS receive buffer on error packet reception (#1498) This pr fixes https://www.postgresql.org/support/security/CVE-2022-41862/ in cloudberry https://git.postgresql.org/cgit/postgresql.git/commit/?id=71c37797d7bd78266146a5829ab62b3687c47295 Original commit message: === pqsecure_open_gss() includes a code path handling error messages with v2-style protocol messages coming from the server. The client-side buffer holding the error message does not force a NULL-termination, with the data of the server getting copied to the errorMessage of the connection. Hence, it would be possible for a server to send an unterminated string and copy arbitrary bytes in the buffer receiving the error message in the client, opening the door to a crash or even data exposure. As at this stage of the authentication process the exchange has not been completed yet, this could be abused by an attacker without Kerberos credentials. Clients that have a valid kerberos cache are vulnerable as libpq opportunistically requests for it except if gssencmode is disabled. Author: Jacob Champion Backpatch-through: 12 Security: CVE-2022-41862 --- src/interfaces/libpq/fe-secure-gssapi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/interfaces/libpq/fe-secure-gssapi.c b/src/interfaces/libpq/fe-secure-gssapi.c index 7006ed58a12..aeb6e35dbdd 100644 --- a/src/interfaces/libpq/fe-secure-gssapi.c +++ b/src/interfaces/libpq/fe-secure-gssapi.c @@ -585,6 +585,8 @@ pqsecure_open_gss(PGconn *conn) PqGSSRecvLength += ret; + Assert(PqGSSRecvLength < PQ_GSS_RECV_BUFFER_SIZE); + PqGSSRecvBuffer[PqGSSRecvLength] = '\0'; appendPQExpBuffer(&conn->errorMessage, "%s\n", PqGSSRecvBuffer + 1); return PGRES_POLLING_FAILED; From 25e06637da8a9ec04516a23f5a890f288809ef59 Mon Sep 17 00:00:00 2001 From: usernamedt Date: Mon, 13 Feb 2023 15:00:31 +0800 Subject: [PATCH 110/133] Movable DataBase Locales for Cloudberry We inherited this issue from PostgreSQL. PostgreSQL uses glibc to sort strings. In version glibc=2.28, collations broke down badly (in general, there are no guarantees when updating glibc). Changing collations breaks indexes. Similarly, a cluster with different collations also behaves unpredictably. What and when something has changed in glibc can be found on https://github.com/ardentperf/glibc-unicode-sorting Also there is special postgresql-wiki https://wiki.postgresql.org/wiki/Locale_data_changes And you tube video https://www.youtube.com/watch?v=0E6O-V8Jato In short, the issue can be seen through the use of bash: ( echo "1-1"; echo "11" ) | LC_COLLATE=en_US.UTF-8 sort gives the different results in ubunru 18.04 and 22.04. There is no way to solve the problem other than by not changing the symbol order. We freeze symbol order and use it instead of glibc. Here the solution https://github.com/postgredients/mdb-locales. In this PR I have added PostgreSQL patch that replaces all glibc locale-related calls with a calls to an external libary. It activates using new configure parameter --with-mdblocales, which is off by default. Using custom locales needs libmdblocales1 package and mdb-locales package with symbol table. Build needs libmdblocales-dev package with headers. --- configure | 97 ++++++- configure.ac | 17 ++ .../src/cpp/storage/oper/pax_oper.cc | 5 +- gpcontrib/orafce/others.c | 9 +- .../src/unittest/gpos/string/CWStringTest.cpp | 7 +- src/backend/utils/adt/Makefile | 3 +- src/backend/utils/adt/mdb.c | 37 +++ src/backend/utils/adt/pg_locale.c | 63 ++-- src/backend/utils/mb/mbutils.c | 3 +- src/bin/initdb/initdb.c | 14 +- src/bin/pg_upgrade/check.c | 9 +- src/common/exec.c | 4 +- src/include/catalog/pg_proc.dat | 4 +- src/include/common/mdb_locale.h | 41 +++ src/include/pg_config.h.in | 6 + src/interfaces/ecpg/ecpglib/connect.c | 3 +- src/interfaces/ecpg/ecpglib/descriptor.c | 8 +- src/interfaces/ecpg/ecpglib/execute.c | 7 +- src/interfaces/libpq/Makefile | 2 +- src/pl/plperl/plperl.c | 19 +- src/port/chklocale.c | 10 +- src/test/locale/test-ctype.c | 4 +- src/test/regress/input/misc.source | 5 + src/test/regress/output/misc.source | 7 + src/test/regress/sql/misc.sql | 271 ++++++++++++++++++ 25 files changed, 582 insertions(+), 73 deletions(-) create mode 100644 src/backend/utils/adt/mdb.c create mode 100644 src/include/common/mdb_locale.h create mode 100644 src/test/regress/sql/misc.sql diff --git a/configure b/configure index 49362c1f015..b9371321677 100755 --- a/configure +++ b/configure @@ -698,6 +698,7 @@ BISON MKDIR_P LN_S TAR +USE_MDBLOCALES install_bin INSTALL_DATA INSTALL_SCRIPT @@ -945,6 +946,7 @@ with_rt with_libcurl with_apr_config with_gnu_ld +with_mdblocales with_ssl with_openssl enable_openssl_redirect @@ -1693,6 +1695,7 @@ Optional Packages: --without-libcurl do not use libcurl --with-apr-config=PATH path to apr-1-config utility --with-gnu-ld assume the C compiler uses GNU ld [default=no] + --without-mdblocales build without MDB locales --with-ssl=LIB use LIB for SSL/TLS support (openssl) --with-openssl obsolete spelling of --with-ssl=openssl @@ -2909,7 +2912,6 @@ PG_PACKAGE_VERSION=14.4 - ac_aux_dir= for ac_dir in config "$srcdir"/config; do if test -f "$ac_dir/install-sh"; then @@ -12208,6 +12210,38 @@ case $INSTALL in esac +# +# MDB locales +# + + + + +# Check whether --with-mdblocales was given. +if test "${with_mdblocales+set}" = set; then : + withval=$with_mdblocales; + case $withval in + yes) + +$as_echo "#define USE_MDBLOCALES 1" >>confdefs.h + + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-mdblocales option" "$LINENO" 5 + ;; + esac + +else + with_mdblocales=no + +fi + + + + if test -z "$TAR"; then for ac_prog in tar do @@ -12844,6 +12878,56 @@ $as_echo "${python_libspec} ${python_additional_libs}" >&6; } +fi + +if test "$with_mdblocales" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for mdb_setlocale in -lmdblocales" >&5 +$as_echo_n "checking for mdb_setlocale in -lmdblocales... " >&6; } +if ${ac_cv_lib_mdblocales_mdb_setlocale+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lmdblocales $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char mdb_setlocale (); +int +main () +{ +return mdb_setlocale (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_mdblocales_mdb_setlocale=yes +else + ac_cv_lib_mdblocales_mdb_setlocale=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mdblocales_mdb_setlocale" >&5 +$as_echo "$ac_cv_lib_mdblocales_mdb_setlocale" >&6; } +if test "x$ac_cv_lib_mdblocales_mdb_setlocale" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBMDBLOCALES 1 +_ACEOF + + LIBS="-lmdblocales $LIBS" + +else + as_fn_error $? "mdblocales library not found" "$LINENO" 5 +fi + fi if test x"$cross_compiling" = x"yes" && test -z "$with_system_tzdata"; then @@ -17065,6 +17149,17 @@ fi done +fi + +if test "$with_mdblocales" = yes; then + ac_fn_c_check_header_mongrel "$LINENO" "mdblocales.h" "ac_cv_header_mdblocales_h" "$ac_includes_default" +if test "x$ac_cv_header_mdblocales_h" = xyes; then : + +else + as_fn_error $? "mdblocales header not found." "$LINENO" 5 +fi + + fi if test "$with_gssapi" = yes ; then diff --git a/configure.ac b/configure.ac index 8bfdcedf7f1..246edc4846e 100644 --- a/configure.ac +++ b/configure.ac @@ -1462,6 +1462,14 @@ case $INSTALL in esac AC_SUBST(install_bin) +# +# MDB locales +# + +PGAC_ARG_BOOL(with, mdblocales, yes, [build without MDB locales], + [AC_DEFINE([USE_MDBLOCALES], 1, [Define to 1 to build with MDB locales. (--with-mdblocales)])]) +AC_SUBST(USE_MDBLOCALES) + PGAC_PATH_PROGS(TAR, tar) AC_PROG_LN_S AC_PROG_MKDIR_P @@ -1620,6 +1628,11 @@ failure. It is possible the compiler isn't looking in the proper directory. Use --without-zlib to disable zlib support.])]) fi +if test "$with_mdblocales" = yes; then + AC_CHECK_LIB(mdblocales, mdb_setlocale, [], + [AC_MSG_ERROR([mdblocales library not found])]) +fi + if test "$enable_external_fts" = yes; then AC_CHECK_LIB(jansson, jansson_version_str, [], [AC_MSG_ERROR([jansson library not found or version is too old, version must >= 2.13])]) @@ -1999,6 +2012,10 @@ if test "$with_lz4" = yes; then AC_CHECK_HEADERS(lz4.h, [], [AC_MSG_ERROR([lz4.h header file is required for LZ4])]) fi +if test "$with_mdblocales" = yes; then + AC_CHECK_HEADER(mdblocales.h, [], [AC_MSG_ERROR([mdblocales header not found.])]) +fi + if test "$with_gssapi" = yes ; then AC_CHECK_HEADERS(gssapi/gssapi.h, [], [AC_CHECK_HEADERS(gssapi.h, [], [AC_MSG_ERROR([gssapi.h header file is required for GSSAPI])])]) diff --git a/contrib/pax_storage/src/cpp/storage/oper/pax_oper.cc b/contrib/pax_storage/src/cpp/storage/oper/pax_oper.cc index 44d4e49d7f8..d08c7a445b9 100644 --- a/contrib/pax_storage/src/cpp/storage/oper/pax_oper.cc +++ b/contrib/pax_storage/src/cpp/storage/oper/pax_oper.cc @@ -25,6 +25,7 @@ *------------------------------------------------------------------------- */ +#include "common/mdb_locale.h" #include "storage/oper/pax_oper.h" #include "comm/cbdb_wrappers.h" @@ -588,9 +589,9 @@ static inline bool LocaleIsC(Oid collation) { return (bool)result; } - localeptr = setlocale(LC_COLLATE, NULL); + localeptr = SETLOCALE(LC_COLLATE, NULL); CBDB_CHECK(localeptr, cbdb::CException::ExType::kExTypeCError, - fmt("Invalid locale, fail to `setlocale`, errno: %d", errno)); + fmt("Invalid locale, fail to `SETLOCALE`, errno: %d", errno)); if (strcmp(localeptr, "C") == 0 || // cut line strcmp(localeptr, "POSIX") == 0) { diff --git a/gpcontrib/orafce/others.c b/gpcontrib/orafce/others.c index 2fb612efe19..5bf8b650e4c 100644 --- a/gpcontrib/orafce/others.c +++ b/gpcontrib/orafce/others.c @@ -45,6 +45,7 @@ #include "utils/uuid.h" #include "orafce.h" #include "builtins.h" +#include "common/mdb_locale.h" /* * Source code for nlssort is taken from postgresql-nls-string @@ -322,7 +323,7 @@ _nls_run_strxfrm(text *string, text *locale) */ if (!lc_collate_cache) { - if ((lc_collate_cache = setlocale(LC_COLLATE, NULL))) + if ((lc_collate_cache = SETLOCALE(LC_COLLATE, NULL))) /* Make a copy of the locale name string. */ #ifdef _MSC_VER lc_collate_cache = _strdup(lc_collate_cache); @@ -364,7 +365,7 @@ _nls_run_strxfrm(text *string, text *locale) * If setlocale failed, we know the default stayed the same, * co we can safely elog. */ - if (!setlocale(LC_COLLATE, locale_str)) + if (!SETLOCALE(LC_COLLATE, locale_str)) elog(ERROR, "failed to set the requested LC_COLLATE value [%s]", locale_str); changed_locale = true; @@ -409,7 +410,7 @@ _nls_run_strxfrm(text *string, text *locale) /* * Set original locale */ - if (!setlocale(LC_COLLATE, lc_collate_cache)) + if (!SETLOCALE(LC_COLLATE, lc_collate_cache)) elog(FATAL, "failed to set back the default LC_COLLATE value [%s]", lc_collate_cache); } @@ -422,7 +423,7 @@ _nls_run_strxfrm(text *string, text *locale) /* * Set original locale */ - if (!setlocale(LC_COLLATE, lc_collate_cache)) + if (!SETLOCALE(LC_COLLATE, lc_collate_cache)) elog(FATAL, "failed to set back the default LC_COLLATE value [%s]", lc_collate_cache); pfree(locale_str); } diff --git a/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp b/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp index 60bccf59341..bb086954403 100644 --- a/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp +++ b/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp @@ -12,6 +12,7 @@ #include "unittest/gpos/string/CWStringTest.h" #include +#include "common/mdb_locale.h" #include "gpos/base.h" #include "gpos/error/CAutoTrace.h" @@ -177,18 +178,18 @@ CWStringTest::EresUnittest_AppendFormatInvalidLocale() CWStringDynamic *expected = GPOS_NEW(mp) CWStringDynamic(mp, GPOS_WSZ_LIT("UNKNOWN")); - CHAR *oldLocale = setlocale(LC_CTYPE, nullptr); + CHAR *oldLocale = SETLOCALE(LC_CTYPE, nullptr); CWStringDynamic *pstr1 = GPOS_NEW(mp) CWStringDynamic(mp); GPOS_RESULT eres = GPOS_OK; - setlocale(LC_CTYPE, "C"); + SETLOCALE(LC_CTYPE, "C"); pstr1->AppendFormat(GPOS_WSZ_LIT("%s"), (CHAR *) "ÃË", 123); pstr1->Equals(expected); // cleanup - setlocale(LC_CTYPE, oldLocale); + SETLOCALE(LC_CTYPE, oldLocale); GPOS_DELETE(pstr1); GPOS_DELETE(expected); diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index bd5479c546b..58dd15a6f8b 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -117,7 +117,8 @@ OBJS = \ windowfuncs.o \ xid.o \ xid8funcs.o \ - xml.o + xml.o \ + mdb.o jsonpath_scan.c: FLEXFLAGS = -CF -p -p jsonpath_scan.c: FLEX_NO_BACKUP=yes diff --git a/src/backend/utils/adt/mdb.c b/src/backend/utils/adt/mdb.c new file mode 100644 index 00000000000..e5c695de1b6 --- /dev/null +++ b/src/backend/utils/adt/mdb.c @@ -0,0 +1,37 @@ +/*------------------------------------------------------------------------- + * + * mdb.c + * mdb routines + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/mdb.c + * + *------------------------------------------------------------------------- + */ + + +#include "postgres.h" +#include "fmgr.h" +#include "utils/fmgrprotos.h" + +/* + * mdb_admin_enabled + * Check that mdb locale patch is enabled + */ +Datum +mdb_locale_enabled(PG_FUNCTION_ARGS) +{ + bool res; + +#if USE_MDBLOCALES + res = true; +#else + res = false; +#endif + + PG_RETURN_BOOL(res); +} diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 11392891538..a9acb875eee 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -66,6 +66,7 @@ #include "utils/memutils.h" #include "utils/pg_locale.h" #include "utils/syscache.h" +#include "common/mdb_locale.h" #ifdef USE_ICU #include @@ -147,7 +148,7 @@ pg_perm_setlocale(int category, const char *locale) const char *envvar; #ifndef WIN32 - result = setlocale(category, locale); + result = SETLOCALE(category, locale); #else /* @@ -165,7 +166,7 @@ pg_perm_setlocale(int category, const char *locale) } else #endif - result = setlocale(category, locale); + result = SETLOCALE(category, locale); #endif /* WIN32 */ if (result == NULL) @@ -252,7 +253,7 @@ check_locale(int category, const char *locale, char **canonname) if (canonname) *canonname = NULL; /* in case of failure */ - save = setlocale(category, NULL); + save = SETLOCALE(category, NULL); if (!save) return false; /* won't happen, we hope */ @@ -260,14 +261,14 @@ check_locale(int category, const char *locale, char **canonname) save = pstrdup(save); /* set the locale with setlocale, to see if it accepts it. */ - res = setlocale(category, locale); + res = SETLOCALE(category, locale); /* save canonical name if requested. */ if (res && canonname) *canonname = pstrdup(res); /* restore old value. */ - if (!setlocale(category, save)) + if (!SETLOCALE(category, save)) elog(WARNING, "failed to restore old locale \"%s\"", save); pfree(save); @@ -501,12 +502,12 @@ PGLC_localeconv(void) memset(&worklconv, 0, sizeof(worklconv)); /* Save prevailing values of monetary and numeric locales */ - save_lc_monetary = setlocale(LC_MONETARY, NULL); + save_lc_monetary = SETLOCALE(LC_MONETARY, NULL); if (!save_lc_monetary) elog(ERROR, "setlocale(NULL) failed"); save_lc_monetary = pstrdup(save_lc_monetary); - save_lc_numeric = setlocale(LC_NUMERIC, NULL); + save_lc_numeric = SETLOCALE(LC_NUMERIC, NULL); if (!save_lc_numeric) elog(ERROR, "setlocale(NULL) failed"); save_lc_numeric = pstrdup(save_lc_numeric); @@ -528,7 +529,7 @@ PGLC_localeconv(void) */ /* Save prevailing value of ctype locale */ - save_lc_ctype = setlocale(LC_CTYPE, NULL); + save_lc_ctype = SETLOCALE(LC_CTYPE, NULL); if (!save_lc_ctype) elog(ERROR, "setlocale(NULL) failed"); save_lc_ctype = pstrdup(save_lc_ctype); @@ -536,11 +537,11 @@ PGLC_localeconv(void) /* Here begins the critical section where we must not throw error */ /* use numeric to set the ctype */ - setlocale(LC_CTYPE, locale_numeric); + SETLOCALE(LC_CTYPE, locale_numeric); #endif /* Get formatting information for numeric */ - setlocale(LC_NUMERIC, locale_numeric); + SETLOCALE(LC_NUMERIC, locale_numeric); extlconv = localeconv(); /* Must copy data now in case setlocale() overwrites it */ @@ -550,11 +551,11 @@ PGLC_localeconv(void) #ifdef WIN32 /* use monetary to set the ctype */ - setlocale(LC_CTYPE, locale_monetary); + SETLOCALE(LC_CTYPE, locale_monetary); #endif /* Get formatting information for monetary */ - setlocale(LC_MONETARY, locale_monetary); + SETLOCALE(LC_MONETARY, locale_monetary); extlconv = localeconv(); /* Must copy data now in case setlocale() overwrites it */ @@ -584,12 +585,12 @@ PGLC_localeconv(void) * should fail. */ #ifdef WIN32 - if (!setlocale(LC_CTYPE, save_lc_ctype)) + if (!SETLOCALE(LC_CTYPE, save_lc_ctype)) elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype); #endif - if (!setlocale(LC_MONETARY, save_lc_monetary)) + if (!SETLOCALE(LC_MONETARY, save_lc_monetary)) elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary); - if (!setlocale(LC_NUMERIC, save_lc_numeric)) + if (!SETLOCALE(LC_NUMERIC, save_lc_numeric)) elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric); /* @@ -773,7 +774,7 @@ cache_locale_time(void) */ /* Save prevailing value of time locale */ - save_lc_time = setlocale(LC_TIME, NULL); + save_lc_time = SETLOCALE(LC_TIME, NULL); if (!save_lc_time) elog(ERROR, "setlocale(NULL) failed"); save_lc_time = pstrdup(save_lc_time); @@ -788,16 +789,16 @@ cache_locale_time(void) */ /* Save prevailing value of ctype locale */ - save_lc_ctype = setlocale(LC_CTYPE, NULL); + save_lc_ctype = SETLOCALE(LC_CTYPE, NULL); if (!save_lc_ctype) elog(ERROR, "setlocale(NULL) failed"); save_lc_ctype = pstrdup(save_lc_ctype); /* use lc_time to set the ctype */ - setlocale(LC_CTYPE, locale_time); + SETLOCALE(LC_CTYPE, locale_time); #endif - setlocale(LC_TIME, locale_time); + SETLOCALE(LC_TIME, locale_time); /* We use times close to current time as data for strftime(). */ timenow = time(NULL); @@ -846,10 +847,10 @@ cache_locale_time(void) * failure to do so is fatal. */ #ifdef WIN32 - if (!setlocale(LC_CTYPE, save_lc_ctype)) + if (!SETLOCALE(LC_CTYPE, save_lc_ctype)) elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype); #endif - if (!setlocale(LC_TIME, save_lc_time)) + if (!SETLOCALE(LC_TIME, save_lc_time)) elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time); /* @@ -1225,7 +1226,7 @@ check_strxfrm_bug(void) ereport(ERROR, (errcode(ERRCODE_SYSTEM_ERROR), errmsg_internal("strxfrm(), in locale \"%s\", writes past the specified array length", - setlocale(LC_COLLATE, NULL)), + SETLOCALE(LC_COLLATE, NULL)), errhint("Apply system library package updates."))); } @@ -1339,7 +1340,7 @@ lc_collate_is_c(Oid collation) if (result >= 0) return (bool) result; - localeptr = setlocale(LC_COLLATE, NULL); + localeptr = SETLOCALE(LC_COLLATE, NULL); if (!localeptr) elog(ERROR, "invalid LC_COLLATE setting"); @@ -1389,7 +1390,7 @@ lc_ctype_is_c(Oid collation) if (result >= 0) return (bool) result; - localeptr = setlocale(LC_CTYPE, NULL); + localeptr = SETLOCALE(LC_CTYPE, NULL); if (!localeptr) elog(ERROR, "invalid LC_CTYPE setting"); @@ -1518,8 +1519,10 @@ pg_newlocale_from_collation(Oid collid) /* Normal case where they're the same */ errno = 0; #ifndef WIN32 - loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate, + + loc = NEWLOCALE(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate, NULL); + #else loc = _create_locale(LC_ALL, collcollate); #endif @@ -1533,11 +1536,11 @@ pg_newlocale_from_collation(Oid collid) locale_t loc1; errno = 0; - loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL); + loc1 = NEWLOCALE(LC_COLLATE_MASK, collcollate, NULL); if (!loc1) report_newlocale_failure(collcollate); errno = 0; - loc = newlocale(LC_CTYPE_MASK, collctype, loc1); + loc = NEWLOCALE(LC_CTYPE_MASK, collctype, loc1); if (!loc) report_newlocale_failure(collctype); #else @@ -1680,12 +1683,16 @@ get_collation_actual_version(char collprovider, const char *collcollate) { #if defined(__GLIBC__) /* Use the glibc version because we don't have anything better. */ +#ifdef USE_MDBLOCALES + collversion = pstrdup(mdb_localesversion()); +#else collversion = pstrdup(gnu_get_libc_version()); +#endif #elif defined(LC_VERSION_MASK) locale_t loc; /* Look up FreeBSD collation version. */ - loc = newlocale(LC_COLLATE, collcollate, NULL); + loc = NEWLOCALE(LC_COLLATE, collcollate, NULL); if (loc) { collversion = diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 29287088ecf..952d1474870 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -40,6 +40,7 @@ #include "utils/builtins.h" #include "utils/memutils.h" #include "utils/syscache.h" +#include "common/mdb_locale.h" /* * We maintain a simple linked list caching the fmgr lookup info for the @@ -1308,7 +1309,7 @@ pg_bind_textdomain_codeset(const char *domainname) int new_msgenc; #ifndef WIN32 - const char *ctype = setlocale(LC_CTYPE, NULL); + const char *ctype = SETLOCALE(LC_CTYPE, NULL); if (pg_strcasecmp(ctype, "C") == 0 || pg_strcasecmp(ctype, "POSIX") == 0) #endif diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 4ed9869a2c9..708cf77ffdf 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -75,6 +75,7 @@ #include "getopt_long.h" #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "common/mdb_locale.h" #include "catalog/catalog.h" @@ -2274,12 +2275,13 @@ locale_date_order(const char *locale) result = DATEORDER_MDY; /* default */ - save = setlocale(LC_TIME, NULL); + save = SETLOCALE(LC_TIME, NULL); + if (!save) return result; save = pg_strdup(save); - setlocale(LC_TIME, locale); + SETLOCALE(LC_TIME, locale); memset(&testtime, 0, sizeof(testtime)); testtime.tm_mday = 22; @@ -2288,7 +2290,7 @@ locale_date_order(const char *locale) res = my_strftime(buf, sizeof(buf), "%x", &testtime); - setlocale(LC_TIME, save); + SETLOCALE(LC_TIME, save); free(save); if (res == 0) @@ -2332,7 +2334,7 @@ check_locale_name(int category, const char *locale, char **canonname) if (canonname) *canonname = NULL; /* in case of failure */ - save = setlocale(category, NULL); + save = SETLOCALE(category, NULL); if (!save) { pg_log_error("setlocale() failed"); @@ -2347,14 +2349,14 @@ check_locale_name(int category, const char *locale, char **canonname) locale = ""; /* set the locale with setlocale, to see if it accepts it. */ - res = setlocale(category, locale); + res = SETLOCALE(category, locale); /* save canonical name if requested. */ if (res && canonname) *canonname = pg_strdup(res); /* restore old value. */ - if (!setlocale(category, save)) + if (!SETLOCALE(category, save)) { pg_log_error("failed to restore old locale \"%s\"", save); exit(1); diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index d0905f3d588..1859443ed87 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -16,6 +16,8 @@ #include "mb/pg_wchar.h" #include "pg_upgrade.h" #include "greenplum/pg_upgrade_greenplum.h" +#include "common/mdb_locale.h" + static void check_new_cluster_is_empty(void); static void check_databases_are_compatible(void); @@ -1629,7 +1631,8 @@ get_canonical_locale_name(int category, const char *locale) char *res; /* get the current setting, so we can restore it. */ - save = setlocale(category, NULL); + + save = SETLOCALE(category, NULL); if (!save) pg_fatal("failed to get the current locale\n"); @@ -1637,7 +1640,7 @@ get_canonical_locale_name(int category, const char *locale) save = (char *) pg_strdup(save); /* set the locale with setlocale, to see if it accepts it. */ - res = setlocale(category, locale); + res = SETLOCALE(category, locale); if (!res) pg_fatal("failed to get system locale name for \"%s\"\n", locale); @@ -1645,7 +1648,7 @@ get_canonical_locale_name(int category, const char *locale) res = pg_strdup(res); /* restore old value. */ - if (!setlocale(category, save)) + if (!SETLOCALE(category, save)) pg_fatal("failed to restore old locale \"%s\"\n", save); pg_free(save); diff --git a/src/common/exec.c b/src/common/exec.c index 7dd2f8c4942..5159b616a39 100644 --- a/src/common/exec.c +++ b/src/common/exec.c @@ -24,6 +24,8 @@ #include #include #include +#include "common/mdb_locale.h" + /* Inhibit mingw CRT's auto-globbing of command line arguments */ #if defined(WIN32) && !defined(_MSC_VER) @@ -443,7 +445,7 @@ set_pglocale_pgservice(const char *argv0, const char *app) /* don't set LC_ALL in the backend */ if (strcmp(app, PG_TEXTDOMAIN("postgres")) != 0) { - setlocale(LC_ALL, ""); + SETLOCALE(LC_ALL, ""); /* * One could make a case for reproducing here PostmasterMain()'s test diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index a47b1ef1615..1093fa948b8 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -11758,7 +11758,9 @@ # # GPDB ADDITIONS START HERE # - +{ oid => '16383', descr => 'contains', + proname => 'mdb_locale_enabled', prorettype => 'bool', + proargtypes => '', prosrc => 'mdb_locale_enabled' }, { oid => '7178', descr => 'for use by pg_upgrade', proname => 'binary_upgrade_set_preassigned_oids', provolatile => 'v', proparallel => 'u', prorettype => 'void', proargtypes => '_oid', diff --git a/src/include/common/mdb_locale.h b/src/include/common/mdb_locale.h new file mode 100644 index 00000000000..91d8656c2c2 --- /dev/null +++ b/src/include/common/mdb_locale.h @@ -0,0 +1,41 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * mdb_locale.h + * Generic headers for custom MDB-locales patch. + * + * IDENTIFICATION + * src/include/common/mdb_locale.h + * + *------------------------------------------------------------------------- + */ + +#ifndef PG_MDB_LOCALE_H +#define PG_MDB_LOCALE_H + +#ifdef USE_MDBLOCALES +#include +#define SETLOCALE(category, locale) mdb_setlocale(category, locale) +#define NEWLOCALE(category, locale, base) mdb_newlocale(category, locale, base) +#else +#define SETLOCALE(category, locale) setlocale(category, locale) +#define NEWLOCALE(category, locale, base) newlocale(category, locale, base) +#endif + +#endif /* PG_MDB_LOCALE_H */ diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index aaa3ea32e8a..54de6844f58 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -392,6 +392,9 @@ /* Define to 1 if you have the `m' library (-lm). */ #undef HAVE_LIBM +/* Define to 1 if you have the `mdblocales' library (-lmdblocales). */ +#undef HAVE_LIBMDBLOCALES + /* Define to 1 if you have the `numa' library (-lnuma). */ #undef HAVE_LIBNUMA @@ -1041,6 +1044,9 @@ /* Define to 1 to build with LZ4 support. (--with-lz4) */ #undef USE_LZ4 +/* Define to 1 to build with MDB locales. (--with-mdblocales) */ +#undef USE_MDBLOCALES + /* Define to 1 to build with Mapreduce capabilities (--enable-mapreduce) */ #undef USE_MAPREDUCE diff --git a/src/interfaces/ecpg/ecpglib/connect.c b/src/interfaces/ecpg/ecpglib/connect.c index 056940cb252..f4d2da9173a 100644 --- a/src/interfaces/ecpg/ecpglib/connect.c +++ b/src/interfaces/ecpg/ecpglib/connect.c @@ -9,6 +9,7 @@ #include "ecpglib_extern.h" #include "ecpgtype.h" #include "sqlca.h" +#include "common/mdb_locale.h" #ifdef HAVE_USELOCALE locale_t ecpg_clocale = (locale_t) 0; @@ -517,7 +518,7 @@ ECPGconnect(int lineno, int c, const char *name, const char *user, const char *p #ifdef HAVE_USELOCALE if (!ecpg_clocale) { - ecpg_clocale = newlocale(LC_NUMERIC_MASK, "C", (locale_t) 0); + ecpg_clocale = NEWLOCALE(LC_NUMERIC_MASK, "C", (locale_t) 0); if (!ecpg_clocale) { #ifdef ENABLE_THREAD_SAFETY diff --git a/src/interfaces/ecpg/ecpglib/descriptor.c b/src/interfaces/ecpg/ecpglib/descriptor.c index f1898dec6a6..2238febbbdd 100644 --- a/src/interfaces/ecpg/ecpglib/descriptor.c +++ b/src/interfaces/ecpg/ecpglib/descriptor.c @@ -15,6 +15,8 @@ #include "sql3types.h" #include "sqlca.h" #include "sqlda.h" +#include "common/mdb_locale.h" + static void descriptor_free(struct descriptor *desc); @@ -500,8 +502,8 @@ ECPGget_desc(int lineno, const char *desc_name, int index,...) #ifdef HAVE__CONFIGTHREADLOCALE stmt.oldthreadlocale = _configthreadlocale(_ENABLE_PER_THREAD_LOCALE); #endif - stmt.oldlocale = ecpg_strdup(setlocale(LC_NUMERIC, NULL), lineno); - setlocale(LC_NUMERIC, "C"); + stmt.oldlocale = ecpg_strdup(SETLOCALE(LC_NUMERIC, NULL), lineno); + SETLOCALE(LC_NUMERIC, "C"); #endif /* desperate try to guess something sensible */ @@ -514,7 +516,7 @@ ECPGget_desc(int lineno, const char *desc_name, int index,...) #else if (stmt.oldlocale) { - setlocale(LC_NUMERIC, stmt.oldlocale); + SETLOCALE(LC_NUMERIC, stmt.oldlocale); ecpg_free(stmt.oldlocale); } #ifdef HAVE__CONFIGTHREADLOCALE diff --git a/src/interfaces/ecpg/ecpglib/execute.c b/src/interfaces/ecpg/ecpglib/execute.c index e8e8fb2b2c3..eafdd8e421a 100644 --- a/src/interfaces/ecpg/ecpglib/execute.c +++ b/src/interfaces/ecpg/ecpglib/execute.c @@ -31,6 +31,7 @@ #include "sqlca.h" #include "sqlda-compat.h" #include "sqlda-native.h" +#include "common/mdb_locale.h" /* * This function returns a newly malloced string that has ' and \ @@ -2002,13 +2003,13 @@ ecpg_do_prologue(int lineno, const int compat, const int force_indicator, #ifdef HAVE__CONFIGTHREADLOCALE stmt->oldthreadlocale = _configthreadlocale(_ENABLE_PER_THREAD_LOCALE); #endif - stmt->oldlocale = ecpg_strdup(setlocale(LC_NUMERIC, NULL), lineno); + stmt->oldlocale = ecpg_strdup(SETLOCALE(LC_NUMERIC, NULL), lineno); if (stmt->oldlocale == NULL) { ecpg_do_epilogue(stmt); return false; } - setlocale(LC_NUMERIC, "C"); + SETLOCALE(LC_NUMERIC, "C"); #endif /* @@ -2222,7 +2223,7 @@ ecpg_do_epilogue(struct statement *stmt) uselocale(stmt->oldlocale); #else if (stmt->oldlocale) - setlocale(LC_NUMERIC, stmt->oldlocale); + SETLOCALE(LC_NUMERIC, stmt->oldlocale); #ifdef HAVE__CONFIGTHREADLOCALE /* diff --git a/src/interfaces/libpq/Makefile b/src/interfaces/libpq/Makefile index 43682574b23..ed3df424ae4 100644 --- a/src/interfaces/libpq/Makefile +++ b/src/interfaces/libpq/Makefile @@ -83,7 +83,7 @@ endif # that are built correctly for use in a shlib. SHLIB_LINK_INTERNAL = -lpgcommon_shlib -lpgport_shlib ifneq ($(PORTNAME), win32) -SHLIB_LINK += $(filter -lcrypt -ldes -lcom_err -lcrypto -lk5crypto -lkrb5 -lgssapi_krb5 -lgss -lgssapi -lssl -lsocket -lnsl -lresolv -lintl -lm, $(LIBS)) $(LDAP_LIBS_FE) $(PTHREAD_LIBS) +SHLIB_LINK += $(filter -lcrypt -ldes -lcom_err -lcrypto -lk5crypto -lkrb5 -lgssapi_krb5 -lgss -lgssapi -lssl -lsocket -lnsl -lresolv -lintl -lm -lmdblocales, $(LIBS)) $(LDAP_LIBS_FE) $(PTHREAD_LIBS) else SHLIB_LINK += $(filter -lcrypt -ldes -lcom_err -lcrypto -lk5crypto -lkrb5 -lgssapi32 -lssl -lsocket -lnsl -lresolv -lintl -lm $(PTHREAD_LIBS), $(LIBS)) $(LDAP_LIBS_FE) endif diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c index 48591e48429..3aff8e95450 100644 --- a/src/pl/plperl/plperl.c +++ b/src/pl/plperl/plperl.c @@ -38,6 +38,7 @@ #include "utils/rel.h" #include "utils/syscache.h" #include "utils/typcache.h" +#include "common/mdb_locale.h" /* define our text domain for translations */ #undef TEXTDOMAIN @@ -743,15 +744,15 @@ plperl_init_interp(void) *save_numeric, *save_time; - loc = setlocale(LC_COLLATE, NULL); + loc = SETLOCALE(LC_COLLATE, NULL); save_collate = loc ? pstrdup(loc) : NULL; - loc = setlocale(LC_CTYPE, NULL); + loc = SETLOCALE(LC_CTYPE, NULL); save_ctype = loc ? pstrdup(loc) : NULL; - loc = setlocale(LC_MONETARY, NULL); + loc = SETLOCALE(LC_MONETARY, NULL); save_monetary = loc ? pstrdup(loc) : NULL; - loc = setlocale(LC_NUMERIC, NULL); + loc = SETLOCALE(LC_NUMERIC, NULL); save_numeric = loc ? pstrdup(loc) : NULL; - loc = setlocale(LC_TIME, NULL); + loc = SETLOCALE(LC_TIME, NULL); save_time = loc ? pstrdup(loc) : NULL; #define PLPERL_RESTORE_LOCALE(name, saved) \ @@ -4167,7 +4168,7 @@ static char * setlocale_perl(int category, char *locale) { dTHX; - char *RETVAL = setlocale(category, locale); + char *RETVAL = SETLOCALE(category, locale); if (RETVAL) { @@ -4182,7 +4183,7 @@ setlocale_perl(int category, char *locale) #ifdef LC_ALL if (category == LC_ALL) - newctype = setlocale(LC_CTYPE, NULL); + newctype = SETLOCALE(LC_CTYPE, NULL); else #endif newctype = RETVAL; @@ -4200,7 +4201,7 @@ setlocale_perl(int category, char *locale) #ifdef LC_ALL if (category == LC_ALL) - newcoll = setlocale(LC_COLLATE, NULL); + newcoll = SETLOCALE(LC_COLLATE, NULL); else #endif newcoll = RETVAL; @@ -4219,7 +4220,7 @@ setlocale_perl(int category, char *locale) #ifdef LC_ALL if (category == LC_ALL) - newnum = setlocale(LC_NUMERIC, NULL); + newnum = SETLOCALE(LC_NUMERIC, NULL); else #endif newnum = RETVAL; diff --git a/src/port/chklocale.c b/src/port/chklocale.c index 3d47d37eae4..2dae78e74e9 100644 --- a/src/port/chklocale.c +++ b/src/port/chklocale.c @@ -18,6 +18,8 @@ #else #include "postgres_fe.h" #endif +#include "common/mdb_locale.h" + #ifdef HAVE_LANGINFO_H #include @@ -343,7 +345,7 @@ pg_get_encoding_from_locale(const char *ctype, bool write_message) pg_strcasecmp(ctype, "POSIX") == 0) return PG_SQL_ASCII; - save = setlocale(LC_CTYPE, NULL); + save = SETLOCALE(LC_CTYPE, NULL); if (!save) return -1; /* setlocale() broken? */ /* must copy result, or it might change after setlocale */ @@ -351,7 +353,7 @@ pg_get_encoding_from_locale(const char *ctype, bool write_message) if (!save) return -1; /* out of memory; unlikely */ - name = setlocale(LC_CTYPE, ctype); + name = SETLOCALE(LC_CTYPE, ctype); if (!name) { free(save); @@ -366,13 +368,13 @@ pg_get_encoding_from_locale(const char *ctype, bool write_message) sys = win32_langinfo(name); #endif - setlocale(LC_CTYPE, save); + SETLOCALE(LC_CTYPE, save); free(save); } else { /* much easier... */ - ctype = setlocale(LC_CTYPE, NULL); + ctype = SETLOCALE(LC_CTYPE, NULL); if (!ctype) return -1; /* setlocale() broken? */ diff --git a/src/test/locale/test-ctype.c b/src/test/locale/test-ctype.c index a3f896c5ecb..10c2b49cb92 100644 --- a/src/test/locale/test-ctype.c +++ b/src/test/locale/test-ctype.c @@ -23,6 +23,8 @@ the author shall be liable for any damage, etc. #include #include #include +#include "common/mdb_locale.h" + char *flag(int b); void describe_char(int c); @@ -62,7 +64,7 @@ main() short c; char *cur_locale; - cur_locale = setlocale(LC_ALL, ""); + cur_locale = SETLOCALE(LC_ALL, ""); if (cur_locale) fprintf(stderr, "Successfully set locale to \"%s\"\n", cur_locale); else diff --git a/src/test/regress/input/misc.source b/src/test/regress/input/misc.source index 331499a2aba..2abe2c82eb8 100644 --- a/src/test/regress/input/misc.source +++ b/src/test/regress/input/misc.source @@ -264,3 +264,8 @@ SELECT *, (equipment(CAST((h.*) AS hobbies_r))).name FROM hobbies_r h; -- -- rewrite rules -- + + +--- mdb-related + +SELECT mdb_locale_enabled(); diff --git a/src/test/regress/output/misc.source b/src/test/regress/output/misc.source index 18bcc227f0a..a0c63418446 100644 --- a/src/test/regress/output/misc.source +++ b/src/test/regress/output/misc.source @@ -609,3 +609,10 @@ CONTEXT: SQL function "equipment" during startup -- -- rewrite rules -- +--- mdb-related +SELECT mdb_locale_enabled(); + mdb_locale_enabled +-------------------- + t +(1 row) + diff --git a/src/test/regress/sql/misc.sql b/src/test/regress/sql/misc.sql new file mode 100644 index 00000000000..5c42672c4f7 --- /dev/null +++ b/src/test/regress/sql/misc.sql @@ -0,0 +1,271 @@ +-- +-- MISC +-- + +-- +-- BTREE +-- +--UPDATE onek +-- SET unique1 = onek.unique1 + 1; + +--UPDATE onek +-- SET unique1 = onek.unique1 - 1; + +-- +-- BTREE partial +-- +-- UPDATE onek2 +-- SET unique1 = onek2.unique1 + 1; + +--UPDATE onek2 +-- SET unique1 = onek2.unique1 - 1; + +-- +-- BTREE shutting out non-functional updates +-- +-- the following two tests seem to take a long time on some +-- systems. This non-func update stuff needs to be examined +-- more closely. - jolly (2/22/96) +-- +/* GPDB TODO: This test is disabled for now, because when running with ORCA, + you get an error: + ERROR: multiple updates to a row by the same query is not allowed +UPDATE tmp + SET stringu1 = reverse_name(onek.stringu1) + FROM onek + WHERE onek.stringu1 = 'JBAAAA' and + onek.stringu1 = tmp.stringu1; + +UPDATE tmp + SET stringu1 = reverse_name(onek2.stringu1) + FROM onek2 + WHERE onek2.stringu1 = 'JCAAAA' and + onek2.stringu1 = tmp.stringu1; +*/ + +DROP TABLE tmp; + +--UPDATE person* +-- SET age = age + 1; + +--UPDATE person* +-- SET age = age + 3 +-- WHERE name = 'linda'; + +-- +-- copy +-- +COPY onek TO '/home/xifos/git/cloudberry-gpdb/src/test/regress/results/onek.data'; + +DELETE FROM onek; + +COPY onek FROM '/home/xifos/git/cloudberry-gpdb/src/test/regress/results/onek.data'; + +SELECT unique1 FROM onek WHERE unique1 < 2 ORDER BY unique1; + +DELETE FROM onek2; + +COPY onek2 FROM '/home/xifos/git/cloudberry-gpdb/src/test/regress/results/onek.data'; + +SELECT unique1 FROM onek2 WHERE unique1 < 2 ORDER BY unique1; + +COPY BINARY stud_emp TO '/home/xifos/git/cloudberry-gpdb/src/test/regress/results/stud_emp.data'; + +DELETE FROM stud_emp; + +COPY BINARY stud_emp FROM '/home/xifos/git/cloudberry-gpdb/src/test/regress/results/stud_emp.data'; + +SELECT * FROM stud_emp; + +-- COPY aggtest FROM stdin; +-- 56 7.8 +-- 100 99.097 +-- 0 0.09561 +-- 42 324.78 +-- . +-- COPY aggtest TO stdout; + + +-- +-- inheritance stress test +-- +SELECT * FROM a_star*; + +SELECT * + FROM b_star* x + WHERE x.b = text 'bumble' or x.a < 3; + +SELECT class, a + FROM c_star* x + WHERE x.c ~ text 'hi'; + +SELECT class, b, c + FROM d_star* x + WHERE x.a < 100; + +SELECT class, c FROM e_star* x WHERE x.c NOTNULL; + +SELECT * FROM f_star* x WHERE x.c ISNULL; + +-- grouping and aggregation on inherited sets have been busted in the past... + +SELECT sum(a) FROM a_star*; + +SELECT class, sum(a) FROM a_star* GROUP BY class ORDER BY class; + + +ALTER TABLE f_star RENAME COLUMN f TO ff; + +ALTER TABLE e_star* RENAME COLUMN e TO ee; + +ALTER TABLE d_star* RENAME COLUMN d TO dd; + +ALTER TABLE c_star* RENAME COLUMN c TO cc; + +ALTER TABLE b_star* RENAME COLUMN b TO bb; + +ALTER TABLE a_star* RENAME COLUMN a TO aa; + +SELECT class, aa + FROM a_star* x + WHERE aa ISNULL; + +-- As of Postgres 7.1, ALTER implicitly recurses, +-- so this should be same as ALTER a_star* + +ALTER TABLE a_star RENAME COLUMN aa TO foo; + +SELECT class, foo + FROM a_star* x + WHERE x.foo >= 2; + +ALTER TABLE a_star RENAME COLUMN foo TO aa; + +SELECT * + from a_star* + WHERE aa < 1000; + +ALTER TABLE f_star ADD COLUMN f int4; + +UPDATE f_star SET f = 10; + +ALTER TABLE e_star* ADD COLUMN e int4; + +--UPDATE e_star* SET e = 42; + +SELECT * FROM e_star*; + +ALTER TABLE a_star* ADD COLUMN a text; + +-- That ALTER TABLE should have added TOAST tables. +SELECT relname, reltoastrelid <> 0 AS has_toast_table + FROM pg_class + WHERE oid::regclass IN ('a_star', 'c_star') + ORDER BY 1; + +--UPDATE b_star* +-- SET a = text 'gazpacho' +-- WHERE aa > 4; + +SELECT class, aa, a FROM a_star*; + + +-- +-- versions +-- + +-- +-- postquel functions +-- +-- +-- mike does post_hacking, +-- joe and sally play basketball, and +-- everyone else does nothing. +-- +SELECT p.name, name(p.hobbies) FROM ONLY person p; + +-- +-- as above, but jeff also does post_hacking. +-- +SELECT p.name, name(p.hobbies) FROM person* p; + +-- +-- the next two queries demonstrate how functions generate bogus duplicates. +-- this is a "feature" .. +-- +SELECT DISTINCT hobbies_r.name, name(hobbies_r.equipment) FROM hobbies_r + ORDER BY 1,2; + +SELECT hobbies_r.name, (hobbies_r.equipment).name FROM hobbies_r; + +-- +-- mike needs advil and peet's coffee, +-- joe and sally need hightops, and +-- everyone else is fine. +-- +SELECT p.name, name(p.hobbies), name(equipment(p.hobbies)) FROM ONLY person p; + +-- +-- as above, but jeff needs advil and peet's coffee as well. +-- +SELECT p.name, name(p.hobbies), name(equipment(p.hobbies)) FROM person* p; + +-- +-- just like the last two, but make sure that the target list fixup and +-- unflattening is being done correctly. +-- +SELECT name(equipment(p.hobbies)), p.name, name(p.hobbies) FROM ONLY person p; + +SELECT (p.hobbies).equipment.name, p.name, name(p.hobbies) FROM person* p; + +SELECT (p.hobbies).equipment.name, name(p.hobbies), p.name FROM ONLY person p; + +SELECT name(equipment(p.hobbies)), name(p.hobbies), p.name FROM person* p; + +SELECT name(equipment(hobby_construct(text 'skywalking', text 'mer'))); + +SELECT name(equipment(hobby_construct_named(text 'skywalking', text 'mer'))); + +SELECT name(equipment_named(hobby_construct_named(text 'skywalking', text 'mer'))); + +SELECT name(equipment_named_ambiguous_1a(hobby_construct_named(text 'skywalking', text 'mer'))); + +SELECT name(equipment_named_ambiguous_1b(hobby_construct_named(text 'skywalking', text 'mer'))); + +SELECT name(equipment_named_ambiguous_1c(hobby_construct_named(text 'skywalking', text 'mer'))); + +SELECT name(equipment_named_ambiguous_2a(text 'skywalking')); + +SELECT name(equipment_named_ambiguous_2b(text 'skywalking')); + +SELECT hobbies_by_name('basketball'); + +SELECT name, overpaid(emp.*) FROM emp; + +-- +-- Try a few cases with SQL-spec row constructor expressions +-- +SELECT * FROM equipment(ROW('skywalking', 'mer')); + +SELECT name(equipment(ROW('skywalking', 'mer'))); + +SELECT *, name(equipment(h.*)) FROM hobbies_r h; + +SELECT *, (equipment(CAST((h.*) AS hobbies_r))).name FROM hobbies_r h; + +-- +-- functional joins +-- + +-- +-- instance rules +-- + +-- +-- rewrite rules +-- + + +--- mdb-related + +SELECT mdb_locale_enabled(); From d4ca192f16fd1ace4618d2ffaaf4d44de5016330 Mon Sep 17 00:00:00 2001 From: reshke Date: Fri, 19 Sep 2025 18:13:31 +0500 Subject: [PATCH 111/133] Extend multixact SLRU (#3) --- src/include/access/multixact.h | 4 ++-- src/include/access/subtrans.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index 4bbb035eaea..f053a30b009 100644 --- a/src/include/access/multixact.h +++ b/src/include/access/multixact.h @@ -30,8 +30,8 @@ #define MaxMultiXactOffset ((MultiXactOffset) 0xFFFFFFFF) /* Number of SLRU buffers to use for multixact */ -#define NUM_MULTIXACTOFFSET_BUFFERS 8 -#define NUM_MULTIXACTMEMBER_BUFFERS 16 +#define NUM_MULTIXACTOFFSET_BUFFERS 32 +#define NUM_MULTIXACTMEMBER_BUFFERS 64 /* * Possible multixact lock modes ("status"). The first four modes are for diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h index 9a54dc0fb3b..73503a26dcc 100644 --- a/src/include/access/subtrans.h +++ b/src/include/access/subtrans.h @@ -12,7 +12,7 @@ #define SUBTRANS_H /* Number of SLRU buffers to use for subtrans */ -#define NUM_SUBTRANS_BUFFERS 32 +#define NUM_SUBTRANS_BUFFERS 64 typedef struct SubTransData { From ca117bbf3a6600a5b78605a332b12af8821e1985 Mon Sep 17 00:00:00 2001 From: reshke Date: Fri, 19 Sep 2025 19:04:10 +0500 Subject: [PATCH 112/133] Delete src/test/regress/sql/misc.sql --- src/test/regress/sql/misc.sql | 271 ---------------------------------- 1 file changed, 271 deletions(-) delete mode 100644 src/test/regress/sql/misc.sql diff --git a/src/test/regress/sql/misc.sql b/src/test/regress/sql/misc.sql deleted file mode 100644 index 5c42672c4f7..00000000000 --- a/src/test/regress/sql/misc.sql +++ /dev/null @@ -1,271 +0,0 @@ --- --- MISC --- - --- --- BTREE --- ---UPDATE onek --- SET unique1 = onek.unique1 + 1; - ---UPDATE onek --- SET unique1 = onek.unique1 - 1; - --- --- BTREE partial --- --- UPDATE onek2 --- SET unique1 = onek2.unique1 + 1; - ---UPDATE onek2 --- SET unique1 = onek2.unique1 - 1; - --- --- BTREE shutting out non-functional updates --- --- the following two tests seem to take a long time on some --- systems. This non-func update stuff needs to be examined --- more closely. - jolly (2/22/96) --- -/* GPDB TODO: This test is disabled for now, because when running with ORCA, - you get an error: - ERROR: multiple updates to a row by the same query is not allowed -UPDATE tmp - SET stringu1 = reverse_name(onek.stringu1) - FROM onek - WHERE onek.stringu1 = 'JBAAAA' and - onek.stringu1 = tmp.stringu1; - -UPDATE tmp - SET stringu1 = reverse_name(onek2.stringu1) - FROM onek2 - WHERE onek2.stringu1 = 'JCAAAA' and - onek2.stringu1 = tmp.stringu1; -*/ - -DROP TABLE tmp; - ---UPDATE person* --- SET age = age + 1; - ---UPDATE person* --- SET age = age + 3 --- WHERE name = 'linda'; - --- --- copy --- -COPY onek TO '/home/xifos/git/cloudberry-gpdb/src/test/regress/results/onek.data'; - -DELETE FROM onek; - -COPY onek FROM '/home/xifos/git/cloudberry-gpdb/src/test/regress/results/onek.data'; - -SELECT unique1 FROM onek WHERE unique1 < 2 ORDER BY unique1; - -DELETE FROM onek2; - -COPY onek2 FROM '/home/xifos/git/cloudberry-gpdb/src/test/regress/results/onek.data'; - -SELECT unique1 FROM onek2 WHERE unique1 < 2 ORDER BY unique1; - -COPY BINARY stud_emp TO '/home/xifos/git/cloudberry-gpdb/src/test/regress/results/stud_emp.data'; - -DELETE FROM stud_emp; - -COPY BINARY stud_emp FROM '/home/xifos/git/cloudberry-gpdb/src/test/regress/results/stud_emp.data'; - -SELECT * FROM stud_emp; - --- COPY aggtest FROM stdin; --- 56 7.8 --- 100 99.097 --- 0 0.09561 --- 42 324.78 --- . --- COPY aggtest TO stdout; - - --- --- inheritance stress test --- -SELECT * FROM a_star*; - -SELECT * - FROM b_star* x - WHERE x.b = text 'bumble' or x.a < 3; - -SELECT class, a - FROM c_star* x - WHERE x.c ~ text 'hi'; - -SELECT class, b, c - FROM d_star* x - WHERE x.a < 100; - -SELECT class, c FROM e_star* x WHERE x.c NOTNULL; - -SELECT * FROM f_star* x WHERE x.c ISNULL; - --- grouping and aggregation on inherited sets have been busted in the past... - -SELECT sum(a) FROM a_star*; - -SELECT class, sum(a) FROM a_star* GROUP BY class ORDER BY class; - - -ALTER TABLE f_star RENAME COLUMN f TO ff; - -ALTER TABLE e_star* RENAME COLUMN e TO ee; - -ALTER TABLE d_star* RENAME COLUMN d TO dd; - -ALTER TABLE c_star* RENAME COLUMN c TO cc; - -ALTER TABLE b_star* RENAME COLUMN b TO bb; - -ALTER TABLE a_star* RENAME COLUMN a TO aa; - -SELECT class, aa - FROM a_star* x - WHERE aa ISNULL; - --- As of Postgres 7.1, ALTER implicitly recurses, --- so this should be same as ALTER a_star* - -ALTER TABLE a_star RENAME COLUMN aa TO foo; - -SELECT class, foo - FROM a_star* x - WHERE x.foo >= 2; - -ALTER TABLE a_star RENAME COLUMN foo TO aa; - -SELECT * - from a_star* - WHERE aa < 1000; - -ALTER TABLE f_star ADD COLUMN f int4; - -UPDATE f_star SET f = 10; - -ALTER TABLE e_star* ADD COLUMN e int4; - ---UPDATE e_star* SET e = 42; - -SELECT * FROM e_star*; - -ALTER TABLE a_star* ADD COLUMN a text; - --- That ALTER TABLE should have added TOAST tables. -SELECT relname, reltoastrelid <> 0 AS has_toast_table - FROM pg_class - WHERE oid::regclass IN ('a_star', 'c_star') - ORDER BY 1; - ---UPDATE b_star* --- SET a = text 'gazpacho' --- WHERE aa > 4; - -SELECT class, aa, a FROM a_star*; - - --- --- versions --- - --- --- postquel functions --- --- --- mike does post_hacking, --- joe and sally play basketball, and --- everyone else does nothing. --- -SELECT p.name, name(p.hobbies) FROM ONLY person p; - --- --- as above, but jeff also does post_hacking. --- -SELECT p.name, name(p.hobbies) FROM person* p; - --- --- the next two queries demonstrate how functions generate bogus duplicates. --- this is a "feature" .. --- -SELECT DISTINCT hobbies_r.name, name(hobbies_r.equipment) FROM hobbies_r - ORDER BY 1,2; - -SELECT hobbies_r.name, (hobbies_r.equipment).name FROM hobbies_r; - --- --- mike needs advil and peet's coffee, --- joe and sally need hightops, and --- everyone else is fine. --- -SELECT p.name, name(p.hobbies), name(equipment(p.hobbies)) FROM ONLY person p; - --- --- as above, but jeff needs advil and peet's coffee as well. --- -SELECT p.name, name(p.hobbies), name(equipment(p.hobbies)) FROM person* p; - --- --- just like the last two, but make sure that the target list fixup and --- unflattening is being done correctly. --- -SELECT name(equipment(p.hobbies)), p.name, name(p.hobbies) FROM ONLY person p; - -SELECT (p.hobbies).equipment.name, p.name, name(p.hobbies) FROM person* p; - -SELECT (p.hobbies).equipment.name, name(p.hobbies), p.name FROM ONLY person p; - -SELECT name(equipment(p.hobbies)), name(p.hobbies), p.name FROM person* p; - -SELECT name(equipment(hobby_construct(text 'skywalking', text 'mer'))); - -SELECT name(equipment(hobby_construct_named(text 'skywalking', text 'mer'))); - -SELECT name(equipment_named(hobby_construct_named(text 'skywalking', text 'mer'))); - -SELECT name(equipment_named_ambiguous_1a(hobby_construct_named(text 'skywalking', text 'mer'))); - -SELECT name(equipment_named_ambiguous_1b(hobby_construct_named(text 'skywalking', text 'mer'))); - -SELECT name(equipment_named_ambiguous_1c(hobby_construct_named(text 'skywalking', text 'mer'))); - -SELECT name(equipment_named_ambiguous_2a(text 'skywalking')); - -SELECT name(equipment_named_ambiguous_2b(text 'skywalking')); - -SELECT hobbies_by_name('basketball'); - -SELECT name, overpaid(emp.*) FROM emp; - --- --- Try a few cases with SQL-spec row constructor expressions --- -SELECT * FROM equipment(ROW('skywalking', 'mer')); - -SELECT name(equipment(ROW('skywalking', 'mer'))); - -SELECT *, name(equipment(h.*)) FROM hobbies_r h; - -SELECT *, (equipment(CAST((h.*) AS hobbies_r))).name FROM hobbies_r h; - --- --- functional joins --- - --- --- instance rules --- - --- --- rewrite rules --- - - ---- mdb-related - -SELECT mdb_locale_enabled(); From 29495b41e323619698599a4f4980f29c446d190c Mon Sep 17 00:00:00 2001 From: reshke Date: Fri, 19 Sep 2025 21:47:24 +0500 Subject: [PATCH 113/133] MDB admin patch & tests (#4) * MDB admin patch & tests This patch introcudes new pseudo-pre-defined role "mdb_admin". Introduces 2 new function: extern bool mdb_admin_allow_bypass_owner_checks(Oid userId, Oid ownerId); extern void check_mdb_admin_is_member_of_role(Oid member, Oid role); To check mdb admin belongship and role-to-role ownership transfer correctness. Our mdb_admin ACL model is the following: * Any roles user or/and roles can be granted with mdb_admin * mdb_admin memeber can tranfser ownershup of relations, namespaces and functions to other roles, if target role in neither: superuser, pg_read_server_files, pg_write_server_files nor pg_execute_server_program. This patch allows mdb admin to tranfers ownership on non-superuser objects * f --- .../regress/expected/create_function_3.out | 4 +- .../expected/create_function_3_optimizer.out | 4 +- src/backend/catalog/namespace.c | 20 +++- src/backend/commands/alter.c | 8 +- src/backend/commands/functioncmds.c | 20 +++- src/backend/commands/schemacmds.c | 13 +- src/backend/commands/tablecmds.c | 12 +- src/backend/storage/ipc/signalfuncs.c | 28 ++++- src/backend/utils/activity/backend_status.c | 16 +++ src/backend/utils/adt/acl.c | 112 ++++++++++++++++++ src/backend/utils/misc/guc.c | 2 +- src/include/utils/acl.h | 7 ++ src/include/utils/backend_status.h | 3 + src/include/utils/guc_tables.h | 2 + src/test/Makefile | 3 + src/test/mdb_admin/.gitignore | 2 + src/test/mdb_admin/Makefile | 23 ++++ src/test/mdb_admin/t/signals.pl | 74 ++++++++++++ .../regress/expected/create_function_3.out | 4 +- .../expected/create_function_3_optimizer.out | 4 +- src/test/regress/expected/mdb_admin.out | 81 +++++++++++++ src/test/regress/parallel_schedule | 4 + src/test/regress/sql/mdb_admin.sql | 77 ++++++++++++ .../expected/create_function_3.out | 4 +- 24 files changed, 494 insertions(+), 33 deletions(-) create mode 100644 src/test/mdb_admin/.gitignore create mode 100644 src/test/mdb_admin/Makefile create mode 100644 src/test/mdb_admin/t/signals.pl create mode 100644 src/test/regress/expected/mdb_admin.out create mode 100644 src/test/regress/sql/mdb_admin.sql diff --git a/contrib/pax_storage/src/test/regress/expected/create_function_3.out b/contrib/pax_storage/src/test/regress/expected/create_function_3.out index 8380df1591f..7842a3c1c82 100644 --- a/contrib/pax_storage/src/test/regress/expected/create_function_3.out +++ b/contrib/pax_storage/src/test/regress/expected/create_function_3.out @@ -166,10 +166,10 @@ SET SESSION AUTHORIZATION regress_unpriv_user; SET search_path TO temp_func_test, public; ALTER FUNCTION functest_E_1(int) NOT LEAKPROOF; ALTER FUNCTION functest_E_2(int) LEAKPROOF; -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function CREATE FUNCTION functest_E_3(int) RETURNS bool LANGUAGE 'sql' LEAKPROOF AS 'SELECT $1 < 200'; -- fail -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function RESET SESSION AUTHORIZATION; -- -- CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT diff --git a/contrib/pax_storage/src/test/regress/expected/create_function_3_optimizer.out b/contrib/pax_storage/src/test/regress/expected/create_function_3_optimizer.out index 3ae669d518a..3256709e1aa 100644 --- a/contrib/pax_storage/src/test/regress/expected/create_function_3_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/create_function_3_optimizer.out @@ -166,10 +166,10 @@ SET SESSION AUTHORIZATION regress_unpriv_user; SET search_path TO temp_func_test, public; ALTER FUNCTION functest_E_1(int) NOT LEAKPROOF; ALTER FUNCTION functest_E_2(int) LEAKPROOF; -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function CREATE FUNCTION functest_E_3(int) RETURNS bool LANGUAGE 'sql' LEAKPROOF AS 'SELECT $1 < 200'; -- fail -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function RESET SESSION AUTHORIZATION; -- -- CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index f367b00a675..be09847022b 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -2971,7 +2971,6 @@ LookupExplicitNamespace(const char *nspname, bool missing_ok) { Oid namespaceId; AclResult aclresult; - /* check for pg_temp alias */ if (strcmp(nspname, "pg_temp") == 0) { @@ -2989,7 +2988,24 @@ LookupExplicitNamespace(const char *nspname, bool missing_ok) if (missing_ok && !OidIsValid(namespaceId)) return InvalidOid; - aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(), ACL_USAGE); + HeapTuple tuple; + Oid ownerId; + + tuple = SearchSysCache1(NAMESPACEOID, ObjectIdGetDatum(namespaceId)); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("schema with OID %u does not exist", namespaceId))); + + ownerId = ((Form_pg_namespace) GETSTRUCT(tuple))->nspowner; + + ReleaseSysCache(tuple); + + if (!mdb_admin_allow_bypass_owner_checks(GetUserId(), ownerId)) { + aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(), ACL_USAGE); + } else { + aclresult = ACLCHECK_OK; + } if (aclresult != ACLCHECK_OK) aclcheck_error(aclresult, OBJECT_SCHEMA, nspname); diff --git a/src/backend/commands/alter.c b/src/backend/commands/alter.c index f5dfd6ff126..6f370a2c9aa 100644 --- a/src/backend/commands/alter.c +++ b/src/backend/commands/alter.c @@ -1085,7 +1085,8 @@ AlterObjectOwner_internal(Relation rel, Oid objectId, Oid new_ownerId) if (!superuser()) { /* must be owner */ - if (!has_privs_of_role(GetUserId(), old_ownerId)) + if (!has_privs_of_role(GetUserId(), old_ownerId) + && !mdb_admin_allow_bypass_owner_checks(GetUserId(), old_ownerId)) { char *objname; char namebuf[NAMEDATALEN]; @@ -1105,14 +1106,13 @@ AlterObjectOwner_internal(Relation rel, Oid objectId, Oid new_ownerId) aclcheck_error(ACLCHECK_NOT_OWNER, get_object_type(classId, objectId), objname); } - /* Must be able to become new owner */ - check_is_member_of_role(GetUserId(), new_ownerId); + + check_mdb_admin_is_member_of_role(GetUserId(), new_ownerId); /* New owner must have CREATE privilege on namespace */ if (OidIsValid(namespaceId)) { AclResult aclresult; - aclresult = pg_namespace_aclcheck(namespaceId, new_ownerId, ACL_CREATE); if (aclresult != ACLCHECK_OK) diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c index b99b2419fcc..8a570fa6965 100644 --- a/src/backend/commands/functioncmds.c +++ b/src/backend/commands/functioncmds.c @@ -1525,9 +1525,13 @@ CreateFunction(ParseState *pstate, CreateFunctionStmt *stmt) * by security barrier views or row-level security policies. */ if (isLeakProof && !superuser()) - ereport(ERROR, - (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("only superuser can define a leakproof function"))); + { + Oid role = get_role_oid("mdb_admin", true); + if (!is_member_of_role(GetUserId(), role)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("only superuser or mdb_admin can define a leakproof function"))); + } if (transformDefElem) { @@ -1852,9 +1856,13 @@ AlterFunction(ParseState *pstate, AlterFunctionStmt *stmt) { procForm->proleakproof = intVal(leakproof_item->arg); if (procForm->proleakproof && !superuser()) - ereport(ERROR, - (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("only superuser can define a leakproof function"))); + { + Oid role = get_role_oid("mdb_admin", true); + if (!is_member_of_role(GetUserId(), role)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("only superuser or mdb_admin can define a leakproof function"))); + } } if (cost_item) { diff --git a/src/backend/commands/schemacmds.c b/src/backend/commands/schemacmds.c index 96757eaa814..03f96bb6499 100644 --- a/src/backend/commands/schemacmds.c +++ b/src/backend/commands/schemacmds.c @@ -598,12 +598,12 @@ AlterSchemaOwner_internal(HeapTuple tup, Relation rel, Oid newOwnerId) AclResult aclresult; /* Otherwise, must be owner of the existing object */ - if (!pg_namespace_ownercheck(nspForm->oid, GetUserId())) + if (!mdb_admin_allow_bypass_owner_checks(GetUserId(), nspForm->nspowner) + && !pg_namespace_ownercheck(nspForm->oid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SCHEMA, NameStr(nspForm->nspname)); - /* Must be able to become new owner */ - check_is_member_of_role(GetUserId(), newOwnerId); + check_mdb_admin_is_member_of_role(GetUserId(), newOwnerId); /* * must have create-schema rights @@ -614,8 +614,13 @@ AlterSchemaOwner_internal(HeapTuple tup, Relation rel, Oid newOwnerId) * schemas. Because superusers will always have this right, we need * no special case for them. */ - aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), + if (mdb_admin_allow_bypass_owner_checks(GetUserId(), nspForm->nspowner)) { + aclresult = ACLCHECK_OK; + } else { + aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), ACL_CREATE); + } + if (aclresult != ACLCHECK_OK) aclcheck_error(aclresult, OBJECT_DATABASE, get_database_name(MyDatabaseId)); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 42e00efe81d..07f00a212b0 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -15704,13 +15704,14 @@ ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lock AclResult aclresult; /* Otherwise, must be owner of the existing object */ - if (!pg_class_ownercheck(relationOid, GetUserId())) + if (!mdb_admin_allow_bypass_owner_checks(GetUserId(), tuple_class->relowner) + && !pg_class_ownercheck(relationOid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relationOid)), RelationGetRelationName(target_rel)); - /* Must be able to become new owner */ - check_is_member_of_role(GetUserId(), newOwnerId); + check_mdb_admin_is_member_of_role(GetUserId(), newOwnerId); + /* New owner must have CREATE privilege on namespace */ aclresult = pg_namespace_aclcheck(namespaceOid, newOwnerId, ACL_CREATE); @@ -20791,7 +20792,7 @@ RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid, Form_pg_class classform; AclResult aclresult; char relkind; - + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); if (!HeapTupleIsValid(tuple)) return; /* concurrently dropped */ @@ -20799,7 +20800,8 @@ RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid, relkind = classform->relkind; /* Must own relation. */ - if (!pg_class_ownercheck(relid, GetUserId())) + if (!mdb_admin_allow_bypass_owner_checks(GetUserId(), classform->relowner) + && !pg_class_ownercheck(relid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relid)), rv->relname); /* No system table modifications unless explicitly allowed. */ diff --git a/src/backend/storage/ipc/signalfuncs.c b/src/backend/storage/ipc/signalfuncs.c index 0d5ccaa201d..753b94752d3 100644 --- a/src/backend/storage/ipc/signalfuncs.c +++ b/src/backend/storage/ipc/signalfuncs.c @@ -52,6 +52,7 @@ static int pg_signal_backend(int pid, int sig, char *msg) { PGPROC *proc = BackendPidGetProc(pid); + LocalPgBackendStatus *local_beentry; /* * BackendPidGetProc returns NULL if the pid isn't valid; but by the time @@ -72,9 +73,34 @@ pg_signal_backend(int pid, int sig, char *msg) return SIGNAL_BACKEND_ERROR; } + local_beentry = pgstat_fetch_stat_local_beentry_by_pid(pid); + /* Only allow superusers to signal superuser-owned backends. */ if (superuser_arg(proc->roleId) && !superuser()) - return SIGNAL_BACKEND_NOSUPERUSER; + { + Oid role; + char * appname; + + if (local_beentry == NULL) { + return SIGNAL_BACKEND_NOSUPERUSER; + } + + role = get_role_oid("mdb_admin", true /*if nodoby created mdb_admin role in this database*/); + appname = local_beentry->backendStatus.st_appname; + + // only allow mdb_admin to kill su queries + if (!is_member_of_role(GetUserId(), role)) { + return SIGNAL_BACKEND_NOSUPERUSER; + } + + if (local_beentry->backendStatus.st_backendType == B_AUTOVAC_WORKER) { + // ok + } else if (appname != NULL && strcmp(appname, "MDB") == 0) { + // ok + } else { + return SIGNAL_BACKEND_NOSUPERUSER; + } + } /* Users can signal backends they have role membership in. */ if (!has_privs_of_role(GetUserId(), proc->roleId) && diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c index 9a0918bceff..217483c1c61 100644 --- a/src/backend/utils/activity/backend_status.c +++ b/src/backend/utils/activity/backend_status.c @@ -1102,6 +1102,22 @@ pgstat_fetch_stat_local_beentry(int beid) return &localBackendStatusTable[beid - 1]; } +/* -- mdb admin patch -- */ +LocalPgBackendStatus * +pgstat_fetch_stat_local_beentry_by_pid(int pid) +{ + pgstat_read_current_status(); + + for (int i = 1; i <= localNumBackends; ++i) { + if (localBackendStatusTable[i - 1].backendStatus.st_procpid == pid) { + return &localBackendStatusTable[i - 1]; + } + } + + return NULL; +} + +/* -- mdb admin patch end -- */ /* ---------- * pgstat_fetch_stat_numbackends() - diff --git a/src/backend/utils/adt/acl.c b/src/backend/utils/adt/acl.c index 714a536e93d..fc566a575f4 100644 --- a/src/backend/utils/adt/acl.c +++ b/src/backend/utils/adt/acl.c @@ -5012,6 +5012,60 @@ has_privs_of_role(Oid member, Oid role) } +// -- non-upstream patch begin +/* + * Is userId allowed to bypass ownership check + * and tranfer onwership to ownerId role? + */ +bool +mdb_admin_allow_bypass_owner_checks(Oid userId, Oid ownerId) +{ + Oid mdb_admin_roleoid; + /* + * Never allow nobody to grant objects to + * superusers. + * This can result in various CVE. + * For paranoic reasons, check this even before + * membership of mdb_admin role. + */ + if (superuser_arg(ownerId)) { + return false; + } + + mdb_admin_roleoid = get_role_oid("mdb_admin", true /* superuser suggested to be mdb_admin*/); + /* Is userId actually member of mdb admin? */ + if (!is_member_of_role(userId, mdb_admin_roleoid)) { + /* if no, disallow. */ + return false; + } + + /* + * Now, we need to check if ownerId + * is some dangerous role to trasfer membership to. + * + * For now, we check that ownerId does not have + * priviledge to execute server program or/and + * read/write server files. + */ + + if (has_privs_of_role(ownerId, ROLE_PG_READ_SERVER_FILES)) { + return false; + } + + if (has_privs_of_role(ownerId, ROLE_PG_WRITE_SERVER_FILES)) { + return false; + } + + if (has_privs_of_role(ownerId, ROLE_PG_EXECUTE_SERVER_PROGRAM)) { + return false; + } + + /* All checks passed, hope will not be hacked here (again) */ + return true; +} + +// -- non-upstream patch end + /* * Is member a member of role (directly or indirectly)? * @@ -5051,6 +5105,64 @@ check_is_member_of_role(Oid member, Oid role) GetUserNameFromId(role, false)))); } +// -- mdb admin patch +/* + * check_mdb_admin_is_member_of_role + * is_member_of_role with a standard permission-violation error if not in usual case + * Is case `member` in mdb_admin we check that role is neither of superuser, pg_read/write + * server files nor pg_execute_server_program + */ +void +check_mdb_admin_is_member_of_role(Oid member, Oid role) +{ + Oid mdb_admin_roleoid; + /* fast path - if we are superuser, its ok */ + if (superuser_arg(member)) { + return; + } + + mdb_admin_roleoid = get_role_oid("mdb_admin", true /* superuser suggested to be mdb_admin*/); + /* Is userId actually member of mdb admin? */ + if (is_member_of_role(member, mdb_admin_roleoid)) { + /* role is mdb admin */ + if (superuser_arg(role)) { + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("cannot transfer ownership to superuser \"%s\"", + GetUserNameFromId(role, false)))); + } + + if (has_privs_of_role(role, ROLE_PG_READ_SERVER_FILES)) { + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("cannot transfer ownership to pg_read_server_files role in Cloud"))); + } + + if (has_privs_of_role(role, ROLE_PG_WRITE_SERVER_FILES)) { + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("cannot transfer ownership to pg_write_server_files role in Cloud"))); + } + + if (has_privs_of_role(role, ROLE_PG_EXECUTE_SERVER_PROGRAM)) { + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("cannot transfer ownership to pg_execute_server_program role in Cloud"))); + } + } else { + /* if no, check membership transfer in usual way. */ + + if (!is_member_of_role(member, role)) { + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be member of role \"%s\"", + GetUserNameFromId(role, false)))); + } + } +} + +// -- mdb admin patch + /* * Is member a member of role, not considering superuserness? * diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index fb09180ebe9..30e536b84d4 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -4928,7 +4928,7 @@ static struct config_enum ConfigureNamesEnum[] = { {"session_replication_role", PGC_SUSET, CLIENT_CONN_STATEMENT, gettext_noop("Sets the session's behavior for triggers and rewrite rules."), - NULL + NULL, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, 0, true, }, &SessionReplicationRole, SESSION_REPLICATION_ROLE_ORIGIN, session_replication_role_options, diff --git a/src/include/utils/acl.h b/src/include/utils/acl.h index 223175099bd..271ac942f6f 100644 --- a/src/include/utils/acl.h +++ b/src/include/utils/acl.h @@ -210,6 +210,13 @@ extern bool has_privs_of_role(Oid member, Oid role); extern bool is_member_of_role(Oid member, Oid role); extern bool is_member_of_role_nosuper(Oid member, Oid role); extern bool is_admin_of_role(Oid member, Oid role); + +// -- non-upstream patch begin +extern bool mdb_admin_allow_bypass_owner_checks(Oid userId, Oid ownerId); + +extern void check_mdb_admin_is_member_of_role(Oid member, Oid role); +// -- non-upstream patch end + extern void check_is_member_of_role(Oid member, Oid role); extern Oid get_role_oid(const char *rolename, bool missing_ok); extern Oid get_role_oid_or_public(const char *rolename); diff --git a/src/include/utils/backend_status.h b/src/include/utils/backend_status.h index 139b7355d13..139646d4a40 100644 --- a/src/include/utils/backend_status.h +++ b/src/include/utils/backend_status.h @@ -319,6 +319,9 @@ extern uint64 pgstat_get_my_query_id(void); extern int pgstat_fetch_stat_numbackends(void); extern PgBackendStatus *pgstat_fetch_stat_beentry(int beid); extern LocalPgBackendStatus *pgstat_fetch_stat_local_beentry(int beid); +/* -- mdb admin patch -- */ +extern LocalPgBackendStatus *pgstat_fetch_stat_local_beentry_by_pid(int pid); +/* -- mdb admin patch end -- */ extern char *pgstat_clip_activity(const char *raw_activity); diff --git a/src/include/utils/guc_tables.h b/src/include/utils/guc_tables.h index 17d2a166b09..08584e4db54 100644 --- a/src/include/utils/guc_tables.h +++ b/src/include/utils/guc_tables.h @@ -204,6 +204,8 @@ struct config_generic char *sourcefile; /* file current setting is from (NULL if not * set in config file) */ int sourceline; /* line in source file */ + + bool mdb_admin_allowed; /* is mdb admin allowed to change this, makes sence only for superuser/not superuser ctx */ }; /* bit values in status field */ diff --git a/src/test/Makefile b/src/test/Makefile index d84edb282df..150c4e97b73 100644 --- a/src/test/Makefile +++ b/src/test/Makefile @@ -18,6 +18,9 @@ SUBDIRS = perl regress isolation modules authentication recovery SUBDIRS += fsync walrep heap_checksum isolation2 fdw singlenode_regress singlenode_isolation2 +# MDB addon +SUBDIRS += mdb_admin + # Test suites that are not safe by default but can be run if selected # by the user via the whitespace-separated list in variable # PG_TEST_EXTRA: diff --git a/src/test/mdb_admin/.gitignore b/src/test/mdb_admin/.gitignore new file mode 100644 index 00000000000..871e943d50e --- /dev/null +++ b/src/test/mdb_admin/.gitignore @@ -0,0 +1,2 @@ +# Generated by test suite +/tmp_check/ diff --git a/src/test/mdb_admin/Makefile b/src/test/mdb_admin/Makefile new file mode 100644 index 00000000000..e4e82367da9 --- /dev/null +++ b/src/test/mdb_admin/Makefile @@ -0,0 +1,23 @@ +#------------------------------------------------------------------------- +# +# Makefile for src/test/mdb_admin +# +# Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group +# Portions Copyright (c) 1994, Regents of the University of California +# +# src/test/mdb_admin/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/test/mdb_admin +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +check: + $(prove_check) + +installcheck: + $(prove_installcheck) + +clean distclean maintainer-clean: + rm -rf tmp_check diff --git a/src/test/mdb_admin/t/signals.pl b/src/test/mdb_admin/t/signals.pl new file mode 100644 index 00000000000..a11db27a527 --- /dev/null +++ b/src/test/mdb_admin/t/signals.pl @@ -0,0 +1,74 @@ + +# Copyright (c) 2024-2024, MDB, Mother Russia + +# Minimal test testing streaming replication +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Initialize primary node +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(); +$node_primary->start; + +# Create some content on primary and check its presence in standby nodes +$node_primary->safe_psql('postgres', + " + CREATE DATABASE regress; + CREATE ROLE mdb_admin; + CREATE ROLE mdb_reg_lh_1; + CREATE ROLE mdb_reg_lh_2; + GRANT pg_signal_backend TO mdb_admin; + GRANT pg_signal_backend TO mdb_reg_lh_1; + GRANT mdb_admin TO mdb_reg_lh_2; +"); + +# Create some content on primary and check its presence in standby nodes +$node_primary->safe_psql('regress', + " + CREATE TABLE tab_int(i int); + INSERT INTO tab_int SELECT * FROm generate_series(1, 1000000); + ALTER SYSTEM SET autovacuum_vacuum_cost_limit TO 1; + ALTER SYSTEM SET autovacuum_vacuum_cost_delay TO 100; + ALTER SYSTEM SET autovacuum_naptime TO 1; +"); + +$node_primary->restart; + +sleep 1; + +my $res_pid = $node_primary->safe_psql('regress', + " + SELECT pid FROM pg_stat_activity WHERE backend_type = 'autovacuum worker' and datname = 'regress';; +"); + + +print "pid is $res_pid\n"; + +ok(1); + + +my ($res_reg_lh_1, $stdout_reg_lh_1, $stderr_reg_lh_1) = $node_primary->psql('regress', + " + SET ROLE mdb_reg_lh_1; + SELECT pg_terminate_backend($res_pid); +"); + +# print ($res_reg_lh_1, $stdout_reg_lh_1, $stderr_reg_lh_1, "\n"); + +ok($res_reg_lh_1 != 0, "should fail for non-mdb_admin"); +like($stderr_reg_lh_1, qr/ERROR: must be a superuser to terminate superuser process/, "matches"); + +my ($res_reg_lh_2, $stdout_reg_lh_2, $stderr_reg_lh_2) = $node_primary->psql('regress', + " + SET ROLE mdb_reg_lh_2; + SELECT pg_terminate_backend($res_pid); +"); + +ok($res_reg_lh_2 == 0, "should success for mdb_admin"); + +# print ($res_reg_lh_2, $stdout_reg_lh_2, $stderr_reg_lh_2, "\n"); + +done_testing(); \ No newline at end of file diff --git a/src/test/regress/expected/create_function_3.out b/src/test/regress/expected/create_function_3.out index 8380df1591f..7842a3c1c82 100644 --- a/src/test/regress/expected/create_function_3.out +++ b/src/test/regress/expected/create_function_3.out @@ -166,10 +166,10 @@ SET SESSION AUTHORIZATION regress_unpriv_user; SET search_path TO temp_func_test, public; ALTER FUNCTION functest_E_1(int) NOT LEAKPROOF; ALTER FUNCTION functest_E_2(int) LEAKPROOF; -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function CREATE FUNCTION functest_E_3(int) RETURNS bool LANGUAGE 'sql' LEAKPROOF AS 'SELECT $1 < 200'; -- fail -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function RESET SESSION AUTHORIZATION; -- -- CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT diff --git a/src/test/regress/expected/create_function_3_optimizer.out b/src/test/regress/expected/create_function_3_optimizer.out index 3ae669d518a..3256709e1aa 100644 --- a/src/test/regress/expected/create_function_3_optimizer.out +++ b/src/test/regress/expected/create_function_3_optimizer.out @@ -166,10 +166,10 @@ SET SESSION AUTHORIZATION regress_unpriv_user; SET search_path TO temp_func_test, public; ALTER FUNCTION functest_E_1(int) NOT LEAKPROOF; ALTER FUNCTION functest_E_2(int) LEAKPROOF; -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function CREATE FUNCTION functest_E_3(int) RETURNS bool LANGUAGE 'sql' LEAKPROOF AS 'SELECT $1 < 200'; -- fail -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function RESET SESSION AUTHORIZATION; -- -- CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT diff --git a/src/test/regress/expected/mdb_admin.out b/src/test/regress/expected/mdb_admin.out new file mode 100644 index 00000000000..5fc2dab10cb --- /dev/null +++ b/src/test/regress/expected/mdb_admin.out @@ -0,0 +1,81 @@ +CREATE ROLE regress_mdb_admin_user1; +CREATE ROLE regress_mdb_admin_user2; +CREATE ROLE regress_mdb_admin_user3; +CREATE ROLE mdb_admin; +CREATE ROLE regress_superuser WITH SUPERUSER; +GRANT mdb_admin TO regress_mdb_admin_user1; +GRANT CREATE ON DATABASE regression TO regress_mdb_admin_user2; +GRANT CREATE ON DATABASE regression TO regress_mdb_admin_user3; +-- mdb admin trasfers ownership to another role +SET ROLE regress_mdb_admin_user2; +CREATE FUNCTION regress_mdb_admin_add(integer, integer) RETURNS integer + AS 'SELECT $1 + $2;' + LANGUAGE SQL + IMMUTABLE + RETURNS NULL ON NULL INPUT; +CREATE SCHEMA regress_mdb_admin_schema; +GRANT CREATE ON SCHEMA regress_mdb_admin_schema TO regress_mdb_admin_user3; +CREATE TABLE regress_mdb_admin_schema.regress_mdb_admin_table(); +CREATE TABLE regress_mdb_admin_table(); +CREATE VIEW regress_mdb_admin_view as SELECT 1; +SET ROLE regress_mdb_admin_user1; +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO regress_mdb_admin_user3; +ALTER VIEW regress_mdb_admin_view OWNER TO regress_mdb_admin_user3; +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO regress_mdb_admin_user3; +ALTER TABLE regress_mdb_admin_table OWNER TO regress_mdb_admin_user3; +ALTER SCHEMA regress_mdb_admin_schema OWNER TO regress_mdb_admin_user3; +-- mdb admin fails to transfer ownership to superusers and system roles +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO regress_superuser; +ERROR: cannot transfer ownership to superuser "regress_superuser" +ALTER VIEW regress_mdb_admin_view OWNER TO regress_superuser; +ERROR: cannot transfer ownership to superuser "regress_superuser" +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO regress_superuser; +ERROR: cannot transfer ownership to superuser "regress_superuser" +ALTER TABLE regress_mdb_admin_table OWNER TO regress_superuser; +ERROR: cannot transfer ownership to superuser "regress_superuser" +ALTER SCHEMA regress_mdb_admin_schema OWNER TO regress_superuser; +ERROR: cannot transfer ownership to superuser "regress_superuser" +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_execute_server_program; +ERROR: cannot transfer ownership to pg_execute_server_program role in Cloud +ALTER VIEW regress_mdb_admin_view OWNER TO pg_execute_server_program; +ERROR: cannot transfer ownership to pg_execute_server_program role in Cloud +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_execute_server_program; +ERROR: cannot transfer ownership to pg_execute_server_program role in Cloud +ALTER TABLE regress_mdb_admin_table OWNER TO pg_execute_server_program; +ERROR: cannot transfer ownership to pg_execute_server_program role in Cloud +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_execute_server_program; +ERROR: cannot transfer ownership to pg_execute_server_program role in Cloud +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_write_server_files; +ERROR: cannot transfer ownership to pg_write_server_files role in Cloud +ALTER VIEW regress_mdb_admin_view OWNER TO pg_write_server_files; +ERROR: cannot transfer ownership to pg_write_server_files role in Cloud +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_write_server_files; +ERROR: cannot transfer ownership to pg_write_server_files role in Cloud +ALTER TABLE regress_mdb_admin_table OWNER TO pg_write_server_files; +ERROR: cannot transfer ownership to pg_write_server_files role in Cloud +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_write_server_files; +ERROR: cannot transfer ownership to pg_write_server_files role in Cloud +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_read_server_files; +ERROR: cannot transfer ownership to pg_read_server_files role in Cloud +ALTER VIEW regress_mdb_admin_view OWNER TO pg_read_server_files; +ERROR: cannot transfer ownership to pg_read_server_files role in Cloud +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_read_server_files; +ERROR: cannot transfer ownership to pg_read_server_files role in Cloud +ALTER TABLE regress_mdb_admin_table OWNER TO pg_read_server_files; +ERROR: cannot transfer ownership to pg_read_server_files role in Cloud +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_read_server_files; +ERROR: cannot transfer ownership to pg_read_server_files role in Cloud +-- end tests +RESET SESSION AUTHORIZATION; +-- +REVOKE CREATE ON DATABASE regression FROM regress_mdb_admin_user2; +REVOKE CREATE ON DATABASE regression FROM regress_mdb_admin_user3; +DROP VIEW regress_mdb_admin_view; +DROP FUNCTION regress_mdb_admin_add; +DROP TABLE regress_mdb_admin_schema.regress_mdb_admin_table; +DROP TABLE regress_mdb_admin_table; +DROP SCHEMA regress_mdb_admin_schema; +DROP ROLE regress_mdb_admin_user1; +DROP ROLE regress_mdb_admin_user2; +DROP ROLE regress_mdb_admin_user3; +DROP ROLE mdb_admin; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index e2df0208627..6ebdd67731e 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -5,6 +5,10 @@ # this limits the number of connections needed to run the tests. # ---------- +# mdb admin simple checks + +test: mdb_admin + # run tablespace by itself, and first, because it forces a checkpoint; # we'd prefer not to have checkpoints later in the tests because that # interferes with crash-recovery testing. diff --git a/src/test/regress/sql/mdb_admin.sql b/src/test/regress/sql/mdb_admin.sql new file mode 100644 index 00000000000..8552bbdd48a --- /dev/null +++ b/src/test/regress/sql/mdb_admin.sql @@ -0,0 +1,77 @@ +CREATE ROLE regress_mdb_admin_user1; +CREATE ROLE regress_mdb_admin_user2; +CREATE ROLE regress_mdb_admin_user3; +CREATE ROLE mdb_admin; + +CREATE ROLE regress_superuser WITH SUPERUSER; + +GRANT mdb_admin TO regress_mdb_admin_user1; +GRANT CREATE ON DATABASE regression TO regress_mdb_admin_user2; +GRANT CREATE ON DATABASE regression TO regress_mdb_admin_user3; + +-- mdb admin trasfers ownership to another role + +SET ROLE regress_mdb_admin_user2; +CREATE FUNCTION regress_mdb_admin_add(integer, integer) RETURNS integer + AS 'SELECT $1 + $2;' + LANGUAGE SQL + IMMUTABLE + RETURNS NULL ON NULL INPUT; + +CREATE SCHEMA regress_mdb_admin_schema; +GRANT CREATE ON SCHEMA regress_mdb_admin_schema TO regress_mdb_admin_user3; +CREATE TABLE regress_mdb_admin_schema.regress_mdb_admin_table(); +CREATE TABLE regress_mdb_admin_table(); +CREATE VIEW regress_mdb_admin_view as SELECT 1; +SET ROLE regress_mdb_admin_user1; + +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO regress_mdb_admin_user3; +ALTER VIEW regress_mdb_admin_view OWNER TO regress_mdb_admin_user3; +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO regress_mdb_admin_user3; +ALTER TABLE regress_mdb_admin_table OWNER TO regress_mdb_admin_user3; +ALTER SCHEMA regress_mdb_admin_schema OWNER TO regress_mdb_admin_user3; + + +-- mdb admin fails to transfer ownership to superusers and system roles + +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO regress_superuser; +ALTER VIEW regress_mdb_admin_view OWNER TO regress_superuser; +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO regress_superuser; +ALTER TABLE regress_mdb_admin_table OWNER TO regress_superuser; +ALTER SCHEMA regress_mdb_admin_schema OWNER TO regress_superuser; + +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_execute_server_program; +ALTER VIEW regress_mdb_admin_view OWNER TO pg_execute_server_program; +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_execute_server_program; +ALTER TABLE regress_mdb_admin_table OWNER TO pg_execute_server_program; +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_execute_server_program; + +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_write_server_files; +ALTER VIEW regress_mdb_admin_view OWNER TO pg_write_server_files; +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_write_server_files; +ALTER TABLE regress_mdb_admin_table OWNER TO pg_write_server_files; +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_write_server_files; + +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_read_server_files; +ALTER VIEW regress_mdb_admin_view OWNER TO pg_read_server_files; +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_read_server_files; +ALTER TABLE regress_mdb_admin_table OWNER TO pg_read_server_files; +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_read_server_files; + + +-- end tests + +RESET SESSION AUTHORIZATION; +-- +REVOKE CREATE ON DATABASE regression FROM regress_mdb_admin_user2; +REVOKE CREATE ON DATABASE regression FROM regress_mdb_admin_user3; + +DROP VIEW regress_mdb_admin_view; +DROP FUNCTION regress_mdb_admin_add; +DROP TABLE regress_mdb_admin_schema.regress_mdb_admin_table; +DROP TABLE regress_mdb_admin_table; +DROP SCHEMA regress_mdb_admin_schema; +DROP ROLE regress_mdb_admin_user1; +DROP ROLE regress_mdb_admin_user2; +DROP ROLE regress_mdb_admin_user3; +DROP ROLE mdb_admin; diff --git a/src/test/singlenode_regress/expected/create_function_3.out b/src/test/singlenode_regress/expected/create_function_3.out index 3a4fd451471..6423fdb7965 100644 --- a/src/test/singlenode_regress/expected/create_function_3.out +++ b/src/test/singlenode_regress/expected/create_function_3.out @@ -166,10 +166,10 @@ SET SESSION AUTHORIZATION regress_unpriv_user; SET search_path TO temp_func_test, public; ALTER FUNCTION functest_E_1(int) NOT LEAKPROOF; ALTER FUNCTION functest_E_2(int) LEAKPROOF; -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function CREATE FUNCTION functest_E_3(int) RETURNS bool LANGUAGE 'sql' LEAKPROOF AS 'SELECT $1 < 200'; -- fail -ERROR: only superuser can define a leakproof function +ERROR: only superuser or mdb_admin can define a leakproof function RESET SESSION AUTHORIZATION; -- -- CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT From 5d67501449fd3c7433712c578e8f813928a742f0 Mon Sep 17 00:00:00 2001 From: reshke Date: Tue, 30 Sep 2025 14:43:07 +0500 Subject: [PATCH 114/133] Role mdb_superuser: feature and regress testsing (#5) This commit introduces new mdb internal role mdb_superuser. Role is capaple of: GRANT/REVOKE any set of priviledges to/from any object in database. Has power of pg_database_owner in any database, including: DROP any object in database (except system catalog and stuff) Role is NOT capaple of: Create database, role, extension or alter other roles with such priviledges. Transfer ownership to /pass has_priv of roles: PG_READ_ALL_DATA PG_WRITE_ALL_DATA PG_EXECUTE_SERVER_PROGRAM PG_READ_SERVER_FILES PG_WRITE_SERVER_FILES PG_DATABASE_OWNER Fix configure.ac USE_MDBLOCALES option handling Apply autoreconf stuff Set missing ok parameter ito true while acquiring mdb_superuser oid In regress tests, nobody creates mdb_superuser role, so missing ok is fine Allow mdb_superuser to have power of pg_database_owner Allow mdb_superuser to alter objects and grant ACl to objects, owner by pg_database_owner. Also, when acl check, allow mdb_supersuer use pg_database_owner role power to pass check --- src/backend/commands/functioncmds.c | 4 +- src/backend/utils/adt/acl.c | 126 ++++++++++++----- src/backend/utils/misc/guc.c | 12 +- src/include/utils/acl.h | 1 + src/test/regress/expected/mdb_admin.out | 55 +++++--- src/test/regress/expected/mdb_superuser.out | 115 ++++++++++++++++ src/test/regress/expected/test_setup.out | 5 + src/test/regress/parallel_schedule | 8 +- src/test/regress/sql/mdb_admin.sql | 16 ++- src/test/regress/sql/mdb_superuser.sql | 144 ++++++++++++++++++++ src/test/regress/sql/test_setup.sql | 6 + 11 files changed, 431 insertions(+), 61 deletions(-) create mode 100644 src/test/regress/expected/mdb_superuser.out create mode 100644 src/test/regress/expected/test_setup.out create mode 100644 src/test/regress/sql/mdb_superuser.sql create mode 100644 src/test/regress/sql/test_setup.sql diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c index 8a570fa6965..1ab3b36dd59 100644 --- a/src/backend/commands/functioncmds.c +++ b/src/backend/commands/functioncmds.c @@ -1526,7 +1526,7 @@ CreateFunction(ParseState *pstate, CreateFunctionStmt *stmt) */ if (isLeakProof && !superuser()) { - Oid role = get_role_oid("mdb_admin", true); + Oid role = get_role_oid("mdb_admin", true /*if nodoby created mdb_admin role in this database*/); if (!is_member_of_role(GetUserId(), role)) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), @@ -1857,7 +1857,7 @@ AlterFunction(ParseState *pstate, AlterFunctionStmt *stmt) procForm->proleakproof = intVal(leakproof_item->arg); if (procForm->proleakproof && !superuser()) { - Oid role = get_role_oid("mdb_admin", true); + Oid role = get_role_oid("mdb_admin", true /*if nodoby created mdb_admin role in this database*/); if (!is_member_of_role(GetUserId(), role)) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), diff --git a/src/backend/utils/adt/acl.c b/src/backend/utils/adt/acl.c index fc566a575f4..e3463f636ae 100644 --- a/src/backend/utils/adt/acl.c +++ b/src/backend/utils/adt/acl.c @@ -116,6 +116,7 @@ static AclResult pg_role_aclcheck(Oid role_oid, Oid roleid, AclMode mode); static void RoleMembershipCacheCallback(Datum arg, int cacheid, uint32 hashvalue); +static bool has_privs_of_unwanted_system_role(Oid role); /* * getid @@ -4991,9 +4992,65 @@ roles_is_member_of(Oid roleid, enum RoleRecurseType type, * set; for such roles, membership implies the ability to do SET ROLE, but * the privileges are not available until you've done so. */ + +/* +* This is basically original postgresql privs-check function +*/ + +// -- mdb_superuser patch + +bool +has_privs_of_role_strict(Oid member, Oid role) +{ + /* Fast path for simple case */ + if (member == role) + return true; + + /* Superusers have every privilege, so are part of every role */ + if (superuser_arg(member)) + return true; + + /* + * Find all the roles that member has the privileges of, including + * multi-level recursion, then see if target role is any one of them. + */ + return list_member_oid(roles_is_member_of(member, ROLERECURSE_PRIVS, + InvalidOid, NULL), + role); +} + +/* +* Check that role is either one of "dangerous" system role +* or has "strict" (not through mdb_admin or mdb_superuser) +* privs of this role +*/ + +static bool +has_privs_of_unwanted_system_role(Oid role) { + if (has_privs_of_role_strict(role, ROLE_PG_READ_SERVER_FILES)) { + return true; + } + if (has_privs_of_role_strict(role, ROLE_PG_WRITE_SERVER_FILES)) { + return true; + } + if (has_privs_of_role_strict(role, ROLE_PG_EXECUTE_SERVER_PROGRAM)) { + return true; + } + if (has_privs_of_role_strict(role, ROLE_PG_READ_ALL_DATA)) { + return true; + } + if (has_privs_of_role_strict(role, ROLE_PG_WRITE_ALL_DATA)) { + return true; + } + + return false; +} + bool has_privs_of_role(Oid member, Oid role) { + Oid mdb_superuser_roleoid; + /* Fast path for simple case */ if (member == role) return true; @@ -5002,6 +5059,23 @@ has_privs_of_role(Oid member, Oid role) if (superuser_arg(member)) return true; + mdb_superuser_roleoid = get_role_oid("mdb_superuser", true /*if nodoby created mdb_superuser role in this database*/); + + if (is_member_of_role(member, mdb_superuser_roleoid)) { + /* if target role is superuser, disallow */ + if (!superuser_arg(role)) { + /* we want mdb_roles_admin to bypass + * has_priv_of_roles test + * if target role is neither superuser nor + * some dangerous system role + */ + if (!has_privs_of_unwanted_system_role(role)) { + return true; + } + } + } + + /* * Find all the roles that member has the privileges of, including * multi-level recursion, then see if target role is any one of them. @@ -5011,6 +5085,7 @@ has_privs_of_role(Oid member, Oid role) role); } +// -- mdb_superuser patch // -- non-upstream patch begin /* @@ -5032,7 +5107,7 @@ mdb_admin_allow_bypass_owner_checks(Oid userId, Oid ownerId) return false; } - mdb_admin_roleoid = get_role_oid("mdb_admin", true /* superuser suggested to be mdb_admin*/); + mdb_admin_roleoid = get_role_oid("mdb_admin", true /*if nodoby created mdb_admin role in this database*/); /* Is userId actually member of mdb admin? */ if (!is_member_of_role(userId, mdb_admin_roleoid)) { /* if no, disallow. */ @@ -5045,23 +5120,11 @@ mdb_admin_allow_bypass_owner_checks(Oid userId, Oid ownerId) * * For now, we check that ownerId does not have * priviledge to execute server program or/and - * read/write server files. + * read/write server files, or/and pg read/write all data */ - if (has_privs_of_role(ownerId, ROLE_PG_READ_SERVER_FILES)) { - return false; - } - - if (has_privs_of_role(ownerId, ROLE_PG_WRITE_SERVER_FILES)) { - return false; - } - - if (has_privs_of_role(ownerId, ROLE_PG_EXECUTE_SERVER_PROGRAM)) { - return false; - } - /* All checks passed, hope will not be hacked here (again) */ - return true; + return !has_privs_of_unwanted_system_role(ownerId); } // -- non-upstream patch end @@ -5110,7 +5173,7 @@ check_is_member_of_role(Oid member, Oid role) * check_mdb_admin_is_member_of_role * is_member_of_role with a standard permission-violation error if not in usual case * Is case `member` in mdb_admin we check that role is neither of superuser, pg_read/write - * server files nor pg_execute_server_program + * server files nor pg_execute_server_program or pg_read/write all data */ void check_mdb_admin_is_member_of_role(Oid member, Oid role) @@ -5121,9 +5184,10 @@ check_mdb_admin_is_member_of_role(Oid member, Oid role) return; } - mdb_admin_roleoid = get_role_oid("mdb_admin", true /* superuser suggested to be mdb_admin*/); + mdb_admin_roleoid = get_role_oid("mdb_admin", true /*if nodoby created mdb_admin role in this database*/); /* Is userId actually member of mdb admin? */ if (is_member_of_role(member, mdb_admin_roleoid)) { + /* role is mdb admin */ if (superuser_arg(role)) { ereport(ERROR, @@ -5132,22 +5196,10 @@ check_mdb_admin_is_member_of_role(Oid member, Oid role) GetUserNameFromId(role, false)))); } - if (has_privs_of_role(role, ROLE_PG_READ_SERVER_FILES)) { + if (has_privs_of_unwanted_system_role(role)) { ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("cannot transfer ownership to pg_read_server_files role in Cloud"))); - } - - if (has_privs_of_role(role, ROLE_PG_WRITE_SERVER_FILES)) { - ereport(ERROR, - (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("cannot transfer ownership to pg_write_server_files role in Cloud"))); - } - - if (has_privs_of_role(role, ROLE_PG_EXECUTE_SERVER_PROGRAM)) { - ereport(ERROR, - (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("cannot transfer ownership to pg_execute_server_program role in Cloud"))); + errmsg("forbidden to transfer ownership to this system role in Cloud"))); } } else { /* if no, check membership transfer in usual way. */ @@ -5287,6 +5339,7 @@ select_best_grantor(Oid roleId, AclMode privileges, List *roles_list; int nrights; ListCell *l; + Oid mdb_superuser_roleoid; /* * The object owner is always treated as having all grant options, so if @@ -5301,6 +5354,16 @@ select_best_grantor(Oid roleId, AclMode privileges, return; } + mdb_superuser_roleoid = get_role_oid("mdb_superuser", true /*if nodoby created mdb_superuser role in this database*/); + + if (is_member_of_role(GetUserId(), mdb_superuser_roleoid) + && has_privs_of_role(GetUserId(), ownerId)) { + *grantorId = mdb_superuser_roleoid; + AclMode mdb_superuser_allowed_privs = needed_goptions; + *grantOptions = mdb_superuser_allowed_privs; + return; + } + /* * Otherwise we have to do a careful search to see if roleId has the * privileges of any suitable role. Note: we can hang onto the result of @@ -5309,7 +5372,6 @@ select_best_grantor(Oid roleId, AclMode privileges, */ roles_list = roles_is_member_of(roleId, ROLERECURSE_PRIVS, InvalidOid, NULL); - /* initialize candidate result as default */ *grantorId = roleId; *grantOptions = ACL_NO_RIGHTS; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 30e536b84d4..3b9d6da07fb 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -7625,6 +7625,7 @@ set_config_option(const char *name, const char *value, void *newextra = NULL; bool prohibitValueChange = false; bool makeDefault; + Oid role; if (elevel == 0) { @@ -7782,10 +7783,13 @@ set_config_option(const char *name, const char *value, case PGC_SUSET: if (context == PGC_USERSET || context == PGC_BACKEND) { - ereport(elevel, - (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("permission denied to set parameter \"%s\"", - name))); + role = get_role_oid("mdb_admin", true /*if nodoby created mdb_admin role in this database*/); + if (!(record->mdb_admin_allowed && is_member_of_role(GetUserId(), role))) { + ereport(elevel, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to set parameter \"%s\"", + name))); + } return 0; } break; diff --git a/src/include/utils/acl.h b/src/include/utils/acl.h index 271ac942f6f..49068f04b2f 100644 --- a/src/include/utils/acl.h +++ b/src/include/utils/acl.h @@ -207,6 +207,7 @@ extern AclMode aclmask(const Acl *acl, Oid roleid, Oid ownerId, extern int aclmembers(const Acl *acl, Oid **roleids); extern bool has_privs_of_role(Oid member, Oid role); +extern bool has_privs_of_role_strict(Oid member, Oid role); extern bool is_member_of_role(Oid member, Oid role); extern bool is_member_of_role_nosuper(Oid member, Oid role); extern bool is_admin_of_role(Oid member, Oid role); diff --git a/src/test/regress/expected/mdb_admin.out b/src/test/regress/expected/mdb_admin.out index 5fc2dab10cb..e4dfc436802 100644 --- a/src/test/regress/expected/mdb_admin.out +++ b/src/test/regress/expected/mdb_admin.out @@ -1,7 +1,6 @@ CREATE ROLE regress_mdb_admin_user1; CREATE ROLE regress_mdb_admin_user2; CREATE ROLE regress_mdb_admin_user3; -CREATE ROLE mdb_admin; CREATE ROLE regress_superuser WITH SUPERUSER; GRANT mdb_admin TO regress_mdb_admin_user1; GRANT CREATE ON DATABASE regression TO regress_mdb_admin_user2; @@ -24,7 +23,7 @@ ALTER VIEW regress_mdb_admin_view OWNER TO regress_mdb_admin_user3; ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO regress_mdb_admin_user3; ALTER TABLE regress_mdb_admin_table OWNER TO regress_mdb_admin_user3; ALTER SCHEMA regress_mdb_admin_schema OWNER TO regress_mdb_admin_user3; --- mdb admin fails to transfer ownership to superusers and system roles +-- mdb admin fails to transfer ownership to superusers and particular system roles ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO regress_superuser; ERROR: cannot transfer ownership to superuser "regress_superuser" ALTER VIEW regress_mdb_admin_view OWNER TO regress_superuser; @@ -36,35 +35,55 @@ ERROR: cannot transfer ownership to superuser "regress_superuser" ALTER SCHEMA regress_mdb_admin_schema OWNER TO regress_superuser; ERROR: cannot transfer ownership to superuser "regress_superuser" ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_execute_server_program; -ERROR: cannot transfer ownership to pg_execute_server_program role in Cloud +ERROR: forbidden to transfer ownership to this system role in Cloud ALTER VIEW regress_mdb_admin_view OWNER TO pg_execute_server_program; -ERROR: cannot transfer ownership to pg_execute_server_program role in Cloud +ERROR: forbidden to transfer ownership to this system role in Cloud ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_execute_server_program; -ERROR: cannot transfer ownership to pg_execute_server_program role in Cloud +ERROR: forbidden to transfer ownership to this system role in Cloud ALTER TABLE regress_mdb_admin_table OWNER TO pg_execute_server_program; -ERROR: cannot transfer ownership to pg_execute_server_program role in Cloud +ERROR: forbidden to transfer ownership to this system role in Cloud ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_execute_server_program; -ERROR: cannot transfer ownership to pg_execute_server_program role in Cloud +ERROR: forbidden to transfer ownership to this system role in Cloud ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_write_server_files; -ERROR: cannot transfer ownership to pg_write_server_files role in Cloud +ERROR: forbidden to transfer ownership to this system role in Cloud ALTER VIEW regress_mdb_admin_view OWNER TO pg_write_server_files; -ERROR: cannot transfer ownership to pg_write_server_files role in Cloud +ERROR: forbidden to transfer ownership to this system role in Cloud ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_write_server_files; -ERROR: cannot transfer ownership to pg_write_server_files role in Cloud +ERROR: forbidden to transfer ownership to this system role in Cloud ALTER TABLE regress_mdb_admin_table OWNER TO pg_write_server_files; -ERROR: cannot transfer ownership to pg_write_server_files role in Cloud +ERROR: forbidden to transfer ownership to this system role in Cloud ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_write_server_files; -ERROR: cannot transfer ownership to pg_write_server_files role in Cloud +ERROR: forbidden to transfer ownership to this system role in Cloud ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_read_server_files; -ERROR: cannot transfer ownership to pg_read_server_files role in Cloud +ERROR: forbidden to transfer ownership to this system role in Cloud ALTER VIEW regress_mdb_admin_view OWNER TO pg_read_server_files; -ERROR: cannot transfer ownership to pg_read_server_files role in Cloud +ERROR: forbidden to transfer ownership to this system role in Cloud ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_read_server_files; -ERROR: cannot transfer ownership to pg_read_server_files role in Cloud +ERROR: forbidden to transfer ownership to this system role in Cloud ALTER TABLE regress_mdb_admin_table OWNER TO pg_read_server_files; -ERROR: cannot transfer ownership to pg_read_server_files role in Cloud +ERROR: forbidden to transfer ownership to this system role in Cloud ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_read_server_files; -ERROR: cannot transfer ownership to pg_read_server_files role in Cloud +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_write_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER VIEW regress_mdb_admin_view OWNER TO pg_write_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_write_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER TABLE regress_mdb_admin_table OWNER TO pg_write_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_write_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_read_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER VIEW regress_mdb_admin_view OWNER TO pg_read_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_read_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER TABLE regress_mdb_admin_table OWNER TO pg_read_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_read_all_data; +ERROR: forbidden to transfer ownership to this system role in Cloud -- end tests RESET SESSION AUTHORIZATION; -- @@ -78,4 +97,4 @@ DROP SCHEMA regress_mdb_admin_schema; DROP ROLE regress_mdb_admin_user1; DROP ROLE regress_mdb_admin_user2; DROP ROLE regress_mdb_admin_user3; -DROP ROLE mdb_admin; +DROP ROLE regress_superuser; diff --git a/src/test/regress/expected/mdb_superuser.out b/src/test/regress/expected/mdb_superuser.out new file mode 100644 index 00000000000..21bafb1011b --- /dev/null +++ b/src/test/regress/expected/mdb_superuser.out @@ -0,0 +1,115 @@ +CREATE ROLE regress_mdb_superuser_user1; +CREATE ROLE regress_mdb_superuser_user2; +CREATE ROLE regress_mdb_superuser_user3; +GRANT mdb_admin TO mdb_superuser; +CREATE ROLE regress_superuser WITH SUPERUSER; +GRANT mdb_superuser TO regress_mdb_superuser_user1; +GRANT CREATE ON DATABASE regression TO regress_mdb_superuser_user2; +GRANT CREATE ON DATABASE regression TO regress_mdb_superuser_user3; +SET ROLE regress_mdb_superuser_user2; +CREATE FUNCTION regress_mdb_superuser_add(integer, integer) RETURNS integer + AS 'SELECT $1 + $2;' + LANGUAGE SQL + IMMUTABLE + RETURNS NULL ON NULL INPUT; +CREATE SCHEMA regress_mdb_superuser_schema; +CREATE TABLE regress_mdb_superuser_schema.regress_mdb_superuser_table(); +CREATE TABLE regress_mdb_superuser_table(); +CREATE VIEW regress_mdb_superuser_view as SELECT 1; +SET ROLE regress_mdb_superuser_user3; +INSERT INTO regress_mdb_superuser_table SELECT * FROM regress_mdb_superuser_table; +ERROR: permission denied for table regress_mdb_superuser_table +SET ROLE regress_mdb_superuser_user1; +-- mdb_superuser can grant to other role +GRANT USAGE, CREATE ON SCHEMA regress_mdb_superuser_schema TO regress_mdb_superuser_user3; +GRANT ALL PRIVILEGES ON TABLE regress_mdb_superuser_table TO regress_mdb_superuser_user3; +REVOKE ALL PRIVILEGES ON TABLE regress_mdb_superuser_table FROM regress_mdb_superuser_user3; +GRANT INSERT, SELECT ON TABLE regress_mdb_superuser_table TO regress_mdb_superuser_user3; +-- grant works +SET ROLE regress_mdb_superuser_user3; +INSERT INTO regress_mdb_superuser_table SELECT * FROM regress_mdb_superuser_table; +SET ROLE mdb_superuser; +-- mdb_superuser drop object of other role +DROP TABLE regress_mdb_superuser_table; +-- mdb admin fails to transfer ownership to superusers and system roles +RESET SESSION AUTHORIZATION; +CREATE TABLE regress_superuser_table(); +SET ROLE pg_read_server_files; +CREATE TABLE regress_pgrsf_table(); +SET ROLE pg_write_server_files; +CREATE TABLE regress_pgwsf_table(); +SET ROLE pg_execute_server_program; +CREATE TABLE regress_pgxsp_table(); +SET ROLE pg_read_all_data; +CREATE TABLE regress_pgrad_table(); +SET ROLE pg_write_all_data; +CREATE TABLE regress_pgrwd_table(); +SET ROLE mdb_superuser; +-- cannot read all data (fail) +SELECT * FROM pg_authid; +ERROR: permission denied for table pg_authid +-- can not drop superuser objects, because does not has_privs_of pg_database_owner +DROP TABLE regress_superuser_table; +ERROR: must be owner of table regress_superuser_table +DROP TABLE regress_pgrsf_table; +ERROR: must be owner of table regress_pgrsf_table +DROP TABLE regress_pgwsf_table; +ERROR: must be owner of table regress_pgwsf_table +DROP TABLE regress_pgxsp_table; +ERROR: must be owner of table regress_pgxsp_table +DROP TABLE regress_pgrad_table; +ERROR: must be owner of table regress_pgrad_table +DROP TABLE regress_pgrwd_table; +ERROR: must be owner of table regress_pgrwd_table +-- does allowed to creare database, role or extension +-- or grant such priviledge +CREATE DATABASE regress_db_fail; +ERROR: permission denied to create database +CREATE ROLE regress_role_fail; +ERROR: permission denied to create role +ALTER ROLE mdb_superuser WITH CREATEROLE; +ERROR: permission denied +ALTER ROLE mdb_superuser WITH CREATEDB; +ERROR: permission denied +ALTER ROLE regress_mdb_superuser_user2 WITH CREATEROLE; +ERROR: permission denied +ALTER ROLE regress_mdb_superuser_user2 WITH CREATEDB; +ERROR: permission denied +-- mdb_superuser more powerfull than pg_database_owner +RESET SESSION AUTHORIZATION; +CREATE DATABASE regress_check_owner OWNER regress_mdb_superuser_user2; +\c regress_check_owner; +SET ROLE regress_mdb_superuser_user2; +CREATE SCHEMA regtest; +CREATE TABLE regtest.regtest(); +-- this should fail +SET ROLE regress_mdb_superuser_user3; +GRANT ALL ON TABLE regtest.regtest TO regress_mdb_superuser_user3; +ERROR: permission denied for schema regtest +ALTER TABLE regtest.regtest OWNER TO regress_mdb_superuser_user3; +ERROR: permission denied for schema regtest +SET ROLE regress_mdb_superuser_user1; +GRANT ALL ON TABLE regtest.regtest TO regress_mdb_superuser_user1; +ALTER TABLE regtest.regtest OWNER TO regress_mdb_superuser_user1; +\c regression +DROP DATABASE regress_check_owner; +-- end tests +RESET SESSION AUTHORIZATION; +-- +REVOKE CREATE ON DATABASE regression FROM regress_mdb_superuser_user2; +REVOKE CREATE ON DATABASE regression FROM regress_mdb_superuser_user3; +DROP VIEW regress_mdb_superuser_view; +DROP FUNCTION regress_mdb_superuser_add; +DROP TABLE regress_mdb_superuser_schema.regress_mdb_superuser_table; +DROP TABLE regress_mdb_superuser_table; +ERROR: table "regress_mdb_superuser_table" does not exist +DROP SCHEMA regress_mdb_superuser_schema; +DROP ROLE regress_mdb_superuser_user1; +DROP ROLE regress_mdb_superuser_user2; +DROP ROLE regress_mdb_superuser_user3; +DROP TABLE regress_superuser_table; +DROP TABLE regress_pgrsf_table; +DROP TABLE regress_pgwsf_table; +DROP TABLE regress_pgxsp_table; +DROP TABLE regress_pgrad_table; +DROP TABLE regress_pgrwd_table; diff --git a/src/test/regress/expected/test_setup.out b/src/test/regress/expected/test_setup.out new file mode 100644 index 00000000000..c1cb724ef37 --- /dev/null +++ b/src/test/regress/expected/test_setup.out @@ -0,0 +1,5 @@ +-- +-- TEST_SETUP --- prepare environment expected by regression test scripts +-- +CREATE ROLE mdb_admin; +CREATE ROLE mdb_superuser; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 6ebdd67731e..b2ed818f677 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -6,13 +6,17 @@ # ---------- # mdb admin simple checks - -test: mdb_admin +test: test_setup # run tablespace by itself, and first, because it forces a checkpoint; # we'd prefer not to have checkpoints later in the tests because that # interferes with crash-recovery testing. test: tablespace + +test: mdb_admin + +test: mdb_superuser + # ---------- # The first group of parallel tests # ---------- diff --git a/src/test/regress/sql/mdb_admin.sql b/src/test/regress/sql/mdb_admin.sql index 8552bbdd48a..b6b048e5692 100644 --- a/src/test/regress/sql/mdb_admin.sql +++ b/src/test/regress/sql/mdb_admin.sql @@ -1,7 +1,6 @@ CREATE ROLE regress_mdb_admin_user1; CREATE ROLE regress_mdb_admin_user2; CREATE ROLE regress_mdb_admin_user3; -CREATE ROLE mdb_admin; CREATE ROLE regress_superuser WITH SUPERUSER; @@ -32,7 +31,7 @@ ALTER TABLE regress_mdb_admin_table OWNER TO regress_mdb_admin_user3; ALTER SCHEMA regress_mdb_admin_schema OWNER TO regress_mdb_admin_user3; --- mdb admin fails to transfer ownership to superusers and system roles +-- mdb admin fails to transfer ownership to superusers and particular system roles ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO regress_superuser; ALTER VIEW regress_mdb_admin_view OWNER TO regress_superuser; @@ -58,6 +57,17 @@ ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_read_se ALTER TABLE regress_mdb_admin_table OWNER TO pg_read_server_files; ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_read_server_files; +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_write_all_data; +ALTER VIEW regress_mdb_admin_view OWNER TO pg_write_all_data; +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_write_all_data; +ALTER TABLE regress_mdb_admin_table OWNER TO pg_write_all_data; +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_write_all_data; + +ALTER FUNCTION regress_mdb_admin_add (integer, integer) OWNER TO pg_read_all_data; +ALTER VIEW regress_mdb_admin_view OWNER TO pg_read_all_data; +ALTER TABLE regress_mdb_admin_schema.regress_mdb_admin_table OWNER TO pg_read_all_data; +ALTER TABLE regress_mdb_admin_table OWNER TO pg_read_all_data; +ALTER SCHEMA regress_mdb_admin_schema OWNER TO pg_read_all_data; -- end tests @@ -74,4 +84,4 @@ DROP SCHEMA regress_mdb_admin_schema; DROP ROLE regress_mdb_admin_user1; DROP ROLE regress_mdb_admin_user2; DROP ROLE regress_mdb_admin_user3; -DROP ROLE mdb_admin; +DROP ROLE regress_superuser; diff --git a/src/test/regress/sql/mdb_superuser.sql b/src/test/regress/sql/mdb_superuser.sql new file mode 100644 index 00000000000..f96338f3aec --- /dev/null +++ b/src/test/regress/sql/mdb_superuser.sql @@ -0,0 +1,144 @@ +CREATE ROLE regress_mdb_superuser_user1; +CREATE ROLE regress_mdb_superuser_user2; +CREATE ROLE regress_mdb_superuser_user3; + +GRANT mdb_admin TO mdb_superuser; + +CREATE ROLE regress_superuser WITH SUPERUSER; + +GRANT mdb_superuser TO regress_mdb_superuser_user1; + +GRANT CREATE ON DATABASE regression TO regress_mdb_superuser_user2; +GRANT CREATE ON DATABASE regression TO regress_mdb_superuser_user3; + + +SET ROLE regress_mdb_superuser_user2; + +CREATE FUNCTION regress_mdb_superuser_add(integer, integer) RETURNS integer + AS 'SELECT $1 + $2;' + LANGUAGE SQL + IMMUTABLE + RETURNS NULL ON NULL INPUT; + +CREATE SCHEMA regress_mdb_superuser_schema; +CREATE TABLE regress_mdb_superuser_schema.regress_mdb_superuser_table(); +CREATE TABLE regress_mdb_superuser_table(); +CREATE VIEW regress_mdb_superuser_view as SELECT 1; + +SET ROLE regress_mdb_superuser_user3; +INSERT INTO regress_mdb_superuser_table SELECT * FROM regress_mdb_superuser_table; + +SET ROLE regress_mdb_superuser_user1; + +-- mdb_superuser can grant to other role +GRANT USAGE, CREATE ON SCHEMA regress_mdb_superuser_schema TO regress_mdb_superuser_user3; +GRANT ALL PRIVILEGES ON TABLE regress_mdb_superuser_table TO regress_mdb_superuser_user3; +REVOKE ALL PRIVILEGES ON TABLE regress_mdb_superuser_table FROM regress_mdb_superuser_user3; + +GRANT INSERT, SELECT ON TABLE regress_mdb_superuser_table TO regress_mdb_superuser_user3; + +-- grant works +SET ROLE regress_mdb_superuser_user3; +INSERT INTO regress_mdb_superuser_table SELECT * FROM regress_mdb_superuser_table; + +SET ROLE mdb_superuser; + +-- mdb_superuser drop object of other role +DROP TABLE regress_mdb_superuser_table; +-- mdb admin fails to transfer ownership to superusers and system roles + +RESET SESSION AUTHORIZATION; + +CREATE TABLE regress_superuser_table(); + +SET ROLE pg_read_server_files; + +CREATE TABLE regress_pgrsf_table(); + +SET ROLE pg_write_server_files; + +CREATE TABLE regress_pgwsf_table(); + +SET ROLE pg_execute_server_program; + +CREATE TABLE regress_pgxsp_table(); + +SET ROLE pg_read_all_data; + +CREATE TABLE regress_pgrad_table(); + +SET ROLE pg_write_all_data; + +CREATE TABLE regress_pgrwd_table(); + +SET ROLE mdb_superuser; + +-- cannot read all data (fail) +SELECT * FROM pg_authid; + +-- can not drop superuser objects, because does not has_privs_of pg_database_owner +DROP TABLE regress_superuser_table; +DROP TABLE regress_pgrsf_table; +DROP TABLE regress_pgwsf_table; +DROP TABLE regress_pgxsp_table; +DROP TABLE regress_pgrad_table; +DROP TABLE regress_pgrwd_table; + + +-- does allowed to creare database, role or extension +-- or grant such priviledge + +CREATE DATABASE regress_db_fail; +CREATE ROLE regress_role_fail; + +ALTER ROLE mdb_superuser WITH CREATEROLE; +ALTER ROLE mdb_superuser WITH CREATEDB; + +ALTER ROLE regress_mdb_superuser_user2 WITH CREATEROLE; +ALTER ROLE regress_mdb_superuser_user2 WITH CREATEDB; + +-- mdb_superuser more powerfull than pg_database_owner + +RESET SESSION AUTHORIZATION; +CREATE DATABASE regress_check_owner OWNER regress_mdb_superuser_user2; + +\c regress_check_owner; + +SET ROLE regress_mdb_superuser_user2; +CREATE SCHEMA regtest; +CREATE TABLE regtest.regtest(); + +-- this should fail + +SET ROLE regress_mdb_superuser_user3; +GRANT ALL ON TABLE regtest.regtest TO regress_mdb_superuser_user3; +ALTER TABLE regtest.regtest OWNER TO regress_mdb_superuser_user3; + +SET ROLE regress_mdb_superuser_user1; +GRANT ALL ON TABLE regtest.regtest TO regress_mdb_superuser_user1; +ALTER TABLE regtest.regtest OWNER TO regress_mdb_superuser_user1; + +\c regression +DROP DATABASE regress_check_owner; + +-- end tests + +RESET SESSION AUTHORIZATION; +-- +REVOKE CREATE ON DATABASE regression FROM regress_mdb_superuser_user2; +REVOKE CREATE ON DATABASE regression FROM regress_mdb_superuser_user3; + +DROP VIEW regress_mdb_superuser_view; +DROP FUNCTION regress_mdb_superuser_add; +DROP TABLE regress_mdb_superuser_schema.regress_mdb_superuser_table; +DROP TABLE regress_mdb_superuser_table; +DROP SCHEMA regress_mdb_superuser_schema; +DROP ROLE regress_mdb_superuser_user1; +DROP ROLE regress_mdb_superuser_user2; +DROP ROLE regress_mdb_superuser_user3; +DROP TABLE regress_superuser_table; +DROP TABLE regress_pgrsf_table; +DROP TABLE regress_pgwsf_table; +DROP TABLE regress_pgxsp_table; +DROP TABLE regress_pgrad_table; +DROP TABLE regress_pgrwd_table; diff --git a/src/test/regress/sql/test_setup.sql b/src/test/regress/sql/test_setup.sql new file mode 100644 index 00000000000..7ec5ccc7471 --- /dev/null +++ b/src/test/regress/sql/test_setup.sql @@ -0,0 +1,6 @@ +-- +-- TEST_SETUP --- prepare environment expected by regression test scripts +-- + +CREATE ROLE mdb_admin; +CREATE ROLE mdb_superuser; From efee3a6a8f75c2333dd0f158e44b84ebe2aa4b05 Mon Sep 17 00:00:00 2001 From: Leonid Borchuk Date: Tue, 30 Dec 2025 09:15:35 +0000 Subject: [PATCH 115/133] mdb locales disabled by default --- src/test/regress/output/misc.source | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/regress/output/misc.source b/src/test/regress/output/misc.source index a0c63418446..f2f7c0dee32 100644 --- a/src/test/regress/output/misc.source +++ b/src/test/regress/output/misc.source @@ -613,6 +613,6 @@ CONTEXT: SQL function "equipment" during startup SELECT mdb_locale_enabled(); mdb_locale_enabled -------------------- - t + f (1 row) From b3628231864eadd34c40dba12bbb449aa7b8a05d Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 19 Jan 2026 10:17:05 +0300 Subject: [PATCH 116/133] [yagp_hooks_collector] Port workfile stats from gpdb Copy of [1] from gpdb to collect workfile stats in yagp-hooks-collector. [1] https://github.com/open-gpdb/gpdb/commit/8813a55193bbce1d16fa0a851692ee5faea0d358 --- .../utils/workfile_manager/workfile_mgr.c | 24 +++++++++++++++++++ src/include/utils/workfile_mgr.h | 4 ++++ 2 files changed, 28 insertions(+) diff --git a/src/backend/utils/workfile_manager/workfile_mgr.c b/src/backend/utils/workfile_manager/workfile_mgr.c index e5b311cf9ba..21b4463e5f1 100644 --- a/src/backend/utils/workfile_manager/workfile_mgr.c +++ b/src/backend/utils/workfile_manager/workfile_mgr.c @@ -192,6 +192,9 @@ static void unpin_workset(workfile_set *work_set); static bool proc_exit_hook_registered = false; +static uint64 total_bytes_written = 0; +static uint64 total_files_created = 0; + Datum gp_workfile_mgr_cache_entries(PG_FUNCTION_ARGS); Datum gp_workfile_mgr_used_diskspace(PG_FUNCTION_ARGS); @@ -371,6 +374,7 @@ RegisterFileWithSet(File file, workfile_set *work_set) localCtl.entries[file].work_set = work_set; work_set->num_files++; work_set->perquery->num_files++; + total_files_created++; /* Enforce the limit on number of files */ if (gp_workfile_limit_files_per_query > 0 && @@ -447,6 +451,7 @@ UpdateWorkFileSize(File file, uint64 newsize) (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("workfile per segment size limit exceeded"))); } + total_bytes_written += diff; } /* @@ -986,3 +991,22 @@ workfile_is_active(workfile_set *workfile) { return workfile ? workfile->active : false; } + +uint64 +WorkfileTotalBytesWritten(void) +{ + return total_bytes_written; +} + +uint64 +WorkfileTotalFilesCreated(void) +{ + return total_files_created; +} + +void +WorkfileResetBackendStats(void) +{ + total_bytes_written = 0; + total_files_created = 0; +} diff --git a/src/include/utils/workfile_mgr.h b/src/include/utils/workfile_mgr.h index dfbd17bca57..48c83620610 100644 --- a/src/include/utils/workfile_mgr.h +++ b/src/include/utils/workfile_mgr.h @@ -74,4 +74,8 @@ extern workfile_set *workfile_mgr_cache_entries_get_copy(int* num_actives); extern uint64 WorkfileSegspace_GetSize(void); extern bool workfile_is_active(workfile_set *workfile); +extern uint64 WorkfileTotalBytesWritten(void); +extern uint64 WorkfileTotalFilesCreated(void); +extern void WorkfileResetBackendStats(void); + #endif /* __WORKFILE_MGR_H__ */ From af1092e5b764a110888dddbadfe8eedb72509aae Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 19 Jan 2026 10:21:41 +0300 Subject: [PATCH 117/133] [yagp_hooks_collector] Port YagpQueryState from gpdb Copy of [1] from gpdb to create a global QueryState for unique hashing for yagp-hooks-collector. [1] https://github.com/open-gpdb/gpdb/commit/476b54071cc5bf88fd9f396a81688be51688d419 --- src/backend/tcop/pquery.c | 3 +++ src/include/executor/execdesc.h | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index 532690f1d51..7c1dbc480bc 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -127,6 +127,9 @@ CreateQueryDesc(PlannedStmt *plannedstmt, if (Gp_role != GP_ROLE_EXECUTE) increment_command_count(); + /* null this field until set by YAGP Hooks collector */ + qd->yagp_query_key = NULL; + return qd; } diff --git a/src/include/executor/execdesc.h b/src/include/executor/execdesc.h index e3ecf31b664..e469945a4c5 100644 --- a/src/include/executor/execdesc.h +++ b/src/include/executor/execdesc.h @@ -22,6 +22,14 @@ struct CdbExplain_ShowStatCtx; /* private, in "cdb/cdbexplain.c" */ +typedef struct YagpQueryKey +{ + int tmid; /* transaction time */ + int ssid; /* session id */ + int ccnt; /* command count */ + int nesting_level; + uintptr_t query_desc_addr; +} YagpQueryKey; /* * SerializedParams is used to serialize external query parameters @@ -330,6 +338,9 @@ typedef struct QueryDesc /* This is always set NULL by the core system, but plugins can change it */ struct Instrumentation *totaltime; /* total time spent in ExecutorRun */ + + /* YAGP Hooks collector */ + YagpQueryKey *yagp_query_key; } QueryDesc; /* in pquery.c */ From 7c6f24dd51b9657a100f940b195fd130ab999f08 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 19 Jan 2026 10:31:30 +0300 Subject: [PATCH 118/133] [yagp_hooks_collector] Use updated names and func's interfaces Update usage in yagp_hooks_collector of - heap_create_with_catalog() - standard_ExecutorRun() - standard_ProcessUtility() - InstrAlloc() - CreateTemplateTupleDesc() - ExplainInitState() -> NewExplainState() - gpmon_gettmid() -> gp_gettmid() - Gp_session_role -> Gp_role - strerror(errno) -> "%m" - Include utils/varlena.h for SplitIdentifierString() in gpdbwrappers.cpp. --- .../yagp_hooks_collector/src/EventSender.cpp | 2 +- .../yagp_hooks_collector/src/EventSender.h | 4 +- .../yagp_hooks_collector/src/PgUtils.cpp | 2 +- .../yagp_hooks_collector/src/ProtoUtils.cpp | 12 +++-- .../yagp_hooks_collector/src/UDSConnector.cpp | 10 ++-- .../src/hook_wrappers.cpp | 34 +++++++------ .../yagp_hooks_collector/src/log/LogOps.cpp | 12 ++--- .../src/log/LogSchema.cpp | 2 +- .../src/memory/gpdbwrappers.cpp | 48 +++++++++---------- .../src/memory/gpdbwrappers.h | 2 +- 10 files changed, 67 insertions(+), 61 deletions(-) diff --git a/gpcontrib/yagp_hooks_collector/src/EventSender.cpp b/gpcontrib/yagp_hooks_collector/src/EventSender.cpp index fee435a6dcc..d638d275548 100644 --- a/gpcontrib/yagp_hooks_collector/src/EventSender.cpp +++ b/gpcontrib/yagp_hooks_collector/src/EventSender.cpp @@ -169,7 +169,7 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { if (query_desc->totaltime == NULL) { MemoryContext oldcxt = ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); - query_desc->totaltime = ya_gpdb::instr_alloc(1, INSTRUMENT_ALL); + query_desc->totaltime = ya_gpdb::instr_alloc(1, INSTRUMENT_ALL, false); ya_gpdb::mem_ctx_switch_to(oldcxt); } } diff --git a/gpcontrib/yagp_hooks_collector/src/EventSender.h b/gpcontrib/yagp_hooks_collector/src/EventSender.h index 4afdf1e14a4..6e195eeacdf 100644 --- a/gpcontrib/yagp_hooks_collector/src/EventSender.h +++ b/gpcontrib/yagp_hooks_collector/src/EventSender.h @@ -23,6 +23,8 @@ class SetQueryReq; #include +extern void gp_gettmid(int32 *); + struct QueryKey { int tmid; int ssid; @@ -40,7 +42,7 @@ struct QueryKey { query_desc->yagp_query_key = (YagpQueryKey *)ya_gpdb::palloc0(sizeof(YagpQueryKey)); int32 tmid; - gpmon_gettmid(&tmid); + gp_gettmid(&tmid); query_desc->yagp_query_key->tmid = tmid; query_desc->yagp_query_key->ssid = gp_session_id; query_desc->yagp_query_key->ccnt = gp_command_count; diff --git a/gpcontrib/yagp_hooks_collector/src/PgUtils.cpp b/gpcontrib/yagp_hooks_collector/src/PgUtils.cpp index fc58112bfaa..96f46429643 100644 --- a/gpcontrib/yagp_hooks_collector/src/PgUtils.cpp +++ b/gpcontrib/yagp_hooks_collector/src/PgUtils.cpp @@ -72,7 +72,7 @@ bool nesting_is_valid(QueryDesc *query_desc, int nesting_level) { } bool need_report_nested_query() { - return Config::report_nested_queries() && Gp_session_role == GP_ROLE_DISPATCH; + return Config::report_nested_queries() && Gp_role == GP_ROLE_DISPATCH; } bool filter_query(QueryDesc *query_desc) { diff --git a/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp b/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp index f28714da6ec..aa8632477f5 100644 --- a/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp +++ b/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp @@ -24,6 +24,8 @@ extern "C" { #include #include +extern void gp_gettmid(int32 *); + namespace { constexpr uint8_t UTF8_CONTINUATION_BYTE_MASK = (1 << 7) | (1 << 6); constexpr uint8_t UTF8_CONTINUATION_BYTE = (1 << 7); @@ -49,7 +51,7 @@ void set_query_key(yagpcc::QueryKey *key) { key->set_ccnt(gp_command_count); key->set_ssid(gp_session_id); int32 tmid = 0; - gpmon_gettmid(&tmid); + gp_gettmid(&tmid); key->set_tmid(tmid); } @@ -81,7 +83,7 @@ std::string trim_str_shrink_utf8(const char *str, size_t len, size_t lim) { } void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { - if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->plannedstmt) { + if (Gp_role == GP_ROLE_DISPATCH && query_desc->plannedstmt) { auto qi = req->mutable_query_info(); qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER @@ -106,7 +108,7 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { } void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { - if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->sourceText) { + if (Gp_role == GP_ROLE_DISPATCH && query_desc->sourceText) { auto qi = req->mutable_query_info(); *qi->mutable_query_text() = trim_str_shrink_utf8( query_desc->sourceText, strlen(query_desc->sourceText), @@ -118,7 +120,7 @@ void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { } void clear_big_fields(yagpcc::SetQueryReq *req) { - if (Gp_session_role == GP_ROLE_DISPATCH) { + if (Gp_role == GP_ROLE_DISPATCH) { auto qi = req->mutable_query_info(); qi->clear_plan_text(); qi->clear_template_plan_text(); @@ -129,7 +131,7 @@ void clear_big_fields(yagpcc::SetQueryReq *req) { } void set_query_info(yagpcc::SetQueryReq *req) { - if (Gp_session_role == GP_ROLE_DISPATCH) { + if (Gp_role == GP_ROLE_DISPATCH) { auto qi = req->mutable_query_info(); qi->set_username(get_user_name()); if (IsTransactionState()) diff --git a/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp b/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp index b6af303218d..a7eaed539f7 100644 --- a/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp +++ b/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp @@ -19,10 +19,9 @@ extern "C" { static void inline log_tracing_failure(const yagpcc::SetQueryReq &req, const std::string &event) { - ereport(LOG, - (errmsg("Query {%d-%d-%d} %s tracing failed with error %s", - req.query_key().tmid(), req.query_key().ssid(), - req.query_key().ccnt(), event.c_str(), strerror(errno)))); + ereport(LOG, (errmsg("Query {%d-%d-%d} %s tracing failed with error %m", + req.query_key().tmid(), req.query_key().ssid(), + req.query_key().ccnt(), event.c_str()))); } bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, @@ -77,8 +76,7 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, // That's a very important error that should never happen, so make it // visible to an end-user and admins. ereport(WARNING, - (errmsg("Unable to create non-blocking socket connection %s", - strerror(errno)))); + (errmsg("Unable to create non-blocking socket connection %m"))); success = false; YagpStat::report_error(); } diff --git a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp index 07ac511d546..56c1da9f4f6 100644 --- a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp +++ b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp @@ -36,7 +36,7 @@ static ProcessUtility_hook_type previous_ProcessUtility_hook = nullptr; static void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags); static void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, - long count); + uint64 count, bool execute_once); static void ya_ExecutorFinish_hook(QueryDesc *query_desc); static void ya_ExecutorEnd_hook(QueryDesc *query_desc); static void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg); @@ -45,10 +45,12 @@ static void ya_ic_teardown_hook(ChunkTransportState *transportStates, #ifdef ANALYZE_STATS_COLLECT_HOOK static void ya_analyze_stats_collect_hook(QueryDesc *query_desc); #endif -static void ya_process_utility_hook(Node *parsetree, const char *queryString, +static void ya_process_utility_hook(PlannedStmt *pstmt, const char *queryString, + bool readOnlyTree, ProcessUtilityContext context, - ParamListInfo params, DestReceiver *dest, - char *completionTag); + ParamListInfo params, + QueryEnvironment *queryEnv, + DestReceiver *dest, QueryCompletion *qc); static EventSender *sender = nullptr; @@ -127,14 +129,14 @@ void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { } void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, - long count) { + uint64 count, bool execute_once) { get_sender()->incr_depth(); PG_TRY(); { if (previous_ExecutorRun_hook) - previous_ExecutorRun_hook(query_desc, direction, count); + previous_ExecutorRun_hook(query_desc, direction, count, execute_once); else - standard_ExecutorRun(query_desc, direction, count); + standard_ExecutorRun(query_desc, direction, count, execute_once); get_sender()->decr_depth(); } PG_CATCH(); @@ -198,10 +200,12 @@ void ya_analyze_stats_collect_hook(QueryDesc *query_desc) { } #endif -static void ya_process_utility_hook(Node *parsetree, const char *queryString, +static void ya_process_utility_hook(PlannedStmt *pstmt, const char *queryString, + bool readOnlyTree, ProcessUtilityContext context, - ParamListInfo params, DestReceiver *dest, - char *completionTag) { + ParamListInfo params, + QueryEnvironment *queryEnv, + DestReceiver *dest, QueryCompletion *qc) { /* Project utility data on QueryDesc to use existing logic */ QueryDesc *query_desc = (QueryDesc *)palloc0(sizeof(QueryDesc)); query_desc->sourceText = queryString; @@ -214,11 +218,11 @@ static void ya_process_utility_hook(Node *parsetree, const char *queryString, PG_TRY(); { if (previous_ProcessUtility_hook) { - (*previous_ProcessUtility_hook)(parsetree, queryString, context, params, - dest, completionTag); + (*previous_ProcessUtility_hook)(pstmt, queryString, readOnlyTree, context, + params, queryEnv, dest, qc); } else { - standard_ProcessUtility(parsetree, queryString, context, params, dest, - completionTag); + standard_ProcessUtility(pstmt, queryString, readOnlyTree, context, params, + queryEnv, dest, qc); } get_sender()->decr_depth(); @@ -264,7 +268,7 @@ Datum yagp_functions_get(FunctionCallInfo fcinfo) { const int ATTNUM = 6; check_stats_loaded(); auto stats = YagpStat::get_stats(); - TupleDesc tupdesc = CreateTemplateTupleDesc(ATTNUM, false); + TupleDesc tupdesc = CreateTemplateTupleDesc(ATTNUM); TupleDescInitEntry(tupdesc, (AttrNumber)1, "segid", INT4OID, -1 /* typmod */, 0 /* attdim */); TupleDescInitEntry(tupdesc, (AttrNumber)2, "total_messages", INT8OID, diff --git a/gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp b/gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp index 0868dd9fc1c..cec9e33693a 100644 --- a/gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp +++ b/gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp @@ -37,12 +37,12 @@ void init_log() { relationId = heap_create_with_catalog( log_relname.data() /* relname */, namespaceId /* namespace */, 0 /* tablespace */, InvalidOid /* relid */, InvalidOid /* reltype oid */, - InvalidOid /* reloftypeid */, GetUserId() /* owner */, - DescribeTuple() /* rel tuple */, NIL, InvalidOid /* relam */, - RELKIND_RELATION, RELPERSISTENCE_PERMANENT, RELSTORAGE_HEAP, false, false, - true, 0, ONCOMMIT_NOOP, NULL /* GP Policy */, (Datum)0, - false /* use_user_acl */, true, true, false /* valid_opts */, - false /* is_part_child */, false /* is part parent */, NULL); + InvalidOid /* reloftypeid */, GetUserId() /* owner */, HEAP_TABLE_AM_OID, + DescribeTuple() /* rel tuple */, NIL, RELKIND_RELATION, + RELPERSISTENCE_PERMANENT, false, false, ONCOMMIT_NOOP, + NULL /* GP Policy */, (Datum)0, false /* use_user_acl */, true, true, + InvalidOid /* relrewrite */, NULL /* typaddress */, + false /* valid_opts */); /* Make the table visible */ CommandCounterIncrement(); diff --git a/gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp b/gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp index 335a3103cfd..2fadcc46599 100644 --- a/gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp +++ b/gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp @@ -19,7 +19,7 @@ const std::unordered_map &proto_name_to_col_idx() { } TupleDesc DescribeTuple() { - TupleDesc tupdesc = CreateTemplateTupleDesc(natts_yagp_log, false); + TupleDesc tupdesc = CreateTemplateTupleDesc(natts_yagp_log); for (size_t anum = 1; anum <= natts_yagp_log; ++anum) { TupleDescInitEntry(tupdesc, anum, log_tbl_desc[anum - 1].pg_att_name.data(), diff --git a/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp b/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp index 0824a3a6808..763e32e539c 100644 --- a/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp +++ b/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp @@ -7,6 +7,7 @@ extern "C" { #include "commands/dbcommands.h" #include "commands/resgroupcmds.h" #include "utils/builtins.h" +#include "utils/varlena.h" #include "nodes/pg_list.h" #include "commands/explain.h" #include "executor/instrument.h" @@ -115,41 +116,40 @@ bool ya_gpdb::split_identifier_string(char *rawstring, char separator, ExplainState ya_gpdb::get_explain_state(QueryDesc *query_desc, bool costs) noexcept { return wrap_noexcept([&]() { - ExplainState es; - ExplainInitState(&es); - es.costs = costs; - es.verbose = true; - es.format = EXPLAIN_FORMAT_TEXT; - ExplainBeginOutput(&es); - ExplainPrintPlan(&es, query_desc); - ExplainEndOutput(&es); - return es; + ExplainState *es = NewExplainState(); + es->costs = costs; + es->verbose = true; + es->format = EXPLAIN_FORMAT_TEXT; + ExplainBeginOutput(es); + ExplainPrintPlan(es, query_desc); + ExplainEndOutput(es); + return *es; }); } ExplainState ya_gpdb::get_analyze_state(QueryDesc *query_desc, bool analyze) noexcept { return wrap_noexcept([&]() { - ExplainState es; - ExplainInitState(&es); - es.analyze = analyze; - es.verbose = true; - es.buffers = es.analyze; - es.timing = es.analyze; - es.summary = es.analyze; - es.format = EXPLAIN_FORMAT_TEXT; - ExplainBeginOutput(&es); + ExplainState *es = NewExplainState(); + es->analyze = analyze; + es->verbose = true; + es->buffers = es->analyze; + es->timing = es->analyze; + es->summary = es->analyze; + es->format = EXPLAIN_FORMAT_TEXT; + ExplainBeginOutput(es); if (analyze) { - ExplainPrintPlan(&es, query_desc); - ExplainPrintExecStatsEnd(&es, query_desc); + ExplainPrintPlan(es, query_desc); + ExplainPrintExecStatsEnd(es, query_desc); } - ExplainEndOutput(&es); - return es; + ExplainEndOutput(es); + return *es; }); } -Instrumentation *ya_gpdb::instr_alloc(size_t n, int instrument_options) { - return wrap_throw(InstrAlloc, n, instrument_options); +Instrumentation *ya_gpdb::instr_alloc(size_t n, int instrument_options, + bool async_mode) { + return wrap_throw(InstrAlloc, n, instrument_options, async_mode); } HeapTuple ya_gpdb::heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, diff --git a/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h b/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h index 8f5f146cc67..920fc1ae6e7 100644 --- a/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h +++ b/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h @@ -32,7 +32,7 @@ bool split_identifier_string(char *rawstring, char separator, List **namelist) noexcept; ExplainState get_explain_state(QueryDesc *query_desc, bool costs) noexcept; ExplainState get_analyze_state(QueryDesc *query_desc, bool analyze) noexcept; -Instrumentation *instr_alloc(size_t n, int instrument_options); +Instrumentation *instr_alloc(size_t n, int instrument_options, bool async_mode); HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull); CdbExplain_ShowStatCtx *cdbexplain_showExecStatsBegin(QueryDesc *query_desc, From 7b0654e333914cccf911f14c490914f471a41b4d Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 19 Jan 2026 10:39:47 +0300 Subject: [PATCH 119/133] [yagp_hooks_collector] Del redundant funcs Remove unnecessary copies of the core jumbling functions from yagp_hooks_collector/stat_statements_parser. In commit [1] query jumbling moved to core, thus there is no need to keep a copy of jumbling functions in yagp_hooks_collector. [1] https://github.com/open-gpdb/cloudberry/commit/5fd9dfa5f50e4906c35133a414ebec5b6d518493 --- .../pg_stat_statements_ya_parser.c | 760 +----------------- 1 file changed, 36 insertions(+), 724 deletions(-) diff --git a/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c index 1c58d936093..c19805ce506 100644 --- a/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c +++ b/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c @@ -6,689 +6,48 @@ #include #include -#include "access/hash.h" -#include "executor/instrument.h" -#include "executor/execdesc.h" -#include "funcapi.h" +#include "common/hashfn.h" +#include "lib/stringinfo.h" #include "mb/pg_wchar.h" #include "miscadmin.h" -#include "parser/analyze.h" -#include "parser/parsetree.h" #include "parser/scanner.h" -#include "parser/gram.h" -#include "pgstat.h" -#include "storage/fd.h" -#include "storage/ipc.h" -#include "storage/spin.h" -#include "tcop/utility.h" #include "utils/builtins.h" #include "utils/memutils.h" +#include "utils/queryjumble.h" #include "pg_stat_statements_ya_parser.h" -static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL; - -#define JUMBLE_SIZE 1024 /* query serialization buffer size */ - -/* - * Struct for tracking locations/lengths of constants during normalization - */ -typedef struct pgssLocationLen -{ - int location; /* start offset in query text */ - int length; /* length in bytes, or -1 to ignore */ -} pgssLocationLen; - -/* - * Working state for computing a query jumble and producing a normalized - * query string - */ -typedef struct pgssJumbleState -{ - /* Jumble of current query tree */ - unsigned char *jumble; - - /* Number of bytes used in jumble[] */ - Size jumble_len; - - /* Array of locations of constants that should be removed */ - pgssLocationLen *clocations; - - /* Allocated length of clocations array */ - int clocations_buf_size; - - /* Current number of valid entries in clocations array */ - int clocations_count; - - /* highest Param id we've seen, in order to start normalization correctly */ - int highest_extern_param_id; -} pgssJumbleState; +#ifndef ICONST +#define ICONST 276 +#endif +#ifndef FCONST +#define FCONST 277 +#endif +#ifndef SCONST +#define SCONST 278 +#endif +#ifndef BCONST +#define BCONST 279 +#endif +#ifndef XCONST +#define XCONST 280 +#endif -static void AppendJumble(pgssJumbleState *jstate, - const unsigned char *item, Size size); -static void JumbleQuery(pgssJumbleState *jstate, Query *query); -static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable); -static void JumbleExpr(pgssJumbleState *jstate, Node *node); -static void RecordConstLocation(pgssJumbleState *jstate, int location); -static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query); +static void fill_in_constant_lengths(JumbleState *jstate, const char *query); static int comp_location(const void *a, const void *b); StringInfo gen_normplan(const char *execution_plan); static bool need_replace(int token); -void pgss_post_parse_analyze(ParseState *pstate, Query *query); -static char *generate_normalized_query(pgssJumbleState *jstate, const char *query, +static char *generate_normalized_query(JumbleState *jstate, const char *query, int *query_len_p, int encoding); - void stat_statements_parser_init() -{ - prev_post_parse_analyze_hook = post_parse_analyze_hook; - post_parse_analyze_hook = pgss_post_parse_analyze; -} - -void stat_statements_parser_deinit() +void stat_statements_parser_init(void) { - post_parse_analyze_hook = prev_post_parse_analyze_hook; -} - -/* - * AppendJumble: Append a value that is substantive in a given query to - * the current jumble. - */ -static void -AppendJumble(pgssJumbleState *jstate, const unsigned char *item, Size size) -{ - unsigned char *jumble = jstate->jumble; - Size jumble_len = jstate->jumble_len; - - /* - * Whenever the jumble buffer is full, we hash the current contents and - * reset the buffer to contain just that hash value, thus relying on the - * hash to summarize everything so far. - */ - while (size > 0) - { - Size part_size; - - if (jumble_len >= JUMBLE_SIZE) - { - uint32 start_hash = hash_any(jumble, JUMBLE_SIZE); - - memcpy(jumble, &start_hash, sizeof(start_hash)); - jumble_len = sizeof(start_hash); - } - part_size = Min(size, JUMBLE_SIZE - jumble_len); - memcpy(jumble + jumble_len, item, part_size); - jumble_len += part_size; - item += part_size; - size -= part_size; - } - jstate->jumble_len = jumble_len; + EnableQueryId(); } -/* - * Wrappers around AppendJumble to encapsulate details of serialization - * of individual local variable elements. - */ -#define APP_JUMB(item) \ - AppendJumble(jstate, (const unsigned char *)&(item), sizeof(item)) -#define APP_JUMB_STRING(str) \ - AppendJumble(jstate, (const unsigned char *)(str), strlen(str) + 1) - -/* - * JumbleQuery: Selectively serialize the query tree, appending significant - * data to the "query jumble" while ignoring nonsignificant data. - * - * Rule of thumb for what to include is that we should ignore anything not - * semantically significant (such as alias names) as well as anything that can - * be deduced from child nodes (else we'd just be double-hashing that piece - * of information). - */ -void JumbleQuery(pgssJumbleState *jstate, Query *query) +void stat_statements_parser_deinit(void) { - Assert(IsA(query, Query)); - Assert(query->utilityStmt == NULL); - - APP_JUMB(query->commandType); - /* resultRelation is usually predictable from commandType */ - JumbleExpr(jstate, (Node *)query->cteList); - JumbleRangeTable(jstate, query->rtable); - JumbleExpr(jstate, (Node *)query->jointree); - JumbleExpr(jstate, (Node *)query->targetList); - JumbleExpr(jstate, (Node *)query->returningList); - JumbleExpr(jstate, (Node *)query->groupClause); - JumbleExpr(jstate, query->havingQual); - JumbleExpr(jstate, (Node *)query->windowClause); - JumbleExpr(jstate, (Node *)query->distinctClause); - JumbleExpr(jstate, (Node *)query->sortClause); - JumbleExpr(jstate, query->limitOffset); - JumbleExpr(jstate, query->limitCount); - /* we ignore rowMarks */ - JumbleExpr(jstate, query->setOperations); -} - -/* - * Jumble a range table - */ -static void -JumbleRangeTable(pgssJumbleState *jstate, List *rtable) -{ - ListCell *lc; - - foreach (lc, rtable) - { - RangeTblEntry *rte = (RangeTblEntry *)lfirst(lc); - - Assert(IsA(rte, RangeTblEntry)); - APP_JUMB(rte->rtekind); - switch (rte->rtekind) - { - case RTE_RELATION: - APP_JUMB(rte->relid); - break; - case RTE_SUBQUERY: - JumbleQuery(jstate, rte->subquery); - break; - case RTE_JOIN: - APP_JUMB(rte->jointype); - break; - case RTE_FUNCTION: - JumbleExpr(jstate, (Node *)rte->functions); - break; - case RTE_VALUES: - JumbleExpr(jstate, (Node *)rte->values_lists); - break; - case RTE_CTE: - - /* - * Depending on the CTE name here isn't ideal, but it's the - * only info we have to identify the referenced WITH item. - */ - APP_JUMB_STRING(rte->ctename); - APP_JUMB(rte->ctelevelsup); - break; - /* GPDB RTEs */ - case RTE_VOID: - break; - case RTE_TABLEFUNCTION: - JumbleQuery(jstate, rte->subquery); - JumbleExpr(jstate, (Node *)rte->functions); - break; - default: - ereport(ERROR, (errmsg("unrecognized RTE kind: %d", (int)rte->rtekind))); - break; - } - } -} - -/* - * Jumble an expression tree - * - * In general this function should handle all the same node types that - * expression_tree_walker() does, and therefore it's coded to be as parallel - * to that function as possible. However, since we are only invoked on - * queries immediately post-parse-analysis, we need not handle node types - * that only appear in planning. - * - * Note: the reason we don't simply use expression_tree_walker() is that the - * point of that function is to support tree walkers that don't care about - * most tree node types, but here we care about all types. We should complain - * about any unrecognized node type. - */ -static void -JumbleExpr(pgssJumbleState *jstate, Node *node) -{ - ListCell *temp; - - if (node == NULL) - return; - - /* Guard against stack overflow due to overly complex expressions */ - check_stack_depth(); - - /* - * We always emit the node's NodeTag, then any additional fields that are - * considered significant, and then we recurse to any child nodes. - */ - APP_JUMB(node->type); - - switch (nodeTag(node)) - { - case T_Var: - { - Var *var = (Var *)node; - - APP_JUMB(var->varno); - APP_JUMB(var->varattno); - APP_JUMB(var->varlevelsup); - } - break; - case T_Const: - { - Const *c = (Const *)node; - - /* We jumble only the constant's type, not its value */ - APP_JUMB(c->consttype); - /* Also, record its parse location for query normalization */ - RecordConstLocation(jstate, c->location); - } - break; - case T_Param: - { - Param *p = (Param *)node; - - APP_JUMB(p->paramkind); - APP_JUMB(p->paramid); - APP_JUMB(p->paramtype); - } - break; - case T_Aggref: - { - Aggref *expr = (Aggref *)node; - - APP_JUMB(expr->aggfnoid); - JumbleExpr(jstate, (Node *)expr->aggdirectargs); - JumbleExpr(jstate, (Node *)expr->args); - JumbleExpr(jstate, (Node *)expr->aggorder); - JumbleExpr(jstate, (Node *)expr->aggdistinct); - JumbleExpr(jstate, (Node *)expr->aggfilter); - } - break; - case T_WindowFunc: - { - WindowFunc *expr = (WindowFunc *)node; - - APP_JUMB(expr->winfnoid); - APP_JUMB(expr->winref); - JumbleExpr(jstate, (Node *)expr->args); - JumbleExpr(jstate, (Node *)expr->aggfilter); - } - break; - case T_ArrayRef: - { - ArrayRef *aref = (ArrayRef *)node; - - JumbleExpr(jstate, (Node *)aref->refupperindexpr); - JumbleExpr(jstate, (Node *)aref->reflowerindexpr); - JumbleExpr(jstate, (Node *)aref->refexpr); - JumbleExpr(jstate, (Node *)aref->refassgnexpr); - } - break; - case T_FuncExpr: - { - FuncExpr *expr = (FuncExpr *)node; - - APP_JUMB(expr->funcid); - JumbleExpr(jstate, (Node *)expr->args); - } - break; - case T_NamedArgExpr: - { - NamedArgExpr *nae = (NamedArgExpr *)node; - - APP_JUMB(nae->argnumber); - JumbleExpr(jstate, (Node *)nae->arg); - } - break; - case T_OpExpr: - case T_DistinctExpr: /* struct-equivalent to OpExpr */ - case T_NullIfExpr: /* struct-equivalent to OpExpr */ - { - OpExpr *expr = (OpExpr *)node; - - APP_JUMB(expr->opno); - JumbleExpr(jstate, (Node *)expr->args); - } - break; - case T_ScalarArrayOpExpr: - { - ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *)node; - - APP_JUMB(expr->opno); - APP_JUMB(expr->useOr); - JumbleExpr(jstate, (Node *)expr->args); - } - break; - case T_BoolExpr: - { - BoolExpr *expr = (BoolExpr *)node; - - APP_JUMB(expr->boolop); - JumbleExpr(jstate, (Node *)expr->args); - } - break; - case T_SubLink: - { - SubLink *sublink = (SubLink *)node; - - APP_JUMB(sublink->subLinkType); - JumbleExpr(jstate, (Node *)sublink->testexpr); - JumbleQuery(jstate, (Query *)sublink->subselect); - } - break; - case T_FieldSelect: - { - FieldSelect *fs = (FieldSelect *)node; - - APP_JUMB(fs->fieldnum); - JumbleExpr(jstate, (Node *)fs->arg); - } - break; - case T_FieldStore: - { - FieldStore *fstore = (FieldStore *)node; - - JumbleExpr(jstate, (Node *)fstore->arg); - JumbleExpr(jstate, (Node *)fstore->newvals); - } - break; - case T_RelabelType: - { - RelabelType *rt = (RelabelType *)node; - - APP_JUMB(rt->resulttype); - JumbleExpr(jstate, (Node *)rt->arg); - } - break; - case T_CoerceViaIO: - { - CoerceViaIO *cio = (CoerceViaIO *)node; - - APP_JUMB(cio->resulttype); - JumbleExpr(jstate, (Node *)cio->arg); - } - break; - case T_ArrayCoerceExpr: - { - ArrayCoerceExpr *acexpr = (ArrayCoerceExpr *)node; - - APP_JUMB(acexpr->resulttype); - JumbleExpr(jstate, (Node *)acexpr->arg); - } - break; - case T_ConvertRowtypeExpr: - { - ConvertRowtypeExpr *crexpr = (ConvertRowtypeExpr *)node; - - APP_JUMB(crexpr->resulttype); - JumbleExpr(jstate, (Node *)crexpr->arg); - } - break; - case T_CollateExpr: - { - CollateExpr *ce = (CollateExpr *)node; - - APP_JUMB(ce->collOid); - JumbleExpr(jstate, (Node *)ce->arg); - } - break; - case T_CaseExpr: - { - CaseExpr *caseexpr = (CaseExpr *)node; - - JumbleExpr(jstate, (Node *)caseexpr->arg); - foreach (temp, caseexpr->args) - { - CaseWhen *when = (CaseWhen *)lfirst(temp); - - Assert(IsA(when, CaseWhen)); - JumbleExpr(jstate, (Node *)when->expr); - JumbleExpr(jstate, (Node *)when->result); - } - JumbleExpr(jstate, (Node *)caseexpr->defresult); - } - break; - case T_CaseTestExpr: - { - CaseTestExpr *ct = (CaseTestExpr *)node; - - APP_JUMB(ct->typeId); - } - break; - case T_ArrayExpr: - JumbleExpr(jstate, (Node *)((ArrayExpr *)node)->elements); - break; - case T_RowExpr: - JumbleExpr(jstate, (Node *)((RowExpr *)node)->args); - break; - case T_RowCompareExpr: - { - RowCompareExpr *rcexpr = (RowCompareExpr *)node; - - APP_JUMB(rcexpr->rctype); - JumbleExpr(jstate, (Node *)rcexpr->largs); - JumbleExpr(jstate, (Node *)rcexpr->rargs); - } - break; - case T_CoalesceExpr: - JumbleExpr(jstate, (Node *)((CoalesceExpr *)node)->args); - break; - case T_MinMaxExpr: - { - MinMaxExpr *mmexpr = (MinMaxExpr *)node; - - APP_JUMB(mmexpr->op); - JumbleExpr(jstate, (Node *)mmexpr->args); - } - break; - case T_XmlExpr: - { - XmlExpr *xexpr = (XmlExpr *)node; - - APP_JUMB(xexpr->op); - JumbleExpr(jstate, (Node *)xexpr->named_args); - JumbleExpr(jstate, (Node *)xexpr->args); - } - break; - case T_NullTest: - { - NullTest *nt = (NullTest *)node; - - APP_JUMB(nt->nulltesttype); - JumbleExpr(jstate, (Node *)nt->arg); - } - break; - case T_BooleanTest: - { - BooleanTest *bt = (BooleanTest *)node; - - APP_JUMB(bt->booltesttype); - JumbleExpr(jstate, (Node *)bt->arg); - } - break; - case T_CoerceToDomain: - { - CoerceToDomain *cd = (CoerceToDomain *)node; - - APP_JUMB(cd->resulttype); - JumbleExpr(jstate, (Node *)cd->arg); - } - break; - case T_CoerceToDomainValue: - { - CoerceToDomainValue *cdv = (CoerceToDomainValue *)node; - - APP_JUMB(cdv->typeId); - } - break; - case T_SetToDefault: - { - SetToDefault *sd = (SetToDefault *)node; - - APP_JUMB(sd->typeId); - } - break; - case T_CurrentOfExpr: - { - CurrentOfExpr *ce = (CurrentOfExpr *)node; - - APP_JUMB(ce->cvarno); - if (ce->cursor_name) - APP_JUMB_STRING(ce->cursor_name); - APP_JUMB(ce->cursor_param); - } - break; - case T_TargetEntry: - { - TargetEntry *tle = (TargetEntry *)node; - - APP_JUMB(tle->resno); - APP_JUMB(tle->ressortgroupref); - JumbleExpr(jstate, (Node *)tle->expr); - } - break; - case T_RangeTblRef: - { - RangeTblRef *rtr = (RangeTblRef *)node; - - APP_JUMB(rtr->rtindex); - } - break; - case T_JoinExpr: - { - JoinExpr *join = (JoinExpr *)node; - - APP_JUMB(join->jointype); - APP_JUMB(join->isNatural); - APP_JUMB(join->rtindex); - JumbleExpr(jstate, join->larg); - JumbleExpr(jstate, join->rarg); - JumbleExpr(jstate, join->quals); - } - break; - case T_FromExpr: - { - FromExpr *from = (FromExpr *)node; - - JumbleExpr(jstate, (Node *)from->fromlist); - JumbleExpr(jstate, from->quals); - } - break; - case T_List: - foreach (temp, (List *)node) - { - JumbleExpr(jstate, (Node *)lfirst(temp)); - } - break; - case T_SortGroupClause: - { - SortGroupClause *sgc = (SortGroupClause *)node; - - APP_JUMB(sgc->tleSortGroupRef); - APP_JUMB(sgc->eqop); - APP_JUMB(sgc->sortop); - APP_JUMB(sgc->nulls_first); - } - break; - case T_WindowClause: - { - WindowClause *wc = (WindowClause *)node; - - APP_JUMB(wc->winref); - APP_JUMB(wc->frameOptions); - JumbleExpr(jstate, (Node *)wc->partitionClause); - JumbleExpr(jstate, (Node *)wc->orderClause); - JumbleExpr(jstate, wc->startOffset); - JumbleExpr(jstate, wc->endOffset); - } - break; - case T_CommonTableExpr: - { - CommonTableExpr *cte = (CommonTableExpr *)node; - - /* we store the string name because RTE_CTE RTEs need it */ - APP_JUMB_STRING(cte->ctename); - JumbleQuery(jstate, (Query *)cte->ctequery); - } - break; - case T_SetOperationStmt: - { - SetOperationStmt *setop = (SetOperationStmt *)node; - - APP_JUMB(setop->op); - APP_JUMB(setop->all); - JumbleExpr(jstate, setop->larg); - JumbleExpr(jstate, setop->rarg); - } - break; - case T_RangeTblFunction: - { - RangeTblFunction *rtfunc = (RangeTblFunction *)node; - - JumbleExpr(jstate, rtfunc->funcexpr); - } - break; - /* GPDB nodes */ - case T_GroupingClause: - { - GroupingClause *grpnode = (GroupingClause *)node; - - JumbleExpr(jstate, (Node *)grpnode->groupsets); - } - break; - case T_GroupingFunc: - { - GroupingFunc *grpnode = (GroupingFunc *)node; - - JumbleExpr(jstate, (Node *)grpnode->args); - } - break; - case T_Grouping: - case T_GroupId: - case T_Integer: - case T_Value: - // TODO:seems like nothing to do with it - break; - /* GPDB-only additions, nothing to do */ - case T_PartitionBy: - case T_PartitionElem: - case T_PartitionRangeItem: - case T_PartitionBoundSpec: - case T_PartitionSpec: - case T_PartitionValuesSpec: - case T_AlterPartitionId: - case T_AlterPartitionCmd: - case T_InheritPartitionCmd: - case T_CreateFileSpaceStmt: - case T_FileSpaceEntry: - case T_DropFileSpaceStmt: - case T_TableValueExpr: - case T_DenyLoginInterval: - case T_DenyLoginPoint: - case T_AlterTypeStmt: - case T_SetDistributionCmd: - case T_ExpandStmtSpec: - break; - default: - /* Only a warning, since we can stumble along anyway */ - ereport(WARNING, (errmsg("unrecognized node type: %d", - (int)nodeTag(node)))); - break; - } -} - -/* - * Record location of constant within query string of query tree - * that is currently being walked. - */ -static void -RecordConstLocation(pgssJumbleState *jstate, int location) -{ - /* -1 indicates unknown or undefined location */ - if (location >= 0) - { - /* enlarge array if needed */ - if (jstate->clocations_count >= jstate->clocations_buf_size) - { - jstate->clocations_buf_size *= 2; - jstate->clocations = (pgssLocationLen *) - repalloc(jstate->clocations, - jstate->clocations_buf_size * - sizeof(pgssLocationLen)); - } - jstate->clocations[jstate->clocations_count].location = location; - /* initialize lengths to -1 to simplify fill_in_constant_lengths */ - jstate->clocations[jstate->clocations_count].length = -1; - jstate->clocations_count++; - } + /* NO-OP */ } /* check if token should be replaced by substitute varable */ @@ -768,60 +127,13 @@ gen_normplan(const char *execution_plan) } /* - * Post-parse-analysis hook: mark query with a queryId - */ -void pgss_post_parse_analyze(ParseState *pstate, Query *query) -{ - pgssJumbleState jstate; - - if (prev_post_parse_analyze_hook) - prev_post_parse_analyze_hook(pstate, query); - - /* Assert we didn't do this already */ - Assert(query->queryId == 0); - - /* - * Utility statements get queryId zero. We do this even in cases where - * the statement contains an optimizable statement for which a queryId - * could be derived (such as EXPLAIN or DECLARE CURSOR). For such cases, - * runtime control will first go through ProcessUtility and then the - * executor, and we don't want the executor hooks to do anything, since we - * are already measuring the statement's costs at the utility level. - */ - if (query->utilityStmt) - { - query->queryId = 0; - return; - } - - /* Set up workspace for query jumbling */ - jstate.jumble = (unsigned char *)palloc(JUMBLE_SIZE); - jstate.jumble_len = 0; - jstate.clocations_buf_size = 32; - jstate.clocations = (pgssLocationLen *) - palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen)); - jstate.clocations_count = 0; - - /* Compute query ID and mark the Query node with it */ - JumbleQuery(&jstate, query); - query->queryId = hash_any(jstate.jumble, jstate.jumble_len); - - /* - * If we are unlucky enough to get a hash of zero, use 1 instead, to - * prevent confusion with the utility-statement case. - */ - if (query->queryId == 0) - query->queryId = 1; -} - -/* - * comp_location: comparator for qsorting pgssLocationLen structs by location + * comp_location: comparator for qsorting LocationLen structs by location */ static int comp_location(const void *a, const void *b) { - int l = ((const pgssLocationLen *) a)->location; - int r = ((const pgssLocationLen *) b)->location; + int l = ((const LocationLen *) a)->location; + int r = ((const LocationLen *) b)->location; if (l < r) return -1; @@ -854,9 +166,9 @@ comp_location(const void *a, const void *b) * reason for a constant to start with a '-'. */ static void -fill_in_constant_lengths(pgssJumbleState *jstate, const char *query) +fill_in_constant_lengths(JumbleState *jstate, const char *query) { - pgssLocationLen *locs; + LocationLen *locs; core_yyscan_t yyscanner; core_yy_extra_type yyextra; core_YYSTYPE yylval; @@ -870,14 +182,14 @@ fill_in_constant_lengths(pgssJumbleState *jstate, const char *query) */ if (jstate->clocations_count > 1) qsort(jstate->clocations, jstate->clocations_count, - sizeof(pgssLocationLen), comp_location); + sizeof(LocationLen), comp_location); locs = jstate->clocations; /* initialize the flex scanner --- should match raw_parser() */ yyscanner = scanner_init(query, &yyextra, - ScanKeywords, - NumScanKeywords); + &ScanKeywords, + ScanKeywordTokens); /* Search for each constant, in sequence */ for (i = 0; i < jstate->clocations_count; i++) @@ -957,7 +269,7 @@ fill_in_constant_lengths(pgssJumbleState *jstate, const char *query) * Returns a palloc'd string. */ static char * -generate_normalized_query(pgssJumbleState *jstate, const char *query, +generate_normalized_query(JumbleState *jstate, const char *query, int *query_len_p, int encoding) { char *norm_query; @@ -1027,12 +339,12 @@ char *gen_normquery(const char *query) if (!query) { return NULL; } - pgssJumbleState jstate; + JumbleState jstate; jstate.jumble = (unsigned char *)palloc(JUMBLE_SIZE); jstate.jumble_len = 0; jstate.clocations_buf_size = 32; - jstate.clocations = (pgssLocationLen *) - palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen)); + jstate.clocations = (LocationLen *) + palloc(jstate.clocations_buf_size * sizeof(LocationLen)); jstate.clocations_count = 0; int query_len = strlen(query); return generate_normalized_query(&jstate, query, &query_len, GetDatabaseEncoding()); From b7f9d2c3a0f2bc99979faefcd7f4f25d746a8a45 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 19 Jan 2026 11:07:06 +0300 Subject: [PATCH 120/133] [yagp_hooks_collector] Change test functions to SRF In yagp_hooks_collector we need control over place where function is executed, and Cloudberry supports only set-returning functions to execute on COORDINATOR so change the type of the functions. We can see the error below without this change: ERROR: EXECUTE ON COORDINATOR is only supported for set-returning functions. --- .../expected/yagp_cursors.out | 12 +++---- .../expected/yagp_dist.out | 12 +++---- .../expected/yagp_select.out | 12 +++---- .../expected/yagp_utility.out | 30 ++++++---------- .../src/yagp_hooks_collector.c | 34 +++++++++++++++---- .../yagp_hooks_collector--1.0--1.1.sql | 16 ++++----- .../yagp_hooks_collector--1.0.sql | 6 ++-- .../yagp_hooks_collector--1.1.sql | 16 ++++----- 8 files changed, 70 insertions(+), 68 deletions(-) diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out b/gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out index 9587c00b550..d251ddd3e1c 100644 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out @@ -40,8 +40,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- DECLARE WITH HOLD SET yagpcc.logging_mode to 'TBL'; @@ -74,8 +73,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- ROLLBACK SET yagpcc.logging_mode to 'TBL'; @@ -105,8 +103,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- FETCH SET yagpcc.logging_mode to 'TBL'; @@ -155,8 +152,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) DROP FUNCTION yagp_status_order(text); DROP EXTENSION yagp_hooks_collector; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_dist.out b/gpcontrib/yagp_hooks_collector/expected/yagp_dist.out index ebaf839601d..5fd5ea5fb3e 100644 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_dist.out +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_dist.out @@ -46,8 +46,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) SET yagpcc.logging_mode to 'TBL'; -- Scan all segments. @@ -83,8 +82,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- Replicated table CREATE FUNCTION force_segments() RETURNS SETOF text AS $$ @@ -128,8 +126,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- Partially distributed table (2 numsegments) SET allow_system_table_mods = ON; @@ -167,8 +164,7 @@ SELECT query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_statu SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) DROP FUNCTION yagp_status_order(text); DROP EXTENSION yagp_hooks_collector; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_select.out b/gpcontrib/yagp_hooks_collector/expected/yagp_select.out index 4c4a0218150..b6e18dc862f 100644 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_select.out +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_select.out @@ -46,8 +46,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- Transaction test SET yagpcc.logging_mode to 'TBL'; @@ -72,8 +71,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- CTE test SET yagpcc.logging_mode to 'TBL'; @@ -102,8 +100,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- Prepared statement test SET yagpcc.logging_mode to 'TBL'; @@ -128,8 +125,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) DROP FUNCTION yagp_status_order(text); DROP EXTENSION yagp_hooks_collector; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out b/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out index 03c17713575..d8ab42dd695 100644 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out @@ -17,7 +17,7 @@ SET yagpcc.enable_utility TO TRUE; SET yagpcc.report_nested_queries TO TRUE; SET yagpcc.logging_mode to 'TBL'; CREATE TABLE test_table (a int, b text); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE INDEX test_idx ON test_table(a); ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; @@ -41,8 +41,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- Partitioning SET yagpcc.logging_mode to 'TBL'; @@ -90,8 +89,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- Views and Functions SET yagpcc.logging_mode to 'TBL'; @@ -118,8 +116,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- Transaction Operations SET yagpcc.logging_mode to 'TBL'; @@ -159,13 +156,12 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- DML Operations SET yagpcc.logging_mode to 'TBL'; CREATE TABLE dml_test (a int, b text); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO dml_test VALUES (1, 'test'); UPDATE dml_test SET b = 'updated' WHERE a = 1; @@ -186,13 +182,12 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- COPY Operations SET yagpcc.logging_mode to 'TBL'; CREATE TABLE copy_test (a int); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. COPY (SELECT 1) TO STDOUT; 1 @@ -214,8 +209,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- Prepared Statements and error during execute SET yagpcc.logging_mode to 'TBL'; @@ -240,8 +234,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- GUC Settings SET yagpcc.logging_mode to 'TBL'; @@ -262,8 +255,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) DROP FUNCTION yagp_status_order(text); DROP EXTENSION yagp_hooks_collector; diff --git a/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c b/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c index 9db73638b24..27fd0e04b26 100644 --- a/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c +++ b/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c @@ -1,5 +1,6 @@ #include "postgres.h" #include "cdb/cdbvars.h" +#include "funcapi.h" #include "utils/builtins.h" #include "hook_wrappers.h" @@ -26,8 +27,15 @@ void _PG_fini(void) { } Datum yagp_stat_messages_reset(PG_FUNCTION_ARGS) { - yagp_functions_reset(); - PG_RETURN_VOID(); + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) { + funcctx = SRF_FIRSTCALL_INIT(); + yagp_functions_reset(); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); } Datum yagp_stat_messages(PG_FUNCTION_ARGS) { @@ -35,11 +43,25 @@ Datum yagp_stat_messages(PG_FUNCTION_ARGS) { } Datum yagp_init_log(PG_FUNCTION_ARGS) { - init_log(); - PG_RETURN_VOID(); + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) { + funcctx = SRF_FIRSTCALL_INIT(); + init_log(); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); } Datum yagp_truncate_log(PG_FUNCTION_ARGS) { - truncate_log(); - PG_RETURN_VOID(); + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) { + funcctx = SRF_FIRSTCALL_INIT(); + truncate_log(); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); } diff --git a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0--1.1.sql b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0--1.1.sql index 959d4f235d1..8684ca73915 100644 --- a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0--1.1.sql +++ b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0--1.1.sql @@ -23,17 +23,17 @@ DROP FUNCTION __yagp_stat_messages_reset_f_on_master(); -- Recreate functions and view in new schema. CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_master() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' LANGUAGE C EXECUTE ON MASTER; CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_segments() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' LANGUAGE C EXECUTE ON ALL SEGMENTS; CREATE FUNCTION yagpcc.stat_messages_reset() -RETURNS void +RETURNS SETOF void AS $$ SELECT yagpcc.__stat_messages_reset_f_on_master(); @@ -75,12 +75,12 @@ ORDER BY segid; -- Create new objects. CREATE FUNCTION yagpcc.__init_log_on_master() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_init_log' LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; CREATE FUNCTION yagpcc.__init_log_on_segments() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_init_log' LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; @@ -95,17 +95,17 @@ CREATE VIEW yagpcc.log AS ORDER BY tmid, ssid, ccnt; CREATE FUNCTION yagpcc.__truncate_log_on_master() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_truncate_log' LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; CREATE FUNCTION yagpcc.__truncate_log_on_segments() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_truncate_log' LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; CREATE FUNCTION yagpcc.truncate_log() -RETURNS void AS $$ +RETURNS SETOF void AS $$ BEGIN PERFORM yagpcc.__truncate_log_on_master(); PERFORM yagpcc.__truncate_log_on_segments(); diff --git a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0.sql b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0.sql index 7ab4e1b2fb7..270cab92382 100644 --- a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0.sql +++ b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0.sql @@ -4,17 +4,17 @@ \echo Use "CREATE EXTENSION yagp_hooks_collector" to load this file. \quit CREATE FUNCTION __yagp_stat_messages_reset_f_on_master() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' LANGUAGE C EXECUTE ON MASTER; CREATE FUNCTION __yagp_stat_messages_reset_f_on_segments() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' LANGUAGE C EXECUTE ON ALL SEGMENTS; CREATE FUNCTION yagp_stat_messages_reset() -RETURNS void +RETURNS SETOF void AS $$ SELECT __yagp_stat_messages_reset_f_on_master(); diff --git a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.1.sql b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.1.sql index 657720a88f2..e0e94b51493 100644 --- a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.1.sql +++ b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.1.sql @@ -6,17 +6,17 @@ CREATE SCHEMA yagpcc; CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_master() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' LANGUAGE C EXECUTE ON MASTER; CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_segments() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' LANGUAGE C EXECUTE ON ALL SEGMENTS; CREATE FUNCTION yagpcc.stat_messages_reset() -RETURNS void +RETURNS SETOF void AS $$ SELECT yagpcc.__stat_messages_reset_f_on_master(); @@ -57,12 +57,12 @@ CREATE VIEW yagpcc.stat_messages AS ORDER BY segid; CREATE FUNCTION yagpcc.__init_log_on_master() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_init_log' LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; CREATE FUNCTION yagpcc.__init_log_on_segments() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_init_log' LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; @@ -77,17 +77,17 @@ CREATE VIEW yagpcc.log AS ORDER BY tmid, ssid, ccnt; CREATE FUNCTION yagpcc.__truncate_log_on_master() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_truncate_log' LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; CREATE FUNCTION yagpcc.__truncate_log_on_segments() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_truncate_log' LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; CREATE FUNCTION yagpcc.truncate_log() -RETURNS void AS $$ +RETURNS SETOF void AS $$ BEGIN PERFORM yagpcc.__truncate_log_on_master(); PERFORM yagpcc.__truncate_log_on_segments(); From 2afca84a39082c59a215980f31ed052057547fd7 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 19 Jan 2026 11:09:32 +0300 Subject: [PATCH 121/133] [yagp_hooks_collector] Change test out for part tbl In gpdb create table was executed for each partition. Now one single create table is executed. Thus only one create table query goes through executor. Change the test accordingly. --- .../expected/yagp_utility.out | 22 ++----------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out b/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out index d8ab42dd695..057f7d7a556 100644 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out @@ -49,34 +49,16 @@ CREATE TABLE pt_test (a int, b int) DISTRIBUTED BY (a) PARTITION BY RANGE (a) (START (0) END (100) EVERY (50)); -NOTICE: CREATE TABLE will create partition "pt_test_1_prt_1" for table "pt_test" -NOTICE: CREATE TABLE will create partition "pt_test_1_prt_2" for table "pt_test" DROP TABLE pt_test; RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_SUBMIT | DISTRIBUTED BY (a) +| | PARTITION BY RANGE (a) +| | (START (0) END (100) EVERY (50)); | - -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_SUBMIT - | DISTRIBUTED BY (a) +| - | PARTITION BY RANGE (a) +| - | (START (0) END (100) EVERY (50)); | - -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_SUBMIT - | DISTRIBUTED BY (a) +| - | PARTITION BY RANGE (a) +| - | (START (0) END (100) EVERY (50)); | - -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_DONE - | DISTRIBUTED BY (a) +| - | PARTITION BY RANGE (a) +| - | (START (0) END (100) EVERY (50)); | - -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_DONE - | DISTRIBUTED BY (a) +| - | PARTITION BY RANGE (a) +| - | (START (0) END (100) EVERY (50)); | -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_DONE | DISTRIBUTED BY (a) +| | PARTITION BY RANGE (a) +| @@ -84,7 +66,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | DROP TABLE pt_test; | QUERY_STATUS_SUBMIT -1 | DROP TABLE pt_test; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT -(10 rows) +(6 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; t From 8f0bf5f66dc07a917adb4e7bb44e8e4bf325dbf2 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 19 Jan 2026 11:13:00 +0300 Subject: [PATCH 122/133] [yagp_hooks_collector] Add CI test and with option Change makefile, test and add it to CI of yagp_hooks_collector. Add option --with-yagp-hooks-collector. Similarly to [1]. [1] https://github.com/open-gpdb/gpdb/commit/7be8893ef5453e248bd758a47d847e2217f1ae00 --- .github/workflows/build-cloudberry.yml | 32 ++++++- configure | 28 ++++++ configure.ac | 7 ++ .../scripts/configure-cloudberry.sh | 4 +- gpcontrib/Makefile | 3 + gpcontrib/yagp_hooks_collector/Makefile | 87 ++++++++----------- .../expected/yagp_cursors.out | 10 ++- .../expected/yagp_dist.out | 2 + .../expected/yagp_select.out | 2 + .../expected/yagp_utf8_trim.out | 2 + .../expected/yagp_utility.out | 72 +++++++-------- .../yagp_hooks_collector/sql/yagp_cursors.sql | 2 + .../yagp_hooks_collector/sql/yagp_dist.sql | 2 + .../yagp_hooks_collector/sql/yagp_select.sql | 2 + .../sql/yagp_utf8_trim.sql | 2 + .../yagp_hooks_collector/sql/yagp_utility.sql | 2 + src/Makefile.global.in | 1 + 17 files changed, 167 insertions(+), 93 deletions(-) diff --git a/.github/workflows/build-cloudberry.yml b/.github/workflows/build-cloudberry.yml index dcad89f30cb..f2efb231f0e 100644 --- a/.github/workflows/build-cloudberry.yml +++ b/.github/workflows/build-cloudberry.yml @@ -271,6 +271,10 @@ jobs: }, "enable_core_check":false }, + {"test":"gpcontrib-yagp-hooks-collector", + "make_configs":["gpcontrib/yagp_hooks_collector:installcheck"], + "extension":"yagp_hooks_collector" + }, {"test":"ic-expandshrink", "make_configs":["src/test/isolation2:installcheck-expandshrink"] }, @@ -530,10 +534,11 @@ jobs: if: needs.check-skip.outputs.should_skip != 'true' env: SRC_DIR: ${{ github.workspace }} + CONFIGURE_EXTRA_OPTS: --with-yagp-hooks-collector run: | set -eo pipefail chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh - if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} CONFIGURE_EXTRA_OPTS=${{ env.CONFIGURE_EXTRA_OPTS }} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then echo "::error::Configure script failed" exit 1 fi @@ -1390,6 +1395,7 @@ jobs: if: success() && needs.check-skip.outputs.should_skip != 'true' env: SRC_DIR: ${{ github.workspace }} + BUILD_DESTINATION: /usr/local/cloudberry-db shell: bash {0} run: | set -o pipefail @@ -1419,6 +1425,30 @@ jobs: PG_OPTS="$PG_OPTS -c optimizer=${{ matrix.pg_settings.optimizer }}" fi + # Create extension if required + if [[ "${{ matrix.extension != '' }}" == "true" ]]; then + case "${{ matrix.extension }}" in + yagp_hooks_collector) + if ! su - gpadmin -c "source ${BUILD_DESTINATION}/greenplum_path.sh && \ + source ${SRC_DIR}/gpAux/gpdemo/gpdemo-env.sh && \ + gpconfig -c shared_preload_libraries -v 'yagp_hooks_collector' && \ + gpstop -ra && \ + echo 'CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; \ + SHOW shared_preload_libraries; \ + TABLE pg_extension;' | \ + psql postgres" + then + echo "Error creating yagp_hooks_collector extension" + exit 1 + fi + ;; + *) + echo "Unknown extension: ${{ matrix.extension }}" + exit 1 + ;; + esac + fi + if [[ "${{ matrix.pg_settings.default_table_access_method != '' }}" == "true" ]]; then PG_OPTS="$PG_OPTS -c default_table_access_method=${{ matrix.pg_settings.default_table_access_method }}" fi diff --git a/configure b/configure index b9371321677..1b06e6965ea 100755 --- a/configure +++ b/configure @@ -722,6 +722,7 @@ with_apr_config with_libcurl with_rt with_zstd +with_yagp_hooks_collector with_libbz2 LZ4_LIBS LZ4_CFLAGS @@ -942,6 +943,7 @@ with_zlib with_lz4 with_libbz2 with_zstd +with_yagp_hooks_collector with_rt with_libcurl with_apr_config @@ -11150,6 +11152,32 @@ $as_echo "yes" >&6; } fi fi +# +# yagp_hooks_collector +# + + + +# Check whether --with-yagp-hooks-collector was given. +if test "${with_yagp_hooks_collector+set}" = set; then : + withval=$with_yagp_hooks_collector; + case $withval in + yes) + : + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-yagp-hooks-collector option" "$LINENO" 5 + ;; + esac + +else + with_yagp_hooks_collector=no + +fi + # # Realtime library # diff --git a/configure.ac b/configure.ac index 246edc4846e..792878fde4b 100644 --- a/configure.ac +++ b/configure.ac @@ -1365,6 +1365,13 @@ PGAC_ARG_BOOL(with, zstd, yes, [do not build with Zstandard], AC_MSG_RESULT([$with_zstd]) AC_SUBST(with_zstd) +# +# yagp_hooks_collector +# +PGAC_ARG_BOOL(with, yagp_hooks_collector, no, + [build with YAGP hooks collector extension]) +AC_SUBST(with_yagp_hooks_collector) + if test "$with_zstd" = yes; then dnl zstd_errors.h was renamed from error_public.h in v1.4.0 PKG_CHECK_MODULES([ZSTD], [libzstd >= 1.4.0]) diff --git a/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh b/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh index bc046695032..0134699b28a 100755 --- a/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh +++ b/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh @@ -53,6 +53,7 @@ # # Optional Environment Variables: # LOG_DIR - Directory for logs (defaults to ${SRC_DIR}/build-logs) +# CONFIGURE_EXTRA_OPTS - Args to pass to configure command # ENABLE_DEBUG - Enable debug build options (true/false, defaults to # false) # @@ -165,7 +166,8 @@ execute_cmd ./configure --prefix=${BUILD_DESTINATION} \ --with-openssl \ --with-uuid=e2fs \ --with-includes=/usr/local/xerces-c/include \ - --with-libraries=${BUILD_DESTINATION}/lib || exit 4 + --with-libraries=${BUILD_DESTINATION}/lib \ + ${CONFIGURE_EXTRA_OPTS:-""} || exit 4 log_section_end "Configure" # Capture version information diff --git a/gpcontrib/Makefile b/gpcontrib/Makefile index 60fef1778c6..4c92cce4c46 100644 --- a/gpcontrib/Makefile +++ b/gpcontrib/Makefile @@ -33,6 +33,9 @@ else pg_hint_plan endif +ifeq "$(with_yagp_hooks_collector)" "yes" + recurse_targets += yagp_hooks_collector +endif ifeq "$(with_zstd)" "yes" recurse_targets += zstd endif diff --git a/gpcontrib/yagp_hooks_collector/Makefile b/gpcontrib/yagp_hooks_collector/Makefile index e7132da9b06..be46eb7149c 100644 --- a/gpcontrib/yagp_hooks_collector/Makefile +++ b/gpcontrib/yagp_hooks_collector/Makefile @@ -1,58 +1,41 @@ -override CFLAGS = -Wall -Wmissing-prototypes -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=gnu99 -Werror=uninitialized -Werror=implicit-function-declaration -DGPBUILD -override CXXFLAGS = -fPIC -g3 -Wall -Wpointer-arith -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation -Wno-stringop-truncation -g -ggdb -std=c++17 -Iinclude -Isrc/protos -Isrc -DGPBUILD -COMMON_CPP_FLAGS := -Isrc -Iinclude -Isrc/stat_statements_parser -PG_CXXFLAGS += $(COMMON_CPP_FLAGS) -SHLIB_LINK += -lprotobuf -lpthread -lstdc++ - -PROTOC = protoc -SRC_DIR = ./src -GEN_DIR = ./src/protos -PROTO_DIR = ./protos -PROTO_GEN_OBJECTS = $(GEN_DIR)/yagpcc_plan.pb.o $(GEN_DIR)/yagpcc_metrics.pb.o \ - $(GEN_DIR)/yagpcc_set_service.pb.o - -$(GEN_DIR)/%.pb.cpp $(GEN_DIR)/%.pb.h: $(PROTO_DIR)/%.proto - sed -i 's/optional //g' $^ - sed -i 's/cloud\/mdb\/yagpcc\/api\/proto\/common\//\protos\//g' $^ - $(PROTOC) --cpp_out=$(SRC_DIR) $^ - mv $(GEN_DIR)/$*.pb.cc $(GEN_DIR)/$*.pb.cpp +MODULE_big = yagp_hooks_collector +EXTENSION = yagp_hooks_collector +DATA = $(wildcard *--*.sql) +REGRESS = yagp_cursors yagp_dist yagp_select yagp_utf8_trim yagp_utility -PG_STAT_DIR := $(SRC_DIR)/stat_statements_parser -PG_STAT_OBJS := $(PG_STAT_DIR)/pg_stat_statements_ya_parser.o +PROTO_BASES = yagpcc_plan yagpcc_metrics yagpcc_set_service +PROTO_OBJS = $(patsubst %,src/protos/%.pb.o,$(PROTO_BASES)) -OBJS := $(PG_STAT_OBJS) \ - $(PROTO_GEN_OBJECTS) \ - $(SRC_DIR)/ProcStats.o \ - $(SRC_DIR)/Config.o \ - $(SRC_DIR)/PgUtils.o \ - $(SRC_DIR)/ProtoUtils.o \ - $(SRC_DIR)/YagpStat.o \ - $(SRC_DIR)/UDSConnector.o \ - $(SRC_DIR)/EventSender.o \ - $(SRC_DIR)/hook_wrappers.o \ - $(SRC_DIR)/memory/gpdbwrappers.o \ - $(SRC_DIR)/yagp_hooks_collector.o \ - $(SRC_DIR)/log/LogOps.o \ - $(SRC_DIR)/log/LogSchema.o -EXTRA_CLEAN := $(GEN_DIR) -DATA := $(wildcard *--*.sql) -EXTENSION := yagp_hooks_collector -EXTVERSION := $(shell grep default_version $(EXTENSION).control | \ - sed -e "s/default_version[[:space:]]*=[[:space:]]*'\([^']*\)'/\1/") -MODULE_big := yagp_hooks_collector -PG_CONFIG := pg_config -PGXS := $(shell $(PG_CONFIG) --pgxs) -include $(PGXS) +C_OBJS = $(patsubst %.c,%.o,$(wildcard src/*.c src/*/*.c)) +CPP_OBJS = $(patsubst %.cpp,%.o,$(wildcard src/*.cpp src/*/*.cpp)) +OBJS = $(C_OBJS) $(CPP_OBJS) $(PROTO_OBJS) -$(GEN_DIR)/yagpcc_set_service.pb.o: $(GEN_DIR)/yagpcc_metrics.pb.h +override CXXFLAGS = -fPIC -g3 -Wall -Wpointer-arith -Wendif-labels \ + -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv \ + -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation \ + -Wno-stringop-truncation -g -ggdb -std=c++17 -Iinclude -Isrc/protos -Isrc -DGPBUILD -PROTO_INCLUDES = $(GEN_DIR)/yagpcc_set_service.pb.h $(GEN_DIR)/yagpcc_metrics.pb.h $(GEN_DIR)/yagpcc_plan.pb.h -$(SRC_DIR)/UDSConnector.o: $(PROTO_INCLUDES) src/log/LogOps.h -$(SRC_DIR)/ProtoUtils.o: $(PROTO_INCLUDES) -$(SRC_DIR)/EventSender.o: $(PROTO_INCLUDES) -$(SRC_DIR)/ProcStats.o: $(GEN_DIR)/yagpcc_metrics.pb.h -$(SRC_DIR)/log/LogOps.o: $(PROTO_INCLUDES) +PG_CXXFLAGS += -Isrc -Iinclude +SHLIB_LINK += -lprotobuf -lpthread -lstdc++ +EXTRA_CLEAN = src/protos -gen: $(PROTO_GEN_OBJECTS) +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = gpcontrib/yagp_hooks_collector +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif + +src/protos/%.pb.cpp src/protos/%.pb.h: protos/%.proto + @mkdir -p src/protos + sed -i 's/optional //g' $^ + sed -i 's|cloud/mdb/yagpcc/api/proto/common/|protos/|g' $^ + protoc -I /usr/include -I /usr/local/include -I . --cpp_out=src $^ + mv src/protos/$*.pb.cc src/protos/$*.pb.cpp -.DEFAULT_GOAL := all +$(CPP_OBJS): src/protos/yagpcc_metrics.pb.h src/protos/yagpcc_plan.pb.h src/protos/yagpcc_set_service.pb.h +src/protos/yagpcc_set_service.pb.o: src/protos/yagpcc_metrics.pb.h diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out b/gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out index d251ddd3e1c..46e124df5e8 100644 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out @@ -12,6 +12,7 @@ BEGIN END; END; $$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; SET yagpcc.enable_utility TO TRUE; SET yagpcc.report_nested_queries TO TRUE; @@ -25,7 +26,7 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+---------------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | BEGIN; | QUERY_STATUS_SUBMIT -1 | BEGIN; | QUERY_STATUS_DONE -1 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_SUBMIT @@ -54,7 +55,7 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | BEGIN; | QUERY_STATUS_SUBMIT -1 | BEGIN; | QUERY_STATUS_DONE -1 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_SUBMIT @@ -86,7 +87,7 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+---------------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | BEGIN; | QUERY_STATUS_SUBMIT -1 | BEGIN; | QUERY_STATUS_DONE -1 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT @@ -129,7 +130,7 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | BEGIN; | QUERY_STATUS_SUBMIT -1 | BEGIN; | QUERY_STATUS_DONE -1 | DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_SUBMIT @@ -159,3 +160,4 @@ DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_dist.out b/gpcontrib/yagp_hooks_collector/expected/yagp_dist.out index 5fd5ea5fb3e..3b1e3504923 100644 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_dist.out +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_dist.out @@ -12,6 +12,7 @@ BEGIN END; END; $$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; SET yagpcc.report_nested_queries TO TRUE; SET yagpcc.enable_utility TO FALSE; @@ -171,3 +172,4 @@ DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_select.out b/gpcontrib/yagp_hooks_collector/expected/yagp_select.out index b6e18dc862f..af08f2d1def 100644 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_select.out +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_select.out @@ -12,6 +12,7 @@ BEGIN END; END; $$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; SET yagpcc.report_nested_queries TO TRUE; SET yagpcc.enable_utility TO FALSE; @@ -132,3 +133,4 @@ DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_utf8_trim.out b/gpcontrib/yagp_hooks_collector/expected/yagp_utf8_trim.out index 194ee6b3609..9de126dd882 100644 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_utf8_trim.out +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_utf8_trim.out @@ -7,6 +7,7 @@ RETURNS TEXT AS $$ ORDER BY datetime DESC LIMIT 1 $$ LANGUAGE sql VOLATILE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; -- Test 1: 1 byte chars SET yagpcc.max_text_size to 19; @@ -63,4 +64,5 @@ DROP FUNCTION get_marked_query(TEXT); RESET yagpcc.max_text_size; RESET yagpcc.logging_mode; RESET yagpcc.enable; +RESET yagpcc.ignored_users_list; DROP EXTENSION yagp_hooks_collector; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out b/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out index 057f7d7a556..0a77859d8d4 100644 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out @@ -12,6 +12,7 @@ BEGIN END; END; $$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; SET yagpcc.enable_utility TO TRUE; SET yagpcc.report_nested_queries TO TRUE; @@ -26,7 +27,7 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+----------------------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_SUBMIT -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_DONE -1 | CREATE INDEX test_idx ON test_table(a); | QUERY_STATUS_SUBMIT @@ -83,7 +84,7 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+------------------------------------------------------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_SUBMIT -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_DONE -1 | CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; | QUERY_STATUS_SUBMIT @@ -113,26 +114,26 @@ BEGIN; ROLLBACK; RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | query_text | query_status --------+----------------------------+--------------------- - -1 | | QUERY_STATUS_DONE - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | SAVEPOINT sp1; | QUERY_STATUS_SUBMIT - -1 | ROLLBACK TO sp1; | QUERY_STATUS_SUBMIT - -1 | ROLLBACK TO sp1; | QUERY_STATUS_DONE - -1 | COMMIT; | QUERY_STATUS_SUBMIT - -1 | COMMIT; | QUERY_STATUS_DONE - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | SAVEPOINT sp2; | QUERY_STATUS_SUBMIT - -1 | ABORT; | QUERY_STATUS_SUBMIT - -1 | ABORT; | QUERY_STATUS_DONE - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | ROLLBACK; | QUERY_STATUS_SUBMIT - -1 | ROLLBACK; | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + segid | query_text | query_status +-------+-----------------------------------+--------------------- + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp2; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | ROLLBACK; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT (18 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -153,7 +154,7 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+----------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_SUBMIT -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_DONE -1 | DROP TABLE dml_test; | QUERY_STATUS_SUBMIT @@ -176,16 +177,16 @@ COPY (SELECT 1) TO STDOUT; DROP TABLE copy_test; RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | query_text | query_status --------+---------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE - -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_SUBMIT - -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_DONE - -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_SUBMIT - -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_DONE - -1 | DROP TABLE copy_test; | QUERY_STATUS_SUBMIT - -1 | DROP TABLE copy_test; | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + segid | query_text | query_status +-------+-----------------------------------+--------------------- + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE + -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_SUBMIT + -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_DONE + -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_SUBMIT + -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_DONE + -1 | DROP TABLE copy_test; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE copy_test; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT (8 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -203,7 +204,7 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_SUBMIT -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_DONE -1 | EXECUTE test_prep(0::int); | QUERY_STATUS_SUBMIT @@ -226,7 +227,7 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+--------------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | SET yagpcc.report_nested_queries TO FALSE; | QUERY_STATUS_SUBMIT -1 | SET yagpcc.report_nested_queries TO FALSE; | QUERY_STATUS_DONE -1 | RESET yagpcc.report_nested_queries; | QUERY_STATUS_SUBMIT @@ -244,3 +245,4 @@ DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_cursors.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_cursors.sql index 5d5bde58110..f56351e0d43 100644 --- a/gpcontrib/yagp_hooks_collector/sql/yagp_cursors.sql +++ b/gpcontrib/yagp_hooks_collector/sql/yagp_cursors.sql @@ -14,6 +14,7 @@ BEGIN END; $$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; SET yagpcc.enable_utility TO TRUE; SET yagpcc.report_nested_queries TO TRUE; @@ -81,3 +82,4 @@ DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_dist.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_dist.sql index b837ef05335..d5519d0cd96 100644 --- a/gpcontrib/yagp_hooks_collector/sql/yagp_dist.sql +++ b/gpcontrib/yagp_hooks_collector/sql/yagp_dist.sql @@ -14,6 +14,7 @@ BEGIN END; $$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; SET yagpcc.report_nested_queries TO TRUE; SET yagpcc.enable_utility TO FALSE; @@ -84,3 +85,4 @@ DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_select.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_select.sql index 4038c6b7b63..90e972ae4c1 100644 --- a/gpcontrib/yagp_hooks_collector/sql/yagp_select.sql +++ b/gpcontrib/yagp_hooks_collector/sql/yagp_select.sql @@ -14,6 +14,7 @@ BEGIN END; $$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; SET yagpcc.report_nested_queries TO TRUE; SET yagpcc.enable_utility TO FALSE; @@ -65,3 +66,4 @@ DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_utf8_trim.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_utf8_trim.sql index c0fdcce24a5..c3053e4af0c 100644 --- a/gpcontrib/yagp_hooks_collector/sql/yagp_utf8_trim.sql +++ b/gpcontrib/yagp_hooks_collector/sql/yagp_utf8_trim.sql @@ -9,6 +9,7 @@ RETURNS TEXT AS $$ LIMIT 1 $$ LANGUAGE sql VOLATILE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; -- Test 1: 1 byte chars @@ -39,5 +40,6 @@ DROP FUNCTION get_marked_query(TEXT); RESET yagpcc.max_text_size; RESET yagpcc.logging_mode; RESET yagpcc.enable; +RESET yagpcc.ignored_users_list; DROP EXTENSION yagp_hooks_collector; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_utility.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_utility.sql index b4cca6f5421..cf9c1d253d0 100644 --- a/gpcontrib/yagp_hooks_collector/sql/yagp_utility.sql +++ b/gpcontrib/yagp_hooks_collector/sql/yagp_utility.sql @@ -14,6 +14,7 @@ BEGIN END; $$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; SET yagpcc.enable_utility TO TRUE; SET yagpcc.report_nested_queries TO TRUE; @@ -131,3 +132,4 @@ DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 062ec75b039..edc49b72e05 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -271,6 +271,7 @@ with_zstd = @with_zstd@ ZSTD_CFLAGS = @ZSTD_CFLAGS@ ZSTD_LIBS = @ZSTD_LIBS@ EVENT_LIBS = @EVENT_LIBS@ +with_yagp_hooks_collector = @with_yagp_hooks_collector@ ########################################################################## # From 9be5f14bfe68fa1c9edf40cb0b1f833f233fd3de Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 19 Jan 2026 11:56:15 +0300 Subject: [PATCH 123/133] [yagp_hooks_collector] Change greenplum_path.sh to cloudberry-env.sh Correct CI for yagp_hooks_collector to use correct env script. --- .github/workflows/build-cloudberry.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-cloudberry.yml b/.github/workflows/build-cloudberry.yml index f2efb231f0e..27590205548 100644 --- a/.github/workflows/build-cloudberry.yml +++ b/.github/workflows/build-cloudberry.yml @@ -1429,7 +1429,7 @@ jobs: if [[ "${{ matrix.extension != '' }}" == "true" ]]; then case "${{ matrix.extension }}" in yagp_hooks_collector) - if ! su - gpadmin -c "source ${BUILD_DESTINATION}/greenplum_path.sh && \ + if ! su - gpadmin -c "source ${BUILD_DESTINATION}/cloudberry-env.sh && \ source ${SRC_DIR}/gpAux/gpdemo/gpdemo-env.sh && \ gpconfig -c shared_preload_libraries -v 'yagp_hooks_collector' && \ gpstop -ra && \ From b0b2e7b0e9ab3af066193ef9f18a41e4b8f0522b Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 19 Jan 2026 17:05:08 +0300 Subject: [PATCH 124/133] [yagp_hooks_collector] Add comments for func args --- gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp b/gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp index cec9e33693a..56bdf1dca62 100644 --- a/gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp +++ b/gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp @@ -38,9 +38,9 @@ void init_log() { log_relname.data() /* relname */, namespaceId /* namespace */, 0 /* tablespace */, InvalidOid /* relid */, InvalidOid /* reltype oid */, InvalidOid /* reloftypeid */, GetUserId() /* owner */, HEAP_TABLE_AM_OID, - DescribeTuple() /* rel tuple */, NIL, RELKIND_RELATION, - RELPERSISTENCE_PERMANENT, false, false, ONCOMMIT_NOOP, - NULL /* GP Policy */, (Datum)0, false /* use_user_acl */, true, true, + DescribeTuple() /* rel tuple */, NIL /* cooked_constraints */, RELKIND_RELATION, + RELPERSISTENCE_PERMANENT, false /* shared_relation */, false /* mapped_relation */, ONCOMMIT_NOOP, + NULL /* GP Policy */, (Datum)0 /* reloptions */, false /* use_user_acl */, true /* allow_system_table_mods */, true /* is_internal */, InvalidOid /* relrewrite */, NULL /* typaddress */, false /* valid_opts */); From 7cff09c9ade4bc71afedd7f69a1b6d5b28e899c7 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 19 Jan 2026 17:07:41 +0300 Subject: [PATCH 125/133] [yagp_hooks_collector] Correct tokens from gram.y Correct defines for token ids copied from gram.y. --- .../pg_stat_statements_ya_parser.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c index c19805ce506..54c8b2cf59f 100644 --- a/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c +++ b/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c @@ -17,20 +17,20 @@ #include "pg_stat_statements_ya_parser.h" -#ifndef ICONST -#define ICONST 276 -#endif #ifndef FCONST -#define FCONST 277 +#define FCONST 260 #endif #ifndef SCONST -#define SCONST 278 +#define SCONST 261 #endif #ifndef BCONST -#define BCONST 279 +#define BCONST 263 #endif #ifndef XCONST -#define XCONST 280 +#define XCONST 264 +#endif +#ifndef ICONST +#define ICONST 266 #endif static void fill_in_constant_lengths(JumbleState *jstate, const char *query); From f3015f790f3108354c796009e8fd6556603a2159 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Tue, 20 Jan 2026 17:45:15 +0300 Subject: [PATCH 126/133] [yagp_hooks_collector] Add consistent filtering Full copy of [1] for yagp_hooks_collector. [1] https://github.com/open-gpdb/gpdb/commit/845278f7bb7f72d1b1bc2cfd5fc0e7aef88610b2#diff-fa2654417413bbb37d47ecf1644dc5af90c76c77f2a90e05c27107967b5f6fd8 --- gpcontrib/yagp_hooks_collector/Makefile | 2 +- .../expected/yagp_cursors.out | 8 +- .../expected/yagp_guc_cache.out | 57 ++++++++++++ .../expected/yagp_utility.out | 72 +++++++-------- .../sql/yagp_guc_cache.sql | 43 +++++++++ gpcontrib/yagp_hooks_collector/src/Config.cpp | 90 +++++++++---------- gpcontrib/yagp_hooks_collector/src/Config.h | 49 +++++++--- .../yagp_hooks_collector/src/EventSender.cpp | 68 ++++++++------ .../yagp_hooks_collector/src/EventSender.h | 10 ++- .../yagp_hooks_collector/src/PgUtils.cpp | 14 --- gpcontrib/yagp_hooks_collector/src/PgUtils.h | 3 - .../yagp_hooks_collector/src/ProtoUtils.cpp | 28 +++--- .../yagp_hooks_collector/src/ProtoUtils.h | 13 ++- .../yagp_hooks_collector/src/UDSConnector.cpp | 5 +- .../yagp_hooks_collector/src/UDSConnector.h | 6 +- .../src/hook_wrappers.cpp | 2 +- 16 files changed, 301 insertions(+), 169 deletions(-) create mode 100644 gpcontrib/yagp_hooks_collector/expected/yagp_guc_cache.out create mode 100644 gpcontrib/yagp_hooks_collector/sql/yagp_guc_cache.sql diff --git a/gpcontrib/yagp_hooks_collector/Makefile b/gpcontrib/yagp_hooks_collector/Makefile index be46eb7149c..79f5401c8d1 100644 --- a/gpcontrib/yagp_hooks_collector/Makefile +++ b/gpcontrib/yagp_hooks_collector/Makefile @@ -1,7 +1,7 @@ MODULE_big = yagp_hooks_collector EXTENSION = yagp_hooks_collector DATA = $(wildcard *--*.sql) -REGRESS = yagp_cursors yagp_dist yagp_select yagp_utf8_trim yagp_utility +REGRESS = yagp_cursors yagp_dist yagp_select yagp_utf8_trim yagp_utility yagp_guc_cache PROTO_BASES = yagpcc_plan yagpcc_metrics yagpcc_set_service PROTO_OBJS = $(patsubst %,src/protos/%.pb.o,$(PROTO_BASES)) diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out b/gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out index 46e124df5e8..df12e3e1b66 100644 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out @@ -26,7 +26,6 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+---------------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | BEGIN; | QUERY_STATUS_SUBMIT -1 | BEGIN; | QUERY_STATUS_DONE -1 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_SUBMIT @@ -36,6 +35,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | COMMIT; | QUERY_STATUS_SUBMIT -1 | COMMIT; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (10 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -55,7 +55,6 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | BEGIN; | QUERY_STATUS_SUBMIT -1 | BEGIN; | QUERY_STATUS_DONE -1 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_SUBMIT @@ -69,6 +68,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | COMMIT; | QUERY_STATUS_SUBMIT -1 | COMMIT; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (14 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -87,7 +87,6 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+---------------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | BEGIN; | QUERY_STATUS_SUBMIT -1 | BEGIN; | QUERY_STATUS_DONE -1 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT @@ -99,6 +98,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | ROLLBACK; | QUERY_STATUS_SUBMIT -1 | ROLLBACK; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (12 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -130,7 +130,6 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | BEGIN; | QUERY_STATUS_SUBMIT -1 | BEGIN; | QUERY_STATUS_DONE -1 | DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_SUBMIT @@ -148,6 +147,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | COMMIT; | QUERY_STATUS_SUBMIT -1 | COMMIT; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (18 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_guc_cache.out b/gpcontrib/yagp_hooks_collector/expected/yagp_guc_cache.out new file mode 100644 index 00000000000..3085cfa42e1 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_guc_cache.out @@ -0,0 +1,57 @@ +-- +-- Test GUC caching for query lifecycle consistency. +-- +-- The extension logs SUBMIT and DONE events for each query. +-- GUC values that control logging (enable_utility, ignored_users_list, ...) +-- must be cached at SUBMIT time to ensure DONE uses the same filtering +-- criteria. Otherwise, a SET command that modifies these GUCs would +-- have its DONE event rejected, creating orphaned SUBMIT entries. +-- This is due to query being actually executed between SUBMIT and DONE. +-- start_ignore +CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; +SELECT yagpcc.truncate_log(); +-- end_ignore +CREATE OR REPLACE FUNCTION print_last_query(query text) +RETURNS TABLE(query_status text) AS $$ + SELECT query_status + FROM yagpcc.log + WHERE segid = -1 AND query_text = query + ORDER BY ccnt DESC +$$ LANGUAGE sql; +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.logging_mode TO 'TBL'; +-- SET below disables utility logging and DONE must still be logged. +SET yagpcc.enable_utility TO FALSE; +SELECT * FROM print_last_query('SET yagpcc.enable_utility TO FALSE;'); + query_status +--------------------- + QUERY_STATUS_SUBMIT + QUERY_STATUS_DONE +(2 rows) + +-- SELECT below adds current user to ignore list and DONE must still be logged. +-- start_ignore +SELECT set_config('yagpcc.ignored_users_list', current_user, false); + set_config +------------ + gpadmin +(1 row) + +-- end_ignore +SELECT * FROM print_last_query('SELECT set_config(''yagpcc.ignored_users_list'', current_user, false);'); + query_status +--------------------- + QUERY_STATUS_SUBMIT + QUERY_STATUS_START + QUERY_STATUS_END + QUERY_STATUS_DONE +(4 rows) + +DROP FUNCTION print_last_query(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; +RESET yagpcc.logging_mode; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out b/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out index 0a77859d8d4..7df1d2816eb 100644 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out @@ -27,7 +27,6 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+----------------------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_SUBMIT -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_DONE -1 | CREATE INDEX test_idx ON test_table(a); | QUERY_STATUS_SUBMIT @@ -37,6 +36,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | DROP TABLE test_table; | QUERY_STATUS_SUBMIT -1 | DROP TABLE test_table; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (10 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -55,7 +55,6 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_SUBMIT | DISTRIBUTED BY (a) +| | PARTITION BY RANGE (a) +| @@ -67,6 +66,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | DROP TABLE pt_test; | QUERY_STATUS_SUBMIT -1 | DROP TABLE pt_test; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (6 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -84,7 +84,6 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+------------------------------------------------------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_SUBMIT -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_DONE -1 | CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; | QUERY_STATUS_SUBMIT @@ -94,6 +93,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | DROP FUNCTION test_func(int); | QUERY_STATUS_SUBMIT -1 | DROP FUNCTION test_func(int); | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (10 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -114,26 +114,26 @@ BEGIN; ROLLBACK; RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | query_text | query_status --------+-----------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | SAVEPOINT sp1; | QUERY_STATUS_SUBMIT - -1 | ROLLBACK TO sp1; | QUERY_STATUS_SUBMIT - -1 | ROLLBACK TO sp1; | QUERY_STATUS_DONE - -1 | COMMIT; | QUERY_STATUS_SUBMIT - -1 | COMMIT; | QUERY_STATUS_DONE - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | SAVEPOINT sp2; | QUERY_STATUS_SUBMIT - -1 | ABORT; | QUERY_STATUS_SUBMIT - -1 | ABORT; | QUERY_STATUS_DONE - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | ROLLBACK; | QUERY_STATUS_SUBMIT - -1 | ROLLBACK; | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + segid | query_text | query_status +-------+----------------------------+--------------------- + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp2; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | ROLLBACK; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (18 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -154,12 +154,12 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+----------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_SUBMIT -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_DONE -1 | DROP TABLE dml_test; | QUERY_STATUS_SUBMIT -1 | DROP TABLE dml_test; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (6 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -177,16 +177,16 @@ COPY (SELECT 1) TO STDOUT; DROP TABLE copy_test; RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | query_text | query_status --------+-----------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE - -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_SUBMIT - -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_DONE - -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_SUBMIT - -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_DONE - -1 | DROP TABLE copy_test; | QUERY_STATUS_SUBMIT - -1 | DROP TABLE copy_test; | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + segid | query_text | query_status +-------+---------------------------------+--------------------- + -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_SUBMIT + -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_DONE + -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_SUBMIT + -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_DONE + -1 | DROP TABLE copy_test; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE copy_test; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (8 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -204,7 +204,6 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_SUBMIT -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_DONE -1 | EXECUTE test_prep(0::int); | QUERY_STATUS_SUBMIT @@ -212,6 +211,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | DEALLOCATE test_prep; | QUERY_STATUS_SUBMIT -1 | DEALLOCATE test_prep; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (8 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -227,12 +227,12 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+--------------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | SET yagpcc.report_nested_queries TO FALSE; | QUERY_STATUS_SUBMIT -1 | SET yagpcc.report_nested_queries TO FALSE; | QUERY_STATUS_DONE -1 | RESET yagpcc.report_nested_queries; | QUERY_STATUS_SUBMIT -1 | RESET yagpcc.report_nested_queries; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (6 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_guc_cache.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_guc_cache.sql new file mode 100644 index 00000000000..9e6de69d61e --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/sql/yagp_guc_cache.sql @@ -0,0 +1,43 @@ +-- +-- Test GUC caching for query lifecycle consistency. +-- +-- The extension logs SUBMIT and DONE events for each query. +-- GUC values that control logging (enable_utility, ignored_users_list, ...) +-- must be cached at SUBMIT time to ensure DONE uses the same filtering +-- criteria. Otherwise, a SET command that modifies these GUCs would +-- have its DONE event rejected, creating orphaned SUBMIT entries. +-- This is due to query being actually executed between SUBMIT and DONE. +-- start_ignore +CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; +SELECT yagpcc.truncate_log(); +-- end_ignore + +CREATE OR REPLACE FUNCTION print_last_query(query text) +RETURNS TABLE(query_status text) AS $$ + SELECT query_status + FROM yagpcc.log + WHERE segid = -1 AND query_text = query + ORDER BY ccnt DESC +$$ LANGUAGE sql; + +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.logging_mode TO 'TBL'; + +-- SET below disables utility logging and DONE must still be logged. +SET yagpcc.enable_utility TO FALSE; +SELECT * FROM print_last_query('SET yagpcc.enable_utility TO FALSE;'); + +-- SELECT below adds current user to ignore list and DONE must still be logged. +-- start_ignore +SELECT set_config('yagpcc.ignored_users_list', current_user, false); +-- end_ignore +SELECT * FROM print_last_query('SELECT set_config(''yagpcc.ignored_users_list'', current_user, false);'); + +DROP FUNCTION print_last_query(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; +RESET yagpcc.logging_mode; diff --git a/gpcontrib/yagp_hooks_collector/src/Config.cpp b/gpcontrib/yagp_hooks_collector/src/Config.cpp index dbd7e25b483..4fb58677018 100644 --- a/gpcontrib/yagp_hooks_collector/src/Config.cpp +++ b/gpcontrib/yagp_hooks_collector/src/Config.cpp @@ -27,45 +27,13 @@ static const struct config_enum_entry logging_mode_options[] = { {"tbl", LOG_MODE_TBL, false}, {NULL, 0, false}}; -static std::unique_ptr> ignored_users_set = - nullptr; static bool ignored_users_guc_dirty = false; -static void update_ignored_users(const char *new_guc_ignored_users) { - auto new_ignored_users_set = - std::make_unique>(); - if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') { - /* Need a modifiable copy of string */ - char *rawstring = ya_gpdb::pstrdup(new_guc_ignored_users); - List *elemlist; - ListCell *l; - - /* Parse string into list of identifiers */ - if (!ya_gpdb::split_identifier_string(rawstring, ',', &elemlist)) { - /* syntax error in list */ - ya_gpdb::pfree(rawstring); - ya_gpdb::list_free(elemlist); - ereport( - LOG, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg( - "invalid list syntax in parameter yagpcc.ignored_users_list"))); - return; - } - foreach (l, elemlist) { - new_ignored_users_set->insert((char *)lfirst(l)); - } - ya_gpdb::pfree(rawstring); - ya_gpdb::list_free(elemlist); - } - ignored_users_set = std::move(new_ignored_users_set); -} - static void assign_ignored_users_hook(const char *, void *) { ignored_users_guc_dirty = true; } -void Config::init() { +void Config::init_gucs() { DefineCustomStringVariable( "yagpcc.uds_path", "Sets filesystem path of the agent socket", 0LL, &guc_uds_path, "/tmp/yagpcc_agent.sock", PGC_SUSET, @@ -128,22 +96,40 @@ void Config::init() { GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, NULL, NULL, NULL); } -std::string Config::uds_path() { return guc_uds_path; } -bool Config::enable_analyze() { return guc_enable_analyze; } -bool Config::enable_cdbstats() { return guc_enable_cdbstats; } -bool Config::enable_collector() { return guc_enable_collector; } -bool Config::enable_utility() { return guc_enable_utility; } -bool Config::report_nested_queries() { return guc_report_nested_queries; } -size_t Config::max_text_size() { return guc_max_text_size; } -size_t Config::max_plan_size() { return guc_max_plan_size * 1024; } -int Config::min_analyze_time() { return guc_min_analyze_time; }; -int Config::logging_mode() { return guc_logging_mode; } - -bool Config::filter_user(std::string username) { - if (!ignored_users_set) { +void Config::update_ignored_users(const char *new_guc_ignored_users) { + auto new_ignored_users_set = std::make_unique(); + if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') { + /* Need a modifiable copy of string */ + char *rawstring = ya_gpdb::pstrdup(new_guc_ignored_users); + List *elemlist; + ListCell *l; + + /* Parse string into list of identifiers */ + if (!ya_gpdb::split_identifier_string(rawstring, ',', &elemlist)) { + /* syntax error in list */ + ya_gpdb::pfree(rawstring); + ya_gpdb::list_free(elemlist); + ereport( + LOG, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg( + "invalid list syntax in parameter yagpcc.ignored_users_list"))); + return; + } + foreach (l, elemlist) { + new_ignored_users_set->insert((char *)lfirst(l)); + } + ya_gpdb::pfree(rawstring); + ya_gpdb::list_free(elemlist); + } + ignored_users_ = std::move(new_ignored_users_set); +} + +bool Config::filter_user(const std::string &username) const { + if (!ignored_users_) { return true; } - return ignored_users_set->find(username) != ignored_users_set->end(); + return ignored_users_->find(username) != ignored_users_->end(); } void Config::sync() { @@ -151,4 +137,14 @@ void Config::sync() { update_ignored_users(guc_ignored_users); ignored_users_guc_dirty = false; } + uds_path_ = guc_uds_path; + enable_analyze_ = guc_enable_analyze; + enable_cdbstats_ = guc_enable_cdbstats; + enable_collector_ = guc_enable_collector; + enable_utility_ = guc_enable_utility; + report_nested_queries_ = guc_report_nested_queries; + max_text_size_ = static_cast(guc_max_text_size); + max_plan_size_ = static_cast(guc_max_plan_size); + min_analyze_time_ = guc_min_analyze_time; + logging_mode_ = guc_logging_mode; } diff --git a/gpcontrib/yagp_hooks_collector/src/Config.h b/gpcontrib/yagp_hooks_collector/src/Config.h index 7501c727a44..b4a393b0383 100644 --- a/gpcontrib/yagp_hooks_collector/src/Config.h +++ b/gpcontrib/yagp_hooks_collector/src/Config.h @@ -1,23 +1,44 @@ #pragma once +#include #include +#include #define LOG_MODE_UDS 0 #define LOG_MODE_TBL 1 +using IgnoredUsers = std::unordered_set; + class Config { public: - static void init(); - static std::string uds_path(); - static bool enable_analyze(); - static bool enable_cdbstats(); - static bool enable_collector(); - static bool enable_utility(); - static bool filter_user(std::string username); - static bool report_nested_queries(); - static size_t max_text_size(); - static size_t max_plan_size(); - static int min_analyze_time(); - static int logging_mode(); - static void sync(); -}; \ No newline at end of file + static void init_gucs(); + + void sync(); + + const std::string &uds_path() const { return uds_path_; } + bool enable_analyze() const { return enable_analyze_; } + bool enable_cdbstats() const { return enable_cdbstats_; } + bool enable_collector() const { return enable_collector_; } + bool enable_utility() const { return enable_utility_; } + bool report_nested_queries() const { return report_nested_queries_; } + size_t max_text_size() const { return max_text_size_; } + size_t max_plan_size() const { return max_plan_size_ * 1024; } + int min_analyze_time() const { return min_analyze_time_; } + int logging_mode() const { return logging_mode_; } + bool filter_user(const std::string &username) const; + +private: + void update_ignored_users(const char *new_guc_ignored_users); + + std::unique_ptr ignored_users_; + std::string uds_path_; + bool enable_analyze_; + bool enable_cdbstats_; + bool enable_collector_; + bool enable_utility_; + bool report_nested_queries_; + size_t max_text_size_; + size_t max_plan_size_; + int min_analyze_time_; + int logging_mode_; +}; diff --git a/gpcontrib/yagp_hooks_collector/src/EventSender.cpp b/gpcontrib/yagp_hooks_collector/src/EventSender.cpp index d638d275548..853a0c43fb9 100644 --- a/gpcontrib/yagp_hooks_collector/src/EventSender.cpp +++ b/gpcontrib/yagp_hooks_collector/src/EventSender.cpp @@ -1,4 +1,3 @@ -#include "Config.h" #include "UDSConnector.h" #include "memory/gpdbwrappers.h" #include "log/LogOps.h" @@ -22,10 +21,8 @@ extern "C" { #include "ProtoUtils.h" #define need_collect_analyze() \ - (Gp_role == GP_ROLE_DISPATCH && Config::min_analyze_time() >= 0 && \ - Config::enable_analyze()) - -static bool enable_utility = Config::enable_utility(); + (Gp_role == GP_ROLE_DISPATCH && config.min_analyze_time() >= 0 && \ + config.enable_analyze()) bool EventSender::verify_query(QueryDesc *query_desc, QueryState state, bool utility) { @@ -38,16 +35,16 @@ bool EventSender::verify_query(QueryDesc *query_desc, QueryState state, switch (state) { case QueryState::SUBMIT: - // Cache enable_utility at SUBMIT to ensure consistent behavior at DONE. - // Without caching, a query that sets enable_utility to false from true - // would be accepted at SUBMIT (guc is true) but rejected at DONE (guc - // is false), causing a leak. - enable_utility = Config::enable_utility(); - if (utility && enable_utility == false) { + // Cache GUCs once at SUBMIT. Synced GUCs are visible to all subsequent + // states. Without caching, a query that unsets/sets filtering GUCs would + // see different filter criteria at DONE, because at SUBMIT the query was + // not executed yet, causing DONE to be skipped/added. + config.sync(); + + if (utility && !config.enable_utility()) { return false; } - // Sync config in case current query changes it. - Config::sync(); + // Register qkey for a nested query we won't report, // so we can detect nesting_level > 0 and skip reporting at end/done. if (!need_report_nested_query() && nesting_level > 0) { @@ -65,7 +62,7 @@ bool EventSender::verify_query(QueryDesc *query_desc, QueryState state, } break; case QueryState::DONE: - if (utility && enable_utility == false) { + if (utility && !config.enable_utility()) { return false; } default: @@ -85,9 +82,9 @@ bool EventSender::verify_query(QueryDesc *query_desc, QueryState state, bool EventSender::log_query_req(const yagpcc::SetQueryReq &req, const std::string &event, bool utility) { bool clear_big_fields = false; - switch (Config::logging_mode()) { + switch (config.logging_mode()) { case LOG_MODE_UDS: - clear_big_fields = UDSConnector::report_query(req, event); + clear_big_fields = UDSConnector::report_query(req, event, config); break; case LOG_MODE_TBL: ya_gpdb::insert_log(req, utility); @@ -135,12 +132,12 @@ void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { return; } - if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze() && + if (Gp_role == GP_ROLE_DISPATCH && config.enable_analyze() && (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0) { query_desc->instrument_options |= INSTRUMENT_BUFFERS; query_desc->instrument_options |= INSTRUMENT_ROWS; query_desc->instrument_options |= INSTRUMENT_TIMER; - if (Config::enable_cdbstats()) { + if (config.enable_cdbstats()) { query_desc->instrument_options |= INSTRUMENT_CDB; if (!query_desc->showstatctx) { instr_time starttime; @@ -161,7 +158,7 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { auto query_msg = query.message.get(); *query_msg->mutable_start_time() = current_ts(); update_query_state(query, QueryState::START, false /* utility */); - set_query_plan(query_msg, query_desc); + set_query_plan(query_msg, query_desc, config); if (need_collect_analyze()) { // Set up to track total elapsed time during query run. // Make sure the space is allocated in the per-query @@ -214,7 +211,7 @@ void EventSender::collect_query_submit(QueryDesc *query_desc, bool utility) { set_query_info(query_msg); set_qi_nesting_level(query_msg, nesting_level); set_qi_slice_id(query_msg); - set_query_text(query_msg, query_desc); + set_query_text(query_msg, query_desc, config); if (log_query_req(*query_msg, "submit", utility)) { clear_big_fields(query_msg); } @@ -271,8 +268,8 @@ void EventSender::report_query_done(QueryDesc *query_desc, QueryItem &query, ereport(DEBUG3, (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); } else { - set_qi_error_message(query_msg, - error_flushed ? edata->message : elog_message()); + set_qi_error_message( + query_msg, error_flushed ? edata->message : elog_message(), config); } } if (prev_state == START) { @@ -331,8 +328,8 @@ void EventSender::ic_metrics_collect() { if (Gp_interconnect_type != INTERCONNECT_TYPE_UDPIFC) { return; } - if (!proto_verified || gp_command_count == 0 || !Config::enable_collector() || - Config::filter_user(get_user_name())) { + if (!proto_verified || gp_command_count == 0 || !config.enable_collector() || + config.filter_user(get_user_name())) { return; } // we also would like to know nesting level here and filter queries BUT we @@ -374,15 +371,18 @@ void EventSender::analyze_stats_collect(QueryDesc *query_desc) { ya_gpdb::instr_end_loop(query_desc->totaltime); double ms = query_desc->totaltime->total * 1000.0; - if (ms >= Config::min_analyze_time()) { + if (ms >= config.min_analyze_time()) { auto &query = get_query(query_desc); auto *query_msg = query.message.get(); - set_analyze_plan_text(query_desc, query_msg); + set_analyze_plan_text(query_desc, query_msg, config); } } EventSender::EventSender() { - if (Config::enable_collector()) { + // Perform initial sync to get default GUC values + config.sync(); + + if (config.enable_collector()) { try { GOOGLE_PROTOBUF_VERIFY_VERSION; proto_verified = true; @@ -486,5 +486,19 @@ bool EventSender::qdesc_submitted(QueryDesc *query_desc) { return queries.find(QueryKey::from_qdesc(query_desc)) != queries.end(); } +bool EventSender::nesting_is_valid(QueryDesc *query_desc, int nesting_level) { + return need_report_nested_query() || + is_top_level_query(query_desc, nesting_level); +} + +bool EventSender::need_report_nested_query() { + return config.report_nested_queries() && Gp_role == GP_ROLE_DISPATCH; +} + +bool EventSender::filter_query(QueryDesc *query_desc) { + return gp_command_count == 0 || query_desc->sourceText == nullptr || + !config.enable_collector() || config.filter_user(get_user_name()); +} + EventSender::QueryItem::QueryItem(QueryState st) : message(std::make_unique()), state(st) {} diff --git a/gpcontrib/yagp_hooks_collector/src/EventSender.h b/gpcontrib/yagp_hooks_collector/src/EventSender.h index 6e195eeacdf..e9acb04422b 100644 --- a/gpcontrib/yagp_hooks_collector/src/EventSender.h +++ b/gpcontrib/yagp_hooks_collector/src/EventSender.h @@ -14,6 +14,7 @@ extern "C" { #undef typeid #include "memory/gpdbwrappers.h" +#include "Config.h" class UDSConnector; struct QueryDesc; @@ -108,8 +109,8 @@ class EventSender { explicit QueryItem(QueryState st); }; - static bool log_query_req(const yagpcc::SetQueryReq &req, - const std::string &event, bool utility); + bool log_query_req(const yagpcc::SetQueryReq &req, const std::string &event, + bool utility); bool verify_query(QueryDesc *query_desc, QueryState state, bool utility); void update_query_state(QueryItem &query, QueryState new_state, bool utility, bool success = true); @@ -123,6 +124,9 @@ class EventSender { QueryMetricsStatus status, ErrorData *edata = NULL); void update_nested_counters(QueryDesc *query_desc); bool qdesc_submitted(QueryDesc *query_desc); + bool nesting_is_valid(QueryDesc *query_desc, int nesting_level); + bool need_report_nested_query(); + bool filter_query(QueryDesc *query_desc); bool proto_verified = false; int nesting_level = 0; @@ -132,4 +136,6 @@ class EventSender { ICStatistics ic_statistics; #endif std::unordered_map queries; + + Config config; }; \ No newline at end of file diff --git a/gpcontrib/yagp_hooks_collector/src/PgUtils.cpp b/gpcontrib/yagp_hooks_collector/src/PgUtils.cpp index 96f46429643..7e53abdabbf 100644 --- a/gpcontrib/yagp_hooks_collector/src/PgUtils.cpp +++ b/gpcontrib/yagp_hooks_collector/src/PgUtils.cpp @@ -65,17 +65,3 @@ bool is_top_level_query(QueryDesc *query_desc, int nesting_level) { } return query_desc->yagp_query_key->nesting_level == 0; } - -bool nesting_is_valid(QueryDesc *query_desc, int nesting_level) { - return need_report_nested_query() || - is_top_level_query(query_desc, nesting_level); -} - -bool need_report_nested_query() { - return Config::report_nested_queries() && Gp_role == GP_ROLE_DISPATCH; -} - -bool filter_query(QueryDesc *query_desc) { - return gp_command_count == 0 || query_desc->sourceText == nullptr || - !Config::enable_collector() || Config::filter_user(get_user_name()); -} diff --git a/gpcontrib/yagp_hooks_collector/src/PgUtils.h b/gpcontrib/yagp_hooks_collector/src/PgUtils.h index 02f084c597a..e9715ce10f4 100644 --- a/gpcontrib/yagp_hooks_collector/src/PgUtils.h +++ b/gpcontrib/yagp_hooks_collector/src/PgUtils.h @@ -9,6 +9,3 @@ std::string get_user_name(); std::string get_db_name(); std::string get_rg_name(); bool is_top_level_query(QueryDesc *query_desc, int nesting_level); -bool nesting_is_valid(QueryDesc *query_desc, int nesting_level); -bool need_report_nested_query(); -bool filter_query(QueryDesc *query_desc); diff --git a/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp b/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp index aa8632477f5..8ebbe19e289 100644 --- a/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp +++ b/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp @@ -82,7 +82,8 @@ std::string trim_str_shrink_utf8(const char *str, size_t len, size_t lim) { return std::string(str, cut_pos); } -void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { +void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc, + const Config &config) { if (Gp_role == GP_ROLE_DISPATCH && query_desc->plannedstmt) { auto qi = req->mutable_query_info(); qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER @@ -93,10 +94,10 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { ExplainState es = ya_gpdb::get_explain_state(query_desc, true); if (es.str) { *qi->mutable_plan_text() = trim_str_shrink_utf8(es.str->data, es.str->len, - Config::max_plan_size()); + config.max_plan_size()); StringInfo norm_plan = ya_gpdb::gen_normplan(es.str->data); *qi->mutable_template_plan_text() = trim_str_shrink_utf8( - norm_plan->data, norm_plan->len, Config::max_plan_size()); + norm_plan->data, norm_plan->len, config.max_plan_size()); qi->set_plan_id( hash_any((unsigned char *)norm_plan->data, norm_plan->len)); qi->set_query_id(query_desc->plannedstmt->queryId); @@ -107,15 +108,16 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { } } -void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { +void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc, + const Config &config) { if (Gp_role == GP_ROLE_DISPATCH && query_desc->sourceText) { auto qi = req->mutable_query_info(); *qi->mutable_query_text() = trim_str_shrink_utf8( query_desc->sourceText, strlen(query_desc->sourceText), - Config::max_text_size()); + config.max_text_size()); char *norm_query = ya_gpdb::gen_normquery(query_desc->sourceText); *qi->mutable_template_query_text() = trim_str_shrink_utf8( - norm_query, strlen(norm_query), Config::max_text_size()); + norm_query, strlen(norm_query), config.max_text_size()); } } @@ -150,10 +152,11 @@ void set_qi_slice_id(yagpcc::SetQueryReq *req) { aqi->set_slice_id(currentSliceId); } -void set_qi_error_message(yagpcc::SetQueryReq *req, const char *err_msg) { +void set_qi_error_message(yagpcc::SetQueryReq *req, const char *err_msg, + const Config &config) { auto aqi = req->mutable_add_info(); *aqi->mutable_error_message() = - trim_str_shrink_utf8(err_msg, strlen(err_msg), Config::max_text_size()); + trim_str_shrink_utf8(err_msg, strlen(err_msg), config.max_text_size()); } void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, @@ -257,7 +260,8 @@ double protots_to_double(const google::protobuf::Timestamp &ts) { return double(ts.seconds()) + double(ts.nanos()) / 1000000000.0; } -void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *req) { +void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *req, + const Config &config) { // Make sure it is a valid txn and it is not an utility // statement for ExplainPrintPlan() later. if (!IsTransactionState() || !query_desc->plannedstmt) { @@ -266,15 +270,15 @@ void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *req) { MemoryContext oldcxt = ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); ExplainState es = ya_gpdb::get_analyze_state( - query_desc, query_desc->instrument_options && Config::enable_analyze()); + query_desc, query_desc->instrument_options && config.enable_analyze()); ya_gpdb::mem_ctx_switch_to(oldcxt); if (es.str) { // Remove last line break. if (es.str->len > 0 && es.str->data[es.str->len - 1] == '\n') { es.str->data[--es.str->len] = '\0'; } - auto trimmed_analyze = trim_str_shrink_utf8(es.str->data, es.str->len, - Config::max_plan_size()); + auto trimmed_analyze = + trim_str_shrink_utf8(es.str->data, es.str->len, config.max_plan_size()); req->mutable_query_info()->set_analyze_text(trimmed_analyze); ya_gpdb::pfree(es.str->data); } diff --git a/gpcontrib/yagp_hooks_collector/src/ProtoUtils.h b/gpcontrib/yagp_hooks_collector/src/ProtoUtils.h index 725a634f765..37b7e4a8a29 100644 --- a/gpcontrib/yagp_hooks_collector/src/ProtoUtils.h +++ b/gpcontrib/yagp_hooks_collector/src/ProtoUtils.h @@ -4,19 +4,24 @@ struct QueryDesc; struct ICStatistics; +class Config; google::protobuf::Timestamp current_ts(); -void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc); -void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc); +void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc, + const Config &config); +void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc, + const Config &config); void clear_big_fields(yagpcc::SetQueryReq *req); void set_query_info(yagpcc::SetQueryReq *req); void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level); void set_qi_slice_id(yagpcc::SetQueryReq *req); -void set_qi_error_message(yagpcc::SetQueryReq *req, const char *err_msg); +void set_qi_error_message(yagpcc::SetQueryReq *req, const char *err_msg, + const Config &config); void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, int nested_calls, double nested_time); void set_ic_stats(yagpcc::MetricInstrumentation *metrics, const ICStatistics *ic_statistics); yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status); double protots_to_double(const google::protobuf::Timestamp &ts); -void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *message); \ No newline at end of file +void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *message, + const Config &config); diff --git a/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp b/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp index a7eaed539f7..74fd57a3ac0 100644 --- a/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp +++ b/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp @@ -25,10 +25,11 @@ static void inline log_tracing_failure(const yagpcc::SetQueryReq &req, } bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, - const std::string &event) { + const std::string &event, + const Config &config) { sockaddr_un address; address.sun_family = AF_UNIX; - std::string uds_path = Config::uds_path(); + const std::string &uds_path = config.uds_path(); if (uds_path.size() >= sizeof(address.sun_path)) { ereport(WARNING, (errmsg("UDS path is too long for socket buffer"))); YagpStat::report_error(); diff --git a/gpcontrib/yagp_hooks_collector/src/UDSConnector.h b/gpcontrib/yagp_hooks_collector/src/UDSConnector.h index f0dfcb77a3f..9483407159d 100644 --- a/gpcontrib/yagp_hooks_collector/src/UDSConnector.h +++ b/gpcontrib/yagp_hooks_collector/src/UDSConnector.h @@ -2,8 +2,10 @@ #include "protos/yagpcc_set_service.pb.h" +class Config; + class UDSConnector { public: bool static report_query(const yagpcc::SetQueryReq &req, - const std::string &event); -}; \ No newline at end of file + const std::string &event, const Config &config); +}; diff --git a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp index 56c1da9f4f6..8cf74641c29 100644 --- a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp +++ b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp @@ -71,7 +71,7 @@ R cpp_call(T *obj, R (T::*func)(Args...), Args... args) { } void hooks_init() { - Config::init(); + Config::init_gucs(); YagpStat::init(); previous_ExecutorStart_hook = ExecutorStart_hook; ExecutorStart_hook = ya_ExecutorStart_hook; From 7ab24edd4151090650e8c31b45a3a6b829bb9e7f Mon Sep 17 00:00:00 2001 From: NJrslv Date: Tue, 20 Jan 2026 16:34:37 +0300 Subject: [PATCH 127/133] [yagp_hooks_collector] Add submit & done hooks Similarly to [1] add missing executor query info hooks. [1] https://github.com/open-gpdb/gpdb/pull/253/changes/87fc05dc4e6455ecde42c9e0d9ba33dc40d5e9fe --- src/backend/commands/createas.c | 8 ++++---- src/backend/commands/matview.c | 5 +++++ src/backend/commands/portalcmds.c | 5 +++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 6822032fe0d..a3d2f155fd8 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -478,10 +478,6 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, dest, params, queryEnv, 0); } - /* GPDB hook for collecting query info */ - if (query_info_collect_hook) - (*query_info_collect_hook)(METRICS_QUERY_SUBMIT, queryDesc); - if (into->skipData) { /* @@ -495,6 +491,10 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, } else { + /* GPDB hook for collecting query info */ + if (query_info_collect_hook) + (*query_info_collect_hook)(METRICS_QUERY_SUBMIT, queryDesc); + check_and_unassign_from_resgroup(queryDesc->plannedstmt); queryDesc->plannedstmt->query_mem = ResourceManagerGetQueryMemoryLimit(queryDesc->plannedstmt); diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index 1555ea9d334..dc8efd4d892 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -63,6 +63,7 @@ #include "tcop/tcopprot.h" #include "utils/builtins.h" #include "utils/lsyscache.h" +#include "utils/metrics_utils.h" #include "utils/rel.h" #include "utils/snapmgr.h" #include "utils/syscache.h" @@ -842,6 +843,10 @@ refresh_matview_datafill(DestReceiver *dest, Query *query, GetActiveSnapshot(), InvalidSnapshot, dest, NULL, NULL, 0); + /* GPDB hook for collecting query info */ + if (query_info_collect_hook) + (*query_info_collect_hook)(METRICS_QUERY_SUBMIT, queryDesc); + RestoreOidAssignments(saved_dispatch_oids); /* call ExecutorStart to prepare the plan for execution */ diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c index 4817c14f07d..553830e8599 100644 --- a/src/backend/commands/portalcmds.c +++ b/src/backend/commands/portalcmds.c @@ -35,6 +35,7 @@ #include "tcop/pquery.h" #include "tcop/tcopprot.h" #include "utils/memutils.h" +#include "utils/metrics_utils.h" #include "utils/snapmgr.h" #include "cdb/cdbendpoint.h" @@ -373,6 +374,10 @@ PortalCleanup(Portal portal) FreeQueryDesc(queryDesc); CurrentResourceOwner = saveResourceOwner; + } else { + /* GPDB hook for collecting query info */ + if (queryDesc->yagp_query_key && query_info_collect_hook) + (*query_info_collect_hook)(METRICS_QUERY_ERROR, queryDesc); } } From 00dff379bf0d9a24f6f25d2836fa9050b2f9c595 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Tue, 20 Jan 2026 17:03:53 +0300 Subject: [PATCH 128/133] [yagp_hooks_collector] Add test for UDS sending Copy of [1] with additinal changed needed for Clouberry are described below: The testing C functions have changed to set-returning ones if comparing with [1] because we need a control over the place where function is executed - either on master or segments, and in Cloudberry these functions must return set of values so they were changed to return SETOF. [1] https://github.com/open-gpdb/gpdb/commit/989ca069f19bf582a51b7bb98dbf200a4e251e62 --- gpcontrib/yagp_hooks_collector/Makefile | 2 +- .../expected/yagp_uds.out | 42 +++++++++ .../yagp_hooks_collector/sql/yagp_uds.sql | 31 +++++++ .../src/hook_wrappers.cpp | 86 +++++++++++++++++++ .../yagp_hooks_collector/src/hook_wrappers.h | 4 + .../src/yagp_hooks_collector.c | 58 +++++++++++++ .../yagp_hooks_collector--1.1.sql | 15 ++++ 7 files changed, 237 insertions(+), 1 deletion(-) create mode 100644 gpcontrib/yagp_hooks_collector/expected/yagp_uds.out create mode 100644 gpcontrib/yagp_hooks_collector/sql/yagp_uds.sql diff --git a/gpcontrib/yagp_hooks_collector/Makefile b/gpcontrib/yagp_hooks_collector/Makefile index 79f5401c8d1..eb6541b7687 100644 --- a/gpcontrib/yagp_hooks_collector/Makefile +++ b/gpcontrib/yagp_hooks_collector/Makefile @@ -1,7 +1,7 @@ MODULE_big = yagp_hooks_collector EXTENSION = yagp_hooks_collector DATA = $(wildcard *--*.sql) -REGRESS = yagp_cursors yagp_dist yagp_select yagp_utf8_trim yagp_utility yagp_guc_cache +REGRESS = yagp_cursors yagp_dist yagp_select yagp_utf8_trim yagp_utility yagp_guc_cache yagp_uds PROTO_BASES = yagpcc_plan yagpcc_metrics yagpcc_set_service PROTO_OBJS = $(patsubst %,src/protos/%.pb.o,$(PROTO_BASES)) diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_uds.out b/gpcontrib/yagp_hooks_collector/expected/yagp_uds.out new file mode 100644 index 00000000000..d04929ffb4a --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_uds.out @@ -0,0 +1,42 @@ +-- Test UDS socket +-- start_ignore +CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; +-- end_ignore +\set UDS_PATH '/tmp/yagpcc_test.sock' +-- Configure extension to send via UDS +SET yagpcc.uds_path TO :'UDS_PATH'; +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.logging_mode TO 'UDS'; +-- Start receiver +SELECT yagpcc.__test_uds_start_server(:'UDS_PATH'); + __test_uds_start_server +------------------------- +(0 rows) + +-- Send +SELECT 1; + ?column? +---------- + 1 +(1 row) + +-- Receive +SELECT yagpcc.__test_uds_receive() > 0 as received; + received +---------- + t +(1 row) + +-- Stop receiver +SELECT yagpcc.__test_uds_stop_server(); + __test_uds_stop_server +------------------------ +(0 rows) + +-- Cleanup +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.uds_path; +RESET yagpcc.ignored_users_list; +RESET yagpcc.enable; +RESET yagpcc.logging_mode; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_uds.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_uds.sql new file mode 100644 index 00000000000..3eef697a4e7 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/sql/yagp_uds.sql @@ -0,0 +1,31 @@ +-- Test UDS socket +-- start_ignore +CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; +-- end_ignore + +\set UDS_PATH '/tmp/yagpcc_test.sock' + +-- Configure extension to send via UDS +SET yagpcc.uds_path TO :'UDS_PATH'; +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.logging_mode TO 'UDS'; + +-- Start receiver +SELECT yagpcc.__test_uds_start_server(:'UDS_PATH'); + +-- Send +SELECT 1; + +-- Receive +SELECT yagpcc.__test_uds_receive() > 0 as received; + +-- Stop receiver +SELECT yagpcc.__test_uds_stop_server(); + +-- Cleanup +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.uds_path; +RESET yagpcc.ignored_users_list; +RESET yagpcc.enable; +RESET yagpcc.logging_mode; diff --git a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp index 8cf74641c29..4a032178237 100644 --- a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp +++ b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp @@ -11,6 +11,12 @@ extern "C" { #include "cdb/ml_ipc.h" #include "tcop/utility.h" #include "stat_statements_parser/pg_stat_statements_ya_parser.h" + +#include +#include +#include +#include +#include } #undef typeid @@ -52,6 +58,13 @@ static void ya_process_utility_hook(PlannedStmt *pstmt, const char *queryString, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc); +#define TEST_MAX_CONNECTIONS 4 +#define TEST_RCV_BUF_SIZE 8192 +#define TEST_POLL_TIMEOUT_MS 200 + +static int test_server_fd = -1; +static char *test_sock_path = NULL; + static EventSender *sender = nullptr; static inline EventSender *get_sender() { @@ -294,4 +307,77 @@ Datum yagp_functions_get(FunctionCallInfo fcinfo) { HeapTuple tuple = ya_gpdb::heap_form_tuple(tupdesc, values, nulls); Datum result = HeapTupleGetDatum(tuple); PG_RETURN_DATUM(result); +} + +void test_uds_stop_server() { + if (test_server_fd >= 0) { + close(test_server_fd); + test_server_fd = -1; + } + if (test_sock_path) { + unlink(test_sock_path); + pfree(test_sock_path); + test_sock_path = NULL; + } +} + +void test_uds_start_server(const char *path) { + struct sockaddr_un addr = {.sun_family = AF_UNIX}; + + if (strlen(path) >= sizeof(addr.sun_path)) + ereport(ERROR, (errmsg("path too long"))); + + test_uds_stop_server(); + + strlcpy(addr.sun_path, path, sizeof(addr.sun_path)); + test_sock_path = MemoryContextStrdup(TopMemoryContext, path); + unlink(path); + + if ((test_server_fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0 || + bind(test_server_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0 || + listen(test_server_fd, TEST_MAX_CONNECTIONS) < 0) { + test_uds_stop_server(); + ereport(ERROR, (errmsg("socket setup failed: %m"))); + } +} + +int64 test_uds_receive(int timeout_ms) { + char buf[TEST_RCV_BUF_SIZE]; + int rc; + struct pollfd pfd = {.fd = test_server_fd, .events = POLLIN}; + int64 total = 0; + + if (test_server_fd < 0) + ereport(ERROR, (errmsg("server not started"))); + + for (;;) { + CHECK_FOR_INTERRUPTS(); + rc = poll(&pfd, 1, Min(timeout_ms, TEST_POLL_TIMEOUT_MS)); + if (rc > 0) + break; + if (rc < 0 && errno != EINTR) + ereport(ERROR, (errmsg("poll: %m"))); + timeout_ms -= TEST_POLL_TIMEOUT_MS; + if (timeout_ms <= 0) + return total; + } + + if (pfd.revents & POLLIN) { + int client = accept(test_server_fd, NULL, NULL); + ssize_t n; + + if (client < 0) + ereport(ERROR, (errmsg("accept: %m"))); + + while ((n = recv(client, buf, sizeof(buf), 0)) != 0) { + if (n > 0) + total += n; + else if (errno != EINTR) + break; + } + + close(client); + } + + return total; } \ No newline at end of file diff --git a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.h b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.h index cfabf39485e..236c6eb9d79 100644 --- a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.h +++ b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.h @@ -12,6 +12,10 @@ extern Datum yagp_functions_get(FunctionCallInfo fcinfo); extern void init_log(); extern void truncate_log(); +extern void test_uds_start_server(const char *path); +extern int64_t test_uds_receive(int timeout_ms); +extern void test_uds_stop_server(); + #ifdef __cplusplus } #endif \ No newline at end of file diff --git a/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c b/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c index 27fd0e04b26..fc035679bfe 100644 --- a/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c +++ b/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c @@ -14,6 +14,10 @@ PG_FUNCTION_INFO_V1(yagp_stat_messages); PG_FUNCTION_INFO_V1(yagp_init_log); PG_FUNCTION_INFO_V1(yagp_truncate_log); +PG_FUNCTION_INFO_V1(yagp_test_uds_start_server); +PG_FUNCTION_INFO_V1(yagp_test_uds_receive); +PG_FUNCTION_INFO_V1(yagp_test_uds_stop_server); + void _PG_init(void) { if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { hooks_init(); @@ -65,3 +69,57 @@ Datum yagp_truncate_log(PG_FUNCTION_ARGS) { funcctx = SRF_PERCALL_SETUP(); SRF_RETURN_DONE(funcctx); } + +Datum yagp_test_uds_start_server(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) { + funcctx = SRF_FIRSTCALL_INIT(); + char *path = text_to_cstring(PG_GETARG_TEXT_PP(0)); + test_uds_start_server(path); + pfree(path); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); +} + +Datum yagp_test_uds_receive(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + int64 *result; + + if (SRF_IS_FIRSTCALL()) { + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + result = (int64 *) palloc(sizeof(int64)); + funcctx->user_fctx = result; + funcctx->max_calls = 1; + MemoryContextSwitchTo(oldcontext); + + int timeout_ms = PG_GETARG_INT32(0); + *result = test_uds_receive(timeout_ms); + } + + funcctx = SRF_PERCALL_SETUP(); + + if (funcctx->call_cntr < funcctx->max_calls) { + result = (int64 *) funcctx->user_fctx; + SRF_RETURN_NEXT(funcctx, Int64GetDatum(*result)); + } + + SRF_RETURN_DONE(funcctx); +} + +Datum yagp_test_uds_stop_server(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) { + funcctx = SRF_FIRSTCALL_INIT(); + test_uds_stop_server(); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); +} diff --git a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.1.sql b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.1.sql index e0e94b51493..83bfb553638 100644 --- a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.1.sql +++ b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.1.sql @@ -93,3 +93,18 @@ BEGIN PERFORM yagpcc.__truncate_log_on_segments(); END; $$ LANGUAGE plpgsql VOLATILE; + +CREATE FUNCTION yagpcc.__test_uds_start_server(path text) +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_test_uds_start_server' +LANGUAGE C STRICT EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__test_uds_receive(timeout_ms int DEFAULT 2000) +RETURNS SETOF bigint +AS 'MODULE_PATHNAME', 'yagp_test_uds_receive' +LANGUAGE C STRICT EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__test_uds_stop_server() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_test_uds_stop_server' +LANGUAGE C EXECUTE ON MASTER; From 38dd15536a18876c4486773bbdce3d08d6da0a58 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Tue, 20 Jan 2026 18:49:58 +0300 Subject: [PATCH 129/133] [yagp_hooks_collector] Correct add of bytes sent Copy of [1] - send() may return -1 in case of an error, do not add -1 to total_bytes sent. [1] https://github.com/open-gpdb/gpdb/commit/e1f6c08698e76e77e742d961aa86af7cdf77008d --- gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp b/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp index 74fd57a3ac0..ad10a663e12 100644 --- a/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp +++ b/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp @@ -51,7 +51,8 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, do { sent = send(sockfd, buf + sent_total, total_size - sent_total, MSG_DONTWAIT); - sent_total += sent; + if (sent > 0) + sent_total += sent; } while ( sent > 0 && size_t(sent_total) != total_size && // the line below is a small throttling hack: From 90803d1bb2a1dab4963dc038611ae44960609998 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Tue, 20 Jan 2026 19:12:07 +0300 Subject: [PATCH 130/133] [yagp_hooks_collector] Refactor Similarly to [1]. [1] https://github.com/open-gpdb/gpdb/commit/bdfabcaa41605c720dfd2bf0ce5127765fc92eb8#diff-99e92ab48c7310c149ea2bd3414a20a1edf31f26823253906aae1981e088775aR39-R79 --- gpcontrib/yagp_hooks_collector/src/Config.cpp | 10 +- gpcontrib/yagp_hooks_collector/src/Config.h | 8 +- .../yagp_hooks_collector/src/UDSConnector.cpp | 118 ++++++++++-------- .../src/hook_wrappers.cpp | 10 +- .../src/yagp_hooks_collector.c | 14 +-- 5 files changed, 85 insertions(+), 75 deletions(-) diff --git a/gpcontrib/yagp_hooks_collector/src/Config.cpp b/gpcontrib/yagp_hooks_collector/src/Config.cpp index 4fb58677018..2c2032ebb03 100644 --- a/gpcontrib/yagp_hooks_collector/src/Config.cpp +++ b/gpcontrib/yagp_hooks_collector/src/Config.cpp @@ -16,9 +16,9 @@ static bool guc_enable_cdbstats = true; static bool guc_enable_collector = true; static bool guc_report_nested_queries = true; static char *guc_ignored_users = nullptr; -static int guc_max_text_size = 1 << 20; // in bytes (1MB) -static int guc_max_plan_size = 1024; // in KB -static int guc_min_analyze_time = 10000; // in ms +static int guc_max_text_size = 1 << 20; // in bytes (1MB) +static int guc_max_plan_size = 1024; // in KB +static int guc_min_analyze_time = 10000; // in ms static int guc_logging_mode = LOG_MODE_UDS; static bool guc_enable_utility = false; @@ -143,8 +143,8 @@ void Config::sync() { enable_collector_ = guc_enable_collector; enable_utility_ = guc_enable_utility; report_nested_queries_ = guc_report_nested_queries; - max_text_size_ = static_cast(guc_max_text_size); - max_plan_size_ = static_cast(guc_max_plan_size); + max_text_size_ = guc_max_text_size; + max_plan_size_ = guc_max_plan_size; min_analyze_time_ = guc_min_analyze_time; logging_mode_ = guc_logging_mode; } diff --git a/gpcontrib/yagp_hooks_collector/src/Config.h b/gpcontrib/yagp_hooks_collector/src/Config.h index b4a393b0383..aa6b5bdc0ba 100644 --- a/gpcontrib/yagp_hooks_collector/src/Config.h +++ b/gpcontrib/yagp_hooks_collector/src/Config.h @@ -21,8 +21,8 @@ class Config { bool enable_collector() const { return enable_collector_; } bool enable_utility() const { return enable_utility_; } bool report_nested_queries() const { return report_nested_queries_; } - size_t max_text_size() const { return max_text_size_; } - size_t max_plan_size() const { return max_plan_size_ * 1024; } + int max_text_size() const { return max_text_size_; } + int max_plan_size() const { return max_plan_size_ * 1024; } int min_analyze_time() const { return min_analyze_time_; } int logging_mode() const { return logging_mode_; } bool filter_user(const std::string &username) const; @@ -37,8 +37,8 @@ class Config { bool enable_collector_; bool enable_utility_; bool report_nested_queries_; - size_t max_text_size_; - size_t max_plan_size_; + int max_text_size_; + int max_plan_size_; int min_analyze_time_; int logging_mode_; }; diff --git a/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp b/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp index ad10a663e12..ea118fca783 100644 --- a/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp +++ b/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp @@ -27,67 +27,77 @@ static void inline log_tracing_failure(const yagpcc::SetQueryReq &req, bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, const std::string &event, const Config &config) { - sockaddr_un address; + sockaddr_un address{}; address.sun_family = AF_UNIX; - const std::string &uds_path = config.uds_path(); + const auto &uds_path = config.uds_path(); + if (uds_path.size() >= sizeof(address.sun_path)) { ereport(WARNING, (errmsg("UDS path is too long for socket buffer"))); YagpStat::report_error(); return false; } strcpy(address.sun_path, uds_path.c_str()); - bool success = true; - auto sockfd = socket(AF_UNIX, SOCK_STREAM, 0); - if (sockfd != -1) { - if (fcntl(sockfd, F_SETFL, O_NONBLOCK) != -1) { - if (connect(sockfd, (sockaddr *)&address, sizeof(address)) != -1) { - auto data_size = req.ByteSize(); - auto total_size = data_size + sizeof(uint32_t); - uint8_t *buf = (uint8_t *)ya_gpdb::palloc(total_size); - uint32_t *size_payload = (uint32_t *)buf; - *size_payload = data_size; - req.SerializeWithCachedSizesToArray(buf + sizeof(uint32_t)); - int64_t sent = 0, sent_total = 0; - do { - sent = send(sockfd, buf + sent_total, total_size - sent_total, - MSG_DONTWAIT); - if (sent > 0) - sent_total += sent; - } while ( - sent > 0 && size_t(sent_total) != total_size && - // the line below is a small throttling hack: - // if a message does not fit a single packet, we take a nap - // before sending the next one. - // Otherwise, MSG_DONTWAIT send might overflow the UDS - (std::this_thread::sleep_for(std::chrono::milliseconds(1)), true)); - if (sent < 0) { - log_tracing_failure(req, event); - success = false; - YagpStat::report_bad_send(total_size); - } else { - YagpStat::report_send(total_size); - } - ya_gpdb::pfree(buf); - } else { - // log the error and go on - log_tracing_failure(req, event); - success = false; - YagpStat::report_bad_connection(); - } - } else { - // That's a very important error that should never happen, so make it - // visible to an end-user and admins. - ereport(WARNING, - (errmsg("Unable to create non-blocking socket connection %m"))); - success = false; - YagpStat::report_error(); - } - close(sockfd); - } else { - // log the error and go on + + const auto sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (sockfd == -1) { log_tracing_failure(req, event); - success = false; YagpStat::report_error(); + return false; } - return success; -} \ No newline at end of file + + // Close socket automatically on error path. + struct SockGuard { + int fd; + ~SockGuard() { close(fd); } + } sock_guard{sockfd}; + + if (fcntl(sockfd, F_SETFL, O_NONBLOCK) == -1) { + // That's a very important error that should never happen, so make it + // visible to an end-user and admins. + ereport(WARNING, + (errmsg("Unable to create non-blocking socket connection %m"))); + YagpStat::report_error(); + return false; + } + + if (connect(sockfd, reinterpret_cast(&address), + sizeof(address)) == -1) { + log_tracing_failure(req, event); + YagpStat::report_bad_connection(); + return false; + } + + const auto data_size = req.ByteSize(); + const auto total_size = data_size + sizeof(uint32_t); + auto *buf = static_cast(ya_gpdb::palloc(total_size)); + // Free buf automatically on error path. + struct BufGuard { + void *p; + ~BufGuard() { ya_gpdb::pfree(p); } + } buf_guard{buf}; + + *reinterpret_cast(buf) = data_size; + req.SerializeWithCachedSizesToArray(buf + sizeof(uint32_t)); + + int64_t sent = 0, sent_total = 0; + do { + sent = + send(sockfd, buf + sent_total, total_size - sent_total, MSG_DONTWAIT); + if (sent > 0) + sent_total += sent; + } while (sent > 0 && size_t(sent_total) != total_size && + // the line below is a small throttling hack: + // if a message does not fit a single packet, we take a nap + // before sending the next one. + // Otherwise, MSG_DONTWAIT send might overflow the UDS + (std::this_thread::sleep_for(std::chrono::milliseconds(1)), true)); + + if (sent < 0) { + log_tracing_failure(req, event); + YagpStat::report_bad_send(total_size); + return false; + } + + YagpStat::report_send(total_size); + return true; +} diff --git a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp index 4a032178237..602a2470805 100644 --- a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp +++ b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp @@ -239,8 +239,9 @@ static void ya_process_utility_hook(PlannedStmt *pstmt, const char *queryString, } get_sender()->decr_depth(); - cpp_call(get_sender(), &EventSender::query_metrics_collect, METRICS_QUERY_DONE, - (void *)query_desc, true /* utility */, (ErrorData *)NULL); + cpp_call(get_sender(), &EventSender::query_metrics_collect, + METRICS_QUERY_DONE, (void *)query_desc, true /* utility */, + (ErrorData *)NULL); pfree(query_desc); } @@ -255,8 +256,9 @@ static void ya_process_utility_hook(PlannedStmt *pstmt, const char *queryString, MemoryContextSwitchTo(oldctx); get_sender()->decr_depth(); - cpp_call(get_sender(), &EventSender::query_metrics_collect, METRICS_QUERY_ERROR, - (void *)query_desc, true /* utility */, edata); + cpp_call(get_sender(), &EventSender::query_metrics_collect, + METRICS_QUERY_ERROR, (void *)query_desc, true /* utility */, + edata); pfree(query_desc); ReThrowError(edata); diff --git a/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c b/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c index fc035679bfe..f7863a38921 100644 --- a/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c +++ b/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c @@ -19,15 +19,13 @@ PG_FUNCTION_INFO_V1(yagp_test_uds_receive); PG_FUNCTION_INFO_V1(yagp_test_uds_stop_server); void _PG_init(void) { - if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { + if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) hooks_init(); - } } void _PG_fini(void) { - if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { + if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) hooks_deinit(); - } } Datum yagp_stat_messages_reset(PG_FUNCTION_ARGS) { @@ -72,7 +70,7 @@ Datum yagp_truncate_log(PG_FUNCTION_ARGS) { Datum yagp_test_uds_start_server(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; - + if (SRF_IS_FIRSTCALL()) { funcctx = SRF_FIRSTCALL_INIT(); char *path = text_to_cstring(PG_GETARG_TEXT_PP(0)); @@ -93,7 +91,7 @@ Datum yagp_test_uds_receive(PG_FUNCTION_ARGS) { funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); - result = (int64 *) palloc(sizeof(int64)); + result = (int64 *)palloc(sizeof(int64)); funcctx->user_fctx = result; funcctx->max_calls = 1; MemoryContextSwitchTo(oldcontext); @@ -105,7 +103,7 @@ Datum yagp_test_uds_receive(PG_FUNCTION_ARGS) { funcctx = SRF_PERCALL_SETUP(); if (funcctx->call_cntr < funcctx->max_calls) { - result = (int64 *) funcctx->user_fctx; + result = (int64 *)funcctx->user_fctx; SRF_RETURN_NEXT(funcctx, Int64GetDatum(*result)); } @@ -114,7 +112,7 @@ Datum yagp_test_uds_receive(PG_FUNCTION_ARGS) { Datum yagp_test_uds_stop_server(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; - + if (SRF_IS_FIRSTCALL()) { funcctx = SRF_FIRSTCALL_INIT(); test_uds_stop_server(); From 5f0eb8210ce35abd3ad1120bcb310d7dc9a10ac5 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Wed, 21 Jan 2026 12:53:47 +0000 Subject: [PATCH 131/133] [yagp_hooks_collector] Make gen of norm plan/query noexcept The extension generates normalized query text and plan using jumbling functions. Those functions may fail when translating to wide character if the current locale cannot handle the character set. Fix changes functions that generate normalized query text/plan to noexcept versions so we can check if error occured and continute execution. The test checks that even when those functions fail, the plan is still executed. This test is partially taken from src/test/regress/gp_locale.sql. --- gpcontrib/yagp_hooks_collector/Makefile | 2 +- .../expected/yagp_locale.out | 23 +++++++++++++++ .../yagp_hooks_collector/sql/yagp_locale.sql | 29 +++++++++++++++++++ .../yagp_hooks_collector/src/ProtoUtils.cpp | 19 +++++++----- .../src/memory/gpdbwrappers.cpp | 11 +++---- .../src/memory/gpdbwrappers.h | 4 +-- 6 files changed, 71 insertions(+), 17 deletions(-) create mode 100644 gpcontrib/yagp_hooks_collector/expected/yagp_locale.out create mode 100644 gpcontrib/yagp_hooks_collector/sql/yagp_locale.sql diff --git a/gpcontrib/yagp_hooks_collector/Makefile b/gpcontrib/yagp_hooks_collector/Makefile index eb6541b7687..d145ae46dbe 100644 --- a/gpcontrib/yagp_hooks_collector/Makefile +++ b/gpcontrib/yagp_hooks_collector/Makefile @@ -1,7 +1,7 @@ MODULE_big = yagp_hooks_collector EXTENSION = yagp_hooks_collector DATA = $(wildcard *--*.sql) -REGRESS = yagp_cursors yagp_dist yagp_select yagp_utf8_trim yagp_utility yagp_guc_cache yagp_uds +REGRESS = yagp_cursors yagp_dist yagp_select yagp_utf8_trim yagp_utility yagp_guc_cache yagp_uds yagp_locale PROTO_BASES = yagpcc_plan yagpcc_metrics yagpcc_set_service PROTO_OBJS = $(patsubst %,src/protos/%.pb.o,$(PROTO_BASES)) diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_locale.out b/gpcontrib/yagp_hooks_collector/expected/yagp_locale.out new file mode 100644 index 00000000000..6689b6a4ed3 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/expected/yagp_locale.out @@ -0,0 +1,23 @@ +-- The extension generates normalized query text and plan using jumbling functions. +-- Those functions may fail when translating to wide character if the current locale +-- cannot handle the character set. This test checks that even when those functions +-- fail, the plan is still generated and executed. This test is partially taken from +-- gp_locale. +-- start_ignore +DROP DATABASE IF EXISTS yagp_test_locale; +-- end_ignore +CREATE DATABASE yagp_test_locale WITH LC_COLLATE='C' LC_CTYPE='C' TEMPLATE=template0; +\c yagp_test_locale +CREATE EXTENSION yagp_hooks_collector; +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.enable TO TRUE; +CREATE TABLE yagp_hi_안녕세계 (a int, 안녕세계1 text, 안녕세계2 text, 안녕세계3 text) DISTRIBUTED BY (a); +INSERT INTO yagp_hi_안녕세계 VALUES(1, '안녕세계1 first', '안녕세2 first', '안녕세계3 first'); +-- Should not see error here +UPDATE yagp_hi_안녕세계 SET 안녕세계1='안녕세계1 first UPDATE' WHERE 안녕세계1='안녕세계1 first'; +RESET yagpcc.enable; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; +DROP TABLE yagp_hi_안녕세계; +DROP EXTENSION yagp_hooks_collector; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_locale.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_locale.sql new file mode 100644 index 00000000000..65d867d1680 --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/sql/yagp_locale.sql @@ -0,0 +1,29 @@ +-- The extension generates normalized query text and plan using jumbling functions. +-- Those functions may fail when translating to wide character if the current locale +-- cannot handle the character set. This test checks that even when those functions +-- fail, the plan is still generated and executed. This test is partially taken from +-- gp_locale. + +-- start_ignore +DROP DATABASE IF EXISTS yagp_test_locale; +-- end_ignore + +CREATE DATABASE yagp_test_locale WITH LC_COLLATE='C' LC_CTYPE='C' TEMPLATE=template0; +\c yagp_test_locale + +CREATE EXTENSION yagp_hooks_collector; + +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.enable TO TRUE; + +CREATE TABLE yagp_hi_안녕세계 (a int, 안녕세계1 text, 안녕세계2 text, 안녕세계3 text) DISTRIBUTED BY (a); +INSERT INTO yagp_hi_안녕세계 VALUES(1, '안녕세계1 first', '안녕세2 first', '안녕세계3 first'); +-- Should not see error here +UPDATE yagp_hi_안녕세계 SET 안녕세계1='안녕세계1 first UPDATE' WHERE 안녕세계1='안녕세계1 first'; + +RESET yagpcc.enable; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; +DROP TABLE yagp_hi_안녕세계; +DROP EXTENSION yagp_hooks_collector; diff --git a/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp b/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp index 8ebbe19e289..f9119ca4b14 100644 --- a/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp +++ b/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp @@ -96,13 +96,15 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc, *qi->mutable_plan_text() = trim_str_shrink_utf8(es.str->data, es.str->len, config.max_plan_size()); StringInfo norm_plan = ya_gpdb::gen_normplan(es.str->data); - *qi->mutable_template_plan_text() = trim_str_shrink_utf8( - norm_plan->data, norm_plan->len, config.max_plan_size()); - qi->set_plan_id( - hash_any((unsigned char *)norm_plan->data, norm_plan->len)); + if (norm_plan) { + *qi->mutable_template_plan_text() = trim_str_shrink_utf8( + norm_plan->data, norm_plan->len, config.max_plan_size()); + qi->set_plan_id( + hash_any((unsigned char *)norm_plan->data, norm_plan->len)); + ya_gpdb::pfree(norm_plan->data); + } qi->set_query_id(query_desc->plannedstmt->queryId); ya_gpdb::pfree(es.str->data); - ya_gpdb::pfree(norm_plan->data); } ya_gpdb::mem_ctx_switch_to(oldcxt); } @@ -116,8 +118,11 @@ void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc, query_desc->sourceText, strlen(query_desc->sourceText), config.max_text_size()); char *norm_query = ya_gpdb::gen_normquery(query_desc->sourceText); - *qi->mutable_template_query_text() = trim_str_shrink_utf8( - norm_query, strlen(norm_query), config.max_text_size()); + if (norm_query) { + *qi->mutable_template_query_text() = trim_str_shrink_utf8( + norm_query, strlen(norm_query), config.max_text_size()); + ya_gpdb::pfree(norm_query); + } } } diff --git a/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp b/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp index 763e32e539c..8cc483a39de 100644 --- a/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp +++ b/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp @@ -204,15 +204,12 @@ void ya_gpdb::instr_end_loop(Instrumentation *instr) { wrap_throw(::InstrEndLoop, instr); } -char *ya_gpdb::gen_normquery(const char *query) { - return wrap_throw(::gen_normquery, query); +char *ya_gpdb::gen_normquery(const char *query) noexcept { + return wrap_noexcept(::gen_normquery, query); } -StringInfo ya_gpdb::gen_normplan(const char *exec_plan) { - if (!exec_plan) - throw std::runtime_error("Invalid execution plan string"); - - return wrap_throw(::gen_normplan, exec_plan); +StringInfo ya_gpdb::gen_normplan(const char *exec_plan) noexcept { + return wrap_noexcept(::gen_normplan, exec_plan); } char *ya_gpdb::get_rg_name_for_id(Oid group_id) { diff --git a/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h b/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h index 920fc1ae6e7..e080ef5cdd4 100644 --- a/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h +++ b/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h @@ -38,8 +38,8 @@ HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, CdbExplain_ShowStatCtx *cdbexplain_showExecStatsBegin(QueryDesc *query_desc, instr_time starttime); void instr_end_loop(Instrumentation *instr); -char *gen_normquery(const char *query); -StringInfo gen_normplan(const char *executionPlan); +char *gen_normquery(const char *query) noexcept; +StringInfo gen_normplan(const char *executionPlan) noexcept; char *get_rg_name_for_id(Oid group_id); void insert_log(const yagpcc::SetQueryReq &req, bool utility); From 17923145aa7a86a5d785fe929ba627d8644f9215 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Wed, 21 Jan 2026 13:52:56 +0000 Subject: [PATCH 132/133] [yagp_hooks_collector] Fix warnings and error them Cloudberry builds treat compiler warnings as errors. For consistency, this behavior has been enabled in yagp_hooks_collector. This commit also fixes the warnings in yagp_hooks_collector. --- gpcontrib/yagp_hooks_collector/Makefile | 2 +- gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp | 2 +- gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp | 6 ++++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/gpcontrib/yagp_hooks_collector/Makefile b/gpcontrib/yagp_hooks_collector/Makefile index d145ae46dbe..49825c55f35 100644 --- a/gpcontrib/yagp_hooks_collector/Makefile +++ b/gpcontrib/yagp_hooks_collector/Makefile @@ -10,7 +10,7 @@ C_OBJS = $(patsubst %.c,%.o,$(wildcard src/*.c src/*/*.c)) CPP_OBJS = $(patsubst %.cpp,%.o,$(wildcard src/*.cpp src/*/*.cpp)) OBJS = $(C_OBJS) $(CPP_OBJS) $(PROTO_OBJS) -override CXXFLAGS = -fPIC -g3 -Wall -Wpointer-arith -Wendif-labels \ +override CXXFLAGS = -Werror -fPIC -g3 -Wall -Wpointer-arith -Wendif-labels \ -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv \ -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation \ -Wno-stringop-truncation -g -ggdb -std=c++17 -Iinclude -Isrc/protos -Isrc -DGPBUILD diff --git a/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp b/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp index ea118fca783..f312a357a4f 100644 --- a/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp +++ b/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp @@ -67,7 +67,7 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, return false; } - const auto data_size = req.ByteSize(); + const auto data_size = req.ByteSizeLong(); const auto total_size = data_size + sizeof(uint32_t); auto *buf = static_cast(ya_gpdb::palloc(total_size)); // Free buf automatically on error path. diff --git a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp index 602a2470805..d55e89b3b26 100644 --- a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp +++ b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp @@ -46,8 +46,10 @@ static void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, static void ya_ExecutorFinish_hook(QueryDesc *query_desc); static void ya_ExecutorEnd_hook(QueryDesc *query_desc); static void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg); +#ifdef IC_TEARDOWN_HOOK static void ya_ic_teardown_hook(ChunkTransportState *transportStates, bool hasErrors); +#endif #ifdef ANALYZE_STATS_COLLECT_HOOK static void ya_analyze_stats_collect_hook(QueryDesc *query_desc); #endif @@ -195,14 +197,14 @@ void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { } } +#ifdef IC_TEARDOWN_HOOK void ya_ic_teardown_hook(ChunkTransportState *transportStates, bool hasErrors) { cpp_call(get_sender(), &EventSender::ic_metrics_collect); -#ifdef IC_TEARDOWN_HOOK if (previous_ic_teardown_hook) { (*previous_ic_teardown_hook)(transportStates, hasErrors); } -#endif } +#endif #ifdef ANALYZE_STATS_COLLECT_HOOK void ya_analyze_stats_collect_hook(QueryDesc *query_desc) { From 351edaeb9fdb17c668ab48bd5e3ee2d2d59a9519 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Thu, 22 Jan 2026 17:29:21 +0300 Subject: [PATCH 133/133] [yagp_hooks_collector] Add licence --- gpcontrib/yagp_hooks_collector/src/Config.cpp | 27 +++++++++++++++++++ gpcontrib/yagp_hooks_collector/src/Config.h | 27 +++++++++++++++++++ .../yagp_hooks_collector/src/EventSender.cpp | 27 +++++++++++++++++++ .../yagp_hooks_collector/src/EventSender.h | 27 +++++++++++++++++++ .../yagp_hooks_collector/src/PgUtils.cpp | 27 +++++++++++++++++++ gpcontrib/yagp_hooks_collector/src/PgUtils.h | 27 +++++++++++++++++++ .../yagp_hooks_collector/src/ProcStats.cpp | 27 +++++++++++++++++++ .../yagp_hooks_collector/src/ProcStats.h | 27 +++++++++++++++++++ .../yagp_hooks_collector/src/ProtoUtils.cpp | 27 +++++++++++++++++++ .../yagp_hooks_collector/src/ProtoUtils.h | 27 +++++++++++++++++++ .../yagp_hooks_collector/src/UDSConnector.cpp | 27 +++++++++++++++++++ .../yagp_hooks_collector/src/UDSConnector.h | 27 +++++++++++++++++++ .../yagp_hooks_collector/src/YagpStat.cpp | 27 +++++++++++++++++++ gpcontrib/yagp_hooks_collector/src/YagpStat.h | 27 +++++++++++++++++++ .../src/hook_wrappers.cpp | 27 +++++++++++++++++++ .../yagp_hooks_collector/src/hook_wrappers.h | 27 +++++++++++++++++++ .../yagp_hooks_collector/src/log/LogOps.cpp | 27 +++++++++++++++++++ .../yagp_hooks_collector/src/log/LogOps.h | 27 +++++++++++++++++++ .../src/log/LogSchema.cpp | 27 +++++++++++++++++++ .../yagp_hooks_collector/src/log/LogSchema.h | 27 +++++++++++++++++++ .../src/memory/gpdbwrappers.cpp | 27 +++++++++++++++++++ .../src/memory/gpdbwrappers.h | 27 +++++++++++++++++++ .../{README.MD => README.md} | 0 .../pg_stat_statements_ya_parser.c | 27 +++++++++++++++++++ .../pg_stat_statements_ya_parser.h | 27 +++++++++++++++++++ .../src/yagp_hooks_collector.c | 27 +++++++++++++++++++ pom.xml | 7 +++++ 27 files changed, 682 insertions(+) rename gpcontrib/yagp_hooks_collector/src/stat_statements_parser/{README.MD => README.md} (100%) diff --git a/gpcontrib/yagp_hooks_collector/src/Config.cpp b/gpcontrib/yagp_hooks_collector/src/Config.cpp index 2c2032ebb03..62c16e91d1f 100644 --- a/gpcontrib/yagp_hooks_collector/src/Config.cpp +++ b/gpcontrib/yagp_hooks_collector/src/Config.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * Config.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/Config.cpp + * + *------------------------------------------------------------------------- + */ + #include "Config.h" #include "memory/gpdbwrappers.h" #include diff --git a/gpcontrib/yagp_hooks_collector/src/Config.h b/gpcontrib/yagp_hooks_collector/src/Config.h index aa6b5bdc0ba..01ae5ea328e 100644 --- a/gpcontrib/yagp_hooks_collector/src/Config.h +++ b/gpcontrib/yagp_hooks_collector/src/Config.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * Config.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/Config.h + * + *------------------------------------------------------------------------- + */ + #pragma once #include diff --git a/gpcontrib/yagp_hooks_collector/src/EventSender.cpp b/gpcontrib/yagp_hooks_collector/src/EventSender.cpp index 853a0c43fb9..f1cc0cc6ea1 100644 --- a/gpcontrib/yagp_hooks_collector/src/EventSender.cpp +++ b/gpcontrib/yagp_hooks_collector/src/EventSender.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * EventSender.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/EventSender.cpp + * + *------------------------------------------------------------------------- + */ + #include "UDSConnector.h" #include "memory/gpdbwrappers.h" #include "log/LogOps.h" diff --git a/gpcontrib/yagp_hooks_collector/src/EventSender.h b/gpcontrib/yagp_hooks_collector/src/EventSender.h index e9acb04422b..ef7dcb0bf8c 100644 --- a/gpcontrib/yagp_hooks_collector/src/EventSender.h +++ b/gpcontrib/yagp_hooks_collector/src/EventSender.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * EventSender.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/EventSender.h + * + *------------------------------------------------------------------------- + */ + #pragma once #include diff --git a/gpcontrib/yagp_hooks_collector/src/PgUtils.cpp b/gpcontrib/yagp_hooks_collector/src/PgUtils.cpp index 7e53abdabbf..ed4bf4d7e64 100644 --- a/gpcontrib/yagp_hooks_collector/src/PgUtils.cpp +++ b/gpcontrib/yagp_hooks_collector/src/PgUtils.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * PgUtils.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/PgUtils.cpp + * + *------------------------------------------------------------------------- + */ + #include "PgUtils.h" #include "Config.h" #include "memory/gpdbwrappers.h" diff --git a/gpcontrib/yagp_hooks_collector/src/PgUtils.h b/gpcontrib/yagp_hooks_collector/src/PgUtils.h index e9715ce10f4..5113fadbff2 100644 --- a/gpcontrib/yagp_hooks_collector/src/PgUtils.h +++ b/gpcontrib/yagp_hooks_collector/src/PgUtils.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * PgUtils.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/PgUtils.h + * + *------------------------------------------------------------------------- + */ + extern "C" { #include "postgres.h" #include "commands/explain.h" diff --git a/gpcontrib/yagp_hooks_collector/src/ProcStats.cpp b/gpcontrib/yagp_hooks_collector/src/ProcStats.cpp index 5c09fa0bce4..72a12e8ca00 100644 --- a/gpcontrib/yagp_hooks_collector/src/ProcStats.cpp +++ b/gpcontrib/yagp_hooks_collector/src/ProcStats.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * ProcStats.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/ProcStats.cpp + * + *------------------------------------------------------------------------- + */ + #include "ProcStats.h" #include "yagpcc_metrics.pb.h" #include diff --git a/gpcontrib/yagp_hooks_collector/src/ProcStats.h b/gpcontrib/yagp_hooks_collector/src/ProcStats.h index 30a90a60519..7629edd0aea 100644 --- a/gpcontrib/yagp_hooks_collector/src/ProcStats.h +++ b/gpcontrib/yagp_hooks_collector/src/ProcStats.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * ProcStats.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/ProcStats.h + * + *------------------------------------------------------------------------- + */ + #pragma once namespace yagpcc { diff --git a/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp b/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp index f9119ca4b14..b449ae20900 100644 --- a/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp +++ b/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * ProtoUtils.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp + * + *------------------------------------------------------------------------- + */ + #include "ProtoUtils.h" #include "PgUtils.h" #include "ProcStats.h" diff --git a/gpcontrib/yagp_hooks_collector/src/ProtoUtils.h b/gpcontrib/yagp_hooks_collector/src/ProtoUtils.h index 37b7e4a8a29..c954545494f 100644 --- a/gpcontrib/yagp_hooks_collector/src/ProtoUtils.h +++ b/gpcontrib/yagp_hooks_collector/src/ProtoUtils.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * ProtoUtils.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/ProtoUtils.h + * + *------------------------------------------------------------------------- + */ + #pragma once #include "protos/yagpcc_set_service.pb.h" diff --git a/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp b/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp index f312a357a4f..d13a82a5ca9 100644 --- a/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp +++ b/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * UDSConnector.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp + * + *------------------------------------------------------------------------- + */ + #include "UDSConnector.h" #include "Config.h" #include "YagpStat.h" diff --git a/gpcontrib/yagp_hooks_collector/src/UDSConnector.h b/gpcontrib/yagp_hooks_collector/src/UDSConnector.h index 9483407159d..be5ab1ef413 100644 --- a/gpcontrib/yagp_hooks_collector/src/UDSConnector.h +++ b/gpcontrib/yagp_hooks_collector/src/UDSConnector.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * UDSConnector.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/UDSConnector.h + * + *------------------------------------------------------------------------- + */ + #pragma once #include "protos/yagpcc_set_service.pb.h" diff --git a/gpcontrib/yagp_hooks_collector/src/YagpStat.cpp b/gpcontrib/yagp_hooks_collector/src/YagpStat.cpp index 879cde85212..3a760b6ea97 100644 --- a/gpcontrib/yagp_hooks_collector/src/YagpStat.cpp +++ b/gpcontrib/yagp_hooks_collector/src/YagpStat.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * YagpStat.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/YagpStat.cpp + * + *------------------------------------------------------------------------- + */ + #include "YagpStat.h" #include diff --git a/gpcontrib/yagp_hooks_collector/src/YagpStat.h b/gpcontrib/yagp_hooks_collector/src/YagpStat.h index 110b1fdcbb1..57fc90cd4d1 100644 --- a/gpcontrib/yagp_hooks_collector/src/YagpStat.h +++ b/gpcontrib/yagp_hooks_collector/src/YagpStat.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * YagpStat.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/YagpStat.h + * + *------------------------------------------------------------------------- + */ + #pragma once #include diff --git a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp index d55e89b3b26..cb4970d60d9 100644 --- a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp +++ b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * hook_wrappers.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp + * + *------------------------------------------------------------------------- + */ + #define typeid __typeid extern "C" { #include "postgres.h" diff --git a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.h b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.h index 236c6eb9d79..443406a5259 100644 --- a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.h +++ b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * hook_wrappers.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/hook_wrappers.h + * + *------------------------------------------------------------------------- + */ + #pragma once #ifdef __cplusplus diff --git a/gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp b/gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp index 56bdf1dca62..e8c927ece84 100644 --- a/gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp +++ b/gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * LogOps.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp + * + *------------------------------------------------------------------------- + */ + #include "protos/yagpcc_set_service.pb.h" #include "LogOps.h" diff --git a/gpcontrib/yagp_hooks_collector/src/log/LogOps.h b/gpcontrib/yagp_hooks_collector/src/log/LogOps.h index bad03d09a8f..1fc30c21030 100644 --- a/gpcontrib/yagp_hooks_collector/src/log/LogOps.h +++ b/gpcontrib/yagp_hooks_collector/src/log/LogOps.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * LogOps.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/log/LogOps.h + * + *------------------------------------------------------------------------- + */ + #pragma once #include diff --git a/gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp b/gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp index 2fadcc46599..a391b1a2209 100644 --- a/gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp +++ b/gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * LogSchema.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp + * + *------------------------------------------------------------------------- + */ + #include "google/protobuf/reflection.h" #include "google/protobuf/descriptor.h" #include "google/protobuf/timestamp.pb.h" diff --git a/gpcontrib/yagp_hooks_collector/src/log/LogSchema.h b/gpcontrib/yagp_hooks_collector/src/log/LogSchema.h index f713c1e9b0e..f78acec7ce9 100644 --- a/gpcontrib/yagp_hooks_collector/src/log/LogSchema.h +++ b/gpcontrib/yagp_hooks_collector/src/log/LogSchema.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * LogSchema.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/log/LogSchema.h + * + *------------------------------------------------------------------------- + */ + #pragma once #include diff --git a/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp b/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp index 8cc483a39de..22083e8bdaf 100644 --- a/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp +++ b/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * gpdbwrappers.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp + * + *------------------------------------------------------------------------- + */ + #include "gpdbwrappers.h" #include "log/LogOps.h" diff --git a/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h b/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h index e080ef5cdd4..fe9b3ba0487 100644 --- a/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h +++ b/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * gpdbwrappers.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h + * + *------------------------------------------------------------------------- + */ + #pragma once extern "C" { diff --git a/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/README.MD b/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/README.md similarity index 100% rename from gpcontrib/yagp_hooks_collector/src/stat_statements_parser/README.MD rename to gpcontrib/yagp_hooks_collector/src/stat_statements_parser/README.md diff --git a/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c index 54c8b2cf59f..7404208055f 100644 --- a/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c +++ b/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * pg_stat_statements_ya_parser.c + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c + * + *------------------------------------------------------------------------- + */ + // NOTE: this file is just a bunch of code borrowed from pg_stat_statements for PG 9.4 // and from our own inhouse implementation of pg_stat_statements for managed PG diff --git a/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h b/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h index b08e8533992..96c6a776dba 100644 --- a/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h +++ b/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * pg_stat_statements_ya_parser.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h + * + *------------------------------------------------------------------------- + */ + #pragma once #ifdef __cplusplus diff --git a/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c b/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c index f7863a38921..271bceee178 100644 --- a/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c +++ b/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * yagp_hooks_collector.c + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c + * + *------------------------------------------------------------------------- + */ + #include "postgres.h" #include "cdb/cdbvars.h" #include "funcapi.h" diff --git a/pom.xml b/pom.xml index 6d33d7e9de5..35bd5b13442 100644 --- a/pom.xml +++ b/pom.xml @@ -153,6 +153,13 @@ code or new licensing patterns. gpcontrib/gp_exttable_fdw/data/** gpcontrib/gp_exttable_fdw/gp_exttable_fdw.control + gpcontrib/yagp_hooks_collector/yagp_hooks_collector.control + gpcontrib/yagp_hooks_collector/protos/yagpcc_set_service.proto + gpcontrib/yagp_hooks_collector/protos/yagpcc_plan.proto + gpcontrib/yagp_hooks_collector/protos/yagpcc_metrics.proto + gpcontrib/yagp_hooks_collector/.clang-format + gpcontrib/yagp_hooks_collector/Makefile + getversion .git-blame-ignore-revs .dir-locals.el