Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,41 @@ logs/scdbserver.log
*.DS_Store

# ignore bazelrc for remote cache
.remote.bazelrc
.remote.bazelrc
# 本地bazel配置覆盖文件
.bazelrc.user

# scdb-tutorial 配置和日志文件
examples/scdb-tutorial/scdb/conf/config.yml
examples/scdb-tutorial/scdb/conf/config_local.yml
examples/scdb-tutorial/scdb/conf/config_hive.yml
examples/scdb-tutorial/logs/
examples/scdb-tutorial/hive/logs/

# 本地配置文件(包含密码等敏感信息)
examples/scdb-tutorial/engine/*/conf/gflags_local.conf

# scdb-tutorial 数据库文件
examples/scdb-tutorial/scdb/*.db

# 根目录编译生成的可执行文件
/scdbclient
/scqltool
/brokerctl

# 根目录日志文件
/logs/

# 本地临时测试文件和脚本
/create_*.sql
/test_*.sh
/setup_local_env.py

# tutorial 本地配置和脚本文件
examples/scdb-tutorial/client/users_local.json
examples/scdb-tutorial/configure_local.py
examples/scdb-tutorial/init_database_local.sh
examples/scdb-tutorial/start_all.sh
examples/scdb-tutorial/start_all_hive.sh
examples/scdb-tutorial/stop_all.sh
examples/scdb-tutorial/stop_all_hive.sh
3 changes: 2 additions & 1 deletion engine/datasource/datasource.proto
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ enum DataSourceKind {
ARROWSQL = 5;
GRPC = 6;
DATAPROXY = 7;
HIVE = 8;
}

message DataSource {
Expand All @@ -38,4 +39,4 @@ message DataSource {
// concrete data source connection string
// It is comprehend to related data source adaptor.
string connection_str = 4;
};
};
8 changes: 5 additions & 3 deletions engine/datasource/datasource_adaptor_mgr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,10 @@ void DatasourceAdaptorMgr::RegisterBuiltinAdaptorFactories() {
{DataSourceKind::DATAPROXY, std::make_shared<DpAdaptorFactory>()});
factory_maps_.insert(
{DataSourceKind::CSVDB, std::make_shared<CsvdbAdaptorFactory>()});
factory_maps_.insert(
{DataSourceKind::ARROWSQL, std::make_shared<ArrowSqlAdaptorFactory>()});
auto arrow_sql_adaptor_factory = std::make_shared<ArrowSqlAdaptorFactory>();
factory_maps_.insert({DataSourceKind::ARROWSQL, arrow_sql_adaptor_factory});
// Hive uses Arrow Flight SQL protocol for better performance and native columnar support
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里不修改,在配置文件里 kind 填 ARROWSQL 也是能运行的

factory_maps_.insert({DataSourceKind::HIVE, arrow_sql_adaptor_factory});
}

} // namespace scql::engine
} // namespace scql::engine
14 changes: 14 additions & 0 deletions examples/scdb-tutorial/client/users_local.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"alice": {
"UserName": "alice",
"Password": "some_password"
},
"bob": {
"UserName": "bob",
"Password": "another_password"
},
"root": {
"UserName": "root",
"Password": "root"
}
}
Comment on lines +1 to +14
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

Hardcoding default credentials, even for examples, poses a security risk as this pattern might be copied into production environments. It's recommended to use placeholder values that are clearly not real passwords and instruct the user to replace them, or to read them from a configuration source that is not checked into version control.

86 changes: 86 additions & 0 deletions examples/scdb-tutorial/configure_local.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import os
import sys
import glob

# Configuration
WORK_DIR = os.getcwd()
if not WORK_DIR.endswith("examples/scdb-tutorial"):
print("Error: Please run this script from 'examples/scdb-tutorial' directory")
sys.exit(1)

# User input for MySQL password
mysql_password = input("Please enter your local MySQL root password: ").strip()
if not mysql_password:
print("Error: Password cannot be empty")
sys.exit(1)

# Paths to config files
alice_conf = "engine/alice/conf/gflags.conf"
bob_conf = "engine/bob/conf/gflags.conf"
scdb_conf = "scdb/conf/config.yml"
scdb_host = "scdb/conf/config.yml"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The variable scdb_host is assigned but never used. Unused code should be removed to improve code clarity and maintainability.


def update_file(filepath, replacements):
with open(filepath, 'r') as f:
content = f.read()

for old, new in replacements.items():
content = content.replace(old, new)

with open(filepath, 'w') as f:
f.write(content)
print(f"Updated {filepath}")
Comment on lines +23 to +32
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The function update_file is defined but is never called. The script uses process_template instead. This unused function should be removed to keep the codebase clean.


# 1. Update Alice Config
# We need to read from current file which might have random password from setup.sh
# But it's easier to read the file and replace the whole connection string regex,
# or just simpler: re-read .template if available?
# Let's try to be robust and read the template if exists, else read the file.
# However, setup.sh modifies files in place if they don't end in .template (it generates them from .template)
# So we can re-generate from .template

def process_template(template_path, output_path, replacements):
if not os.path.exists(template_path):
print(f"Warning: Template {template_path} not found, skipping.")
return
with open(template_path, 'r') as f:
content = f.read()

for old, new in replacements.items():
content = content.replace(old, new)

with open(output_path, 'w') as f:
f.write(content)
print(f"Generated {output_path} from template")

# Replacements for Alice
alice_replacements = {
"__MYSQL_ROOT_PASSWD__": mysql_password,
"/home/admin/engine/conf": f"{WORK_DIR}/engine/alice/conf",
"host=mysql": "host=127.0.0.1",
# Fix setup.sh's randomness if we are running on top of it
# But since we use template, we don't care about previous random password
}
process_template("engine/alice/conf/gflags.conf.template", alice_conf, alice_replacements)

# Replacements for Bob
bob_replacements = {
"__MYSQL_ROOT_PASSWD__": mysql_password,
"/home/admin/engine/conf": f"{WORK_DIR}/engine/bob/conf",
"host=mysql": "host=127.0.0.1",
"--listen_port=8003": "--listen_port=8004", # Change Port!
}
process_template("engine/bob/conf/gflags.conf.template", bob_conf, bob_replacements)

# Replacements for SCDB
scdb_replacements = {
"__MYSQL_ROOT_PASSWD__": mysql_password,
"mysql:3306": "127.0.0.1:3306",
}
process_template("scdb/conf/config.yml.template", scdb_conf, scdb_replacements)

print("\nConfiguration files updated successfully!")
print(f"Alice DB Port: 8003 (default)")
print(f"Bob DB Port: 8004 (modified)")
print(f"SCDB Port: 8080 (default)")

20 changes: 20 additions & 0 deletions examples/scdb-tutorial/engine/alice/conf/gflags_hive.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
--listen_port=8003
--datasource_router=embed
--enable_driver_authorization=false
--server_enable_ssl=false
--driver_enable_ssl_as_client=false
--peer_engine_enable_ssl_as_client=false
# Hive configuration using Arrow Flight SQL protocol
# Connection string format: grpc+tcp://<host>:<port> or grpc+tcp://<host>:<port>@<user>:<password>
# Note: Requires Arrow Flight SQL server running (e.g., Hive with Arrow Flight support, Spark Thrift Server with Arrow, etc.)
--embed_router_conf={"datasources":[{"id":"ds001","name":"hive db","kind":"HIVE","connection_str":"grpc://localhost:8815"}],"rules":[{"db":"*","table":"*","datasource_id":"ds001"}]}
# Arrow Flight SQL TLS configuration (optional)
--arrow_client_disable_server_verification=true
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

Disabling server verification with --arrow_client_disable_server_verification=true introduces a significant security vulnerability, making the connection susceptible to man-in-the-middle (MITM) attacks. Even for local testing, it is better practice to use a self-signed certificate and configure the client to trust it. This encourages secure development habits.

# --arrow_cert_pem_path=/path/to/ca.pem
# --arrow_client_key_pem_path=/path/to/client-key.pem
# --arrow_client_cert_pem_path=/path/to/client-cert.pem
# party authentication flags (disabled for testing)
--enable_self_auth=false
--enable_peer_auth=false
# --private_key_pem_path=./examples/scdb-tutorial/engine/alice/conf/ed25519key.pem
# --authorized_profile_path=./examples/scdb-tutorial/engine/alice/conf/authorized_profile.json
20 changes: 20 additions & 0 deletions examples/scdb-tutorial/engine/bob/conf/gflags_hive.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
--listen_port=8004
--datasource_router=embed
--enable_driver_authorization=false
--server_enable_ssl=false
--driver_enable_ssl_as_client=false
--peer_engine_enable_ssl_as_client=false
# Hive configuration using Arrow Flight SQL protocol
# Connection string format: grpc+tcp://<host>:<port> or grpc+tcp://<host>:<port>@<user>:<password>
# Note: Requires Arrow Flight SQL server running (e.g., Hive with Arrow Flight support, Spark Thrift Server with Arrow, etc.)
--embed_router_conf={"datasources":[{"id":"ds001","name":"hive db","kind":"HIVE","connection_str":"grpc://localhost:8816"}],"rules":[{"db":"*","table":"*","datasource_id":"ds001"}]}
# Arrow Flight SQL TLS configuration (optional)
--arrow_client_disable_server_verification=true
# --arrow_cert_pem_path=/path/to/ca.pem
# --arrow_client_key_pem_path=/path/to/client-key.pem
# --arrow_client_cert_pem_path=/path/to/client-cert.pem
# party authentication flags (disabled for testing)
--enable_self_auth=false
--enable_peer_auth=false
# --private_key_pem_path=./examples/scdb-tutorial/engine/bob/conf/ed25519key.pem
# --authorized_profile_path=./examples/scdb-tutorial/engine/bob/conf/authorized_profile.json
Loading
Loading