diff --git a/bob.txt b/bob.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/bob.txt @@ -0,0 +1 @@ + diff --git a/btrdb/__init__.py b/btrdb/__init__.py index a7019c7..cd14662 100644 --- a/btrdb/__init__.py +++ b/btrdb/__init__.py @@ -20,7 +20,9 @@ from btrdb.exceptions import ConnectionError from btrdb.version import get_version from btrdb.utils.credentials import credentials_by_profile, credentials +from btrdb.utils.ray import register_serializer from btrdb.stream import MINIMUM_TIME, MAXIMUM_TIME +from warnings import warn ########################################################################## ## Module Variables @@ -40,7 +42,7 @@ def _connect(endpoints=None, apikey=None): return BTrDB(Endpoint(Connection(endpoints, apikey=apikey).channel)) -def connect(conn_str=None, apikey=None, profile=None): +def connect(conn_str=None, apikey=None, profile=None, shareable=False): """ Connect to a BTrDB server. @@ -57,6 +59,12 @@ def connect(conn_str=None, apikey=None, profile=None): The name of a profile containing the required connection information as found in the user's predictive grid credentials file `~/.predictivegrid/credentials.yaml`. + shareable: bool, default=False + Whether or not the connection can be "shared" in a distributed setting such + as Ray workers. If set to True, the connection can be serialized and sent + to other workers so that data can be retrieved in parallel; **however**, this + is less secure because it is possible for other users of the Ray cluster to + use your API key to fetch data. Returns ------- @@ -68,6 +76,11 @@ def connect(conn_str=None, apikey=None, profile=None): if conn_str and profile: raise ValueError("Received both conn_str and profile arguments.") + # check shareable flag and register custom serializer if necessary + if shareable: + warn("a shareable connection is potentially insecure; other users of the same cluster may be able to access your API key") + register_serializer(conn_str=conn_str, apikey=apikey, profile=profile) + # use specific profile if requested if profile: return _connect(**credentials_by_profile(profile)) diff --git a/btrdb/conn.py b/btrdb/conn.py index 55058a4..bb79f2f 100644 --- a/btrdb/conn.py +++ b/btrdb/conn.py @@ -18,6 +18,7 @@ import os import re import json +import certifi import uuid as uuidlib import grpc @@ -65,12 +66,25 @@ def __init__(self, addrportstr, apikey=None): # grpc bundles its own CA certs which will work for all normal SSL # certificates but will fail for custom CA certs. Allow the user # to specify a CA bundle via env var to overcome this - ca_bundle = os.getenv("BTRDB_CA_BUNDLE","") - if ca_bundle != "": + env_bundle = os.getenv("BTRDB_CA_BUNDLE", "") + + # certifi certs are provided as part of this package install + # https://github.com/certifi/python-certifi + lib_certs = certifi.where() + + ca_bundle = env_bundle + + if ca_bundle == "": + ca_bundle = lib_certs + try: with open(ca_bundle, "rb") as f: contents = f.read() - else: - contents = None + except Exception: + if env_bundle != "": + # The user has given us something but we can't use it, we need to make noise + raise Exception("BTRDB_CA_BUNDLE(%s) env is defined but could not read file" % ca_bundle) + else: + contents = None if apikey is None: self.channel = grpc.secure_channel( diff --git a/btrdb/transformers.py b/btrdb/transformers.py index 12f7979..1cf346c 100644 --- a/btrdb/transformers.py +++ b/btrdb/transformers.py @@ -145,13 +145,15 @@ def to_dataframe(streamset, columns=None, agg="mean", name_callable=None): df = pd.DataFrame(to_dict(streamset,agg=agg)) - df = df.set_index("time") - if agg == "all" and not streamset.allow_window: - stream_names = [[s.collection, s.name, prop] for s in streamset._streams for prop in _STAT_PROPERTIES] - df.columns=pd.MultiIndex.from_tuples(stream_names) - else: - df.columns = columns if columns else _stream_names(streamset, name_callable) + if not df.empty: + df = df.set_index("time") + + if agg == "all" and not streamset.allow_window: + stream_names = [[s.collection, s.name, prop] for s in streamset._streams for prop in _STAT_PROPERTIES] + df.columns=pd.MultiIndex.from_tuples(stream_names) + else: + df.columns = columns if columns else _stream_names(streamset, name_callable) return df diff --git a/btrdb/utils/ray.py b/btrdb/utils/ray.py index 8df9527..674ab64 100644 --- a/btrdb/utils/ray.py +++ b/btrdb/utils/ray.py @@ -1,9 +1,6 @@ -from functools import partial - -import ray - import btrdb from btrdb.conn import BTrDB +from functools import partial def register_serializer(conn_str=None, apikey=None, profile=None): """ @@ -22,8 +19,27 @@ def register_serializer(conn_str=None, apikey=None, profile=None): found in the user's predictive grid credentials file `~/.predictivegrid/credentials.yaml`. """ - ray.register_custom_serializer( - BTrDB, serializer=btrdb_serializer, deserializer=partial(btrdb_deserializer, conn_str=conn_str, apikey=apikey, profile=profile)) + try: + import ray + except ImportError: + raise ImportError("must pip install ray to register custom serializer") + try: + import semver + except ImportError: + raise ImportError("must pip install semver to register custom serializer") + + assert ray.is_initialized(), "Need to call ray.init() before registering custom serializer" + # TODO: check the version using the 'semver' package? + ver = semver.VersionInfo.parse(ray.__version__) + if ver.major == 0: + ray.register_custom_serializer( + BTrDB, serializer=btrdb_serializer, deserializer=partial(btrdb_deserializer, conn_str=conn_str, apikey=apikey, profile=profile)) + elif ver.major == 1 and ver.minor in range(2, 4): + # TODO: check different versions of ray? + ray.util.register_serializer( + BTrDB, serializer=btrdb_serializer, deserializer=partial(btrdb_deserializer, conn_str=conn_str, apikey=apikey, profile=profile)) + else: + raise Exception("Ray version %s does not have custom serialization. Please upgrade to >= 1.2.0" % ray.__version__) def btrdb_serializer(_): """ diff --git a/btrdb/version.py b/btrdb/version.py index 6df466a..4bff3e4 100644 --- a/btrdb/version.py +++ b/btrdb/version.py @@ -15,7 +15,7 @@ ## Module Info ########################################################################## -__version_info__ = { 'major': 5, 'minor': 11, 'micro': 0, 'releaselevel': 'final'} +__version_info__ = { 'major': 5, 'minor': 11, 'micro': 7, 'releaselevel': 'final'} ########################################################################## ## Helper Functions diff --git a/docs/source/conf.py b/docs/source/conf.py index e221fcb..79f76de 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -26,8 +26,8 @@ # -- Project information ----------------------------------------------------- project = 'btrdb' -copyright = '2019, Michael P. Andersen' -author = 'Michael P. Andersen' +copyright = '2021, Ping Things, Inc.' +author = 'PingThingsIO' # The short X.Y version version = get_version() @@ -93,7 +93,7 @@ # html_theme_options = { 'show_powered_by': False, - 'github_user': 'BTrDB', + 'github_user': 'PingThingsIO', 'github_repo': 'btrdb-python', 'travis_button': False, 'github_banner': False, @@ -159,7 +159,7 @@ # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'btrdb-python.tex', 'btrdb-python Documentation', - 'Michael Andersen', 'manual'), + 'PingThingsIO', 'manual'), ] diff --git a/docs/source/index.rst b/docs/source/index.rst index 66d2de0..da1b71e 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,8 +1,8 @@ Welcome to btrdb docs! ====================== -.. image:: https://img.shields.io/travis/BTrDB/btrdb-python/master.svg - :target: https://travis-ci.org/BTrDB/btrdb-python +.. image:: https://github.com/PingThingsIO/btrdb-python/actions/workflows/release.yaml/badge.svg + :target: https://github.com/PingThingsIO/btrdb-python/actions .. image:: https://readthedocs.org/projects/btrdb/badge/?version=latest :target: https://btrdb.readthedocs.io/en/latest/ diff --git a/release.sh b/release.sh index 6f6595a..9104416 100755 --- a/release.sh +++ b/release.sh @@ -35,8 +35,8 @@ fi echo "Setting version to v$1.$2.$3" -VERION_CODE="__version_info__ = { 'major': $1, 'minor': $2, 'micro': $3, 'releaselevel': 'final'}" -sed -i "s/^__version_info__.*$/${VERION_CODE}/g" btrdb/version.py +VERSION_CODE="__version_info__ = { 'major': $1, 'minor': $2, 'micro': $3, 'releaselevel': 'final'}" +sed -i.bak "s/^__version_info__.*$/${VERSION_CODE}/g" btrdb/version.py git add btrdb/version.py git commit -m "Release v$1.$2.$3" diff --git a/requirements.txt b/requirements.txt index b12c1a6..0d89f56 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,5 @@ grpcio-tools>=1.19.0 pytz # Misc libraries -pyyaml \ No newline at end of file +pyyaml +certifi \ No newline at end of file diff --git a/setup.py b/setup.py index 0e515f2..2997b26 100644 --- a/setup.py +++ b/setup.py @@ -29,11 +29,11 @@ ## Basic information NAME = "btrdb" DESCRIPTION = "Bindings to interact with the Berkeley Tree Database using gRPC." -AUTHOR = "Michael Andersen, Allen Leis" -EMAIL = "michael@steelcode.com" -MAINTAINER = "Michael Andersen" +AUTHOR = "PingThingsIO" +EMAIL = "support@pingthings.io" +MAINTAINER = "PingThingsIO" LICENSE = "BSD-3-Clause" -REPOSITORY = "https://github.com/BTrDB/btrdb-python" +REPOSITORY = "https://github.com/PingThingsIO/btrdb-python" PACKAGE = "btrdb" URL = "http://btrdb.io/" DOCS_URL = "https://btrdb.readthedocs.io/en/latest/"