From 7781fd48c65625e11928e735af7143dec8b63a15 Mon Sep 17 00:00:00 2001 From: Marvin Frick Date: Fri, 25 Dec 2015 12:09:36 +0100 Subject: [PATCH] adds tests for variable extraction WIP implementing variableExtractor adds section about variable manipulation --- README.md | 43 +++++++++++++++++- .../__modules__/extract_from_id/__init__.py | 7 +++ examples/stack/tld/com.yaml | 2 + examples/varstack_with_extractors.yaml | 7 +++ test/varstack_test.py | 26 ++++++++++- varstack/__init__.py | 44 ++++++++++++++++++- 6 files changed, 126 insertions(+), 3 deletions(-) create mode 100644 examples/__modules__/extract_from_id/__init__.py create mode 100644 examples/stack/tld/com.yaml create mode 100644 examples/varstack_with_extractors.yaml diff --git a/README.md b/README.md index 3a53a8e..97a697f 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ stack: The 'datadir' setting defines the base directory which is searched for the configuration files specified in the 'stack' setting. The 'stack' setting is a list of configuration file candidates that will be read in the order they are specified if they exist. If a file doesn't exist the evaluation will continue with the next candidate. The path names may contain any number of variables in the form '%{variable}' and the values for these variables can be specified when varstack is run to select the desired files. The parsing will start with an empty set of settings and the contents of each file in the list that exists will be merged into this set if settings until all candidates have been read at which point the finalized set of settings is returned. -Note that if the type of a variable is a list then the path containing such a variable in its name will be duplicated for each value in the list. If for example the variable "tags" is a list [mysql, apache] then the evaluation of the path "tags/%{tags}" will result in two paths "tags/mysql.yaml" and "tags/apache.yaml". This feature can only be utilized using the python interface right now and not from the command line. +Note that if the type of a variable is a list then the path containing such a variable in its name will be duplicated for each value in the list. If for example the variable "tags" is a list [mysql, apache] then the evaluation of the path "tags/%{tags}" will result in two paths "tags/mysql.yaml" and "tags/apache.yaml". This feature can only be utilized using the python interface right now and not from the command line. The way the data from a new file is merged with the existing data can be controlled by specifying a combination mode. Right now this mode can either be 'merge' or 'replace'. When 'replace' is specified if Varstack encounters a hash or array/list variable the content from previous definitions of this variable is replaced with the content in the new file. This allows one to override variables from previous definitions. If the mode 'merge' is selected (the default) then content of hash or array/list variables is merged with previous definitions of this variable. This allows for extending previously defined data. @@ -226,6 +226,47 @@ Inside this encrypted value, dicts and lists can exist. This will be parsed thro The default gnupgdir is '_$HOME/.gnupg_'. If you want to chose another path, put _gnupghome: PATH_TO_GNUPG_FOLDER_ inside your varstack.yaml config file +## Manipulating variables with custom python modules +When using variables to stack your data, it can be advantageous to manipulate and/or generate new variables on the fly. This can be done by providing custom python modules that implement a single function `extractVariables`. +### Example +Include at last one entry to an `extractors` list in your varstack.yaml: +``` +--- +extractors: + - extract_from_id +stack: + - defaults + - tld/%{_tld} +``` +and put a python module (eg. a directory with at least the `__init__.py` file in it) in `__modules__` next to your your varstack.yaml file: + +(note that you can provide an absolute path to the python module as well). +``` +example +├── __modules__ +│   └── extract_from_id +│   └── __init__.py +├── stack +│   ├── crypted.yaml +│   ├── defaults.yaml +│   ├── overwrites.yaml +│   └── tld +│   └── com.yaml +└── varstack.yaml +``` +The `extract_from_id` module used in this example does simple split a fqdn into its different domain zones: +```python +def extractVariables(variables): + if variables['id']: + variables['_tld'] = variables['id'].split('.')[-1] + variables['_sld'] = variables['id'].split('.')[-2] + variables['_host'] = variables['id'].split('.')[0] + + return variables +``` +The function will be passed the current variables dict and is expected to return the expanded dictionary. +If you do have multiple expander functions, they will get applied in order sorted by their name. Although it is possible to not only increase but also decrease the number of variables this way, it is not recommended to do so. + ## Running development tests ``` diff --git a/examples/__modules__/extract_from_id/__init__.py b/examples/__modules__/extract_from_id/__init__.py new file mode 100644 index 0000000..861c268 --- /dev/null +++ b/examples/__modules__/extract_from_id/__init__.py @@ -0,0 +1,7 @@ +def extractVariables(variables): + if variables['id']: + variables['_tld'] = variables['id'].split('.')[-1] + variables['_sld'] = variables['id'].split('.')[-2] + variables['_host'] = variables['id'].split('.')[0] + + return variables \ No newline at end of file diff --git a/examples/stack/tld/com.yaml b/examples/stack/tld/com.yaml new file mode 100644 index 0000000..335c036 --- /dev/null +++ b/examples/stack/tld/com.yaml @@ -0,0 +1,2 @@ +--- +my_tld: com \ No newline at end of file diff --git a/examples/varstack_with_extractors.yaml b/examples/varstack_with_extractors.yaml new file mode 100644 index 0000000..508f9ee --- /dev/null +++ b/examples/varstack_with_extractors.yaml @@ -0,0 +1,7 @@ +--- +extractors: + - extract_from_id + - /etc/varstack/absolute_path_extractor +stack: + - defaults + - tld/%{_tld} diff --git a/test/varstack_test.py b/test/varstack_test.py index b8363bd..1e72fbb 100644 --- a/test/varstack_test.py +++ b/test/varstack_test.py @@ -24,7 +24,7 @@ def test_evaluate_with_non_existing_datadir_fails_and_returns_empty(self, log): assert_in('not found, skipping', str(log)) def test_evaluate_merges_and_replaces(self): - v = Varstack(os.path.dirname(__file__)+"/../examples/varstack_no_datadir.yaml") + v = Varstack(os.path.dirname(__file__)+"/../examples/varstack_no_datadir.yaml", {}) evaluated = v.evaluate({}) assert_equal(3, len(evaluated['an_array'])) assert_equal(2, len(evaluated['a_dict'])) @@ -78,3 +78,27 @@ def test_evaluate_cant_decrypt(self, log): assert_is_instance(logged_evaluated['secret_that_was_encrypted_with_another_key'], str) assert_in('BEGIN PGP MESSAGE', logged_evaluated['secret_that_was_encrypted_with_another_key']) assert_in('could not decrypt string', str(log)) + +class TestVarstackVariableExtractor(object): + @log_capture(level=logging.DEBUG) + def test_can_load_py_files_from_working_dir(self, log): + v = Varstack(os.path.dirname(__file__)+"/../examples/varstack_with_extractors.yaml") + v.evaluate({}) + assert_in('successfully loaded "extract_from_id"', str(log)) + assert_in('extract_from_id', v.config['extractor_functions']) + + @log_capture(level=logging.ERROR) + def test_can_load_py_files_with_absolute_path(self, log): + v = Varstack(os.path.dirname(__file__)+"/../examples/varstack_with_extractors.yaml") + v.evaluate({}) + assert_in('Could not load extractor function from', str(log)) + assert_not_in('absolute_path_extractor', v.config['extractor_functions']) + + @log_capture(level=logging.DEBUG) + def test_extractor_function_is_called(self, log): + v = Varstack(os.path.dirname(__file__)+"/../examples/varstack_with_extractors.yaml", {}) + evaluated = v.evaluate({'id': 'some1-extractortest1.varstack.example.com'}) + assert_equal(evaluated['my_tld'], 'com') + + evaluated = v.evaluate({'id': 'some1-extractortest1.varstack.example.net'}) + assert_in('tld/net.yaml" not found', str(log)) diff --git a/varstack/__init__.py b/varstack/__init__.py index 04f81ca..e9f928b 100644 --- a/varstack/__init__.py +++ b/varstack/__init__.py @@ -5,7 +5,7 @@ __all__ = [ "Varstack" ] -import logging, re, yaml, os +import logging, re, yaml, os, sys from pprint import pprint try: @@ -23,6 +23,8 @@ def __init__(self, config_filename='/etc/varstack.yaml', config={}): self.log.addHandler(NullHandler()) self.data = {} self.config = config + self.config['extractors'] = [] + self.config['extractor_functions'] = {} if not 'gnupghome' in self.config: if 'HOME' in os.environ: self.config['gnupghome'] = os.environ['HOME']+'/.gnupg' @@ -42,6 +44,8 @@ def evaluate(self, variables, init_data=None): return {} self.config.update(yaml.safe_load(cfh)) cfh.close() + self.__loadExtractorFunctions() + variables = self.__extractVariables(variables) for path in self.config['stack']: fullpaths = self.__substitutePathVariables(self.config['datadir']+'/'+path+'.yaml', variables) if not fullpaths: @@ -58,6 +62,44 @@ def evaluate(self, variables, init_data=None): rawdata = self.data return self.__cleanupData(rawdata) + """Extracts variables from other variables using custom functions, loaded from extractor modules.""" + def __loadExtractorFunctions(self): + for filename_or_path in self.config['extractors']: + if not os.path.isabs(filename_or_path): + file_to_load = os.path.abspath(os.path.join(os.path.dirname(self.config_filename), '__modules__', filename_or_path)) + else: + file_to_load = filename_or_path + + module_name = os.path.basename(file_to_load) + try: + sys.path.insert(0, os.path.dirname(file_to_load)) + module = __import__(module_name) + try: + self.config['extractor_functions'][module_name] = module.extractVariables + except AttributeError, e: + raise ImportError('no function named \'extractVariables\'') + else: + self.log.debug('successfully loaded "{0}" with an extractor function'.format(module_name)) + except ImportError, e: + self.log.error('Could not load extractor function from {0}: {1}'.format(file_to_load, e)) + finally: + del sys.path[0] + + """Extracts variables from other variables using custom functions, loaded from extractor modules.""" + def __extractVariables(self, variables): + for function_name in sorted(self.config['extractor_functions'].keys()): + function = self.config['extractor_functions'][function_name] + + self.log.debug('applying {0} to variables ({1} entries so far)'.format(function_name, len(variables))) + try: + variables = function(variables) + except Exception, e: + self.log.warn('Exception caught while running variable extractor {0}: {1}'.format(function_name, e)) + import traceback + self.log.debug(traceback.format_exc().replace("\n"," \ ")) + + return variables + """Replace variables in a path with their respective values.""" def __substitutePathVariables(self, path, variables): new_paths = [path]