Source code for pydatasentry.config

#!/usr/bin/env python 
"""

Maintains the overall configuration. Any over-rides provided by the
users are incorporated into the configuration. The configuration is
combined with run-specific data to the post-processing function. 

The default configuration is 
:: 
     {
        'debug': False, 
        'spec': {         

            # High level experiment information
            'experiment': { 
                'scope': 'offers',
                'run': 'conditional-offers',
                'version': 1
            },

            # Which modules should be instrumented 
            'instrumentation': {
                'modules': ['statsmodels.formula.api']
            },

            # What should be captured 
            'output': {
                'params': [ 
                    {
                        'content': 'attributes.output.default-signature',
                        'path': 'attributes.output.relative-path',
                        'filename': 'signature.json'
                    }
                ]
            },

            # Where should they be stored and how 
            'store': {
                'params': ['attributes.storage.local']
            },

     }

"""
import os, sys  
import copy 
import json
from .helpers import dumper, merge
from .attributes import attribute_overlay

[docs]def initialize_config(update={}): """ Initialize the configuration of pydatasentry and over-ride it with with any user or run specific parameters :param update: Dict that over-rides the basic configuration """ global config config = { 'debug': False, 'spec': { # High level experiment information 'experiment': { 'scope': 'offers', 'run': 'conditional', 'version': 1 }, # Which modules should be instrumented 'instrumentation': { 'modules': ['statsmodels.formula.api'] }, # What should be captured and where 'output': { 'params': [ { 'content': 'attributes.output.default-signature', 'path': 'attributes.output.relative-path', 'filename': 'signature.json', 'format': 'JSON' }, { 'content': 'attributes.output.full-pickle', 'path': 'attributes.output.relative-path', 'filename': 'full.pickle', 'format': 'JSON' }, { 'content': 'attributes.output.summary-pickle', 'path': 'attributes.output.relative-path', 'filename': 'summary.pickle', 'format': 'JSON' }, ] }, # Storage engine 'store': { 'params': ['attributes.storage.local'] }, } # spec } # Include the attribute map... merge(config, attribute_overlay) # Override the helper functions and spec if needed... merge(config, update) #if config['debug']: # print("Post initialization") # print(json.dumps(config, default=dumper, indent=4))
[docs]def get_config(): """ Read the configuration :returns: current configuration """ global config return copy.deepcopy(config)
[docs]def validate_config(): """ Checks whether the specified configuration has all the essential fields such as the experiment details. More checks will be added over time. :returns: "Invalid configuration" exception if there is an issue """ global config #if config['debug']: # print("config: [Validate Config]") # print(json.dumps(config, default=dumper, indent=4)) if 'experiment' not in config['spec']: print("pydatasentry requires specification of " \ "'experiment'. Please check documentation", file=sys.stderr) raise Exception("Invalid configuration") required = ['scope', 'run', 'version'] missing = [r for r in required if r not in config['spec']['experiment']] if len(missing) > 0: print("pydatasentry requires atleast modeling scope (e.g., " \ "offers), run (e.g., regional model), and version (e.g., \ v1", file=sys.stderr) raise Exception("Invalid configuration")