Source code for pydatasentry.config

#!/usr/bin/env python 
"""

Maintains the overall configuration. Any over-rides provided by the
users are incorporated into the configuration. The configuration is
combined with run-specific data to the post-processing function. 

The default configuration is 
:: 
     {
        'debug': False, 
        'spec': {         

            # High level experiment information
            'experiment': { 
                'scope': 'offers',
                'run': 'conditional-offers',
                'version': 1
            },

            # Which modules should be instrumented 
            'instrumentation': {
                'modules': ['statsmodels.formula.api']
            },

            # What should be captured 
            'output': {
                'params': [ 
                    {
                        'content': 'attributes.output.default-signature',
                        'path': 'attributes.output.relative-path',
                        'filename': 'signature.json'
                    }
                ]
            },

            # Where should they be stored and how 
            'store': {
                'params': ['attributes.storage.local']
            },

     }

"""
import os, sys  
import copy 
import json
from .helpers import dumper, merge
from .attributes import attribute_overlay

[docs]def initialize_config(update={}): 
    """
    Initialize the configuration of pydatasentry and over-ride it with
    with any user or run specific parameters

    :param update: Dict that over-rides the basic configuration
    """
    global config 
    config = {
        'debug': False, 
        'spec': {         
            # High level experiment information
            'experiment': { 
                'scope': 'offers',
                'run': 'conditional',
                'version': 1
            },
            # Which modules should be instrumented 
            'instrumentation': {
                'modules': ['statsmodels.formula.api']
            },
            # What should be captured and where
            'output': {
                'params': [ 
                    {
                        'content': 'attributes.output.default-signature',
                        'path': 'attributes.output.relative-path',
                        'filename': 'signature.json',
                        'format': 'JSON'
                    },
                    {
                        'content': 'attributes.output.full-pickle',
                        'path': 'attributes.output.relative-path',
                        'filename': 'full.pickle',
                        'format': 'JSON'
                    },
                    {
                        'content': 'attributes.output.summary-pickle',
                        'path': 'attributes.output.relative-path',
                        'filename': 'summary.pickle',
                        'format': 'JSON'
                    },
                    
                ]
            },
            # Storage engine
            'store': {
                'params': ['attributes.storage.local']
            },
            
        } # spec
    }
    
    # Include the attribute map...
    merge(config, attribute_overlay)

    # Override the helper functions and spec if needed...    
    merge(config, update)
    
    #if config['debug']:
    #    print("Post initialization")
    #    print(json.dumps(config, default=dumper, indent=4))

[docs]def get_config(): 
    """
    Read the configuration 

    :returns: current configuration

    """
    global config 
    return copy.deepcopy(config)

[docs]def validate_config():    
    """
    Checks whether the specified configuration has all the essential
    fields such as the experiment details. More checks will be added
    over time. 
    
    :returns: "Invalid configuration" exception if there is an issue
    """
    global config

    #if config['debug']: 
    #    print("config: [Validate Config]")
    #    print(json.dumps(config, default=dumper, indent=4))

    if 'experiment' not in config['spec']: 
        print("pydatasentry requires specification of " \
              "'experiment'. Please check documentation", file=sys.stderr) 
        raise Exception("Invalid configuration")

    required = ['scope', 'run', 'version']
    missing = [r for r in required if r not in config['spec']['experiment']]
    if len(missing) > 0: 
        print("pydatasentry requires atleast modeling scope (e.g., " \
              "offers), run (e.g., regional model), and version (e.g., \
              v1", file=sys.stderr) 
        raise Exception("Invalid configuration")