Source code for pydatasentry.process

#!/usr/bin/env python

import json 
import pickle 
from .helpers import dumper
from .lineage import get_lineage

#    "spec": {
#        "storage": [
#            "local"
#        ],
#        "experiment": {
#            "scope": "test",
#            "version": 1,
#            "run": "test"
#        },
#        "output": [
#            "output.default-signature"
#        ],
#        "instrumentation": {
#            "modules": [
#                "statsmodels.formula.api"
#            ]
#        }
#    },
#    "attributes": {
#        "": {
#            "compute": "<function <lambda> at 0x7fd373569268>"
#        },
#        "model.common.timestamp": {
#            "compute": "<function <lambda> at 0x7fd373569510>"
#        },
#        "dataset.transformations": {
#            "compute": "<function <lambda> at 0x7fd373569400>"
#        },
#        "output.default-signature": {
#            "compute": "<function compute_default_signature at 0x7fd3735691e0>",
#            "params": {
#                "format": "JSON"
#            },
#            "inputs": {
#                "modeling-function": "model.function",
#                "columns": "",
#                "uuid": "uuid",
#                "modeling-module": "model.module",
#                "data-dimensions": ""
#            }
#        },
#        "": {
#            "compute": "<function <lambda> at 0x7fd373569598>",
#            "inputs": {
#                "dataset": ""
#            }
#        },
#        "dataset.relativepath": {
#            "compute": "<function dataset_relpath at 0x7fd3735690d0>"
#        },
#        "model.modname": {
#            "compute": "<function <lambda> at 0x7fd373569488>"
#        },
#        "storage.local": {
#            "store": "<function local_storage at 0x7fd373569158>",
#            "params": {
#                "relative-path": [
#                    "model-output",
#                    "spec.scope",
#                    "",
#                    "spec.version",
#                    "model.common.timestamp",
#                    "model.common.formula"
#                ]
#            }
#        },
#        "dataset.hash": {
#            "compute": "<function <lambda> at 0x7fd3735692f0>"
#        },
#        "": {
#            "compute": "<function dataset_basename at 0x7fd373564f28>"
#        },
#        "dataset.timestamp": {
#            "compute": "<function <lambda> at 0x7fd373569378>"
#        },
#        "": {
#            "compute": "<function <lambda> at 0x7fd373569620>",
#            "inputs": {
#                "dataset": ""
#            }
#        }
#    },
#    "datasets": [],
#    "debug": true

[docs]def lookup_attribute(name, run): """ Looks up the run configuration for the value of a given attribute. The function tries a couple of options before giving up. The default is to return the name unmodified :param name: name of the attribute :param run: Combination of configuration and run-specific information (internally generated) :returns attribute: dict corresponding to the attribute """ print("Default lookup", name) # See if a simple lookup will work.. if name in run: # model print("Default lookup. Basic", name, run[name]) return run[name] # May be the name is nested. So try that as well.. try: # Try run['model']['function'] alt = "['" + name.replace(".","']['") + "']" alt = "run"+alt res = eval(alt) print("Default lookup. Bracketed", alt, res) return res except Exception as e: print("Default lookup. Bracketed", alt, "Didnt work") pass # Nothing worked. Simply return the name print("Default lookup. Nothing worked", name) return name
[docs]def evaluate_attribute(name, run, form=str, depth=0): """ Evaluate the signature and other attributes specified by the configuration. :param name: Name of the attribute :param run: Combination of configuration and run-specific information (internally generated) :param depth: <internal parameter to track recursion> """ # Evaluate pre-requisites debug = run.get('debug', False) if debug: print("Evaluate ", name, "Depth", depth) if isinstance(name, dict): result = {} for e in name: result[e] = evaluate_attribute(name[e], run, form, depth+1) return result if isinstance(name, list): result = [] for e in name: result.append(evaluate_attribute(e, run, form, depth+1)) return result # The result may be a simple string... attribute = lookup_attribute(name, run) # We may not have found any attribute to process. So simply return # the same.. if ((not isinstance(attribute, dict)) or (('params' not in attribute) and ('compute' not in attribute))): print("Attribute name", name, "not found or the data does not " "look like an attribute.", "So returning the attribute", attribute) return attribute # Now the params = attribute.get('params', {}) # Turn params into args args = evaluate_attribute(params, run) # Gather the computation... compute = attribute.get('compute', lambda run, args: args) print("Found ", json.dumps(args, default=dumper, indent=4)) print("Calling compute of ", name) return compute(run, args)
[docs]def summarize_run(run): """ Post-process the input and output data from the run. :param run: Combination of configuration and run-specific information (internally generated) """ if run['debug']: print("Document") print(json.dumps(run, default=dumper, indent=4)) # Gather what should be computed... evaluate_attribute("", run)