Simple graph object describing the links between executors. More...

Inheritance diagram for python.trfGraph.executorGraph:

Collaboration diagram for python.trfGraph.executorGraph:

Public Member Functions
def	__init__ (self, executorSet, inputData=set([]), outputData=set([]))
	Initialise executor graph. More...

def	inputData (self)

def	inputData (self, inputData)

def	outputData (self)

def	outputData (self, outputData)

def	execution (self)
	Return a list of execution nodes with their data inputs/outputs. More...

def	data (self)
	Return a list of all data used in this execution. More...

def	addNode (self, executor)
	Add an executor node to the graph. More...

def	deleteNote (self, executor)
	Remove an executor node from the graph. More...

def	findConnections (self)
	Look at executor nodes and work out how they are connected. More...

def	doToposort (self)
	Find a topologically sorted list of the graph nodes. More...

def	findExecutionPath (self)
	Find the graph's execution nodes, from input to output data types with each activated step and the inputs/outputs. More...

def	__str__ (self)
	Nodes in topologically sorted order, if available, else sorted name order. More...

def	__repr__ (self)
	Nodes in topologically sorted order, if available, else sorted name order. More...

Private Member Functions
def	_resetConnections (self)

def	_bestPath (self, data, dataAvailable, startNodeName='_start', endNodeName=None)
	Find the best path from a end to a start node, producing a certain type of data given the set of currently available data and the current set of activated nodes. More...

def	_extendPath (self, path, currentNodeName, nextNodeName)
	Connect a path to a particular node. More...

Private Attributes
	_nodeDict

	_inputData

	_outputData

	_toposort

	_toposortData

	_execution

Detailed Description

Simple graph object describing the links between executors.

Definition at line 42 of file trfGraph.py.

Constructor & Destructor Documentation

◆ init()

def python.trfGraph.executorGraph.__init__	(	self,
		executorSet,
		inputData = `set([])`,
		outputData = `set([])`
	)

Initialise executor graph.

Parameters

executorSet	Set of executor instances
inputData	Iterable with input data for this transform's execution
outputData	Iterable with output data for this transform's execution

Definition at line 48 of file trfGraph.py.

     def __init__(self, executorSet, inputData = set([]), outputData = set([])):
         
         # Set basic node list
         self._nodeDict = {}
         
         msg.info('Transform graph input data: {0}; output data {1}'.format(inputData, outputData))
         
         if len(executorSet) == 1:
             # Single executor - in this case inData/outData is not mandatory, so we set them to the 
             # input/output data of the transform
             executor = list(executorSet)[0]
             if len(executor._inData) == 0 and len(executor._outData) == 0:
                 executor.inData = inputData
                 executor.outData = outputData
         
         for executor in executorSet:
             self.addNode(executor)
             
         self._inputData = set(inputData)
         self._outputData = set(outputData)
         
         # It's forbidden for a transform to consume and produce the same datatype
         dataOverlap = self._inputData & self._outputData
         if len(dataOverlap) > 0:
             raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_GRAPH_ERROR'), 
                                                         'Transform definition error, you cannot produce and consume the same datatypes in a transform. Duplicated input/output types {0}.'.format(' '.join(dataOverlap)))
  
         # Add a pseudo-start/stop nodes, from which input data flows and output data finally arrives
         # This makes the graph 'concrete' for this job
         # This is useful as then data edges all connect properly to a pair of nodes
         # We add a node for every possible output as this enables topo sorting of the graph
         # nodes for any intermediate data end nodes as well
         pseudoNodes = dict()
         pseudoNodes['_start'] = graphNode(name='_start', inData=[], outData=self._inputData, weight = 0)
         for node in self._nodeDict.values():
             for dataType in node.outputDataTypes:
                 endNodeName = '_end_{0}'.format(dataType)
                 pseudoNodes[endNodeName] = graphNode(name=endNodeName, inData=[dataType], outData=[], weight = 0)
         self._nodeDict.update(pseudoNodes)
         
         # Toposort not yet done
         self._toposort = []
         self._toposortData = []
  
         # Now find connections between nodes
         self.findConnections()
     

Member Function Documentation

◆ repr()

def python.trfGraph.executorGraph.__repr__ ( self )

Nodes in topologically sorted order, if available, else sorted name order.

Definition at line 416 of file trfGraph.py.

     def __repr__(self):
         nodeStrList = []
         if len(self._toposort) > 0:
             nodeNames = self._toposort
         else:
             nodeNames = list(self._nodeDict)
             nodeNames.sort()
         for nodeName in nodeNames:
             nodeStrList.append(repr(self._nodeDict[nodeName]))
         return os.linesep.join(nodeStrList)
  
  

◆ str()

def python.trfGraph.executorGraph.__str__ ( self )

Nodes in topologically sorted order, if available, else sorted name order.

Definition at line 402 of file trfGraph.py.

     def __str__(self):
         nodeStrList = []
         if len(self._toposort) > 0:
             nodeNames = self._toposort
         else:
             nodeNames = list(self._nodeDict)
             nodeNames.sort()
         for nodeName in nodeNames:
             if not nodeName.startswith('_'): 
                 nodeStrList.append(str(self._nodeDict[nodeName]))
         return os.linesep.join(nodeStrList)
     
  

◆ _bestPath()

def python.trfGraph.executorGraph._bestPath	(	self,
		data,
		dataAvailable,
		startNodeName = `'_start'`,
		endNodeName = `None`
	)

private

Find the best path from a end to a start node, producing a certain type of data given the set of currently available data and the current set of activated nodes.

Parameters

data	Data to produce
dataAvailable	Data types which can be used as sources
startNodeName	Find the path to this node (default '_start')
endNodeName	Find the path from this node (default '_end_DATATYPE')

We can always ask the algorithm to trace the part from end to start for this data type (this data is in endnode by construction). If we have to go along an edge where the data is not yet available then we need to add this data to our list of data to produce.

Definition at line 299 of file trfGraph.py.

     def _bestPath(self, data, dataAvailable, startNodeName = '_start', endNodeName = None):
         
         if endNodeName is None:
             endNodeName = '_end_{0}'.format(data)
         
         if endNodeName not in self._nodeDict:
             raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), 
                 'Node {0} was not found - the transform data connection definition is broken'.format(endNodeName))
  
         
         # Set of all considered paths
         # Initialise this with our endNode name - algorithm works back to the start
         pathSet = [graphPath(endNodeName, data),]
         
         msg.debug('Started path finding with seed path {0}'.format(pathSet[0]))
         
         # Halting condition - only one path and its first element is startNodeName
         while len(pathSet) > 1 or pathSet[0].path[0] != startNodeName:
             msg.debug('Starting best path iteration with {0} paths in {1}'.format(len(pathSet), pathSet))
             # Copy the pathSet to do this, as we will update it
             for path in pathSet[:]:
                 msg.debug('Continuing path finding with path {0}'.format(path))
                 currentNodeName = path.path[0]
                 if currentNodeName == startNodeName:
                     msg.debug('Path {0} has reached the start node - finished'.format(path))
                     continue
                 # If there are no paths out of this node then it's a dead end - kill it
                 if len(self._nodeDict[currentNodeName].connections['in']) == 0:
                     msg.debug('Path {0} is a dead end - removing'.format(path))
                     pathSet.remove(path)
                     continue
                 # If there is only one path out of this node, we extend it
                 if len(self._nodeDict[currentNodeName].connections['in']) == 1:
                     msg.debug('Single exit from path {0} - adding connection to {1}'.format(path, list(self._nodeDict[currentNodeName].connections['in'])[0]))
                     self._extendPath(path, currentNodeName, list(self._nodeDict[currentNodeName].connections['in'])[0])
                     continue
                 # Else we need to clone the path for each possible exit
                 msg.debug('Multiple exits from path {0} - will clone for each extra exit'.format([path]))
                 for nextNodeName in list(self._nodeDict[currentNodeName].connections['in'])[1:]:
                     newPath = copy.deepcopy(path)
                     msg.debug('Cloned exit from path {0} to {1}'.format(newPath, nextNodeName))             
                     self._extendPath(newPath, currentNodeName, nextNodeName)
                     pathSet.append(newPath)
                 # Finally, use the original path to extend along the first node exit
                 msg.debug('Adding exit from original path {0} to {1}'.format(path, list(self._nodeDict[currentNodeName].connections['in'])[0]))
                 self._extendPath(path, currentNodeName, list(self._nodeDict[currentNodeName].connections['in'])[0])
  
             # Now compare paths which made it to the end - only keep the shortest
             lowestCostPath = None
             for path in pathSet[:]:
                 currentNodeName = path.path[0]
                 if currentNodeName == startNodeName:
                     if lowestCostPath is None:
                         lowestCostPath = path
                         continue
                     if path.cost >= lowestCostPath.cost:
                         msg.debug('Path {0} is no cheaper than best path {1} - removing'.format(path, lowestCostPath))
                         pathSet.remove(path)
                     else:
                         msg.debug('Path {0} is cheaper than previous best path {1} - removing previous'.format(path, lowestCostPath))
                         pathSet.remove(lowestCostPath)
                         lowestCostPath = path
     
             # Emergency break
             if len(pathSet) == 0:
                 raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), 
                                                             'No path found between {0} and {1} for {2}'.format(startNodeName, endNodeName, data))
         return pathSet[0]
  
     

◆ _extendPath()

def python.trfGraph.executorGraph._extendPath	(	self,
		path,
		currentNodeName,
		nextNodeName
	)

private

Connect a path to a particular node.

Parameters

path	graphPath instance
nextNodeName	Node to connect to

Definition at line 372 of file trfGraph.py.

     def _extendPath(self, path, currentNodeName, nextNodeName):
         edgeData = self._nodeDict[currentNodeName].connections['in'][nextNodeName]
         msg.debug('Connecting {0} to {1} with data {2}'.format(currentNodeName, nextNodeName, edgeData))
         
         extraData = set()
         if self._execution[currentNodeName]['enabled'] is True:
             extraCost = 0
         else:
             for edgeDataElement in edgeData:
                 # Simple case - one data connection only
                 if edgeDataElement in self._nodeDict[currentNodeName].inData:
                     extraCost = self._nodeDict[currentNodeName].weights[edgeDataElement]
                 else:
                     # Complex case - the start requirement for this node must be multi-data
                     # Only the first match in the dataIn lists is considered
                     # This will break if there are multiple overlapping dataIn requirements
                     for nodeStartData in self._nodeDict[currentNodeName].inData:
                         if isinstance(nodeStartData, (list, tuple)) and edgeDataElement in nodeStartData:
                             extraCost = self._nodeDict[currentNodeName].weights[nodeStartData]
                             msg.debug('Found multi-data exit from {0} to {1} - adding {2} to data requirements'.format(currentNodeName, nextNodeName, nodeStartData))
                             extraData.update(nodeStartData)
                             break
             # Remove data which is on the edge itself
             extraData.difference_update(edgeData)
             
         msg.debug('Updating path {0} with {1}, {2}, {3}, {4}'.format(path, nextNodeName, edgeData, extraData, extraCost))
         path.addToPath(nextNodeName, edgeData, extraData, extraCost)
  
                     

◆ _resetConnections()

def python.trfGraph.executorGraph._resetConnections ( self )

private

Definition at line 148 of file trfGraph.py.

     def _resetConnections(self):
         for node in self._nodeDict.values():
             node.resetConnections()
     

◆ addNode()

def python.trfGraph.executorGraph.addNode	(	self,
		executor
	)

Add an executor node to the graph.

Definition at line 138 of file trfGraph.py.

     def addNode(self, executor):
         self._nodeDict[executor.name] = executorNode(executor)

◆ data()

def python.trfGraph.executorGraph.data ( self )

Return a list of all data used in this execution.

Definition at line 126 of file trfGraph.py.

     def data(self):
         dataset = set()
         for nodeName in self._toposort:
             # Start and end nodes are not real - they never actually execute
             if nodeName.startswith(('_start', '_end')):
                 continue
             if self._execution[nodeName]['enabled'] is True:
                 dataset.update(self._execution[nodeName]['input'])
                 dataset.update(self._execution[nodeName]['output'])
         return dataset
     

◆ deleteNote()

def python.trfGraph.executorGraph.deleteNote	(	self,
		executor
	)

Remove an executor node from the graph.

Definition at line 143 of file trfGraph.py.

     def deleteNote(self, executor):
         if executor.name in self._nodeDict:
             del(self._nodeDict[executor.name])
     
     

◆ doToposort()

def python.trfGraph.executorGraph.doToposort ( self )

Find a topologically sorted list of the graph nodes.

Note: If this is not possible, the graph is not a DAG - not supported; See http://en.wikipedia.org/wiki/Topological_sorting

Definition at line 171 of file trfGraph.py.

     def doToposort(self):
         # We will manipulate the graph, so deepcopy it
         graphCopy = copy.deepcopy(self._nodeDict)
         # Find all valid start nodes in this graph - ones with no data dependencies themselves
         startNodeNames = []
         for nodeName, node in graphCopy.items():
             if len(node.connections['in']) == 0:
                 startNodeNames.append(nodeName)
  
         if len(startNodeNames) == 0:
             raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), 
                                                         'There are no starting nodes in this graph - non-DAG graphs are not supported')
  
         msg.debug('Found this list of start nodes for toposort: {0}'.format(startNodeNames))
  
         # The startNodeNames holds the list of nodes with their dependencies now satisfied (no input edges anymore)
         while len(startNodeNames) > 0:
             # Take the next startNodeName and zap it from the graph
             theNodeName = startNodeNames.pop()
             theNode = graphCopy[theNodeName]
             self._toposort.append(theNodeName)
             del graphCopy[theNodeName]
             
             # Now delete the edges this node was a source for
             msg.debug('Considering connections from node {0}'.format(theNodeName))
             for connectedNodeName in theNode.connections['out']:
                 graphCopy[connectedNodeName].delConnection(toExe = theNodeName, direction = 'in')
                 # Look for nodes which now have their dependencies satisfied
                 if len(graphCopy[connectedNodeName].connections['in']) == 0:
                     startNodeNames.append(connectedNodeName)
         
         # If there are nodes left then the graph has cycles, which means it's not a DAG        
         if len(graphCopy) > 0:
             raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), 
                                                         'Graph topological sort had no more start nodes, but nodes were left {0} - non-DAG graphs are not supported'.format(list(graphCopy)))
             
         msg.debug('Topologically sorted node order: {0}'.format(self._toposort))
         
         # Now toposort the input data for nodes
         self._toposortData = []
         for nodeName in self._toposort:
             # First add input data, then output data
             for dataType in self._nodeDict[nodeName].inputDataTypes:
                 if dataType not in self._toposortData:
                     self._toposortData.append(dataType)
             for dataType in self._nodeDict[nodeName].outputDataTypes:
                 if dataType not in self._toposortData:
                     self._toposortData.append(dataType)
                     
         msg.debug('Topologically sorted data order: {0}'.format(self._toposortData))
     
     

◆ execution()

def python.trfGraph.executorGraph.execution ( self )

Return a list of execution nodes with their data inputs/outputs.

Definition at line 113 of file trfGraph.py.

     def execution(self):
         exeList = []
         for nodeName in self._toposort:
             # Start and end nodes are not real - they never actually execute
             if nodeName.startswith(('_start', '_end')):
                 continue
             if self._execution[nodeName]['enabled'] is True:
                 exeList.append({'name': nodeName, 'input': self._execution[nodeName]['input'], 
                                 'output': self._execution[nodeName]['output']})
         return exeList
     

◆ findConnections()

def python.trfGraph.executorGraph.findConnections ( self )

Look at executor nodes and work out how they are connected.

Note: Anything better than n^2? Should be ok for our low numbers of nodes, but could be optimised

Definition at line 154 of file trfGraph.py.

     def findConnections(self):
         self._resetConnections()
         for nodeNameA, nodeA in self._nodeDict.items():
             for nodeNameB, nodeB in self._nodeDict.items():
                 if nodeNameA == nodeNameB:
                     continue
                 dataIntersection = list(set(nodeA.outputDataTypes) & set(nodeB.inputDataTypes))
                 msg.debug('Data connections between {0} and {1}: {2}'.format(nodeNameA, nodeNameB, dataIntersection))
                 if len(dataIntersection) > 0:
                     nodeA.addConnection(nodeNameB, dataIntersection, direction='out')
                     nodeB.addConnection(nodeNameA, dataIntersection, direction='in')
                     
         msg.debug('Graph connections are: \n{0}'.format(self))
                     

◆ findExecutionPath()

def python.trfGraph.executorGraph.findExecutionPath ( self )

Find the graph's execution nodes, from input to output data types with each activated step and the inputs/outputs.

Parameters

Definition at line 227 of file trfGraph.py.

     def findExecutionPath(self):        
         # Switch off all nodes, except if we have a single node which is not data driven...
         self._execution = {}
         for nodeName, node in self._nodeDict.items():
             if len(self._nodeDict) == 1 and node.inputDataTypes == set() and node.inputDataTypes == set():
                 self._execution[nodeName] = {'enabled' : True, 'input' : set(), 'output' : set()}
             else:
                 self._execution[nodeName] = {'enabled' : False, 'input' : set(), 'output' : set()}
  
         dataToProduce = copy.deepcopy(self._outputData)
         dataAvailable = copy.deepcopy(self._inputData)
                 
         # Consider the next data type in topo order
         while len(dataToProduce) > 0:
             nextDataType = None
             for dataType in self._toposortData:
                 if dataType in dataToProduce:
                     nextDataType = dataType
                     dataToProduce.remove(nextDataType)
                     dataAvailable.update([nextDataType])
                     break
  
             if not nextDataType:
                 msg.error('Still have to produce data type(s) {0}, but did not find anything in the toposorted data list ({1}).' 
                           ' Transform parameters/graph are broken so aborting.'.format(dataToProduce, self._toposortData))
                 raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), 
                                                             'Data type graph error')
  
             msg.debug('Next data type to try is {0}'.format(nextDataType))
             bestPath = self._bestPath(nextDataType, dataAvailable)
             
             msg.debug('Found best path for {0}: {1}'.format(nextDataType, bestPath))
  
             
             modPath = bestPath.path + [None]
             for (nodeName, nextNodeName) in [ (n, modPath[modPath.index(n)+1]) for n in bestPath.path ]:
                 self._execution[nodeName]['enabled'] = True
                 # Add the necessary data types to the output of the first node and the input of the next
                 if nodeName in bestPath.newData:
                     self._execution[nodeName]['output'].update(bestPath.newData[nodeName])
                     for newData in bestPath.newData[nodeName]:
                         if newData not in dataAvailable:
                             dataToProduce.update([newData])
                 if nextNodeName:
                     self._execution[nextNodeName]['input'].update(bestPath.newData[nodeName])
                     if nextNodeName in bestPath.extraData:
                         self._execution[nextNodeName]['input'].update(bestPath.extraData[nodeName])
                 # Add any extra data we need (from multi-exit nodes) to the data to produce list
                 for extraNodeData in bestPath.extraData.values():
                     for extra in extraNodeData:
                         if extra not in dataAvailable:
                             dataToProduce.update([extra])
                             
         # Now remove the fake data objects from activated nodes
         for node, props in self._execution.items():
             msg.debug('Removing fake data from node {0}'.format(node))
             props['input'] -= set(['inNULL', 'outNULL'])
             props['output'] -= set(['inNULL', 'outNULL'])
  
         msg.debug('Execution dictionary: {0}'.format(self._execution))
     
     

◆ inputData() [1/2]

def python.trfGraph.executorGraph.inputData ( self )

Definition at line 96 of file trfGraph.py.

     def inputData(self):
         return self._inputData
     

◆ inputData() [2/2]

def python.trfGraph.executorGraph.inputData	(	self,
		inputData
	)

Definition at line 100 of file trfGraph.py.

     def inputData(self, inputData):
         self._inputData = set(inputData)
     

◆ outputData() [1/2]

def python.trfGraph.executorGraph.outputData ( self )

Definition at line 104 of file trfGraph.py.

     def outputData(self):
         return self._outputData
     

◆ outputData() [2/2]

def python.trfGraph.executorGraph.outputData	(	self,
		outputData
	)

Definition at line 108 of file trfGraph.py.

     def outputData(self, outputData):
         self._outputData = set(outputData)
         

Member Data Documentation

◆ _execution

python.trfGraph.executorGraph._execution

private

Definition at line 229 of file trfGraph.py.

◆ _inputData

python.trfGraph.executorGraph._inputData

private

Definition at line 66 of file trfGraph.py.

◆ _nodeDict

python.trfGraph.executorGraph._nodeDict

private

Definition at line 51 of file trfGraph.py.

◆ _outputData

python.trfGraph.executorGraph._outputData

private

Definition at line 67 of file trfGraph.py.

◆ _toposort

python.trfGraph.executorGraph._toposort

private

Definition at line 89 of file trfGraph.py.

◆ _toposortData

python.trfGraph.executorGraph._toposortData

private

Definition at line 90 of file trfGraph.py.

The documentation for this class was generated from the following file:

trfGraph.py

Public Member Functions

Private Member Functions

Private Attributes

Detailed Description

Constructor & Destructor Documentation

◆ __init__()

Member Function Documentation

◆ __repr__()

◆ __str__()

◆ _bestPath()

◆ _extendPath()

◆ _resetConnections()

◆ addNode()

◆ data()

◆ deleteNote()

◆ doToposort()

◆ execution()

◆ findConnections()

◆ findExecutionPath()

◆ inputData() [1/2]

◆ inputData() [2/2]

◆ outputData() [1/2]

◆ outputData() [2/2]

Member Data Documentation

◆ _execution

◆ _inputData

◆ _nodeDict

◆ _outputData

◆ _toposort

◆ _toposortData

◆ init()

◆ repr()

◆ str()