Python Client for fault tolerance
An ecFlow Python Client can be used to solve a Fault Tolerance request, for example, when three out of five families or tasks are enough to carry on submitting new jobs.
Such client can be used as an ecFlow task wrapper, with few more lines.
#!/usr/bin/env python
""" ./ecflow3of5.py 63 /e_41r2/main/12/prod """
import ecflow
import os
import sys
import time
host = os.getenv("ECF_HOST", "localhost")
port = os.getenv("ECF_PORT", 31415)
client = ecflow.Client(host, port)
wait = False; wait = True
interval = 30
outof5 = int(sys.argv[1])
node_path = sys.argv[2]
def stop(msg, num): print msg; sys.exit(num)
while 1:
tot = 0
count = 0
done = True
client.sync_local()
node = client.get_defs().find_abs_node(node_path)
if node is None: stop("node not found!!!", 1)
for item in node.nodes:
count += 1
status = "%s" % item.get_state()
# print item.get_abs_node_path(), status, outof5, tot
if status == "complete":
tot += 1
if tot >= outof5: stop("# OK", 0)
elif status == "aborted":
pass
else: done = False
if count < outof5: stop("# Impossible: %d < %d" % (count, outof5), 1)
if done: stop("# KO %d" % tot, 1)
print "# still possible",
if wait:
print "...", tot, outof5, count
time.sleep(interval);
else: stop("", -1)