2323import threading
2424from socketserver import BaseRequestHandler , ThreadingTCPServer
2525
26- FARM_MAX_PARALLEL_TASKS = 2
26+ FARM_MAX_PARALLEL_TASKS = 10
2727MAX_BYTES_REQUEST = 4096 # 8192 / 65536 if needed
2828
2929PathLike = Union [str , Path ]
@@ -48,14 +48,15 @@ class Status(Enum):
4848
4949
5050class Task :
51- def __init__ (self , jid : str , tid : str , label : str , command : str , metadata : dict , jobDir : PathLike ):
51+ def __init__ (self , jid : str , tid : str , label : str , command : str , metadata : dict , jobDir : PathLike , env : dict = None ):
5252 self .jid : str = jid
5353 self .tid : str = tid
5454 self .parentTids = [] # Tasks that must be completed before this one
5555 self .childTids = [] # Task that depend on this one
5656 self .label : str = label
5757 self .command : str = command
58- self .metadata : dict = metadata
58+ self .metadata : dict = metadata or {}
59+ self .env : dict = env or {}
5960 self .taskDir : Path = Path (jobDir ) / "tasks"
6061 self .taskDir .mkdir (parents = True , exist_ok = True )
6162 self .status : Status = Status .NONE
@@ -73,6 +74,7 @@ def to_dict(self):
7374 "label" : self .label ,
7475 "command" : self .command ,
7576 "metadata" : self .metadata ,
77+ "env" : self .env ,
7678 "status" : self .status .name ,
7779 "created_at" : self .created_at .isoformat (),
7880 "started_at" : self .started_at .isoformat () if self .started_at else None ,
@@ -300,14 +302,7 @@ def processJobs(self):
300302 # Check if process finished
301303 returncode = task .process .poll ()
302304 if returncode is not None :
303- task .finished_at = datetime .now ()
304- task .return_code = returncode
305- if returncode == 0 :
306- task .status = Status .SUCCESS
307- logger .info (f"Task { task .tid } completed" )
308- else :
309- task .status = Status .ERROR
310- logger .error (f"Task { task .tid } failed with code { returncode } " )
305+ self .finishTask (task , returncode )
311306
312307 # Check if job is complete
313308 if any (t .status in [Status .ERROR , Status .STOPPED , Status .KILLED ] for t in job .tasks ):
@@ -342,20 +337,48 @@ def startTask(self, task: Task):
342337 task .status = Status .RUNNING
343338 task .started_at = datetime .now ()
344339 # Create log file
340+ additional_env = {
341+ "LOCALFARM_CURRENT_JID" : str (task .jid ),
342+ "LOCALFARM_CURRENT_TID" : str (task .tid ),
343+ "MR_LOCAL_FARM_PATH" : str (self .root )
344+ }
345+ additional_env .update (task .env )
346+ process_env = os .environ .copy ()
347+ process_env .update (additional_env )
345348 try :
349+
346350 with open (task .logFile , "w" ) as log :
351+ log .write (f"# ========== Starting task { task .tid } at { task .started_at .isoformat ()} " \
352+ f" (command=\" { task .command } \" ) ==========\n " )
353+ log .write (f"# Additional env variables:\n " )
354+ for _k , _v in additional_env .items ():
355+ log .write (f"# - { str (_k )} ={ str (_v )} \n " )
356+ log .write (f"\n " )
347357 task .process = subprocess .Popen (
348358 task .command ,
349359 # shlex.split(task.command),
350360 stdout = log ,
351361 stderr = log ,
352362 cwd = task .taskDir ,
363+ env = process_env ,
353364 shell = True
354365 )
355366 except Exception as e :
356367 logger .error (f"Failed to start task { task .tid } : { e } " )
357368 task .status = "error"
358369 task .finished_at = datetime .now ()
370+
371+ def finishTask (self , task : Task , returncode : int ):
372+ task .finished_at = datetime .now ()
373+ task .return_code = returncode
374+ if returncode == 0 :
375+ task .status = Status .SUCCESS
376+ logger .info (f"Task { task .tid } completed" )
377+ else :
378+ task .status = Status .ERROR
379+ logger .error (f"Task { task .tid } failed with code { returncode } " )
380+ with open (task .logFile , "a" ) as log :
381+ log .write (f"\n # ========== Task { task .tid } finished at { task .finished_at .isoformat ()} ==========\n " )
359382
360383 def cleanup (self ):
361384 logger .info ("Cleaning up..." )
@@ -393,15 +416,15 @@ def create_job(self, name):
393416 logger .info (f"Created job { jid } " )
394417 return {"success" : True , "jid" : jid }
395418
396- def create_task (self , jid , name , command , metadata , dependencies ):
419+ def create_task (self , jid , name , command , metadata , dependencies , env = None ):
397420 """Add a task to a job"""
398421 with self .lock :
399422 if jid not in self .jobs :
400423 return {"success" : False , "error" : "Job not found" }
401424 job = self .jobs [jid ]
402425 job .lastJid += 1
403426 tid = job .lastJid
404- task = Task (jid , tid , name , command , metadata , job .jobDir )
427+ task = Task (jid , tid , name , command , metadata , job .jobDir , env = env )
405428 job .tasks .append (task )
406429 for parentTid in dependencies :
407430 parentTask = next ((t for t in job .tasks if t .tid == parentTid ), None )
@@ -412,14 +435,15 @@ def create_task(self, jid, name, command, metadata, dependencies):
412435 logger .info (f"Added task { tid } to job { jid } " )
413436 return {"success" : True , "tid" : tid }
414437
415- def expand_task (self , jid , name , command , metadata , parentTid ):
438+ def expand_task (self , jid , name , command , metadata , parentTid , env = None ):
416439 with self .lock :
417440 if jid not in self .jobs :
441+ logger .info (f"Available jobs: { list (self .jobs .keys ())} " )
418442 return {"success" : False , "error" : "Job not found" }
419443 job = self .jobs [jid ]
420444 job .lastJid += 1
421445 tid = job .lastJid
422- task = Task (jid , tid , name , command , metadata , job .jobDir )
446+ task = Task (jid , tid , name , command , metadata , job .jobDir , env = env )
423447 task .status = Status .SUBMITTED
424448 job .tasks .append (task )
425449 parentTask = next ((t for t in job .tasks if t .tid == parentTid ), None )
@@ -456,7 +480,6 @@ def get_job_infos(self, jid):
456480 if jid not in self .jobs :
457481 return {'success' : False , "error" : "Job not found" }
458482 job = self .jobs [jid ]
459- logger .info (f"Job infos : { job .to_dict ()} " )
460483 return {"success" : True , "result" : job .to_dict ()}
461484
462485 def pause_job (self , jid ):
@@ -538,7 +561,7 @@ def list_jobs(self):
538561 with self .lock :
539562 return {
540563 "success" : True ,
541- "jobs" : { jid : job .to_dict () for jid , job in self .jobs .items ()}
564+ "jobs" : [ job .to_dict () for job in self .jobs .values ()]
542565 }
543566
544567
@@ -609,6 +632,9 @@ def main(root):
609632
610633if __name__ == "__main__" :
611634 parser = argparse .ArgumentParser (description = 'Execute a Graph of processes.' )
612- parser .add_argument ('farm_root ' , type = str , help = 'Root path for the farm.' )
635+ parser .add_argument ('--root ' , type = str , required = False , help = 'Root path for the farm.' )
613636 args = parser .parse_args ()
614- main (args .farm_root )
637+ root = args .root
638+ if not root :
639+ root = os .getenv ("MR_LOCAL_FARM_PATH" , os .path .join (os .path .expanduser ("~" ), ".local_farm" ))
640+ main (root )
0 commit comments