winnet_monitor_failed.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 import numpy as np
3 import matplotlib.pyplot as plt
4 import pandas as pd
5 from tqdm import tqdm
6 from datetime import datetime,time,timedelta
7 from winnet_class import winnet
8 import sys
9 import subprocess
10 import os
11 import h5py
12 
13 # Define user
14 user = "mreicher"
15 
16 use_slurm_times = False
17 
18 
19 if len(sys.argv) < 2:
20  path ="."
21 else:
22  path = sys.argv[1]
23 
24 # Get number of slurm processes
25 cmd = "squeue | grep "+user+" | grep R | wc -l"
26 x = subprocess.check_output(cmd, shell=True)
27 nr_process = int(x.strip())
28 
29 allTotal = 0
30 allRem = 0
31 allFin = 0
32 allFail = 0
33 
34 eltime = []
35 all_folders = os.listdir(path)
36 faillist = []
37 ecodelist = []
38 for p in all_folders:
39  tot_path = os.path.join(path,p)
40  if not os.path.isdir(tot_path):
41  continue
42 
43  w_path = os.path.join(tot_path,"winnet")
44  b_path = os.path.join(tot_path,"blocked")
45  f_path = os.path.join(tot_path,"finab.dat")
46  h_path = os.path.join(tot_path,"WinNet_data.h5")
47  o_path = os.path.join(tot_path,"OUT")
48  e_path = os.path.join(tot_path,"ERR")
49  # Some other folder
50  if not os.path.isfile(w_path) and not os.path.isfile(b_path):
51  continue
52  elif not os.path.isfile(w_path) and os.path.isfile(b_path):
53  # Either Fail or finished
54  if os.path.isfile(f_path):
55  # Read elapsed time of finshed runs
56  if not use_slurm_times:
57  w = winnet(tot_path)
58  w.read_OUT()
59  eltime.append(w.elapsed_time)
60  allFin += 1
61  elif os.path.isfile(h_path):
62  ftmp = h5py.File(h_path,"r")
63  if "finab/" in ftmp:
64  allFin += 1
65  else:
66  allFail += 1
67  faillist.append(p)
68  # Try to read the error code
69  try:
70  with open(e_path,"r") as f:
71  lines = f.readlines()
72  ecode = lines[1].split()[1].strip()
73  except:
74  ecode="-"
75  ecodelist.append(ecode)
76 
77  ftmp.close()
78  else:
79  allFail += 1
80  faillist.append(p)
81  # Try to read the error code
82  try:
83  with open(e_path,"r") as f:
84  lines = f.readlines()
85  ecode = lines[1].split()[1].strip()
86  except:
87  ecode="-"
88  ecodelist.append(ecode)
89  elif os.path.isfile(w_path) and os.path.isfile(o_path):
90  allRem += 1
91 
92  allTotal +=1
93 
94 
95 
96 outstr = ""
97 outstr += " WinNet monitoring "+"\n"
98 outstr += "===================================="+"\n"
99 # outstr += "\n"
100 outstr += "| List of failed runs: | \n"
101 outstr += "|----------------------------------| \n"
102 outstr += "| Name | Error code | \n"
103 lll = ""
104 for ind,p in enumerate(faillist):
105  lll += "| "+p[:19].ljust(19)+" | "+ecodelist[ind].ljust(10)+" | \n"
106 
107 outstr += lll
108 outstr += "|"+"_"*34 +"| \n"
109 
110 outstr += 'Different error codes: \n'
111 outstr += ' '.join(list(set(ecodelist)))
112 
113 
114 # Output
115 print(outstr)