summarize.py
Go to the documentation of this file.
1 # Author: M. Reichert
2 # Date : 25.09.2024
3 import numpy as np
4 import h5py
5 import sys
6 import os
7 import optparse
8 import logging
9 import re
10 from tqdm import tqdm
11 from src_files.wreader import wreader
12 from src_files.template_class import template
13 from src_files.nucleus_multiple_class import nucleus_multiple
14 import matplotlib.pyplot as plt
15 
16 
17 
18 #--- define options ----------------------------------------------------------
19 p = optparse.OptionParser()
20 p.add_option("-i","--input" , action="store", dest="rundir", default='.', \
21  help="Simulation directory to summarize (default: current directory)")
22 p.add_option("-o","--output", action="store", dest="outdir", default='./summary.hdf5', \
23  help="Output path (default: summary.hdf5)")
24 p.add_option("-b", "--buf" , action="store", dest="buffersize", default='500', \
25  help="Buffer size before writing to the file (default: 500)")
26 p.add_option("-f", "--force", action="store_true", dest="force", default=False, \
27  help="Force overwrite of the output file (default: False)")
28 p.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, \
29  help="Enable verbose output. If enabled, output is written to 'debug.log' (default: False)")
30 p.add_option("--time_file", action="store", dest="time_file", default=None, \
31  help="File containing a time grid in seconds to map the data to (default: None)")
32 p.add_option("--time_final", action="store", dest="time_final", default=None, \
33  help="Final time for the time grid. Only used if '--time_file' "+\
34  "is not given (default: Read from template file)")
35 p.add_option("--time_initial", action="store", dest="time_initial", default=None, \
36  help="Initial time for the time grid. Only used if '--time_file' "+\
37  "is not given (default: 1e-5)")
38 p.add_option("--time_number", action="store", dest="time_number", default=None, \
39  help="Number of time steps for the time grid. Only used if '--time_file' "+\
40  "is not given (default: 200)")
41 p.add_option("--sunet_path", action="store", dest="sunet_path", default=None, \
42  help="Path to the sunet file (default: Read from template)")
43 p.add_option("--disable_mainout", action="store_true", dest="disable_mainout", default=False, \
44  help="Disable the summary of the mainout output (default: False)")
45 p.add_option("--disable_energy", action="store_true", dest="disable_energy", default=False, \
46  help="Disable the summary of the energy output (default: False)")
47 p.add_option("--disable_timescales", action="store_true", dest="disable_timescales", default=False, \
48  help="Disable the summary of the timescales output (default: False)")
49 p.add_option("--disable_tracked_nuclei", action="store_true", dest="disable_tracked_nuclei", default=False, \
50  help="Disable the summary of the tracked_nuclei output (default: False)")
51 p.add_option("--disable_nuloss", action="store_true", dest="disable_nuloss", default=False, \
52  help="Disable the summary of the nuloss output (default: False)")
53 p.add_option("--disable_snapshots", action="store_true", dest="disable_snapshots", default=False, \
54  help="Disable the summary of the snapshots output (default: False)")
55 p.set_usage("""
56 
57  Usage: ./summarize.py -i <rundir>
58  Example: ./summarize.py -i runs/test""")
59 
60 
61 #--- parse options -----------------------------------------------------------
62 (options,args) = p.parse_args()
63 run_path = options.rundir
64 
65 
66 # Remove later and make it an parameter
67 buffsize = int(options.buffersize)
68 
69 
70 # Verbose mode or not?
71 if options.verbose:
72  # Set up a logger to trace the progress
73  logging.basicConfig(
74  format="%(asctime)s - %(levelname)-10s - %(message)s",
75  style="%",
76  datefmt="%Y-%m-%d %H:%M",
77  level=logging.DEBUG,
78  )
79  file_handler = logging.FileHandler("debug.log", mode="w", encoding="utf-8")
80  logger = logging.getLogger(__name__)
81  # Set the style also for the file handler
82  file_handler.setFormatter(logging.Formatter(
83  fmt="%(asctime)s - %(levelname)s - %(message)s",
84  datefmt="%Y-%m-%d %H:%M",
85  style="%",
86  ))
87  logger.addHandler(file_handler)
88 else:
89  # Set up a logger to trace the progress
90  logging.basicConfig(level=logging.ERROR)
91  logger = logging.getLogger(__name__)
92 
93 # Disable the logging in the terminal
94 logger.propagate = False
95 # Say something
96 logger.info(f"Started summarizing run at {run_path}.")
97 
98 # Get a list of all directories in the run_path. Ignore "network_data" directory
99 dirs = [d for d in os.listdir(run_path) if os.path.isdir(os.path.join(run_path, d)) and d != "network_data"]
100 
101 # Say something
102 logger.info(f"Found {len(dirs)} directories in {run_path}.")
103 
104 
105 # Create output hdf5 file
106 output_file = options.outdir
107 # Check if the file already exists
108 if os.path.exists(output_file) and not options.force:
109  logger.error(f"Output file {output_file} already exists. Exiting.")
110  raise ValueError(f"Output file {output_file} already exists. Either delete or use -f option to overwrite.")
111 elif os.path.exists(output_file) and options.force:
112  logger.warning(f"Output file {output_file} already exists. Overwriting it.")
113 f_hdf = h5py.File(output_file, 'w')
114 
115 
116 # Check already one run to see what has been outputted
117 # Find a run that didnt crash
118 found = False
119 for d in dirs:
120  data = wreader(os.path.join(run_path, d))
121  if not data.is_crashed:
122  logger.info(f"Found {d} to look up the output.")
123  found = True
124  break
125 if not found:
126  # Raise an error if all runs are crashed
127  logger.error("All runs are crashed!")
128  raise ValueError("All runs are crashed!")
129 
130 # Get the template name (ends with .par)
131 template_name = [f for f in os.listdir(os.path.join(run_path, d)) if f.endswith('.par')][0]
132 t = template(os.path.join(run_path, d, template_name))
133 
134 # Check if the sunet is given in the template
135 if options.sunet_path is not None:
136  net_source = options.sunet_path
137 else:
138  if (not "net_source" in t.entries):
139  # Raise an error if the net_source is not given
140  raise ValueError("net_source not given in the template file.")
141  else:
142  # Get the net_source
143  net_source = t["net_source"]
144 
145 # Read the net_source file
146 logger.info(f"Using sunet file from {net_source}.")
147 nuclei = np.loadtxt(net_source,dtype=str)
148 nuclei_data = nucleus_multiple(nuclei)
149 
150 
151 # Create the time grid
152 if options.time_file is not None:
153  logger.info(f"Using time grid from {options.time_file}.")
154  mainout_time = np.loadtxt(options.time_file, dtype=float, unpack=True)
155 else:
156  if options.time_final is not None:
157  final_time = float(options.time_final)
158  else:
159  final_time = 3.15e16
160  # Make some reasonable time grid
161  # Check if termination criterion is given
162  if "termination_criterion" in t.entries:
163  if t["termination_criterion"] == "1":
164  if "final_time" in t.entries:
165  final_time = float(t["final_time"])
166 
167  if options.time_initial is not None:
168  initial_time = float(options.time_initial)
169  else:
170  initial_time = 1e-5
171 
172  if options.time_number is not None:
173  time_number = int(options.time_number)
174  else:
175  time_number = 200
176 
177  mainout_time = np.logspace(np.log10(initial_time), np.log10(final_time), time_number)
178 
179 
180 # Possible entries in the data
181 possible_entries = []
182 if not options.disable_mainout:
183  possible_entries.append("mainout")
184 else:
185  logger.info("Ignoring mainout output.")
186 if not options.disable_energy:
187  possible_entries.append("energy")
188 else:
189  logger.info("Ignoring energy output.")
190 if not options.disable_timescales:
191  possible_entries.append("timescales")
192 else:
193  logger.info("Ignoring timescales output.")
194 if not options.disable_tracked_nuclei:
195  possible_entries.append("tracked_nuclei")
196 else:
197  logger.info("Ignoring tracked_nuclei output.")
198 if not options.disable_nuloss:
199  possible_entries.append("nuloss")
200 else:
201  logger.info("Ignoring nuloss output.")
202 
203 
204 entry_dict = {}
205 for entry in possible_entries:
206  # Check existence
207  if data.check_existence(entry) != 0:
208  entry_dict[entry] = {}
209  for key in data[entry].keys():
210  # Ignore iteration and time key
211  if ((key == "iteration") or (key == "time") or (key == "A") or (key == "Z") or (key == "N")
212  or (key == "names") or (key == "latex_names")):
213  continue
214  # Ignore temperature, density, and radius for nuloss
215  if (entry == "nuloss") and ((key == "temp") or (key == "dens") or (key == "rad")):
216  continue
217  entry_dict[entry][key] = np.zeros((len(mainout_time),buffsize))
218 
219  # Write the time already
220  f_hdf[entry+"/time"] = mainout_time
221 
222  # Say something
223  logger.info(f"Found {entry} in the data.")
224 
225  # Check if it is the tracked nuclei and put A, Z, and N
226  # if entry == "tracked_nuclei":
227  # f_hdf[entry+"/A"] = data[entry]["A"]
228  # f_hdf[entry+"/Z"] = data[entry]["Z"]
229  # f_hdf[entry+"/N"] = data[entry]["N"]
230  # f_hdf[entry+"/names"] = data[entry]["names"]
231 
232 
233 # Take care of snapshots, check if they are custom or not
234 if (data.check_existence("snapshot") != 0) and (not options.disable_snapshots):
235  if ("custom_snapshots" in t.entries) or ("h_custom_snapshots" in t.entries):
236  # Check if either ascii or hdf5 custom snapshots are given
237  summarize_snapshots = False
238  if ("custom_snapshots" in t.entries):
239  summarize_snapshots = (t["custom_snapshots"].strip().lower() == "yes")
240  # Debug statement
241  if (t["custom_snapshots"].strip().lower() == "yes"):
242  logger.info("Found custom snapshots in ascii format.")
243  if ("h_custom_snapshots" in t.entries):
244  summarize_snapshots = (summarize_snapshots or (t["h_custom_snapshots"].strip().lower() == "yes"))
245  # Debug statement
246  if (t["h_custom_snapshots"].strip().lower() == "yes"):
247  logger.info("Found custom snapshots in hdf5 format.")
248 
249  # Read the time for the custom snapshots
250  if summarize_snapshots:
251  if "snapshot_file" not in t.entries:
252  raise ValueError("Invalid template file. snapshot_file not given in the template file.")
253  snapshot_time = np.loadtxt(os.path.join(t["snapshot_file"]),dtype=float)
254  # Convert from days to seconds
255  snapshot_time *= 24*3600
256  # Write the time already
257  f_hdf["snapshots/time"] = snapshot_time
258  # Write the A and Z data to the hdf5 file
259  f_hdf["snapshots/A"] = nuclei_data.A
260  f_hdf["snapshots/Z"] = nuclei_data.Z
261  f_hdf["snapshots/N"] = nuclei_data.N
262  # Create an array to buffer the data
263  snapshot_data = np.zeros((len(nuclei),len(snapshot_time),buffsize))
264  logger.info(f"Summarize custom snapshots as well.")
265  else:
266  summarize_snapshots = False
267 else:
268  summarize_snapshots = False
269  if not options.disable_snapshots:
270  logger.info("Ignoring snapshots output.")
271 
272 
273 # Finab should always be in
274 finab_data_Y = np.zeros((len(nuclei),buffsize))
275 finab_data_X = np.zeros((len(nuclei),buffsize))
276 # Write already the A and Z data to the hdf5 file, this is the same for
277 # all runs
278 f_hdf["finab/A"] = nuclei_data.A
279 f_hdf["finab/Z"] = nuclei_data.Z
280 f_hdf["finab/N"] = nuclei_data.N
281 
282 # Prepare for efficient mapping
283 dtype_nuclei = np.dtype([('A', int), ('Z', int)])
284 nuclei_struct = np.array(list(zip(nuclei_data.A.astype(int), nuclei_data.Z.astype(int))), dtype=dtype_nuclei)
285 # Sort the structured nuclei array and get sorting indices
286 nuclei_sorted_idx = np.argsort(nuclei_struct)
287 sorted_nuclei_struct = nuclei_struct[nuclei_sorted_idx]
288 
289 # Create array that will contain the names of the runs
290 # Array of strings:
291 run_names = np.zeros(buffsize,dtype="S100")
292 run_ids = np.zeros(buffsize,dtype=int)
293 
294 # Loop over all directories
295 ind = -1
296 for counter, d in enumerate(tqdm(dirs)):
297  # Load data
298  data = wreader(os.path.join(run_path, d),silent=True)
299 
300  if data.is_crashed:
301  logger.warning(f"Run {d} is crashed. Skipping it.")
302  continue
303 
304  # Increase the index
305  ind += 1
306 
307  #### Finab ####
308  ###############
309  # Put the data in the finab_data, Notice that it should be at the right A and Z position
310  # A is contained in data.finab["A"] and Z in data.finab["Z"]. It should fit to the nuclei_data.A and nuclei_data.Z
311  # All of them are 1D arrays
312  # Getting indices where match occurs
313  # indices = [(np.where((nuclei_data.A.astype(int) == A) & (nuclei_data.Z.astype(int) == Z))[0][0])
314  # for A, Z in zip(data.finab["A"].astype(int), data.finab["Z"].astype(int))]
315 
316  # Check if the finab_data is full and write it to the hdf5 file
317  if ind % buffsize == 0 and ind != 0:
318  # Check if the dataset is already created and if not create it
319  if "finab/Y" not in f_hdf:
320  f_hdf.create_dataset("finab/Y", (len(nuclei),ind+1), maxshape=(len(nuclei),None))
321  f_hdf.create_dataset("finab/X", (len(nuclei),ind+1), maxshape=(len(nuclei),None))
322  # If necessary extend the dataset
323  if ind > buffsize:
324  f_hdf["finab/Y"].resize((len(nuclei),ind+1))
325  f_hdf["finab/X"].resize((len(nuclei),ind+1))
326  # Write the data to the hdf5 file
327  f_hdf["finab/Y"][:,ind-buffsize:ind] = finab_data_Y
328  f_hdf["finab/X"][:,ind-buffsize:ind] = finab_data_X
329 
330  # Convert the finab data to structured array
331  finab_struct = np.array(list(zip(data.finab["A"].astype(int), data.finab["Z"].astype(int))), dtype=dtype_nuclei)
332  # Find the matching indices
333  # Use np.searchsorted to find matching indices
334  matching_idx = np.searchsorted(sorted_nuclei_struct, finab_struct)
335  # Recover the original indices from the sorted index
336  indices = nuclei_sorted_idx[matching_idx]
337 
338  # Set first zero
339  finab_data_Y[:,ind % buffsize] = 0
340  finab_data_X[:,ind % buffsize] = 0
341  finab_data_Y[indices,ind % buffsize] = data.finab["Y"][:]
342  finab_data_X[indices,ind % buffsize] = data.finab["X"][:]
343 
344  #### Run name ####
345  ##################
346 
347  # Check if the run_names is full and write it to the hdf5 file
348  if ind % buffsize == 0 and ind != 0:
349  # Check if the dataset is already created and if not create it
350  if "run_names" not in f_hdf:
351  f_hdf.create_dataset("run_names", (ind+1,), maxshape=(None,),dtype="S100")
352  # If necessary extend the dataset
353  if ind > buffsize:
354  f_hdf["run_names"].resize((ind+1,))
355  # Write the data to the hdf5 file
356  f_hdf["run_names"][ind-buffsize:ind] = run_names
357 
358  # Save the run name
359  run_names[ind % buffsize] = d
360 
361 
362  # Get numbers out from the run name
363  # Check if the run_ids is full and write it to the hdf5 file
364  if ind % buffsize == 0 and ind != 0:
365  # Check if the dataset is already created and if not create it
366  if "run_ids" not in f_hdf:
367  f_hdf.create_dataset("run_ids", (ind+1,), maxshape=(None,),dtype=int)
368  # If necessary extend the dataset
369  if ind > buffsize:
370  f_hdf["run_ids"].resize((ind+1,))
371  # Write the data to the hdf5 file
372  f_hdf["run_ids"][ind-buffsize:ind] = run_ids
373 
374  try:
375  run_ids[ind % buffsize] = int(re.findall(r'\d+', d)[-1])
376  except:
377  run_ids[ind % buffsize] = -1
378 
379  #### Custom snapshots ####
380  ##########################
381 
382  if summarize_snapshots:
383  # Get the time of the snapshots
384  snapstime = data.snapshot_time
385 
386  # Now get the indexes of the entries that agree with snapshot_time
387  indexes = np.searchsorted(snapstime, snapshot_time)
388 
389  # In case snapstime is shorter than snapshot_time, we need to get a mask to set it nan
390  if (len(snapstime) < len(snapshot_time)):
391  mask = np.zeros(len(snapshot_time),dtype=bool)
392  # Check where the time deviates more than 1e-5
393  mask[np.min(np.abs(snapstime - snapshot_time[:,np.newaxis]),axis=1) > 1e-5] = True
394  else:
395  mask = None
396 
397 
398  # Convert the snapshot data to structured array
399  finab_struct = np.array(list(zip(data.A.astype(int), data.Z.astype(int))), dtype=dtype_nuclei)
400  # Find the matching indices
401  # Use np.searchsorted to find matching indices
402  matching_idx = np.searchsorted(sorted_nuclei_struct, finab_struct)
403  # Recover the original indices from the sorted index
404  indices_nuclei = nuclei_sorted_idx[matching_idx]
405 
406  # Check if the snapshot_data is full and write it to the hdf5 file
407  if ind % buffsize == 0 and ind != 0:
408  # Check if the dataset is already created and if not create it
409  if "snapshots/Y" not in f_hdf:
410  f_hdf.create_dataset("snapshots/Y", (len(nuclei),len(snapshot_time),ind+1), maxshape=(len(nuclei),len(snapshot_time),None))
411  f_hdf.create_dataset("snapshots/X", (len(nuclei),len(snapshot_time),ind+1), maxshape=(len(nuclei),len(snapshot_time),None))
412  # If necessary extend the dataset
413  if ind > buffsize:
414  f_hdf["snapshots/Y"].resize((len(nuclei),len(snapshot_time),ind+1))
415  f_hdf["snapshots/X"].resize((len(nuclei),len(snapshot_time),ind+1))
416  # Write the data to the hdf5 file
417  f_hdf["snapshots/Y"][:,:,ind-buffsize:ind] = snapshot_data
418  f_hdf["snapshots/X"][:,:,ind-buffsize:ind] = snapshot_data*nuclei_data.A[:,np.newaxis,np.newaxis]
419 
420  # Store it
421  snapshot_data[:,:,ind % buffsize] = 0
422  snapshot_data[indices_nuclei,:,ind % buffsize] = data.Y[indexes][:, :].T
423 
424  # Set entries to nan if necessary (e.g., if run does not contain a time at the beginning or end)
425  if mask is not None:
426  snapshot_data[:,mask,ind % buffsize] = np.nan
427 
428 
429  #### Other entries ####
430  #######################
431 
432  for entry in entry_dict.keys():
433  # Check if the data_dict is full and write it to the hdf5 file
434  if ind % buffsize == 0 and ind != 0:
435  for key in entry_dict[entry].keys():
436  # Check if the dataset is already created and if not create it
437  if entry+"/"+key not in f_hdf:
438  f_hdf.create_dataset(entry+"/"+key, (len(mainout_time),ind+1), maxshape=(len(mainout_time),None))
439  # If necessary extend the dataset
440  if ind > buffsize:
441  f_hdf[entry+"/"+key].resize((len(mainout_time),ind+1))
442  # Write the data to the hdf5 file
443  f_hdf[entry+"/"+key][:,ind-buffsize:ind] = entry_dict[entry][key]
444 
445  # Put the data in the data_dict
446  for key in entry_dict[entry].keys():
447  entry_dict[entry][key][:,ind % buffsize] = np.interp(mainout_time,data[entry]["time"],data[entry][key],left=np.nan,right=np.nan)
448 
449 
450 
451 # Say something
452 logger.info(f"Finished looping over all directories, writing the last data to the hdf5 file.")
453 
454 
455 # # Write the last data to the hdf5 file
456 
457 #### Finab ####
458 ###############
459 
460 if "finab/Y" not in f_hdf:
461  f_hdf.create_dataset("finab/Y", (len(nuclei),ind+1), maxshape=(len(nuclei),None))
462  f_hdf.create_dataset("finab/X", (len(nuclei),ind+1), maxshape=(len(nuclei),None))
463 else:
464  f_hdf["finab/Y"].resize((len(nuclei),ind+1))
465  f_hdf["finab/X"].resize((len(nuclei),ind+1))
466 # Write the missing entries
467 if ind>buffsize:
468  f_hdf["finab/Y"][:,ind-buffsize+1:ind+1] = finab_data_Y[:,:buffsize]
469  f_hdf["finab/X"][:,ind-buffsize+1:ind+1] = finab_data_X[:,:buffsize]
470 else:
471  f_hdf["finab/Y"][:,:ind+1] = finab_data_Y[:,:ind+1]
472  f_hdf["finab/X"][:,:ind+1] = finab_data_X[:,:ind+1]
473 
474 
475 #### Run name ####
476 ##################
477 
478 if "run_names" not in f_hdf:
479  f_hdf.create_dataset("run_names", (ind+1,), maxshape=(None,),dtype="S100")
480 else:
481  f_hdf["run_names"].resize((ind+1,))
482 # Write the missing entries
483 if ind>buffsize:
484  f_hdf["run_names"][ind-buffsize+1:ind+1] = run_names[:buffsize]
485 else:
486  f_hdf["run_names"][:ind+1] = run_names[:ind+1]
487 
488 
489 if "run_ids" not in f_hdf:
490  f_hdf.create_dataset("run_ids", (ind+1,), maxshape=(None,),dtype=int)
491 else:
492  f_hdf["run_ids"].resize((ind+1,))
493 # Write the missing entries
494 if ind>buffsize:
495  f_hdf["run_ids"][ind-buffsize+1:ind+1] = run_ids[:buffsize]
496 else:
497  f_hdf["run_ids"][:ind+1] = run_ids[:ind+1]
498 
499 
500 #### Custom snapshots ####
501 ##########################
502 
503 if summarize_snapshots:
504  if "snapshots/Y" not in f_hdf:
505  f_hdf.create_dataset("snapshots/Y", (len(nuclei),len(snapshot_time),ind+1), maxshape=(len(nuclei),len(snapshot_time),None))
506  f_hdf.create_dataset("snapshots/X", (len(nuclei),len(snapshot_time),ind+1), maxshape=(len(nuclei),len(snapshot_time),None))
507  else:
508  f_hdf["snapshots/Y"].resize((len(nuclei),len(snapshot_time),ind+1))
509  f_hdf["snapshots/X"].resize((len(nuclei),len(snapshot_time),ind+1))
510  # Write the missing entries
511  if ind>buffsize:
512  f_hdf["snapshots/Y"][:,:,ind-buffsize+1:ind+1] = snapshot_data[:,:,:buffsize]
513  f_hdf["snapshots/X"][:,:,ind-buffsize+1:ind+1] = snapshot_data[:,:,:buffsize]*nuclei_data.A[:,np.newaxis,np.newaxis]
514  else:
515  f_hdf["snapshots/Y"][:,:,:ind+1] = snapshot_data[:,:,:ind+1]
516  f_hdf["snapshots/X"][:,:,:ind+1] = snapshot_data[:,:,:ind+1]*nuclei_data.A[:,np.newaxis,np.newaxis]
517 
518 
519 #### Other entries ####
520 #######################
521 
522 for entry in entry_dict.keys():
523  for key in entry_dict[entry].keys():
524  if entry+"/"+key not in f_hdf:
525  f_hdf.create_dataset(entry+"/"+key, (len(mainout_time),ind+1), maxshape=(len(mainout_time),None))
526  else:
527  f_hdf[entry+"/"+key].resize((len(mainout_time),ind+1))
528  # Write the missing entries
529  if ind>buffsize:
530  f_hdf[entry+"/"+key][:,ind-buffsize+1:ind+1] = entry_dict[entry][key][:,:buffsize]
531  else:
532  f_hdf[entry+"/"+key][:,:ind+1] = entry_dict[entry][key][:,:ind+1]
533 
534 
535 
536 # Say something
537 logger.info(f"Finished summarizing run at {run_path}.")
538 
539 # Close the hdf5 file
540 f_hdf.close()
541 
src_files.wreader
Definition: wreader.py:1
src_files.template_class
Definition: template_class.py:1
src_files.template_class.template
Definition: template_class.py:6
src_files.nucleus_multiple_class.nucleus_multiple
Definition: nucleus_multiple_class.py:10
src_files.nucleus_multiple_class
Definition: nucleus_multiple_class.py:1
src_files.wreader.wreader
Definition: wreader.py:11