Source code for ijazz.config

import argparse
import sys
import yaml
import numpy as np
from pathlib import Path
from copy import deepcopy



[docs] def merge_dicts(dict1, dict2): """Recursively merges dict2 into dict1.""" for key, value in dict2.items(): if isinstance(value, dict) and key in dict1: merge_dicts(dict1[key], value) else: if key == 'cut' and dict2.get('use_year_selection', True): dict1[key] = dict1.get(key, '') + ' and ' + value elif key =='use_year_selection': pass else: dict1[key] = value
[docs] def ijazz_config_sas(): """Entry point for the IJazZ Scale and Smearing config generator.""" parser = argparse.ArgumentParser(description=f'IJazZ Scale and Smearing fit') parser.add_argument('config', type=str, help='yaml config file') parser.add_argument('--cfg', type=str, default=None, help='path to the yaml config with steps') args = parser.parse_args(sys.argv[1:]) with open(args.config, 'r') as fcfg: config = yaml.safe_load(fcfg) with open(args.cfg, 'r') as fcfg: cfg = yaml.safe_load(fcfg) config_sas(config, cfg)
[docs] def config_sas(config: dict,cfg: dict): """Create per-step SAS YAML configs from a base config and step config. Args: config (dict): Base configuration with dataset info, SAS options, and output paths. cfg (dict): Step configuration containing a ``steps`` list and SAS overrides. Returns: None: Writes YAML files to ``dir_yaml``. """ datasets_sas = config['datasets'] dir_yaml = Path(config.get('dir_yaml','.')) dir_yaml.mkdir(parents=True, exist_ok=True) sas_cut = config.get('sas', {}).get('cut', None) corr_name = '' if sas_cut is not None: print(f'Applying cut: {sas_cut}') cfg['sas']['cut'] = sas_cut for i,step in enumerate(cfg['steps']): print(f'Processing step: {step["name"]}') # print(step['sas'].get('correct_data',True)) split = step.get('split', False) for dataset in datasets_sas: dataset['file_dt'] = [file.replace('.parquet', f'.{corr_name}.parquet' if (i and cfg['steps'][max(0,i-1)]['sas'].get('correct_data',True)) else '.parquet') for file in dataset['file_dt']] if split: print('Used split datasets') datasets = datasets_sas else: files_dt = [] files_mc = [] for dataset in datasets_sas: files_dt += [dataset['file_dt']] if np.isscalar(dataset['file_dt']) else dataset['file_dt'] files_mc += [dataset['file_mc']] if np.isscalar(dataset['file_mc']) else dataset['file_mc'] datasets = [{"subyear": '','file_dt': files_dt, 'file_mc': files_mc}] for dataset in datasets: file_dt = [dataset['file_dt']] if np.isscalar(dataset['file_dt']) else dataset['file_dt'] file_mc = [dataset['file_mc']] if np.isscalar(dataset['file_mc']) else dataset['file_mc'] corr_name = config['object_type'] + step['name'] + 'Corr' cset_name = config['object_type'] + step['name'] dset_name = config['year'] + str(dataset.get('subyear','')) dir_results = Path(config['dir_results']) / step['name'] config_step = deepcopy(cfg) merge_dicts(config_step, step) config_step['file_dt'] = file_dt config_step['file_mc'] = file_mc config_step['dir_results'] = str(dir_results) config_step['dset_name'] = dset_name config_step['cset_name'] = cset_name config_step.pop('steps', None) # print(config_step) name_yaml = f'sas_{step["name"]}{"_" if dataset.get("subyear","") else ""}{dataset.get("subyear","")}.yaml' print(f'Writing yaml file: {dir_yaml / name_yaml}') with open(dir_yaml / name_yaml, 'w') as yaml_file: yaml.dump(config_step, yaml_file, default_flow_style=False, sort_keys=False)