Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ESGF config files #179

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions docs/esg.cordex-cmip6.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
[project:cordex-cmip6]

# Define the categories to be used for this project:
# name | category_type | is_mandatory | is_thredds_property | display_order
categories =
mip_era | enum | true | true | 0
activity_id | enum | true | true | 1
domain_id | enum | true | true | 2
driving_source_id | enum | true | true | 3
driving_experiment_id | enum | true | true | 4
driving_variant_label | string | false | true | 5
institution_id | enum | true | true | 6
source_id | enum | true | true | 7
version_realization | enum | true | true | 8
variable_id | string | true | true | 9
frequency | string | false | true | 10
product | string | false | true | 11
source_type | string | false | true | 12
grid | string | false | true | 13
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if grid and other CMIP6 DRS elements that in CORDEX are just global attributes should remain here as category

creation_date | string | false | true | 14
activity_id | string | false | true | 15
experiment_title | string | false | true | 16
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably we should add driving (i.e. driving_experiment_title) to this entry for consistency

model_cohort | string | true | true | 17
project | string | false | true | 18
description | text | false | false | 99

category_defaults =
project_id | CORDEX

filename_format = %(variable_id)s_%(domain_id)s_%(driving_source_id)s_%(driving_experiment_id)s_%(driving_variant_label)s_%(institution_id)s_%(source_id)s_%(version_realization)s_%(frequency)s[_%(time_range)s].nc

directory_format = %(project_id)s/%(mip_era)s/%(activity_id)s/%(domain_id)s/%(institution_id)s/%(driving_source_id)s/%(driving_experiment_id)s/%(driving_variant_label)s/%(source_id)s/%(version_realization)s/%(frequency)s/%(variable_id)s/%(version)s

dataset_id = cordex.%(activity_id)s.%(domain_id)s.%(institution_id)s.%(driving_source_id)s.%(driving_experiment_id)s.%(driving_variant_label)s.%(source_id)s.%(version_realization)s.%(frequency)s.%(variable_id)s

dataset_name_format = mip_era=%(mip_era)s, source_id=%(source_id)s, experiment=%(experiment_title)s, member_id=%(member_id)s, variable=%(variable_id)s, version=%(version)s
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This entry still needs adapting. Not sure which criteria are followed to select the DRS elements which appear in this dataset_name_format entry (currently remains as in CMIP6 ini file).


mip_era_options = CMIP6

activity_id_options = DD, ESD

driving_source_id_options = 4AOP-v1-5, ACCESS-CM2, ACCESS-ESM1-5, ACCESS-OM2, ACCESS-OM2-025, ARTS-2-3, AWI-CM-1-1-HR, AWI-CM-1-1-LR, AWI-CM-1-1-MR, AWI-ESM-1-1-LR, AWI-ESM-2-1-LR, BCC-CSM2-HR, BCC-CSM2-MR, BCC-ESM1, CAM-MPAS-HR, CAM-MPAS-LR, CAMS-CSM1-0, CAS-ESM2-0, CESM1-1-CAM5-CMIP5, CESM1-CAM5-SE-HR, CESM1-CAM5-SE-LR, CESM1-WACCM-SC, CESM2, CESM2-FV2, CESM2-WACCM, CESM2-WACCM-FV2, CIESM, CMCC-CM2-HR4, CMCC-CM2-SR5, CMCC-CM2-VHR4, CMCC-ESM2, CNRM-CM6-1, CNRM-CM6-1-HR, CNRM-ESM2-1, CanESM5, CanESM5-1, CanESM5-CanOE, E3SM-1-0, E3SM-1-1, E3SM-1-1-ECA, E3SM-2-0, E3SM-2-0-NARRM, EC-Earth3, EC-Earth3-AerChem, EC-Earth3-CC, EC-Earth3-GrIS, EC-Earth3-HR, EC-Earth3-LR, EC-Earth3-Veg, EC-Earth3-Veg-LR, EC-Earth3P, EC-Earth3P-HR, EC-Earth3P-VHR, ECMWF-IFS-HR, ECMWF-IFS-LR, ECMWF-IFS-MR, ERA5, FGOALS-f3-H, FGOALS-f3-L, FGOALS-g3, FIO-ESM-2-0, GFDL-AM4, GFDL-CM4, GFDL-CM4C192, GFDL-ESM2M, GFDL-ESM4, GFDL-GRTCODE, GFDL-OM4p5B, GFDL-RFM-DISORT, GISS-E2-1-G, GISS-E2-1-G-CC, GISS-E2-1-H, GISS-E2-2-G, GISS-E2-2-H, GISS-E3-G, HadGEM3-GC31-HH, HadGEM3-GC31-HM, HadGEM3-GC31-LL, HadGEM3-GC31-LM, HadGEM3-GC31-MH, HadGEM3-GC31-MM, HiRAM-SIT-HR, HiRAM-SIT-LR, ICON-ESM-LR, IITM-ESM, INM-CM4-8, INM-CM5-0, INM-CM5-H, IPSL-CM5A2-INCA, IPSL-CM6A-ATM-HR, IPSL-CM6A-ATM-ICO-HR, IPSL-CM6A-ATM-ICO-LR, IPSL-CM6A-ATM-ICO-MR, IPSL-CM6A-ATM-ICO-VHR, IPSL-CM6A-ATM-LR-REPROBUS, IPSL-CM6A-LR, IPSL-CM6A-LR-INCA, IPSL-CM6A-MR1, KACE-1-0-G, KIOST-ESM, LBLRTM-12-8, MCM-UA-1-0, MIROC-ES2H, MIROC-ES2H-NB, MIROC-ES2L, MIROC6, MPI-ESM-1-2-HAM, MPI-ESM1-2-HR, MPI-ESM1-2-LR, MPI-ESM1-2-XR, MRI-AGCM3-2-H, MRI-AGCM3-2-S, MRI-ESM2-0, NESM3, NICAM16-7S, NICAM16-8S, NICAM16-9S, NorCPM1, NorESM1-F, NorESM2-LM, NorESM2-MM, PCMDI-test-1-0, RRTMG-LW-4-91, RRTMG-SW-4-02, RTE-RRTMGP-181204, SAM0-UNICON, TaiESM1, TaiESM1-TIMCOM, TaiESM1-TIMCOM2, UKESM1-0-LL, UKESM1-1-LL, UKESM1-ice-LL

driving_experiment_id_options = evaluation, historical, ssp119, ssp126, ssp245, ssp370, ssp585

institution_id_options = BCCR-UCAN, BOM, CCCma, CLMcom-DWD, CLMcom-KIT, CNRM-MF, GERICS, HCLIMcom-DMI, HCLIMcom-METNo, HCLIMcom-SMHI, ICTP, MOHC, OURANOS, UBA-CIMA-IFAECI, UQ-DEC

source_id_options = CCAM-v2105, CCAM-v2112, CCAMoc-v2112, CNRM-ALADIN64E1, CRCM5-SN, CanRCM5-SN, HCLIM43-ALADIN, HadREM3-GA7-05, REMO2020, RegCM5-0, WRF451Q

domain_id_options = AFR-12, AFR-25, AFR-50, AFR-50i, ANT-12, ANT-50, ANT-50i, ARC-12, ARC-50, ARC-50i, AUS-12, AUS-25, AUS-50, AUS-50i, CAM-12, CAM-25, CAM-50, CAM-50i, CAS-12, CAS-25, CAS-50, CAS-50i, EAS-12, EAS-25, EAS-50, EAS-50i, EUR-12, EUR-12i, EUR-25, EUR-50, EUR-50i, MED-12, MED-50, MED-50i, MNA-12, MNA-25, MNA-25i, MNA-50, MNA-50i, NAM-12, NAM-25, NAM-50, NAM-50i, SAM-12, SAM-25, SAM-50, SAM-50i, SEA-25, SEA-25i, WAS-12, WAS-25, WAS-50, WAS-50i

maps = experiment_title_map, model_cohort_map, las_time_delta_map

experiment_title_map = map(experiment_id : experiment_title)
evaluation | reanalysis simulation of the recent past
historical | all-forcing simulation of the recent past
ssp119 | low-end scenario reaching 1.9 W m-2, based on SSP1
ssp126 | update of RCP2.6 based on SSP1
ssp245 | update of RCP4.5 based on SSP2
ssp370 | gap-filling scenario reaching 7.0 based on SSP3
ssp585 | update of RCP8.5 based on SSP5

variable_id_pattern = %(string)s

model_cohort_map = map(source_id : model_cohort)
CCAM-v2105 | Registered
CCAM-v2112 | Registered
CCAMoc-v2112 | Registered
CNRM-ALADIN64E1 | Registered
CRCM5-SN | Registered
CanRCM5-SN | Registered
HCLIM43-ALADIN | Registered
HadREM3-GA7-05 | Registered
REMO2020 | Registered
RegCM5-0 | Registered
WRF451Q | Registered

project_options = CORDEX

driving_variant_label_pattern = r%(digit)si%(digit)sp%(digit)sf%(digit)s

frequency_options = 1hr, 3hr, 6hr, day, fx, mon, yr

version_pattern = v%(digit)s

version_realization_pattern = v%(digit)s-r%(digit)s

las_time_delta_map = map(frequency : las_time_delta)
1hr | 1 hour
3hr | 3 hours
6hr | 6 hours
day | 1 day
fx | fixed
mon | 1 month
yr | 1 year

handler = esgcet.config.cmip6_handler:CMIP6Handler

min_cmor_version = 3.2.4

min_cf_version = 1.11

create_cim = true

source_type_delimiter = space

activity_id_delimiter = space

realm_delimiter = space

model_cohort_delimiter = space

las_configure = false

extract_global_attrs = frequency, product, source_type, grid, creation_date, variant_label, activity_id
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure of the meaning of extract_global_attrs


thredds_exclude_variables = a, a_bnds, alev1, alevel, alevhalf, alt40, b, b_bnds, bnds, bounds_lat, bounds_lon, dbze, depth, depth0m, depth100m, depth_bnds, geo_region, height, height10m, height2m, lat, lat_bnds, latitude, latitude_bnds, layer, lev, lev_bnds, location, lon, lon_bnds, longitude, longitude_bnds, olayer100m, olevel, oline, p0, p220, p500, p560, p700, p840, plev, plev3, plev7, plev8, plev_bnds, plevs, pressure1, region, rho, scatratio, sdepth, sdepth1, sza5, time, time1, time2, time_bnds, vegtype, i, j, rlat, rlat_bnds, sector, type, vertices_latitude, vertices_longitude

variable_locate = ps, ps_ | tau, tau_

variable_per_file = true

version_by_date = true

33 changes: 33 additions & 0 deletions scripts/esg-ini-sources.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
activity_id_options:
file_label: activity_id

domain_id_options:
file_label: domain_id

driving_experiment_id_options:
file_label: driving_experiment_id

driving_source_id_options:
file_label: driving_source_id

experiment_title_map:
file_label: driving_experiment_id
is_map: true

frequency_options:
file_label: frequency

institution_id_options:
file_label: institution_id

mip_era_options:
file_label: fixed
field: mip_era

model_cohort_map:
file_label: source_id
is_map: true
is_cohort: true

source_id_options:
file_label: source_id
72 changes: 72 additions & 0 deletions scripts/update-esg-ini.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import yaml
import json
from icecream import ic


def get_field(file_label, field, is_map=False, is_cohort=False):
with open(f"../CORDEX-CMIP6_{file_label}.json", "r") as file:
data = json.load(file)
rval = data[field]
if type(rval) == dict:
if is_map:
if is_cohort:
return [f" {x[0]:<27} | Registered\n" for x in rval.items()]
else:
return [f" {x[0]:<27} | {x[1]}\n" for x in rval.items()]
else:
return ", ".join(sorted(rval.keys()))
elif type(rval) == list:
return rval[0]


def read_map(map_file):
with open(map_file, "r") as file:
return yaml.load(file, Loader=yaml.FullLoader)


def update_ini_file(ini_content, map_data):
"""
Update the ini content by replacing the value of fields as indicated
in the YAML config file
"""
updated_ini = []
skip_map = False
for line in ini_content:
if "=" in line:
skip_map = False
key, value = line.split("=", 1)
key = key.strip()
value = value.strip()
if key in map_data.keys():
ic(key)
is_map = map_data[key].get("is_map", False)
is_cohort = map_data[key].get("is_cohort", False)
new_value = get_field(
map_data[key]["file_label"],
map_data[key].get("field", map_data[key]["file_label"]),
is_map,
is_cohort,
)
if is_map:
updated_ini.append(line)
updated_ini.extend(new_value)
skip_map = True
else:
updated_ini.append(f"{key} = {new_value}\n")
else:
updated_ini.append(line)
elif "|" in line and skip_map:
continue
else:
updated_ini.append(line)
return updated_ini


if __name__ == "__main__":
ini_file = "../docs/esg.cordex-cmip6.ini"
with open(ini_file, "r") as file:
ini_content = file.readlines()
map_data = read_map("esg-ini-sources.yaml")
updated_ini_content = update_ini_file(ini_content, map_data)
with open(ini_file, "w") as file:
file.writelines(updated_ini_content)