[docs]defcheck_sorted(dataset:xr.Dataset)->bool:"""Check that the dataset is sorted by the rules in :func:`sort_ds`"""sorted_ds=sort_ds(dataset.copy(deep=True))returnall([np.allclose(sorted_ds.pressure,dataset.pressure,equal_nan=True),np.all((sorted_ds.time==dataset.time)|(np.isnat(sorted_ds.time)==np.isnat(dataset.time))),np.allclose(sorted_ds.latitude,dataset.latitude,equal_nan=True),np.allclose(sorted_ds.longitude,dataset.longitude,equal_nan=True),])
[docs]defcheck_ancillary_variables(ds:xr.Dataset):"""Check that everything in an ancillary_variables attribute appears as a variable Check that every variable that is known ancillary appears in at least one ancillary_variable attribute """looks_ancillary_suffixes=("_qc","_error")ancillary_variables_attrs=defaultdict(list)looks_ancillary=set()forname,variableinds.variables.items():ifnotisinstance(name,str):raiseValueError(f"variable names must be strings not {name}")ifany(name.endswith(suffix)forsuffixinlooks_ancillary_suffixes):looks_ancillary.add(name)ifvariable.attrs.get("ancillary_variables")isNone:continueforancillaryinvariable.attrs["ancillary_variables"].split():ancillary_variables_attrs[ancillary].append(name)iferrors:=ancillary_variables_attrs.keys()-ds.variables.keys():raiseValueError(errors)iferrors:=looks_ancillary-ancillary_variables_attrs.keys():raiseValueError(errors)
[docs]defcheck_flags(dataset:xr.Dataset,raises=True):"""Check WOCE flag values agaisnt their param and ensure that the param either has a value or is "nan" depedning on the flag definition. Return a boolean array of invalid locations? """woce_flags={"WOCESAMPLE":ExchangeBottleFlag,"WOCECTD":ExchangeCTDFlag,"WOCEBOTTLE":ExchangeSampleFlag,}flag_has_value={"WOCESAMPLE":{flag.value:flag.has_valueforflaginExchangeBottleFlag},"WOCECTD":{flag.value:flag.has_valueforflaginExchangeCTDFlag},"WOCEBOTTLE":{flag.value:flag.has_valueforflaginExchangeSampleFlag},}# In some cases, a coordinate variable might have flags, so we are not using filter_by_attrs# get all the flag vars (that also have conventions)flag_vars=[]forvar_nameindataset.variables:# do not replace the above with .items() it will give you xr.Variable objects that you don't want to use# the following gets a real xr.DataArraydata=dataset[var_name]ifnot{"standard_name","conventions"}<=data.attrs.keys():continueifnotany(flagindata.attrs["conventions"]forflaginwoce_flags):continueif"status_flag"indata.attrs["standard_name"]:flag_vars.append(var_name)# match flags with their data vars# it is legal in CF for one set of flags to apply to multiple varsflag_errors={}forflag_varinflag_vars:# get the flag and check attrs for defsflag_da=dataset[flag_var]conventions=Noneforflaginwoce_flags:ifflag_da.attrs.get("conventions","").startswith(flag):conventions=flagbreak# we don't know these flags, skip the checkifnotconventions:continueallowed_values=np.array(list(flag_has_value[conventions]))illegal_flags=~flag_da.fillna(9).isin(allowed_values)ifnp.any(illegal_flags):illegal_flags.attrs["comments"]=(f"This is a boolean array in the same shape as '{flag_da.name}' which is truthy where invalid values exist")flag_errors[f"{flag_da.name}_value_errors"]=illegal_flagscontinueforvar_nameindataset.variables:data=dataset[var_name]if"ancillary_variables"notindata.attrs:continueifflag_varnotindata.attrs["ancillary_variables"].split(" "):continue# check data against flagshas_fill_f=[flagforflag,valueinflag_has_value[conventions].items()ifvalueisFalse]has_fill=flag_da.isin(has_fill_f)|np.isnan(flag_da)# TODO deal with strsifnp.issubdtype(data.values.dtype,np.number):fill_value_mismatch:xr.DataArray=~(np.isfinite(data)^has_fill)# type: ignore[assignment]ifnp.any(fill_value_mismatch):fill_value_mismatch.attrs["comments"]=(f"This is a boolean array in the same shape as '{data.name}' which is truthy where invalid values exist")flag_errors[f"{data.name}_value_errors"]=fill_value_mismatchflag_errors_ds=xr.Dataset(flag_errors)ifraisesandany(flag_errors_ds):raiseExchangeDataFlagPairError(flag_errors_ds)returnflag_errors_ds