QC flags

Contents

QC flags#

By default, the QC flags are applied. This means that for numeric data columns that have a QC flag column, values where the QC flag is not “0” are set to NaN.

See Select sites for more information about selecting sites and Daily data / uscrn.get_data() and NRT data / uscrn.get_nrt_data() for more information about loading data.

import pandas as pd

import uscrn

station_id = "1045"  # Boulder, CO

df = uscrn.get_data(2019, "hourly", station_id=station_id, n_jobs=1)
df_no_qc = uscrn.get_data(2019, "hourly", station_id=station_id, apply_qc=False)

qc_vns = [k for k, v in df.attrs["attrs"].items() if v["qc_flag_name"]]

counts = []
for vn in qc_vns:
    fn = df.attrs["attrs"][vn]["qc_flag_name"]
    counts.append(df[fn].value_counts().convert_dtypes().rename(vn))

counts = pd.DataFrame(counts)
counts

	0	3
solarad	8756.0	4.0
solarad_max	8750.0	10.0
solarad_min	8756.0	4.0
sur_temp	8756.0	4.0
sur_temp_max	8756.0	4.0
sur_temp_min	8756.0	4.0
rh_hr_avg	8760.0	NaN

vn = counts.sort_values(by="0").iloc[0].name

pd.concat(
    [
        df[vn].isnull().value_counts().rename("qc"),
        df_no_qc[vn].isnull().value_counts().rename("no qc"),
    ],
    axis=1,
)

	qc	no qc
solarad_max
False	8749	8759
True	11	1

df.sur_temp_type.value_counts()

sur_temp_type
C    8759
U       1
Name: count, dtype: int64

IR surface measurement type#

NRT data are (presumably) more likely to have non-corrected values present.

df = uscrn.get_nrt_data((-4, None), "hourly", n_jobs=2)

df.sur_temp_type.value_counts()

sur_temp_type
C    564
U     60
Name: count, dtype: int64

wbans = sorted(df.query("sur_temp_type == 'U'").wban.unique())
print(wbans)
print(len(wbans))

['23801', '23802', '63862', '63867', '63868', '63891', '63892', '63893', '63894', '63895', '63897', '63899', '73801', '73802', '73803']
15