NRT data#
import time
import matplotlib.pyplot as plt
import pandas as pd
import uscrn
Recent hourly data#
With uscrn.get_nrt_data(), we can load recent data (near-real-time) from USCRN
by specifying the period we want and from which dataset.
Here, we request the 6 most recent files.
now = pd.Timestamp.now("UTC")
print(now)
df = uscrn.get_nrt_data(
(-6, None),
"hourly",
n_jobs=2,
)
df
| wban | utc_time | lst_time | crx_vn | longitude | latitude | t_calc | t_hr_avg | t_max | t_min | ... | soil_moisture_5 | soil_moisture_10 | soil_moisture_20 | soil_moisture_50 | soil_moisture_100 | soil_temp_5 | soil_temp_10 | soil_temp_20 | soil_temp_50 | soil_temp_100 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 03047 | 2026-06-01 19:00:00 | 2026-06-01 13:00:00 | 2.622 | -102.809998 | 31.620001 | 32.700001 | 32.299999 | 33.099998 | 31.200001 | ... | 0.037 | 0.039 | 0.051 | 0.066 | 0.062 | 40.599998 | 32.500000 | 30.700001 | 28.799999 | 28.400000 |
| 1 | 03048 | 2026-06-01 19:00:00 | 2026-06-01 12:00:00 | 2.622 | -106.889999 | 34.360001 | 32.000000 | 31.000000 | 32.400002 | 29.500000 | ... | 0.047 | 0.059 | 0.059 | 0.059 | 0.066 | 40.099998 | 30.299999 | 26.600000 | 25.700001 | 24.799999 |
| 2 | 03054 | 2026-06-01 19:00:00 | 2026-06-01 13:00:00 | 2.622 | -102.769997 | 33.959999 | 32.099998 | 32.099998 | 33.000000 | 31.400000 | ... | 0.168 | 0.198 | 0.152 | 0.108 | 0.133 | 31.600000 | 26.100000 | 23.799999 | 23.700001 | 20.900000 |
| 3 | 03055 | 2026-06-01 19:00:00 | 2026-06-01 13:00:00 | 2.622 | -101.589996 | 36.599998 | 32.799999 | 32.299999 | 32.900002 | 31.500000 | ... | 0.201 | 0.234 | 0.200 | 0.232 | 0.160 | 24.400000 | 22.400000 | 20.400000 | 19.900000 | 19.299999 |
| 4 | 03060 | 2026-06-01 19:00:00 | 2026-06-01 12:00:00 | 2.622 | -107.690002 | 38.540001 | 20.799999 | 20.299999 | 21.100000 | 19.500000 | ... | 0.080 | 0.144 | 0.194 | 0.258 | 0.199 | 23.799999 | 17.500000 | 15.200000 | 14.500000 | 12.100000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1372 | 96404 | 2026-06-02 00:00:00 | 2026-06-01 15:00:00 | 2.514 | -141.210007 | 62.740002 | 15.700000 | 17.500000 | 18.700001 | 15.500000 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1373 | 96405 | 2026-06-02 00:00:00 | 2026-06-01 15:00:00 | 2.514 | -145.350006 | 60.470001 | 13.600000 | 13.600000 | 14.100000 | 13.300000 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1374 | 96407 | 2026-06-02 00:00:00 | 2026-06-01 15:00:00 | 2.515 | -159.000000 | 66.559998 | 13.700000 | 13.700000 | 14.300000 | 12.600000 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1375 | 96408 | 2026-06-02 00:00:00 | 2026-06-01 15:00:00 | 2.515 | -150.869995 | 63.450001 | 12.300000 | 12.200000 | 12.600000 | 11.900000 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1376 | 96409 | 2026-06-02 00:00:00 | 2026-06-01 15:00:00 | 2.514 | -149.399994 | 68.650002 | 6.300000 | 6.600000 | 8.300000 | 5.500000 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1377 rows × 36 columns
Sometimes, a few sites can have times an hour earlier than the others.
See the notes in uscrn.get_nrt_data() for more details.
def func(x):
nx = len(x)
if nx == 0:
return ""
elif nx < 10:
return sorted(x.unique())
else:
return f"..."
(
df.utc_time
.value_counts()
.sort_index()
.to_frame()
.assign(
wbans=df.groupby("utc_time")["wban"].apply(func),
)
)
| count | wbans | |
|---|---|---|
| utc_time | ||
| 2026-05-27 20:00:00 | 1 | [53878] |
| 2026-05-27 21:00:00 | 3 | [53878, 63855, 94995] |
| 2026-05-27 22:00:00 | 4 | [53878, 63855, 94995] |
| 2026-05-27 23:00:00 | 4 | [53878, 63855, 94995] |
| 2026-05-28 00:00:00 | 4 | [53878, 63855, 94995] |
| ... | ... | ... |
| 2026-06-01 20:00:00 | 152 | ... |
| 2026-06-01 21:00:00 | 150 | ... |
| 2026-06-01 22:00:00 | 150 | ... |
| 2026-06-01 23:00:00 | 150 | ... |
| 2026-06-02 00:00:00 | 148 | ... |
125 rows × 2 columns
In these files, for example, site WBAN 13301 has data for multiple earlier days included.
https://www.ncei.noaa.gov/pub/data/uscrn/products/hourly02/updates/2024/CRN60H0203-202402212000.txt
https://www.ncei.noaa.gov/pub/data/uscrn/products/hourly02/updates/2024/CRN60H0203-202402222000.txt
Plot temperature change time series#
fig, ax = plt.subplots(figsize=(7, 4))
df_ = df.copy()
tmax = df_.utc_time.max()
df_ = df_[df_.utc_time.between(tmax - pd.Timedelta("7h"), tmax)]
df_["utc_time_mid"] = df_["utc_time"] + pd.Timedelta("30min")
df_["t_hr_avg_k"] = df_["t_hr_avg"].add(273.15)
df_["dt_hr_avg"] = df_["t_hr_avg_k"].sub(df_.groupby("wban")["t_hr_avg_k"].transform("mean"))
df_[["utc_time", "dt_hr_avg"]].groupby("utc_time").mean().plot(
color="0.3",
linewidth=3,
zorder=10,
legend=False,
ax=ax,
)
df_.groupby("wban").plot(
x="utc_time",
y="dt_hr_avg",
color="0.5",
linewidth=1,
alpha=0.4,
legend=False,
xlabel="Time (UTC)",
ylabel="NRT temperature anomaly $\Delta T$ (°C)",
ax=ax,
)
ax.set_title(df.attrs["title"], loc="left", size=8);
Plot current temperature#
fig, ax = plt.subplots(figsize=(7, 4.5))
ds = uscrn.to_xarray(df)
ds.isel(time=-1).plot.scatter(x="longitude", y="latitude", hue="t_hr_avg", ax=ax);
Specific period of hourly data#
Date selection works by file, not by the data inside the file.
In general, the data are an hour behind the file date/time.
See the notes in uscrn.get_nrt_data() for more details.
def get_nrt_hourly_period(period):
a, b = period
ap1 = pd.to_datetime(a) + pd.Timedelta(hours=1)
bp1 = pd.to_datetime(b) + pd.Timedelta(hours=1)
df = uscrn.get_nrt_data((ap1, bp1))
time.sleep(0.5) # for prints
in_period = df.utc_time.between(a, b)
print(
f"Got {in_period.sum()}/{len(df)} ({in_period.sum() / len(df):.1%}) records "
f"in desired period {a} to {b}"
)
outside = df.loc[~in_period, "utc_time"].value_counts()
print(
"Outside counts:",
", ".join(f"{time:%Y-%m-%d %H} ({count})" for time, count in outside.items())
)
dupe = df.duplicated(["wban", "utc_time"], keep=False)
print(f"Got {dupe.sum()} ({dupe.sum() / len(df):.1%}) duplicates")
return (
df[in_period]
.drop_duplicates(["wban", "utc_time"], keep="last")
.reset_index(drop=True)
)
df = get_nrt_hourly_period(("2024-02-09 16", "2024-02-09 20"))
df
| wban | utc_time | lst_time | crx_vn | longitude | latitude | t_calc | t_hr_avg | t_max | t_min | ... | soil_moisture_5 | soil_moisture_10 | soil_moisture_20 | soil_moisture_50 | soil_moisture_100 | soil_temp_5 | soil_temp_10 | soil_temp_20 | soil_temp_50 | soil_temp_100 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 03047 | 2024-02-09 16:00:00 | 2024-02-09 10:00:00 | 2.622 | -102.809998 | 31.620001 | 11.300000 | 10.0 | 11.300000 | 8.300000 | ... | 0.048 | 0.038 | 0.056 | 0.045 | 0.035 | 9.3 | 9.1 | 9.5 | 12.1 | 13.3 |
| 1 | 03048 | 2024-02-09 16:00:00 | 2024-02-09 09:00:00 | 2.622 | -106.889999 | 34.360001 | 3.000000 | 2.3 | 3.100000 | 0.900000 | ... | 0.101 | 0.140 | 0.109 | 0.056 | 0.057 | 3.1 | 4.4 | 6.2 | 8.2 | 9.2 |
| 2 | 03054 | 2024-02-09 16:00:00 | 2024-02-09 10:00:00 | 2.622 | -102.769997 | 33.959999 | 6.700000 | 6.5 | 6.700000 | 5.900000 | ... | 0.132 | 0.160 | 0.164 | 0.105 | 0.125 | 5.0 | 5.8 | 6.9 | 9.0 | 10.3 |
| 3 | 03055 | 2024-02-09 16:00:00 | 2024-02-09 10:00:00 | 2.622 | -101.589996 | 36.599998 | 8.700000 | 6.7 | 8.700000 | 3.900000 | ... | 0.250 | 0.306 | 0.324 | 0.412 | 0.217 | 4.5 | 5.0 | 5.6 | 6.4 | 6.6 |
| 4 | 03060 | 2024-02-09 16:00:00 | 2024-02-09 09:00:00 | 2.622 | -107.690002 | 38.540001 | -7.400000 | -8.4 | -7.400000 | -10.400000 | ... | NaN | NaN | NaN | 0.173 | 0.152 | -0.3 | -0.3 | -0.1 | 0.7 | 2.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 761 | 94996 | 2024-02-09 20:00:00 | 2024-02-09 14:00:00 | 2.622 | -96.849998 | 40.700001 | 8.200000 | 7.8 | 8.400000 | 7.000000 | ... | 0.290 | 0.330 | 0.287 | 0.260 | 0.322 | 5.1 | 4.4 | 4.8 | 5.2 | 5.5 |
| 762 | 96404 | 2024-02-09 20:00:00 | 2024-02-09 11:00:00 | 2.514 | -141.210007 | 62.740002 | -21.700001 | -22.9 | -21.700001 | -24.200001 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 763 | 96405 | 2024-02-09 20:00:00 | 2024-02-09 11:00:00 | 2.514 | -145.350006 | 60.470001 | 3.000000 | 2.9 | 3.200000 | 2.600000 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 764 | 96407 | 2024-02-09 20:00:00 | 2024-02-09 11:00:00 | 2.515 | -159.000000 | 66.559998 | -9.700000 | -9.3 | -8.700000 | -11.100000 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 765 | 96409 | 2024-02-09 20:00:00 | 2024-02-09 11:00:00 | 2.514 | -149.399994 | 68.650002 | -10.800000 | -9.6 | -8.000000 | -11.100000 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
766 rows × 36 columns
Recent daily data#
Here, we load the most recent daily data file.
df = uscrn.get_nrt_data(-1, "daily")
df
| wban | lst_date | crx_vn | longitude | latitude | t_daily_max | t_daily_min | t_daily_mean | t_daily_avg | p_daily_calc | ... | soil_moisture_5_daily | soil_moisture_10_daily | soil_moisture_20_daily | soil_moisture_50_daily | soil_moisture_100_daily | soil_temp_5_daily | soil_temp_10_daily | soil_temp_20_daily | soil_temp_50_daily | soil_temp_100_daily | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 03047 | 2026-05-31 | 2.622 | -102.809998 | 31.620001 | 36.299999 | 20.6 | 28.500000 | 26.799999 | 0.0 | ... | 0.045 | 0.044 | 0.052 | 0.068 | 0.064 | 31.6 | 31.000000 | 30.400000 | 30.0 | 28.299999 |
| 1 | 03048 | 2026-05-31 | 2.622 | -106.889999 | 34.360001 | 33.500000 | 10.0 | 21.700001 | 23.600000 | 0.0 | ... | 0.043 | 0.060 | 0.059 | 0.059 | 0.065 | 30.5 | 29.200001 | 27.500000 | 25.5 | 24.500000 |
| 2 | 03054 | 2026-05-31 | 2.622 | -102.769997 | 33.959999 | 36.000000 | 12.9 | 24.500000 | 25.500000 | 0.0 | ... | 0.172 | 0.191 | 0.155 | 0.107 | 0.133 | 25.5 | 24.799999 | 23.700001 | 22.9 | 20.799999 |
| 3 | 03055 | 2026-05-31 | 2.622 | -101.589996 | 36.599998 | 33.099998 | 12.5 | 22.799999 | 24.000000 | 0.0 | ... | 0.205 | 0.243 | 0.197 | 0.231 | 0.160 | 21.9 | 21.600000 | 20.600000 | 19.6 | 19.299999 |
| 4 | 03060 | 2026-05-31 | 2.622 | -107.690002 | 38.540001 | 19.500000 | 3.9 | 11.700000 | 12.300000 | 0.0 | ... | 0.079 | 0.145 | 0.196 | 0.259 | 0.201 | 18.1 | 16.799999 | 15.700000 | 14.2 | 11.900000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 145 | 96404 | 2026-05-31 | 2.514 | -141.210007 | 62.740002 | 14.800000 | -1.4 | 6.700000 | 8.400000 | 3.5 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 146 | 96405 | 2026-05-31 | 2.514 | -145.350006 | 60.470001 | 11.300000 | 2.5 | 6.900000 | 7.000000 | 9.1 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 147 | 96407 | 2026-05-31 | 2.515 | -159.000000 | 66.559998 | 11.200000 | 2.2 | 6.700000 | 6.900000 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 148 | 96408 | 2026-05-31 | 2.515 | -150.869995 | 63.450001 | 11.400000 | 3.7 | 7.500000 | 7.200000 | 1.5 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 149 | 96409 | 2026-05-31 | 2.514 | -149.399994 | 68.650002 | 7.500000 | -3.9 | 1.800000 | 1.800000 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
150 rows × 28 columns
fig, ax = plt.subplots(figsize=(7, 4.5))
ds = uscrn.to_xarray(df).squeeze()
ds.plot.scatter(x="longitude", y="latitude", hue="t_daily_max", ax=ax);