NRT data#

import time

import matplotlib.pyplot as plt
import pandas as pd

import uscrn

Recent hourly data#

With uscrn.get_nrt_data(), we can load recent data (near-real-time) from USCRN by specifying the period we want and from which dataset. Here, we request the 6 most recent files.

now = pd.Timestamp.now("UTC")
print(now)

df = uscrn.get_nrt_data(
    (-6, None),
    "hourly",
    n_jobs=2,
)

df

	wban	utc_time	lst_time	crx_vn	longitude	latitude	t_calc	t_hr_avg	t_max	t_min	...	soil_moisture_5	soil_moisture_10	soil_moisture_20	soil_moisture_50	soil_moisture_100	soil_temp_5	soil_temp_10	soil_temp_20	soil_temp_50	soil_temp_100
0	03047	2024-05-09 16:00:00	2024-05-09 10:00:00	2.622	-102.809998	31.620001	28.900000	27.799999	29.000000	26.700001	...	0.044	0.027	0.042	0.032	0.031	27.799999	27.299999	25.000000	27.299999	25.900000
1	03048	2024-05-09 16:00:00	2024-05-09 09:00:00	2.622	-106.889999	34.360001	18.200001	16.900000	18.200001	15.300000	...	0.047	0.075	0.072	0.067	0.068	21.500000	19.900000	21.299999	22.100000	21.799999
2	03054	2024-05-09 16:00:00	2024-05-09 10:00:00	2.622	-102.769997	33.959999	20.900000	19.600000	21.000000	18.600000	...	0.063	0.111	0.134	0.112	0.134	21.700001	20.299999	21.299999	21.500000	18.600000
3	03055	2024-05-09 16:00:00	2024-05-09 10:00:00	2.622	-101.589996	36.599998	18.799999	18.299999	19.400000	17.000000	...	0.082	0.138	0.140	0.234	0.144	15.900000	16.000000	16.100000	15.900000	15.600000
4	03060	2024-05-09 16:00:00	2024-05-09 09:00:00	2.622	-107.690002	38.540001	5.200000	3.600000	5.200000	1.700000	...	0.216	0.292	0.295	0.336	0.367	5.300000	5.300000	6.000000	6.200000	5.700000
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
930	96405	2024-05-09 21:00:00	2024-05-09 12:00:00	2.514	-145.350006	60.470001	6.200000	6.100000	6.400000	5.700000	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
931	96406	2024-05-09 21:00:00	2024-05-09 12:00:00	2.515	-154.130005	64.500000	4.500000	3.900000	5.000000	2.900000	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
932	96407	2024-05-09 21:00:00	2024-05-09 12:00:00	2.515	-159.000000	66.559998	-0.800000	-1.000000	-0.700000	-1.400000	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
933	96408	2024-05-09 21:00:00	2024-05-09 12:00:00	2.515	-150.869995	63.450001	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
934	96409	2024-05-09 21:00:00	2024-05-09 12:00:00	2.514	-149.399994	68.650002	5.500000	5.000000	5.500000	4.200000	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

935 rows × 36 columns

Sometimes, a few sites can have times an hour earlier than the others. See the notes in uscrn.get_nrt_data() for more details.

def func(x):
    nx = len(x)
    if nx == 0:
        return ""
    elif nx < 10:
        return sorted(x.unique())
    else:
        return f"..."

(
    df.utc_time
    .value_counts()
    .sort_index()
    .to_frame()
    .assign(
        wbans=df.groupby("utc_time")["wban"].apply(func),
    )
)

	count	wbans
utc_time
2024-05-09 16:00:00	156	...
2024-05-09 17:00:00	157	...
2024-05-09 18:00:00	157	...
2024-05-09 19:00:00	155	...
2024-05-09 20:00:00	155	...
2024-05-09 21:00:00	155	...

In these files, for example, site WBAN 13301 has data for multiple earlier days included.

Plot temperature change time series#

fig, ax = plt.subplots(figsize=(7, 4))

df_ = df.copy()

tmax = df_.utc_time.max()
df_ = df_[df_.utc_time.between(tmax - pd.Timedelta("7h"), tmax)]

df_["utc_time_mid"] = df_["utc_time"] + pd.Timedelta("30min")
df_["t_hr_avg_k"] = df_["t_hr_avg"].add(273.15)
df_["dt_hr_avg"] = df_["t_hr_avg_k"].sub(df_.groupby("wban")["t_hr_avg_k"].transform("mean"))

df_[["utc_time", "dt_hr_avg"]].groupby("utc_time").mean().plot(
    color="0.3",
    linewidth=3,
    zorder=10,
    legend=False,
    ax=ax,
)

df_.groupby("wban").plot(
    x="utc_time",
    y="dt_hr_avg",
    color="0.5",
    linewidth=1,
    alpha=0.4,
    legend=False,
    xlabel="Time (UTC)",
    ylabel="NRT temperature anomaly  $\Delta T$  (°C)",
    ax=ax,
)

ax.set_title(df.attrs["title"], loc="left", size=8);

../_images/25115e9a43b37996d17731f379242ece039aebc8119a784c33d77530479e4b03.png

Plot current temperature#

fig, ax = plt.subplots(figsize=(7, 4.5))

ds = uscrn.to_xarray(df)

ds.isel(time=-1).plot.scatter(x="longitude", y="latitude", hue="t_hr_avg", ax=ax);

../_images/5d1e373e91405d560c28feafc40684c32ff903aa7a6b4a544d47b93d498f8736.png

Specific period of hourly data#

Date selection works by file, not by the data inside the file. In general, the data is an hour behind the file date/time. See the notes in uscrn.get_nrt_data() for more details.

def get_nrt_hourly_period(period):
    a, b = period
    ap1 = pd.to_datetime(a) + pd.Timedelta(hours=1)
    bp1 = pd.to_datetime(b) + pd.Timedelta(hours=1)

    df = uscrn.get_nrt_data((ap1, bp1))

    time.sleep(0.5)  # for prints
    in_period = df.utc_time.between(a, b)
    print(
        f"Got {in_period.sum()}/{len(df)} ({in_period.sum() / len(df):.1%}) records "
        f"in desired period {a} to {b}"
    )
    outside = df.loc[~in_period, "utc_time"].value_counts()
    print(
        "Outside counts:",
        ", ".join(f"{time:%Y-%m-%d %H} ({count})" for time, count in outside.items())
    )

    dupe = df.duplicated(["wban", "utc_time"], keep=False)
    print(f"Got {dupe.sum()} ({dupe.sum() / len(df):.1%}) duplicates")

    return (
        df[in_period]
        .drop_duplicates(["wban", "utc_time"], keep="last")
        .reset_index(drop=True)
    )


df = get_nrt_hourly_period(("2024-02-09 16", "2024-02-09 20"))

df

	wban	utc_time	lst_time	crx_vn	longitude	latitude	t_calc	t_hr_avg	t_max	t_min	...	soil_moisture_5	soil_moisture_10	soil_moisture_20	soil_moisture_50	soil_moisture_100	soil_temp_5	soil_temp_10	soil_temp_20	soil_temp_50	soil_temp_100
0	03047	2024-02-09 16:00:00	2024-02-09 10:00:00	2.622	-102.809998	31.620001	11.300000	10.0	11.300000	8.300000	...	0.048	0.038	0.056	0.045	0.035	9.3	9.1	9.5	12.1	13.3
1	03048	2024-02-09 16:00:00	2024-02-09 09:00:00	2.622	-106.889999	34.360001	3.000000	2.3	3.100000	0.900000	...	0.101	0.140	0.109	0.056	0.057	3.1	4.4	6.2	8.2	9.2
2	03054	2024-02-09 16:00:00	2024-02-09 10:00:00	2.622	-102.769997	33.959999	6.700000	6.5	6.700000	5.900000	...	0.132	0.160	0.164	0.105	0.125	5.0	5.8	6.9	9.0	10.3
3	03055	2024-02-09 16:00:00	2024-02-09 10:00:00	2.622	-101.589996	36.599998	8.700000	6.7	8.700000	3.900000	...	0.250	0.306	0.324	0.412	0.217	4.5	5.0	5.6	6.4	6.6
4	03060	2024-02-09 16:00:00	2024-02-09 09:00:00	2.622	-107.690002	38.540001	-7.400000	-8.4	-7.400000	-10.400000	...	NaN	NaN	NaN	0.173	0.152	-0.3	-0.3	-0.1	0.7	2.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
761	94996	2024-02-09 20:00:00	2024-02-09 14:00:00	2.622	-96.849998	40.700001	8.200000	7.8	8.400000	7.000000	...	0.290	0.330	0.287	0.260	0.322	5.1	4.4	4.8	5.2	5.5
762	96404	2024-02-09 20:00:00	2024-02-09 11:00:00	2.514	-141.210007	62.740002	-21.700001	-22.9	-21.700001	-24.200001	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
763	96405	2024-02-09 20:00:00	2024-02-09 11:00:00	2.514	-145.350006	60.470001	3.000000	2.9	3.200000	2.600000	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
764	96407	2024-02-09 20:00:00	2024-02-09 11:00:00	2.515	-159.000000	66.559998	-9.700000	-9.3	-8.700000	-11.100000	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
765	96409	2024-02-09 20:00:00	2024-02-09 11:00:00	2.514	-149.399994	68.650002	-10.800000	-9.6	-8.000000	-11.100000	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

766 rows × 36 columns

Recent daily data#

Here, we load the most recent daily data file.

df = uscrn.get_nrt_data(
    -1,
    "daily",
    n_jobs=1,
)

df

	wban	lst_date	crx_vn	longitude	latitude	t_daily_max	t_daily_min	t_daily_mean	t_daily_avg	p_daily_calc	...	soil_moisture_5_daily	soil_moisture_10_daily	soil_moisture_20_daily	soil_moisture_50_daily	soil_moisture_100_daily	soil_temp_5_daily	soil_temp_10_daily	soil_temp_20_daily	soil_temp_50_daily	soil_temp_100_daily
0	03047	2024-05-08	2.622	-102.809998	31.620001	34.700001	18.4	26.600000	27.000000	0.0	...	0.050	0.032	0.040	0.033	0.031	30.000000	30.200001	29.700001	27.200001	25.5
1	03048	2024-05-08	2.622	-106.889999	34.360001	26.700001	12.1	19.400000	19.299999	0.0	...	0.050	0.078	0.074	0.068	0.069	25.900000	24.900000	23.900000	22.000000	21.6
2	03054	2024-05-08	2.622	-102.769997	33.959999	30.200001	9.5	19.799999	20.600000	0.0	...	0.068	0.114	0.135	0.112	0.134	24.000000	23.200001	22.299999	21.200001	18.6
3	03055	2024-05-08	2.622	-101.589996	36.599998	25.200001	6.4	15.800000	16.000000	0.0	...	0.088	0.142	0.142	0.243	0.146	17.700001	17.600000	16.500000	15.800000	15.5
4	03060	2024-05-08	2.622	-107.690002	38.540001	6.600000	-6.2	0.200000	0.400000	0.0	...	0.224	0.297	0.296	0.325	0.366	7.700000	7.200000	6.500000	6.000000	5.7
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
151	96405	2024-05-08	2.514	-145.350006	60.470001	7.400000	3.3	5.400000	5.200000	10.5	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
152	96406	2024-05-08	2.515	-154.130005	64.500000	14.500000	-0.9	6.800000	6.500000	0.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
153	96407	2024-05-08	2.515	-159.000000	66.559998	0.600000	-7.2	-3.300000	-2.900000	0.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
154	96408	2024-05-08	2.515	-150.869995	63.450001	NaN	NaN	NaN	NaN	0.7	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
155	96409	2024-05-08	2.514	-149.399994	68.650002	-7.300000	-9.9	-8.600000	-8.400000	0.2	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

156 rows × 28 columns

fig, ax = plt.subplots(figsize=(7, 4.5))

ds = uscrn.to_xarray(df).squeeze()

ds.plot.scatter(x="longitude", y="latitude", hue="t_daily_max", ax=ax);

../_images/4eee45a2a65d7a1565d91ad32cbb279c42f955203e98fc440ad7b951377c8f0f.png

NRT data

Contents