Data Download 2

This sample notebook demonstrates how to programatically download and visualize air quality data stored on the Open Storage Network. In this second version, we demonstrate the use of the AWS.jl and AWSS3.jl client libraries

using CSV, DataFrames 
using Plots
using Markdown
using AWS, AWSS3
include("./osn_anonymous.jl")

Now that we have the requisite packages loaded, let’s set up our AWS configuration information using our custom AnonymousOSN configuration function.

We now can use AWSS3.jl to explore the files stored on OSN for central node 8.

p = S3Path(joinpath(bucket, "AirQualityNetwork/data/raw/Central_Hub_1/2023/03/02/"))

df_paths = []

for (root,dirs,files) ∈ walkdir(p)
    for f ∈ files
        push!(df_paths, joinpath(root, f))
    end
end

df_paths

15-element Vector{Any}:
 "s3://ees230012-bucket01/AirQual" ⋯ 56 bytes ⋯ "6318c91_APDS9002_2023_03_02.csv"
 "s3://ees230012-bucket01/AirQual" ⋯ 54 bytes ⋯ "e06318c91_BME280_2023_03_02.csv"
 "s3://ees230012-bucket01/AirQual" ⋯ 54 bytes ⋯ "e06318c91_BME680_2023_03_02.csv"
 "s3://ees230012-bucket01/AirQual" ⋯ 53 bytes ⋯ "1e06318c91_GL001_2023_03_02.csv"
 "s3://ees230012-bucket01/AirQual" ⋯ 54 bytes ⋯ "e06318c91_GUV001_2023_03_02.csv"
 "s3://ees230012-bucket01/AirQual" ⋯ 54 bytes ⋯ "e06318c91_HM3301_2023_03_02.csv"
 "s3://ees230012-bucket01/AirQual" ⋯ 55 bytes ⋯ "06318c91_IPS7100_2023_03_02.csv"
 "s3://ees230012-bucket01/AirQual" ⋯ 50 bytes ⋯ "_001e06318c91_IP_2023_03_02.csv"
 "s3://ees230012-bucket01/AirQual" ⋯ 54 bytes ⋯ "e06318c91_LIBRAD_2023_03_02.csv"
 "s3://ees230012-bucket01/AirQual" ⋯ 53 bytes ⋯ "1e06318c91_SCD30_2023_03_02.csv"
 "s3://ees230012-bucket01/AirQual" ⋯ 54 bytes ⋯ "e06318c91_TB108L_2023_03_02.csv"
 "s3://ees230012-bucket01/AirQual" ⋯ 55 bytes ⋯ "06318c91_TMG3993_2023_03_02.csv"
 "s3://ees230012-bucket01/AirQual" ⋯ 55 bytes ⋯ "06318c91_TSL2591_2023_03_02.csv"
 "s3://ees230012-bucket01/AirQual" ⋯ 56 bytes ⋯ "6318c91_VEML6075_2023_03_02.csv"
 "s3://ees230012-bucket01/AirQual" ⋯ 59 bytes ⋯ "ff2037bc_Summary_2023_03_02.csv"

We can now load the files directly into dataframes for continued analysis.

df = CSV.File(S3Path(df_paths[7])) |> DataFrame

9715×15 DataFrame

9690 rows omitted

Row	dateTime	pc0_1	pc0_3	pc0_5	pc1_0	pc2_5	pc5_0	pc10_0	pm0_1	pm0_3	pm0_5	pm1_0	pm2_5	pm5_0	pm10_0
	String31	Int64	Int64	Int64	Int64	Int64	Int64	Int64	Float64	Float64	Float64	Float64	Float64	Float64	Float64
1	2023-03-02 21:18:03.686980	186348	101116	61137	6795	1036	10	0	0.155705	2.4369	8.82213	14.4999	28.0323	29.1087	29.1087
2	2023-03-02 21:18:04.756650	185877	100817	60893	6789	1036	10	0	0.155312	2.42976	8.78947	14.4623	28.0025	29.149	29.149
3	2023-03-02 21:18:05.682043	185455	100555	60617	6783	1040	16	0	0.154959	2.4235	8.75437	14.4225	28.0176	29.7319	29.7319
4	2023-03-02 21:18:06.679952	185011	100310	60232	6760	1041	17	0	0.154589	2.4176	8.70829	14.357	27.959	29.8246	29.8246
5	2023-03-02 21:18:07.677839	184486	99999	59783	6724	1042	17	0	0.154149	2.41013	8.65389	14.2727	27.8843	29.7314	29.7314
6	2023-03-02 21:18:08.674922	184024	99727	59374	6693	1041	18	0	0.153763	2.40361	8.60465	14.1975	27.7991	29.7457	29.7457
7	2023-03-02 21:18:09.672863	183592	99472	58951	6660	1038	19	0	0.153403	2.39749	8.55434	14.1197	27.6844	29.7084	29.7084
8	2023-03-02 21:18:10.670898	183114	99167	58509	6623	1035	16	0	0.153003	2.39022	8.50093	14.0356	27.5565	29.3077	29.3077
9	2023-03-02 21:18:11.667976	182595	98817	58055	6587	1032	10	0	0.152569	2.3819	8.44521	13.9493	27.4383	28.5185	28.5185
10	2023-03-02 21:18:12.666070	182069	98491	57559	6542	1028	7	0	0.15213	2.37411	8.38565	13.8521	27.2784	28.071	28.071
11	2023-03-02 21:18:13.663993	181435	98084	56997	6480	1013	5	0	0.1516	2.36438	8.31723	13.7318	26.9703	27.5012	27.5012
12	2023-03-02 21:18:14.661008	180749	97619	56413	6414	995	2	0	0.151027	2.35333	8.24519	13.6052	26.6081	26.8255	26.8255
13	2023-03-02 21:18:15.658345	180081	97165	55831	6350	973	0	0	0.150469	2.34253	8.17358	13.4801	26.198	26.2421	26.2421
⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮
9704	2023-03-02 23:59:48.539369	121501	59068	11955	2241	270	0	0	0.101522	1.43411	2.68273	4.55575	8.08815	8.08815	8.08815
9705	2023-03-02 23:59:49.537345	121060	58771	11910	2222	264	0	0	0.101154	1.42705	2.67102	4.52807	7.9793	7.9793	7.9793
9706	2023-03-02 23:59:50.535837	120637	58478	11872	2205	258	0	0	0.1008	1.42007	2.66001	4.50269	7.88151	7.88151	7.88151
9707	2023-03-02 23:59:51.534259	120133	58104	11802	2183	252	0	0	0.100379	1.41122	2.64386	4.46834	7.76922	7.76922	7.76922
9708	2023-03-02 23:59:52.532713	119602	57736	11719	2158	246	0	0	0.0999347	1.40247	2.62647	4.43038	7.64456	7.64456	7.64456
9709	2023-03-02 23:59:53.530269	119180	57461	11664	2140	240	0	0	0.0995824	1.39592	2.61415	4.4026	7.54067	7.54067	7.54067
9710	2023-03-02 23:59:54.528673	118754	57177	11616	2121	234	0	0	0.0992267	1.38914	2.60234	4.37504	7.43885	7.43885	7.43885
9711	2023-03-02 23:59:55.526219	118367	56925	11570	2102	229	0	0	0.0989031	1.38314	2.59152	4.34813	7.3423	7.3423	7.3423
9712	2023-03-02 23:59:56.524240	118005	56683	11505	2081	224	0	0	0.0986006	1.37738	2.57906	4.31805	7.24628	7.24628	7.24628
9713	2023-03-02 23:59:57.522713	117585	56398	11414	2056	218	0	0	0.0982495	1.37061	2.56273	4.28131	7.13573	7.13573	7.13573
9714	2023-03-02 23:59:58.520251	117207	56148	11337	2039	213	0	0	0.097934	1.36463	2.54869	4.25284	7.03624	7.03624	7.03624
9715	2023-03-02 23:59:59.518143	116909	55961	11277	2027	208	0	0	0.0976852	1.36018	2.53798	4.2321	6.95184	6.95184	6.95184

Now that we have the data loaded we can visualize it:

That plot looks great! Let’s now demonstrate the use of notebook parameters with papermill. In the first cell we define the variable test_parameter to the value 3.14. At execution time, the value is now 1.21