# 1. Importing Libraries
import sys
from pathlib import Path
# 2. Setting project paths
project_root = Path.cwd()
sys.path.append(str(project_root))
# Define standard data subdirectories for easy access later
RAW_DATA_DIR = project_root / "data" / "raw"
PROCESSED_DATA_DIR = project_root / "data" / "processed"
ASSETS_DIR = project_root / "assets" / "3D_Objects"AKR Map Visualisation from Wind Satellite Data
1 Nitty-gritties
This study utilises archived observations of Auroral Kilometric Radiation (AKR) bursts, individually selected from the Wind/WAVES (1995–2004). The raw data were previously processed by Fogg et al. (2022) to isolate distinct AKR emission clusters (link to archive).
The dataset consists of discrete AKR burst events. The following parameters are defined for each burst event:
Temporal details:
- STIME : burst start in universal time (YYYY-MM-DDTHH:MM:SS.SSSSSSSSS)
- ETIME : burst end in universal time (DD/MM/YYYY HH:MM:SS.SSSSSSSSS)
- BURST_TIMESTAMP: list of universal times defining burst window, format YYYY-MM-DDTHH:MM:SS.SSSSSSSSS
Spectral features:
- MIN_F_BOUND: list of the lower frequency bound
- MAX_F_BOUND: list of the upper frequency bound
Coordiantes (at each BURST_TIMESTAMP):
- Spehircal/local:
- LT_GSE: list of spacecraft local times (hours)
- RADIUS: list of spacecraft radial distances (Earth radius)
- LAT_GSE: list of spacecraft latitude (degrees, in GSE coordinates)
- LT_GSE: list of spacecraft local times (hours)
- Cartesian (GSE):
- X_GSE: list of spacecraft position in X (Earth radii, in GSE coordinates)
- Y_GSE: list of spacecraft position in Y (Earth radii, GSE coordinates)
- X_GSE: list of spacecraft position in X (Earth radii, in GSE coordinates)
- Z_GSE: list of spacecraft position in Z (Earth radii, GSE coordinates)
- Spehircal/local:
. Empty values are represented as “nan” or “NaT” depending to numeric (integer or float) or, datetime, respectively.
. Frequency data and Coordinates as a function of BURST_TIMESTAMP.
2 Objective:
The goal is to quantify AKR activity by mapping discrete satellite data into a 3D grid, focusing on four key variables:
Observation Time: The cumulative duration spent actually detecting AKR in each bin (the sum of the burst windows),
Normalised Observation Time: The ratio of observation time to residence time. This removes orbital bias to provide the true probability of AKR occurrence per location.
Residence Time: The total time the Wind spacecraft spent in each bin. This measures the cumulative “exposure” time per location.
Number of Bursts: The count of unique burst events in each bin, determined by the number of recorded burst start times (STIME).
3 Project Environment Setup
4 Data Ingestion & Pre-processing
4.1 AKR burst data processing
Reading csv file and applying data type schema
# 1. Importing functions from Package_Name
from akr_3d_map.wind_data_reading import (
exploding_saving_wind_data,
load_apply_schema_wind_csv,
)
# 2. Loading and applying data type schema to wind data from CSV
wind_data = load_apply_schema_wind_csv(
f"{RAW_DATA_DIR}/fogg_akr_burst_list_1995_2004.csv",
)Processing: Exploding the data and filtering
# 1. Exploding nested data and filtering NaNs
wind_data_processed = exploding_saving_wind_data(wind_data)Saving as parquet or json
# 1. Processed data name
processed_data_base_name = "01_processed_wind_data_fogg_akr_burst_list_1995_2004"
# 2. Saving in parquet format (memory efficient)
wind_data_processed.to_parquet(
f"{PROCESSED_DATA_DIR}/{processed_data_base_name}.parquet",
engine="pyarrow",
index=False,
)
# 3. Saving in json format
wind_data_processed.to_json(
f"{PROCESSED_DATA_DIR}/{processed_data_base_name}.json",
index=False,
)
print(f"Data successfully saved to: {PROCESSED_DATA_DIR}")4.2 Residence time data processing
# 1. library
from akr_3d_map.wind_data_reading import vstack_residence_data
# 2. residence data in txt format for individual years
target_path_obj = f"{RAW_DATA_DIR}/residence_data"
output_path = f"{RAW_DATA_DIR}/residence_data.parquet"
# 3. processing to parquet and saving in output_path
vstack_residence_data(target_path_obj, output_path)5 Importing Pre-processed Parquet Data for Analysis
5.1 Importing AKR burst data
# 1. Importing libraries
import pandas as pd
# 2. Importing parquet file
wind_data = pd.read_parquet(
f"{project_root}/data/processed/01_processed_wind_data_fogg_akr_burst_list_1995_2004.parquet",
)
# 3. Descriptory analysis
print("__________ Data type of columns __________")
display(wind_data.describe().T)
print("\n")
print("__________ Data summary __________")
display(wind_data.groupby("original_burst_id").size().describe().to_frame().T)__________ Data type of columns __________
| count | mean | min | 25% | 50% | 75% | max | std | |
|---|---|---|---|---|---|---|---|---|
| original_burst_id | 245918.0 | 4848.044137 | 0.0 | 2591.25 | 4878.0 | 7572.0 | 9030.0 | 2671.844472 |
| stime | 245918 | 2001-02-07 07:16:30.291876736 | 1995-04-23 00:45:38.992827520 | 1999-03-25 16:22:10.384550272 | 2001-02-02 08:54:09.537061632 | 2003-11-19 03:33:15.836880768 | 2004-12-30 15:46:08.170306176 | NaN |
| etime | 245918 | 2001-02-07 12:35:57.797255424 | 1995-04-23 01:07:01.840782848 | 1999-03-25 18:05:18.880834816 | 2001-02-02 09:15:32.384987136 | 2003-11-19 17:39:19.960755200 | 2004-12-30 16:53:19.977978496 | NaN |
| burst_timestamp | 245918 | 2001-02-07 09:56:14.222162176 | 1995-04-23 00:45:38.992827520 | 1999-03-25 17:14:53.393762304 | 2001-02-02 09:07:54.225013760 | 2003-11-19 05:46:53.636229120 | 2004-12-30 16:53:19.977978496 | NaN |
| min_f_bound | 245918.0 | 148.125961 | 20.0 | 80.0 | 136.0 | 196.0 | 940.0 | 91.904463 |
| max_f_bound | 245918.0 | 613.193796 | 24.0 | 388.0 | 624.0 | 804.0 | 1040.0 | 275.538172 |
| LT_gse | 245918.0 | 11.694959 | 0.000065 | 4.562738 | 12.093581 | 18.063398 | 23.999895 | 7.701588 |
| radius | 245918.0 | 114.096905 | 2.875516 | 44.794718 | 84.45 | 199.374477 | 322.43 | 81.946968 |
| lat_gse | 245918.0 | 0.672692 | -72.02 | -3.85 | 0.27 | 4.08 | 72.02 | 13.251037 |
| lon_gse | 245918.0 | 172.644148 | 0.002375 | 89.755961 | 178.673378 | 246.427507 | 360.0 | 94.950115 |
| x_gse | 245918.0 | -20.006167 | -236.21 | -59.400516 | -8.294 | 27.377561 | 260.9 | 106.923148 |
| y_gse | 245918.0 | 9.708635 | -315.350602 | -31.979141 | 1.191208 | 38.456754 | 320.9 | 87.52806 |
| z_gse | 245918.0 | 0.579817 | -36.78 | -6.19 | 0.229377 | 7.38 | 54.14 | 12.037289 |
__________ Data summary __________
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| 0 | 9031.0 | 27.230429 | 46.197366 | 5.0 | 7.0 | 12.0 | 26.0 | 753.0 |
5.2 Importing residence time data
# 1. Importing libraries
import pandas as pd
# 2. Importing parquet file
residence_data = pd.read_parquet(
f"{project_root}/data/processed/02_processed_wind_data_residence_time_1995_2004.parquet",
)
# 3. Descriptory analysis
print("__________ Data type of columns __________")
display(residence_data.describe().T)
print("\n")__________ Data type of columns __________
| count | mean | min | 25% | 50% | 75% | max | std | |
|---|---|---|---|---|---|---|---|---|
| time_stamp | 706728 | 2002-12-08 21:07:07.538970496 | 1994-11-16 00:00:00 | 1998-11-27 08:21:00 | 2002-12-08 16:42:00 | 2006-12-20 15:27:00 | 2010-12-31 23:48:00 | NaN |
| x_gse | 706728.0 | 139.54227 | -236.21 | 49.07 | 197.48 | 224.81 | 264.66 | 107.635665 |
| y_gse | 706728.0 | -3.736369 | -322.85 | -52.6925 | -5.4 | 41.96 | 321.03 | 88.307216 |
| z_gse | 706728.0 | 1.743739 | -36.78 | -8.54 | 1.09 | 12.22 | 54.14 | 13.313019 |
| lat_gse | 706728.0 | 0.237641 | -72.02 | -3.41 | 0.47 | 3.77 | 72.02 | 6.777547 |
| lon_gse | 706728.0 | 184.891433 | 0.0 | 19.1675 | 230.06 | 339.41 | 360.0 | 150.500187 |
| LT_gse | 706728.0 | 11.631802 | 0.0 | 10.4975 | 11.850833 | 13.100556 | 23.999444 | 3.576659 |
| x_gsm | 706728.0 | 139.54227 | -236.21 | 49.07 | 197.48 | 224.81 | 264.66 | 107.635665 |
| y_gsm | 706728.0 | -3.540354 | -323.17 | -50.15 | -5.56 | 39.31 | 321.17 | 84.285711 |
| z_gsm | 706728.0 | 2.556153 | -141.44 | -13.37 | 2.83 | 20.8 | 152.11 | 29.483111 |
| lat_gsm | 706728.0 | 0.596474 | -77.57 | -5.1 | 1.21 | 5.99 | 74.05 | 10.828076 |
| lon_gsm | 706728.0 | 185.714125 | 0.0 | 18.28 | 232.55 | 340.64 | 360.0 | 151.056704 |
| radius | 706728.0 | 182.170192 | 1.44 | 110.48 | 208.27 | 245.97 | 323.64 | 76.576022 |
6 Analysis & 3D Grid Generation
6.1 Burst counts
Cartesian coordinates
Calculating observation time and preaping plot file
# 1. Importing Cartesian class from Package_Name
from akr_3d_map.grid_3d import Cartesian
# 2. Path to save the file
plot_save_path = f"{ASSETS_DIR}/cartesian_grid_with_burst_counts.json"
# 3. Performing analysis and generating plot
cart = (
# Provide bin size for the 3D plot
Cartesian()
# In case you are not sure about the extreme points
# .decide_boundaries(wind_data)
# Create grid points
.create_grid()
# Calculate burst count given wind satellite data
.add_burst_count(wind_data, coord_colnames=("x_gse", "y_gse", "z_gse"))
# Creating plot and saving as a json file (HPC save for big data)
.plot_3d(
variable="burst_count",
path=plot_save_path,
show_earth=True,
earth_image_path_str=f"{project_root}/assets/temp.jpg",
show_sun=False,
)
)
# 4. Validation: Total time should be > 0
total_burst_count = cart.grid.burst_count.sum().item()
print(f"Total observations time logged: {(total_burst_count):.1f} seconds")Update in progress... processed 0 bins.
Grid populated: 29 bins updated.
Total observations time logged: 346.0 seconds
Interactive Visualisation
import plotly.io as pio
# Reload and display the 3D Interactive Map
if Path(plot_save_path).exists():
cartesian_fig = pio.read_json(str(plot_save_path))
cartesian_fig.show()
else:
print(f"Error: Plot file not found at {plot_save_path}")LTRMlat coordinates
Calculating residence time and preaping plot file
# 1. Importing Cartesian class from Package_Name
from akr_3d_map.grid_3d import LTRMLat
# 2. Path to save the file
plot_save_path = f"{ASSETS_DIR}/LTRMlat_grid_with_burst_counts.json"
# 3. Performing analysis and generating plot
ltrmat = (
# Provide bin size for the 3D plot
LTRMLat()
# In case you are not sure about the extreme points
# .decide_boundaries(wind_data)
# Create grid points
.create_grid()
# Calculate burst count given wind satellite data
.add_burst_count(wind_data, coord_colnames=("LT_gse", "radius", "lat_gse"))
# Creating plot and saving as a json file (HPC save for big data)
.plot_3d(
variable="burst_count",
path=plot_save_path,
show_earth=True,
earth_image_path_str=f"{project_root}/assets/temp.jpg",
show_sun=False,
)
)
# 4. Validation: Total time should be > 0.
total_burst_count = ltrmat.grid.burst_count.sum().item()
print(f"Total observations time logged: {(total_burst_count):.1f} seconds")Update in progress... processed 0 bins.
Grid populated: 184 bins updated.
Total observations time logged: 252.0 seconds
Interactive Visualisation
import plotly.io as pio
# Reload and display the 3D Interactive Map
if Path(plot_save_path).exists():
LTRMLat_fig = pio.read_json(str(plot_save_path))
LTRMLat_fig.show()
else:
print(f"Error: Plot file not found at {plot_save_path}")6.2 Observation time
Cartesian coordinates
Calculating observation time and preaping plot file
# 1. Importing Cartesian class from Package_Name
from akr_3d_map.grid_3d import Cartesian
# 2. Path to save the file
plot_save_path = f"{ASSETS_DIR}/cartesian_grid_with_observation_time.json"
# 3. Performing analysis and generating plot
cart = (
# Provide bin size for the 3D plot
Cartesian()
# In case you are not sure about the extreme points
# .decide_boundaries(wind_data)
# Create grid points
.create_grid()
# Calculate observations time given wind satellite data
.add_observation_time(wind_data, coord_colnames=("x_gse", "y_gse", "z_gse"))
# Creating plot and saving as a json file (HPC save for big data)
.plot_3d(
variable="observation_time",
path=plot_save_path,
show_earth=True,
earth_image_path_str=f"{project_root}/assets/temp.jpg",
show_sun=False,
)
)
# 4. Validation: Total time should be > 0
total_observation_time = cart.grid.observation_time.sum().item()
print(f"Total observations time logged: {(total_observation_time):.1f} seconds")Update in progress... processed 0 bins.
Grid populated: 31 bins updated.
Total observations time logged: 2988857.8 seconds
Interactive Visualisation
import plotly.io as pio
# Reload and display the 3D Interactive Map
if Path(plot_save_path).exists():
cartesian_fig = pio.read_json(str(plot_save_path))
cartesian_fig.show()
else:
print(f"Error: Plot file not found at {plot_save_path}")LTRMlat coordinates
Calculating residence time and preaping plot file
# 1. Importing Cartesian class from Package_Name
from akr_3d_map.grid_3d import LTRMLat
# 2. Path to save the file
plot_save_path = f"{ASSETS_DIR}/LTRMlat_grid_with_observation_time.json"
# 3. Performing analysis and generating plot
ltrmat = (
# Provide bin size for the 3D plot
LTRMLat()
# In case you are not sure about the extreme points
# .decide_boundaries(wind_data)
# Create grid points
.create_grid()
# Calculate observations time given wind satellite data
.add_observation_time(wind_data, coord_colnames=("LT_gse", "radius", "lat_gse"))
# Creating plot and saving as a json file (HPC save for big data)
.plot_3d(
variable="observation_time",
path=plot_save_path,
show_earth=True,
earth_image_path_str=f"{project_root}/assets/temp.jpg",
show_sun=False,
)
)
# 4. Validation: Total time should be > 0
total_observation_time = ltrmat.grid.observation_time.sum().item()
print(f"Total observations time logged: {(total_observation_time):.1f} seconds")Update in progress... processed 0 bins.
Update in progress... processed 500 bins.
Grid populated: 531 bins updated.
Total observations time logged: 2170704.8 seconds
Interactive Visualisation
import plotly.io as pio
# Reload and display the 3D Interactive Map
if Path(plot_save_path).exists():
LTRMLat_fig = pio.read_json(str(plot_save_path))
LTRMLat_fig.show()
else:
print(f"Error: Plot file not found at {plot_save_path}")6.3 Residence Time
Cartesian coordinates
Calculating residence time and preaping plot file
# 1. Importing Cartesian class from Package_Name
from akr_3d_map.grid_3d import Cartesian
# 2. Path to save the file
plot_save_path = f"{ASSETS_DIR}/cartesian_grid_with_residence_data.json"
# 3. Performing analysis and generating plot
cart = (
# Provide bin size for the 3D plot
Cartesian()
# In case you are not sure about the extreme points
# .decide_boundaries(wind_data)
# Create grid points
.create_grid()
# Calculate residence time given wind satellite data
.add_residence_time(residence_data, coord_colnames=("x_gse", "y_gse", "z_gse"))
# Creating plot and saving as a json file (HPC save for big data)
.plot_3d(
variable="residence_time",
path=plot_save_path,
show_earth=True,
earth_image_path_str=f"{project_root}/assets/temp.jpg",
show_sun=False,
)
)
# 4. Validation: Total time should be > 0
total_residence_time = cart.grid.residence_time.sum().item()
print(f"Total observations time logged: {(total_residence_time):.1f} seconds")Residence Update: processed 0 bins.
Grid populated: 31 bins updated with residence time.
Total observations time logged: 5050080.0 seconds
Interactive Visualisation
import plotly.io as pio
# Reload and display the 3D Interactive Map
if Path(plot_save_path).exists():
cartesian_fig = pio.read_json(str(plot_save_path))
cartesian_fig.show()
else:
print(f"Error: Plot file not found at {plot_save_path}")LTRMlat coordinates
Calculating residence time and preaping plot file
# 1. Importing Cartesian class from Package_Name
from akr_3d_map.grid_3d import LTRMLat
# 2. Path to save the file
plot_save_path = f"{ASSETS_DIR}/LTRMLat_grid_with_residence_data.json"
# 3. Performing analysis and generating plot
ltrmat = (
# Provide bin size for the 3D plot
LTRMLat()
# In case you are not sure about the extreme points
# .decide_boundaries(wind_data)
# Create grid points
.create_grid()
# Calculate residence time given wind satellite data
.add_residence_time(residence_data, coord_colnames=("LT_gse", "radius", "lat_gse"))
# Creating plot and saving as a json file (HPC save for big data)
.plot_3d(
variable="residence_time",
path=plot_save_path,
show_earth=True,
earth_image_path_str=f"{project_root}/assets/temp.jpg",
show_sun=False,
)
)
# 4. Validation: Total time should be > 0
total_burst_count = ltrmat.grid.burst_count.sum().item()
print(f"Total observations time logged: {(total_burst_count):.1f} seconds")Residence Update: processed 0 bins.
Residence Update: processed 500 bins.
Grid populated: 662 bins updated with residence time.
Total observations time logged: 0.0 seconds
Interactive Visualisation
import plotly.io as pio
# Reload and display the 3D Interactive Map
if Path(plot_save_path).exists():
LTRMLat_fig = pio.read_json(str(plot_save_path))
LTRMLat_fig.show()
else:
print(f"Error: Plot file not found at {plot_save_path}")6.4 Normalised Observation Time
Cartesian coordinates
Calculating normalised observation time and preaping plot file
# 1. Importing Cartesian class from Package_Name
from akr_3d_map.grid_3d import Cartesian
# 2. Path to save the file
plot_save_path = f"{ASSETS_DIR}/cartesian_grid_with_normalised_observation_data.json"
# 3. Performing analysis and generating plot
cart = (
# Provide bin size for the 3D plot
Cartesian()
# In case you are not sure about the extreme points
# .decide_boundaries(wind_data)
# Create grid points
.create_grid()
# Calculate normalised observation time given wind satellite data
.add_normalised_observation_time(
akr_df=wind_data,
satellite_residence_df=residence_data,
coord_colnames=("x_gse", "y_gse", "z_gse"),
akr_timestamp_colname="burst_timestamp",
residence_timestamp_colname="time_stamp",
)
# Creating plot and saving as a json file (HPC save for big data)
.plot_3d(
variable="normalised_observation_time",
path=plot_save_path,
show_earth=True,
earth_image_path_str=f"{project_root}/assets/temp.jpg",
show_sun=False,
)
)
# 4. Validation: Total time should be > 0
total_normalised_observation_time = ltrmat.grid.normalised_observation_time.sum().item()
print(
f"Total observations time logged: {(total_normalised_observation_time):.1f} seconds"
)Residence time grid empty. Populating from satellite_residence_df...
Residence Update: processed 0 bins.
Grid populated: 31 bins updated with residence time.
Observation time grid empty. Populating from akr_df...
Update in progress... processed 0 bins.
Grid populated: 31 bins updated.
Normalistion complete. Activity rates calculated for 31 active grid cells.
Total observations time logged: 0.0 seconds
Interactive Visualisation
import plotly.io as pio
# Reload and display the 3D Interactive Map
if Path(plot_save_path).exists():
cartesian_fig = pio.read_json(str(plot_save_path))
cartesian_fig.show()
else:
print(f"Error: Plot file not found at {plot_save_path}")LTRMlat coordinates
Calculating normalised observation time and preaping plot file
# 1. Importing Cartesian class from Package_Name
from akr_3d_map.grid_3d import LTRMLat
# 2. Path to save the file
plot_save_path = f"{ASSETS_DIR}/LTRMLat_grid_with_normalised_observation_data.json"
# 3. Performing analysis and generating plot
ltrmat = (
# Provide bin size for the 3D plot
LTRMLat()
# In case you are not sure about the extreme points
# .decide_boundaries(wind_data)
# Create grid points
.create_grid()
# Calculate normalised observation time given wind satellite data
.add_normalised_observation_time(
akr_df=wind_data,
satellite_residence_df=residence_data,
coord_colnames=("LT_gse", "radius", "lat_gse"),
akr_timestamp_colname="burst_timestamp",
residence_timestamp_colname="time_stamp",
)
# Creating plot and saving as a json file (HPC save for big data)
.plot_3d(
variable="normalised_observation_time",
path=plot_save_path,
show_earth=True,
earth_image_path_str=f"{project_root}/assets/temp.jpg",
show_sun=False,
)
)
# 4. Validation: Total time should be > 0
total_normalised_observation_time = ltrmat.grid.normalised_observation_time.sum().item()
print(
f"Total observations time logged: {(total_normalised_observation_time):.1f} seconds"
)Residence time grid empty. Populating from satellite_residence_df...
Residence Update: processed 0 bins.
Residence Update: processed 500 bins.
Grid populated: 662 bins updated with residence time.
Observation time grid empty. Populating from akr_df...
Update in progress... processed 0 bins.
Update in progress... processed 500 bins.
Grid populated: 531 bins updated.
Normalistion complete. Activity rates calculated for 513 active grid cells.
Total observations time logged: 340.3 seconds
Interactive Visualisation
import plotly.io as pio
# Reload and display the 3D Interactive Map
if Path(plot_save_path).exists():
LTRMLat_fig = pio.read_json(str(plot_save_path))
LTRMLat_fig.show()
else:
print(f"Error: Plot file not found at {plot_save_path}")