Download SUVI HDR composites


Download SUVI composite files from the NOAA webserver. Composite files are high dynamic range images composed from long and short exposure L1b files to avoid saturation.

First of all: if you have wget and want an easy solution outside of Python, here are a few bash one-liner examples (remove the #) that can be applied to GOES-16, GOES-17, and different wavelengths with minor changes:

Download an entire day of 171 data to the current directory:

#wget -nH -nd -r -np -A *.fits

Download only the 171 data between 1 and 2 pm that day to the current directory:

#wget -nH -nd -r -np -A dr_suvi-l2-ci171_g16_s20210218T13*.fits

Same as above, but for all SUVI wavelengths, downloaded into their respective subdirectories:

#for w in ci094 ci131 ci171 ci195 ci284 ci304; do wget -nH -nd -r -np --directory-prefix=$w -A dr_suvi-l2-$w_g16_s20210218T13*.fits$w/2021/02/18/; done;

Now let’s use Python. Import the necessary libraries:

__author__ = "cbethge"

from bs4 import BeautifulSoup
from astropy.time import Time, TimeDelta
import requests, os
import numpy as np

Define parser for the SUVI websites using BeautifulSoup:

def list_url_directory(url, ext=''):
    page = requests.get(url).text
    soup = BeautifulSoup(page, 'html.parser')
    return [url + node.get('href') for node in soup.find_all('a') if node.get('href').endswith(ext)]

Now we define the variable date_time. The general template for is ‘YYYY-MM-DDThh:mm:ss’, but it can have the following formats:

Single date/time:


Several dates/times:

'2020-01-05T12:30:00, 2020-04-23T11:43:00, 2020-05-11T17:05:00'

JSOC-style with start time, timespan, and cadence (image every 20 min for 1 hour in this example):


If a single or several explicit date_times are given, the code will only download the data closest to those timestamps. For the JSOC-style, it will download everything in the given range with the given cadence. Note that SUVI has an imaging cadence of 4 minutes, so any given cadence should be a multiple of 4 minutes. An exception is the 195 channel, where images are taken more frequently. Accepted units for the timespan and cadence are: ‘d’ (days), ‘h’ (hours), and ‘m’ (minutes).

date_time = '2020-01-05T12:30:00/1h@20m'

A few other definitions:

spacecraft  = 16              # GOES 16 or 17?
wavelengths = [171,195]       # Wavelengths. Valid values: 93, 94, 131, 171, 195, 284, 304, 305.
outdir      = './composites'  # The download path. Subdirectories for the wavelengths will be created.
query_only  = False           # If True, then the filenames are printed only, nothing is downloaded.
verbose     = True            # If True, then print the filenames when downloading.

Run the code:

for wavelength in wavelengths:
    # Split the date argument at the commas (if applicable)
    date_time = date_time.replace(" ","").split(',')
    if len(date_time) == 1:
        # If it is not several dates, take only the first item. That way,
        # we can distinguish between lists and strings below.
        date_time = date_time[0]

    # this should stay the same for now
    baseurl1 = ''
    baseurl2 = '/l2/data/'
    ext = '.fits'

    # check for existing output directory and correct spacecraft and wavelength numbers
    if not query_only:
        # Create the output directory if it does not exist
        except FileExistsError:
            # directory already exists

    spacecraft_numbers = [16,17]

    if spacecraft not in spacecraft_numbers:
        raise Exception('Invalid spacecraft number: '+str(spacecraft)+'. Valid values are: 16, 17.')

    wvln_path = dict({ 93:'suvi-l2-ci094',  94:'suvi-l2-ci094', 131:'suvi-l2-ci131', 171:'suvi-l2-ci171', \
                      195:'suvi-l2-ci195', 284:'suvi-l2-ci284', 304:'suvi-l2-ci304', 305:'suvi-l2-ci304' })

    if wavelength not in wvln_path:
        raise Exception('Invalid wavelength: '+str(wavelength)+'. Valid values are: 93, 94, 131, 171, 195, 284, 304, 305.')

    # Figure out what kind of date_time was given.
    if isinstance(date_time, str):
        # Check if it is a JSOC-style query
        if len(date_time.split('/')) == 2:
            if len(date_time.split('@')) == 2:
                cadence_string = date_time.split('@')[1]
                timespan_string = date_time.split('@')[0].split('/')[1]
                cadence = float(cadence_string[:-1])
                cadence_unit = cadence_string[-1]
                if cadence_unit == 'm':
                    cadence = cadence*60.
                elif cadence_unit == 'h':
                    cadence = cadence*60.*60.
                elif cadence_unit == 'd':
                    cadence = cadence*60.*60*24.
                    print('Not a valid time unit (must be m, h, or d).')
                cadence = 240.
                timespan_string = date_time.split('/')[1]

            timespan = float(timespan_string[:-1])
            timespan_unit = timespan_string[-1]
            if timespan_unit == 'm':
                timespan = timespan*60.
            elif timespan_unit == 'h':
                timespan = timespan*60.*60.
            elif timespan_unit == 'd':
                timespan = timespan*60.*60*24.
                print('Not a valid time unit (must be m, h, or d).')

            t0 = Time(date_time.split('/')[0], scale='utc', format='isot')
            tmp_timestamp = []
            counter = 0
            while counter*cadence <= timespan:
                counter += 1

            timestamp = t0+TimeDelta(tmp_timestamp, format='sec')
            urls = []
            for time in timestamp:

            # Only one date, and no JSOC-style query
            timestamp = [Time(date_time, scale='utc', format='isot')]
            urls = [baseurl1+str(spacecraft)+baseurl2+wvln_path[wavelength]+'/'+date_time[0:10].replace('-','/')+'/']

    elif isinstance(date_time, list):
        # if the argument was a list of dates
        timestamp = []
        urls = []
        for this_date in date_time:
            timestamp.append(Time(this_date, scale='utc', format='isot'))

    # Before we run, check if all of the websites are there.
    # Cook the urls down to unique values. To do that, convert
    # to a numpy array, use np.unique, and then convert back
    # to a list. Tried by using conversion to a set first,
    # but that doesn't keep the correct order for the dates.
    urls_arr = np.array(urls)
    urls_unique = np.unique(urls_arr).tolist()
    all_files  = []
    start_time = []
    end_time   = []
    for url in urls_unique:
        request = requests.get(url)
        if not request.status_code == 200:
            raise Exception('Website not found: '+url)
            # If all of the websites were found, go ahead and make lists of files and dates.
            print('Querying', url, 'for SUVI files...')
            for file in list_url_directory(url, ext):
                file_base = os.path.basename(file)

    # Create the subdirectory for the current wavelength
    this_outdir = os.path.join(outdir, str(wavelength))
    except FileExistsError:
        # directory already exists

    # Make astropy time objects from the start times
    start_time = Time(start_time, scale='utc', format='isot')

    these_files = np.array(all_files)

    # Now go through all of the requested times and download/print the files.
    for time in timestamp:
        delta_t = time-start_time
        which_file = np.abs(delta_t).argmin()
        if query_only:
            if verbose:
            f = requests.get(these_files[which_file])
            open(os.path.join(this_outdir, os.path.basename(these_files[which_file])), 'wb').write(f.content)
Querying for SUVI files...
Querying for SUVI files...

Total running time of the script: (0 minutes 14.203 seconds)

