Fix the SUVI L1b headers (astropy >=4.2.1)

Purpose:

Correct the issue with the wrong CONTINUE implementation in the SUVI L1b fits headers.

Import the necessary libraries:

__author__ = "cbethge"

from astropy.io import fits
import requests
import os, gzip, re

Define function that only uses native Python to correct the corrupt SUVI L1b fits headers. Input is a filename (unzipped fits file), output is the corrected header:

def fix_suvi_l1b_header(in_filename):
    # Read the file manually as bytes until we hit a UnicodeDecodeError, i.e.
    # until we reach the data part. Since astropy version 4.2.1, we can't use
    # the .to_string() method anymore because of FITS header consistency checks
    # that cannot be overridden.
    header = ''
    with open(in_filename, 'rb') as in_file:
        counter = 1
        while True:
            try:
                this_line = in_file.read(counter)
                this_str = this_line.decode("utf-8")
                header += this_str
                counter += 1
            except UnicodeDecodeError:
                break
    in_file.close()
    hdr_list = [header[i:i+80] for i in range(0, len(header), 80)]

    # Remove all the empty entries
    while(" "*80 in hdr_list) :
        hdr_list.remove(" "*80)

    # Make a new string list where we put all the information together correctly
    hdr_list_new = []
    for count, item in enumerate(hdr_list):
        if count <= len(hdr_list)-2:
            if hdr_list[count][0:8] != 'CONTINUE' and hdr_list[count+1][0:8] != 'CONTINUE':
                hdr_list_new.append(hdr_list[count])
            else:
                if hdr_list[count][0:8] != 'CONTINUE' and hdr_list[count+1][0:8] == 'CONTINUE':
                    ampersand_pos = hdr_list[count].find('&')
                    if ampersand_pos != -1:
                        new_entry = hdr_list[count][0:ampersand_pos]
                    else:
                        # Raise exception here because there should be an ampersand at the end of a CONTINUE'd keyword
                        raise Exception("There should be an ampersand at the end of a CONTINUE'd keyword.")

                    tmp_count = 1
                    while hdr_list[count+tmp_count][0:8] == 'CONTINUE':
                        ampersand_pos = hdr_list[count+tmp_count].find('&')
                        if ampersand_pos != -1:
                            first_sq_pos = hdr_list[count+tmp_count].find("'")
                            if first_sq_pos != -1:
                                new_entry = new_entry+hdr_list[count+tmp_count][first_sq_pos+1:ampersand_pos]
                            else:
                                # Raise exception here because there should be a single quote after CONTINUE
                                raise Exception("There should be two single quotes after CONTINUE. Did not find any.")

                        else:
                            # If there is no ampersand at the end anymore, it means the entry ends here.
                            # Read from the first to the second single quote in this case.
                            first_sq_pos = hdr_list[count+tmp_count].find("'")
                            if first_sq_pos != -1:
                                second_sq_pos = hdr_list[count+tmp_count][first_sq_pos+1:].find("'")
                                if second_sq_pos != -1:
                                    new_entry = new_entry+hdr_list[count+tmp_count][first_sq_pos+1:second_sq_pos+1+first_sq_pos].rstrip()+"'"
                                else:
                                    # Raise exception here because there should be a second single quote after CONTINUE
                                    raise Exception("There should be two single quotes after CONTINUE. Found the first, but not the second.")

                            else:
                                # Raise exception here because there should be a (first) single quote after CONTINUE
                                raise Exception("There should be two single quotes after CONTINUE. Did not find any.")

                        tmp_count += 1

                    hdr_list_new.append(new_entry)

                else:
                    continue

        else:
            # Add END at the end of the header
            hdr_list_new.append(hdr_list[count])

    # Now we stitch together the CONTINUE information correctly,
    # with a "\n" at the end that we use as a separator later on
    # when we convert from a string to an astropy header.
    for count, item in enumerate(hdr_list_new):
        if len(item) > 80:
            this_entry = item[0:78]+"&'\n"
            rest       = "CONTINUE  '"+item[78:]
            while len(rest) > 80:
                this_entry = this_entry + rest[0:78] + "&'\n"
                rest       = "CONTINUE  '"+ rest[78:]
            this_entry = this_entry + rest
            hdr_list_new[count] = this_entry

    # Now we should have the correct list of strings. Since we can't convert a list to a
    # fits header directly, we have to convert it to a string first, separated by "\n".
    hdr_str_new = '\n'.join([str(item) for item in hdr_list_new])

    # And finally we create the new corrected astropy fits header from that string
    hdr_corr = fits.Header.fromstring(hdr_str_new, sep='\n')

    # Return the corrected header
    return hdr_corr

GOES-16/SUVI L1b files can be downloaded here. We download a sample L1b file for illustration:

local_dir = "./"
the_url   = "https://data.ngdc.noaa.gov/platforms/solar-space-observing-satellites/goes/goes16/l1b/suvi-l1b-fe171/2021/04/21/"
the_file  = "OR_SUVI-L1b-Fe171_G16_s20211110304328_e20211110304338_c20211110304518.fits.gz"

with open(local_dir + the_file, "wb") as f:
  r = requests.get(the_url + the_file)
  f.write(r.content)

The downloaded file is gzipped, so we need to unzip it first for this to work. Otherwise, if it is an unzipped fits file, simply pass the filename.

split_filename = os.path.splitext(the_file)
if split_filename[1] == '.gz':
    with gzip.open(local_dir + the_file, 'r') as f_in, open(local_dir + split_filename[0], 'wb') as f_out:
        f_out.write(f_in.read())
    hdu = fits.open(local_dir + split_filename[0], do_not_scale_image_data=True)
    corrected_header = fix_suvi_l1b_header(local_dir + split_filename[0])
    os.remove(local_dir + split_filename[0])
elif split_filename[1] == '.fits':
    hdu = fits.open(local_dir + the_file, do_not_scale_image_data=True)
    corrected_header = fix_suvi_l1b_header(local_dir + the_file)

Uncomment the following line to see what happens when we try to access the fits keyword “LUT_NAME” in the original header:

#hdu[0].header['LUT_NAME']

Try accessing “LUT_NAME” to see if it worked:

corrected_header['LUT_NAME']
'SUVI_CalibrationParameters(FM1A_CDRL79RevD_PR_09_06_02)-657795600.0.h5 SUVI_NavigationParameters(FM1A_CDRL79RevD_DO_07_00_00)-582860861.0.xml'

Now we can replace the older header

# replace the header in the HDU with the corrected one
hdu[0].header = corrected_header

Finally a small fix to the second HDU (the data quality, DQF) header

#Fix a few more malformed CONTINUE statements
for card in hdu[1].header._cards:
    bad_continues = re.findall(r"'\s*CONTINUE\s*'",card._image)
    if len(bad_continues)>0:
        for bad_continue in bad_continues:
            card._image = card._image.replace(bad_continue,"'CONTINUE  '")

Check the problematic header field is fixed

hdu[1].header['FLAGMEAN']
'good_quality_qf degraded_due_to_bad_pixel_correction_qf degraded_due_to_bad_column_correction_qf invalid_due_to_missing_L0_data_qf potentially_degraded_due_to_pixel_spike_detected_qf'

Write out the result

# make sure CHECKSUM and DATASUM get recomputed for both headers
for thishdu in [hdu[0],hdu[1]]:
    del thishdu.header['CHECKSUM']
    del thishdu.header['DATASUM']

# write the fits file with the corrected headers
hdu.writeto(local_dir + the_file, overwrite=True, checksum=True)

Total running time of the script: (0 minutes 11.381 seconds)

Gallery generated by Sphinx-Gallery