diff --git a/pyxlma/lmalib/io/cf_netcdf.py b/pyxlma/lmalib/io/cf_netcdf.py
index 6ce8d45..7a83347 100644
--- a/pyxlma/lmalib/io/cf_netcdf.py
+++ b/pyxlma/lmalib/io/cf_netcdf.py
@@ -301,6 +301,10 @@ def new_template_dataset():
    'attrs': {'_FillValue': ' '*32, 'long_name': 'LMA network'},
    'dtype': '|S32',
    },
+   'station_name': {'dims': ('number_of_stations',),# 'string32'),
+   'attrs': {'_FillValue': ' '*32, 'long_name': 'Name of receiving station'},
+   'dtype': '|S32',
+   },
   'station_latitude': {'dims': ('number_of_stations',),
    'attrs': {'_FillValue': np.nan,
     'valid_range': [-90.0, 90.0],
@@ -323,6 +327,22 @@ def new_template_dataset():
     'positive': 'up'},
    'dtype': 'float32',
    },
+   'station_delay': {'dims': ('number_of_stations',),
+   'attrs': {'_FillValue': np.nan,
+    'long_name': "Configured delay time for a signal to travel from the antenna to the receiver",
+    'units': 'nanoseconds'},
+   'dtype': 'float32',
+   },
+   'station_board_revision': {'dims': ('number_of_stations',),
+   'attrs': {'_FillValue': 0,
+    'long_name': "Station board revision number"},
+   'dtype': 'uint8',
+   },
+   'station_receive_channels': {'dims': ('number_of_stations',),
+   'attrs': {'_FillValue': 0,
+    'long_name': "Number of channels on the station's receiver"},
+   'dtype': 'uint8',
+   },
   'station_event_fraction': {'dims': ('number_of_stations',),
    'attrs': {'_FillValue': np.nan,
     'long_name': "Fraction of events to which this station contributed",
@@ -335,6 +355,11 @@ def new_template_dataset():
     'long_name': "<P/P_m>"},
    'dtype': 'float32',
    },
+   'station_active': {'dims': ('number_of_stations',),
+   'attrs': {'_FillValue': np.nan,
+    'long_name': "Station active flag"},
+   'dtype': '|S2',
+   },
 
  }}
  return __template_dataset
@@ -461,7 +486,7 @@ def compare_attributes(ds):
     """ Compare the attributes of all data variables in ds to the CF spec
         and print a report of any differences.
     """
-    dst = __template_dataset['data_vars']
+    dst = new_template_dataset()['data_vars']
     dsd = ds.to_dict()['data_vars']
     for k in dsd.keys():
         if dst[k]['attrs'] != dsd[k]['attrs']:
diff --git a/pyxlma/lmalib/io/read.py b/pyxlma/lmalib/io/read.py
index 82d5724..a5f5e7a 100644
--- a/pyxlma/lmalib/io/read.py
+++ b/pyxlma/lmalib/io/read.py
@@ -3,6 +3,11 @@
 import numpy as np
 import gzip
 import datetime as dt
+from os import path
+import collections
+import string
+import warnings
+from pyxlma.lmalib.io.cf_netcdf import new_dataset, new_template_dataset
 
 class open_gzip_or_dat:
     def __init__(self, filename):
@@ -36,6 +41,17 @@ def combine_datasets(lma_data):
         pyxlma.lmalib.io.cf_netcdf.new_dataset or
         pyxlma.lmalib.io.read.to_dataset
     """
+    def reorder_stations(dataset, index_mapping):
+        for var in dataset.data_vars:
+            if 'number_of_stations' in dataset[var].dims:
+                if var == 'station_code':
+                    new_station_codes = dataset[var].data.copy()
+                    new_station_codes = new_station_codes[index_mapping]
+                    dataset = dataset.assign(station_code=('number_of_stations', new_station_codes))
+                else:
+                    reordered_data = dataset[var].isel(number_of_stations=index_mapping)
+                    dataset[var].data = reordered_data.values
+        return dataset
     # Get a list of all the global attributes from each dataset
     attrs = [d.attrs for d in lma_data]
     # Create a dict of {attr_name: [list of values from each dataset]}
@@ -46,52 +62,226 @@ def combine_datasets(lma_data):
     }
     final_attrs = {}
     for k in all_attrs:
+        if k.startswith('analysis_') and k.endswith('_time'):
+            continue
         attr_vals =  all_attrs[k]
         set_of_values = set(attr_vals)
-        if len(set_of_values) == 1:
+        if len(set_of_values) == 0:
+            continue
+        elif len(set_of_values) == 1:
             final_attrs[k] = tuple(set_of_values)[0]
         else:
-            final_attrs[k] = '; '.join(attr_vals)
-    # print(final_attrs)
-
-    # Get just the pure-station variables
-    lma_station_data = xr.concat(
-        [d.drop_dims(['number_of_events']) for d in lma_data],
-        dim='number_of_files'
-    )
-    # print(lma_station_data)
-
-    # Get just the pure-event variables
-    lma_event_data = xr.concat(
-        [d.drop_dims(['number_of_stations']).drop_vars(
-            ['network_center_latitude',
-             'network_center_longitude',
-             'network_center_altitude',]
-          ) for d in lma_data],
-        dim='number_of_events'
-    )
-    # print(lma_event_data)
-
-    # Get any varaibles with joint dimensions,
-    # i.e., ('number_of_events', 'number_of_stations')
-    event_contributing_stations = xr.concat(
-        [d['event_contributing_stations'] for d in lma_data],
-        dim='number_of_events'
-    )
-    # print(event_contributing_stations)
-
-    # Find the mean of the station data, and then add back the event data
-    ds = xr.merge([lma_station_data.mean(dim='number_of_files'),
-                   lma_event_data],
-                  compat='equals')
-    # ... and then restore the contributing stations.
-    # Note the coordinate variables are being used as labels to ensure data remain aligned.
-    ds['event_contributing_stations'] = event_contributing_stations
-
-    # Restore the global attributes
-    ds.attrs.update(final_attrs)
-
-    return ds
+            if type(attr_vals[0]) == str:
+                final_attrs[k] = '; '.join(attr_vals)
+            elif k == 'min_stations':
+                final_attrs[k] = min(attr_vals)
+            elif k == 'max_chi2':
+                final_attrs[k] = max(attr_vals)
+            elif k == 'max_chi2_iterations':
+                final_attrs[k] = min(attr_vals)
+            else:
+                warnings.warn(f'Conflicting values for global attribute {k}')
+    # Get the station data from the first dataset and assign each station a unique index
+    lma_data[0]['station_code'] = lma_data[0].number_of_stations
+    lma_data[0]['number_of_stations'] = np.arange(len(lma_data[0].number_of_stations))
+    lma_data[0].attrs['config_times'] = [[[lma_data[0].attrs['analysis_start_time'], lma_data[0].attrs['analysis_end_time']]]]
+    all_data = lma_data[0]
+    # Get the attributes attached to each variable in the dataset
+    dv_attrs = {}
+    new_ds = new_template_dataset()
+    for var in new_ds['data_vars']:
+        dv_attrs[var] = new_ds['data_vars'][var]['attrs']
+    # Define list of 'properties', things which identify a station and are not expected to change
+    property_vars = ('station_latitude', 'station_longitude', 'station_altitude', 'station_code', 'station_network', 'station_name', 'station_board_revision', 'station_delay', 'station_receive_channels')
+    # Define list of variables to be recalculated for each station after the datasets are combined
+    recalc_vars = ('station_event_fraction', 'station_power_ratio', 'network_center_latitude', 'network_center_longitude', 'network_center_altitude')
+    # Will be set to True if network_center location needs to be recalculated
+    recalc_center = False
+    # Check each subsequent dataset for new stations
+    for new_file_num in range(1, len(lma_data)):
+        new_file = lma_data[new_file_num]
+        if (np.all(new_file.network_center_latitude.data != all_data.network_center_latitude.data)
+            or np.all(new_file.network_center_longitude.data.item() != all_data.network_center_longitude.data)
+            or np.all(new_file.network_center_altitude.data != all_data.network_center_altitude.data)):
+            recalc_center = True
+        # Demote station code to a data variable and assign an index to each station in the new file
+        new_file['station_code'] = new_file.number_of_stations
+        new_file['number_of_stations'] = np.arange(len(new_file.number_of_stations))
+        stations_in_file = []
+        # Check each station in the new file against all known stations
+        station_is_new = True
+        old_ids_to_check_for_missing = all_data.number_of_stations.data.tolist()
+        for station_num in range(len(new_file.number_of_stations.data)):
+            station = new_file.isel(number_of_stations=station_num)
+            old_ids_to_search = collections.deque(range(len(all_data.number_of_stations.data)))
+            old_ids_to_search.rotate(-1*station_num) # start with the same index because stations USUALLY come in the same order
+            for old_station_num in old_ids_to_search:
+                old_station = all_data.isel(number_of_stations=old_station_num)
+                all_props_match = True
+                for prop in property_vars:
+                    if station.data_vars[prop].data.item() != old_station.data_vars[prop].data.item():
+                        all_props_match = False
+                        break
+                if all_props_match:
+                    station_is_new = False
+                    stations_in_file.append(old_station_num)
+                    old_ids_to_check_for_missing.remove(old_station_num)
+                    break
+            if station_is_new:
+                stations_in_file.append(-1)
+        # Find the indices of any newly-discovered stations
+        indices_of_new_stations = np.where(np.array(stations_in_file) == -1)[0]
+        # Add the new stations to the known stations
+        for idx_of_new_station in indices_of_new_stations:
+            new_station = new_file.isel(number_of_stations=idx_of_new_station)
+            # The new station is appended to the end of the known stations
+            new_station_index = len(all_data.number_of_stations)
+            # Expand all of the previous data to contain the new station. Fill with station properties.
+            fill_vals_dict = {}
+            for var in new_station.data_vars:
+                if 'number_of_stations' in all_data[var].dims:
+                    if var in property_vars:
+                        fill_vals_dict[var] = new_station[var].data.item()
+                    else:
+                        fill_vals_dict[var] = 0
+            all_data = all_data.reindex({'number_of_stations': np.arange(new_station_index+1)}, fill_value=fill_vals_dict)
+            # Update the station index for the new station
+            stations_in_file[idx_of_new_station] = new_station_index
+        # Check for any previously-known stations that are no longer in the new file
+        for missing_station_id in old_ids_to_check_for_missing:
+            dead_station_data = all_data.isel(number_of_stations=missing_station_id)
+            # a station has been removed, create a row of nan values for this file to indicate that the station is no longer present
+            # first create a temporary index for the dead station
+            temp_station_id = len(new_file.number_of_stations)
+            fill_vals_dict = {}
+            for var in dead_station_data.data_vars:
+                if 'number_of_stations' in all_data[var].dims:
+                    if var in property_vars:
+                        fill_vals_dict[var] = dead_station_data[var].data.item()
+                    else:
+                        fill_vals_dict[var] = 0
+            new_file = new_file.reindex({'number_of_stations': np.arange(temp_station_id+1)}, fill_value=fill_vals_dict)
+            # Update the station index for the dead station
+            stations_in_file.append(missing_station_id)
+        # Re-order the station data to match the order of the stations in the new file
+        # This can happen if lma_analysis decides to change the order of the stations, or if new stations are added or removed
+        new_file = reorder_stations(new_file, np.argsort(stations_in_file))
+        # Concatenate the new file's station information with the previously-known station information
+        station_to_merge = [d.drop_dims(['number_of_events']) for d in [all_data, new_file]]
+        lma_station_data = xr.concat(station_to_merge, dim='network_configurations').drop_vars(
+            ['network_center_latitude', 'network_center_longitude', 'network_center_altitude'])
+        for var_name in lma_station_data.data_vars:
+            da = lma_station_data[var_name]
+            if 'network_configurations' in da.dims:
+                # black magic from openAI that somehow determines if a data array is identical across the 'network_configurations' variable
+                if np.all((da == da.isel({'network_configurations': 0})).all('network_configurations')) or var_name in recalc_vars:
+                    # Remove the 'network_configurations' dimension if the data is identical across it
+                    lma_station_data[var_name] = da.isel(network_configurations=0)
+        if 'network_configurations' in lma_station_data.dims:
+            unique_netw_configs = np.unique(lma_station_data.station_active.data, return_index=True, axis=0)[1]
+            lma_station_data = lma_station_data.isel(network_configurations=sorted(unique_netw_configs))
+        # Rebuild the event_contributing_stations array
+        event_contributing_stations = xr.concat(
+                [d['event_contributing_stations'] for d in [all_data, new_file]],
+                dim='number_of_events'
+            )
+        # Attach the event_contributing_stations array to the station dataset
+        lma_station_data['event_contributing_stations'] = event_contributing_stations
+
+        # Combining the events is a simple concatenation. If only the stations had been this easy...
+        lma_event_data = xr.concat(
+            [d.drop_dims(['number_of_stations']) for d in [all_data, new_file]],
+            dim='number_of_events'
+        )
+        # merge all the data from this file with the previously-known data
+        combined_event_data = xr.merge([lma_station_data, lma_event_data])
+        all_data = combined_event_data
+        # Log continuous intervals of data coverage
+        if 'network_configurations' in all_data.dims:
+            this_netw_conf = np.where((all_data.station_active.data == new_file.station_active.data).all(axis=1))[0][0]
+        else:
+            this_netw_conf = 0
+        if len(all_data.attrs['config_times']) > this_netw_conf:
+            times_of_this_config = all_data.attrs['config_times'][this_netw_conf]
+            for gap in times_of_this_config:
+                if new_file.attrs['analysis_start_time'] == gap[1]:
+                    gap[1] = new_file.attrs['analysis_end_time']
+                    break
+                elif new_file.attrs['analysis_end_time'] == gap[0]:
+                    gap[0] = new_file.attrs['analysis_start_time']
+                    break
+            else:
+                times_of_this_config.append([new_file.attrs['analysis_start_time'], new_file.attrs['analysis_end_time']])
+        else:
+            all_data.attrs['config_times'].append([[new_file.attrs['analysis_start_time'], new_file.attrs['analysis_end_time']]])
+        all_data.attrs['analysis_start_time'] = min(all_data.attrs['analysis_start_time'], new_file.attrs['analysis_start_time'])
+        all_data.attrs['analysis_end_time'] = max(all_data.attrs['analysis_end_time'], new_file.attrs['analysis_end_time'])
+        all_data['network_center_longitude'] = all_data.network_center_longitude.isel(number_of_events=0)
+        all_data['network_center_latitude'] = all_data.network_center_latitude.isel(number_of_events=0)
+        all_data['network_center_altitude'] = all_data.network_center_altitude.isel(number_of_events=0)
+    # Update the global attributes
+    all_data.attrs.update(final_attrs)
+    # To reduce complexity and resource usage, if the 'network_configurations' dimension is the same for all variables, then the dimension is unnecessary
+    if 'network_configurations' in all_data.dims:
+        # Identify unique network configurations
+        unique_configs = np.unique(all_data.station_active.data, return_index=True, axis=0)[1]
+        if len(unique_configs) == 1:
+            unique_configs = unique_configs[0]
+        else:
+            all_data['station_event_fraction'] = 100*all_data.event_contributing_stations.sum(dim='number_of_events')/all_data.number_of_events.shape[0]
+            all_data['station_power_ratio'] = (all_data.event_contributing_stations * all_data.event_power).sum(dim='number_of_events')/all_data.event_power.sum(dim='number_of_events')
+    # recalculate variables that depend on the station data
+    if recalc_center:
+        all_data['network_center_longitude'] = all_data.station_longitude.mean(dim='number_of_stations')
+        all_data['network_center_latitude'] = all_data.station_latitude.mean(dim='number_of_stations')
+        all_data['network_center_altitude'] = all_data.station_altitude.mean(dim='number_of_stations')
+    # Make sure all station codes are unique
+    if np.max(np.unique_counts(all_data.station_code.data).counts) > 1:
+        unique_stat = []
+        the_alphabet = string.ascii_letters
+        for i in range(len(all_data.station_code.data)):
+            stat = all_data.station_code.data[i]
+            if stat in unique_stat:
+                warnings.warn(f'Conflicting station code discovered at index {i}')
+                stat_str = stat.decode('utf-8')
+                if stat_str.isupper():
+                    lowc = stat_str.lower()
+                    if bytes(lowc, 'utf-8') not in unique_stat:
+                        warnings.warn(f'Renaming station {stat_str} to {lowc}')
+                        unique_stat.append(bytes(lowc, 'utf-8'))
+                    else:
+                        for new_letter in the_alphabet:
+                            if bytes(new_letter, 'utf-8') not in unique_stat:
+                                warnings.warn(f'Renaming station {stat_str} to {new_letter}')
+                                unique_stat.append(bytes(new_letter, 'utf-8'))
+                                break
+                        else:
+                            warnings.warn(f'Assigning station a blank ID. This station will not be included in files generated by pyxlma.lmalib.io.write.lma_dat_file')
+                elif stat_str.islower():
+                    upc = stat_str.upper()
+                    if bytes(upc, 'utf-8') not in unique_stat:
+                        warnings.warn(f'Renaming station {stat_str} to {upc}')
+                        unique_stat.append(bytes(upc, 'utf-8'))
+                    else:
+                        for new_letter in the_alphabet:
+                            if bytes(new_letter, 'utf-8') not in unique_stat:
+                                warnings.warn(f'Renaming station {stat_str} to {new_letter}')
+                                unique_stat.append(bytes(new_letter, 'utf-8'))
+                                break
+                        else:
+                            warnings.warn(f'Assigning station a blank ID. This station will not be included in files generated by pyxlma.lmalib.io.write.lma_dat_file')
+            else:    
+                unique_stat.append(stat)
+        all_data.station_code.data = np.array(unique_stat, dtype='S1')
+    bin_to_dec = 2**np.flip(np.arange(all_data.event_contributing_stations.data.shape[1]))
+    all_data.event_mask.data = np.sum((bin_to_dec * all_data.event_contributing_stations.data), axis=1) # convert bin to dec
+    all_data.attrs['station_mask_order'] = np.apply_along_axis(lambda x: ''.join(x), 0, all_data.station_code.astype(str).data).item()
+    # restore previously cached data var attributes
+    for var_name in all_data.data_vars:
+        if var_name in dv_attrs:
+            all_data[var_name].attrs = dv_attrs[var_name]
+    all_data.station_active.attrs['_FillValue'] = 255
+    return all_data
 
 def dataset(filenames, sort_time=True):
     """ Create an xarray dataset of the type returned by
@@ -101,23 +291,30 @@ def dataset(filenames, sort_time=True):
         filenames = [filenames]
     lma_data = []
     starttime = None
+    if sort_time:
+        all_starttimes = []
     next_event_id = 0
-    for filename in filenames:
+    for filename in sorted(filenames):
         lma_file = lmafile(filename)
         if starttime is None:
             starttime = lma_file.starttime
         else:
             starttime = min(lma_file.starttime, starttime)
-        # Accounting for empty files
-        try:
-            ds = to_dataset(lma_file, event_id_start=next_event_id).set_index(
-                {'number_of_stations':'station_code', 'number_of_events':'event_id'})
-            lma_data.append(ds)
-            next_event_id += ds.sizes['number_of_events']
-        except:
-            raise
+        ds = to_dataset(lma_file, event_id_start=next_event_id).set_index(
+            {'number_of_stations':'station_code', 'number_of_events':'event_id'})
+        ds.attrs['analysis_start_time'] = lma_file.starttime
+        ds.attrs['analysis_end_time'] = lma_file.starttime + dt.timedelta(seconds=lma_file.analyzed_sec)
+        lma_data.append(ds)
+        next_event_id += ds.sizes['number_of_events']
+        if sort_time:
+            all_starttimes.append(lma_file.starttime)
+    if sort_time:
+        sorting = np.argsort(all_starttimes)
+        lma_data = [lma_data[i] for i in sorting]
     ds = combine_datasets(lma_data)
-    ds = ds.reset_index(('number_of_events', 'number_of_stations'))
+    if sort_time:
+        ds = ds.sortby('event_time')
+    ds = ds.reset_index(('number_of_events'))
     if 'number_of_events_' in ds.coords:
         # Older xarray versions appended a trailing underscore. reset_coords then dropped
         # converted the coordinate variables into regular variables while not touching
@@ -125,18 +322,12 @@ def dataset(filenames, sort_time=True):
         # names are never changed at the reset_index step, so the renaming step modifies
         # the dimension name, too.
         resetted = ('number_of_events_', 'number_of_stations_')
-        ds = ds.reset_coords(resetted)
-        ds = ds.rename({resetted[0]:'event_id',
-                        resetted[1]:'station_code'})
+        ds = ds.rename({resetted[0]:'event_id'})
     else:
         # The approach for newer xarray versions requires we explicitly rename only the variables.
         # The generic "rename" in the block above renames vars, coords, and dims.
         resetted = ('number_of_events', 'number_of_stations')
-        ds = ds.reset_coords(resetted)
-        ds = ds.rename_vars({resetted[0]:'event_id',
-                        resetted[1]:'station_code'})
-    if sort_time:
-        ds = ds.sortby('event_time')
+        ds = ds.rename_vars({resetted[0]:'event_id'})
     return ds, starttime
 
 def to_dataset(lma_file, event_id_start=0):
@@ -145,7 +336,7 @@ def to_dataset(lma_file, event_id_start=0):
     returns an xarray dataset of the type returned by
         pyxlma.lmalib.io.cf_netcdf.new_dataset
     """
-    from pyxlma.lmalib.io.cf_netcdf import new_dataset
+
     lma_data = lma_file.readfile()
     starttime = lma_file.starttime
     stations = lma_file.stations
@@ -162,6 +353,11 @@ def to_dataset(lma_file, event_id_start=0):
         'station_altitude':'Alt',
         'station_event_fraction':'sources',
         'station_power_ratio':'<P/P_m>',
+        'station_active':'active',
+        'station_name':'Name',
+        'station_board_revision':'Board Revision',
+        'station_delay':'Delay Time',
+        'station_receive_channels':'Receiver Channels',
     }
     event_mapping = {
         'event_latitude':'lat',
@@ -201,7 +397,10 @@ def to_dataset(lma_file, event_id_start=0):
     ds.attrs['history'] = "LMA source file created "+lma_file.file_created
     ds.attrs['event_algorithm_name'] = lma_file.analysis_program
     ds.attrs['event_algorithm_version'] = lma_file.analysis_program_version
-
+    ds.attrs['original_filename'] =  path.basename(lma_file.file)
+    ds.attrs['max_chi2'] = lma_file.maximum_chi2
+    ds.attrs['min_stations'] = lma_file.minimum_stations
+    ds.attrs['max_chi2_iterations'] = lma_file.maximum_chi2_iter
     # -- Populate the station mask information --
     # int, because NetCDF doesn't have booleans
     station_mask_bools = np.zeros((N_events, N_stations), dtype='int8')
@@ -217,6 +416,15 @@ def to_dataset(lma_file, event_id_start=0):
         station_mask_bools[:, i] = lma_data[col]
     ds['event_contributing_stations'][:] = station_mask_bools
 
+    # -- Convert the station_active flag to a psuedo-boolean --
+    station_active_data = np.zeros(N_stations, dtype='uint8')
+    station_active_data[ds.station_active.data == b'A'] = 1
+    station_active_data[ds.station_active.data == b'NA'] = 0
+    ds.station_active.data = station_active_data
+
+    # Convert station event count to station event fraction
+    if N_events != 0:
+        ds.station_event_fraction.data = 100*ds.station_event_fraction.data/N_events
     return ds
 
 
@@ -352,22 +560,21 @@ def __init__(self,filename):
         with open_gzip_or_dat(self.file) as f:
             for line_no, line in enumerate(f):
                 if line.startswith(b'Analysis program:'):
-                    analysis_program = line.decode().split(':')[1:]
-                    self.analysis_program = ':'.join(analysis_program)[:-1]
+                    self.analysis_program = line.decode().replace('Analysis program: ','').replace('Analysis program:','').replace('\n', '')
                 if line.startswith(b'Analysis program version:'):
-                    analysis_program_version = line.decode().split(':')[1:]
-                    self.analysis_program_version = ':'.join(analysis_program_version)[:-1]
+                    self.analysis_program_version = line.decode().replace('Analysis program version: ', '').replace('Analysis program version:', '').replace('\n', '')
                 if line.startswith(b'File created:') | line.startswith(b'Analysis finished:'):
-                    file_created = line.decode().split(':')[1:]
-                    self.file_created = ':'.join(file_created)[:-1]
+                    self.file_created = line.decode().replace('File created: ', '').replace('File created:', '').replace('Analysis finished: ', '').replace('Analysis finished:', '').replace('\n', '')
                 if line.startswith(b'Location:'):
-                    self.network_location = ':'.join(line.decode().split(':')[1:])[:-1]
+                    self.network_location = line.decode().replace('Location: ', '').replace('Location:', '').replace('\n', '')
                 if line.startswith(b'Data start time:'):
                     timestring = line.decode().split()[-2:]
                     self.startday = dt.datetime.strptime(timestring[0],'%m/%d/%y')
-                    # Full start time and second, likely unneeded
+                    # Full start time and second
                     self.starttime = dt.datetime.strptime(timestring[0]+timestring[1],'%m/%d/%y%H:%M:%S')
                     # self.startsecond = (starttime-dt.datetime(starttime.year,starttime.month,starttime.day)).seconds
+                if line.startswith(b'Number of seconds analyzed:'):
+                    self.analyzed_sec = int(line.decode().split(':')[-1])
                 # Find starting and ending rows for station information
                 if line.startswith(b'Coordinate center'):
                     self.center_lat = float(line.decode().split()[-3])
@@ -376,7 +583,7 @@ def __init__(self,filename):
                 # Number of active stations
                 if line.startswith(b'Number of active stations:'):
                     self.active_station_c_line = line_no
-                    self.active_staion_c_count = line.decode().split()[-1]
+                    self.active_staion_c_count = int(line.decode().split()[-1])
                 # Active stations
                 if line.startswith(b'Active stations:'):
                     self.active_station_s_line = line_no
@@ -385,14 +592,12 @@ def __init__(self,filename):
                     self.minimum_stations = int(line.decode().split(':')[1])
                 if line.startswith(b'Maximum reduced chi-squared:'):
                     self.maximum_chi2 = float(line.decode().split(':')[1])
-                if line.startswith(b'Maximum chi-squared iterations:'):
+                if line.startswith(b'Maximum chi-squared iterations:') or line.startswith(b'Maximum number of chi-squared iterations:'):
                     self.maximum_chi2_iter = int(line.decode().split(':')[1])
                 if line.startswith(b'Station information:'):
                     self.station_info_start = line_no
                 if line.startswith(b'Station data:'):
                     self.station_data_start = line_no
-                if line.startswith(b'Metric file:'):
-                    self.station_data_end = line_no
                 # Find mask list order
                 if line.startswith(b'Station mask order:'):
                     self.maskorder = line.decode().split()[-1]
@@ -414,13 +619,13 @@ def __init__(self,filename):
 
         # Station overview information
         stations = pd.DataFrame(self.gen_sta_info(),
-            columns=['ID','Name','Lat','Long','Alt','Delay Time'])
+            columns=['ID','Name','Lat','Long','Alt','Delay Time','Board Revision','Receiver Channels'])
         overview = pd.DataFrame(self.gen_sta_data(),
             columns=['ID','Name','win(us)', 'data_ver', 'rms_error(ns)',
-                     'sources','percent','<P/P_m>','active'])
+                     'sources','percent','<P/P_m>','active']).drop(columns=['Name'])
         # Drop the station name column that has a redundant station letter code
         # as part of the name and join on station letter code.
-        station_combo =  stations.set_index('ID').drop(columns=['Name']).join(
+        station_combo =  stations.set_index('ID').join(
                              overview.set_index('ID'))
         self.stations = station_combo.reset_index(level=station_combo.index.names)
 
@@ -439,7 +644,7 @@ def gen_sta_info(self):
                 parts = line.decode("utf-8").split()
                 name = ' '.join(parts[2:-6])
                 sta_info, code = parts[0:2]
-                yield (code, name) + tuple(parts[-6:-2])
+                yield (code, name) + tuple(parts[-6:])
 
     def gen_sta_data(self):
         """ Parse the station data table from the header. Some files do not
@@ -488,7 +693,7 @@ def readfile(self):
         lmad.insert(1,'Datetime',pd.to_timedelta(lmad['time (UT sec of day)'], unit='s')+self.startday)
 
         # Parse out which stations contributed into new columns for each station
-        col_names = self.stations.Name.values
+        col_names = self.stations.Name.values.copy()
         self.mask_ints = mask_to_int(lmad["mask"])
         for index,items in enumerate(self.maskorder[::-1]):
             col_names[index] = items+'_'+self.stations.Name.values[index]
diff --git a/pyxlma/lmalib/io/write.py b/pyxlma/lmalib/io/write.py
new file mode 100644
index 0000000..4599c0c
--- /dev/null
+++ b/pyxlma/lmalib/io/write.py
@@ -0,0 +1,267 @@
+import importlib.metadata
+import numpy as np
+import datetime as dt
+import warnings
+import importlib
+from functools import reduce
+import gzip
+
+def dataset(dataset, path):
+    "Write an LMA dataset to a netCDF file"
+
+    if 'flash_event_count' in dataset.data_vars:
+        if np.all(dataset.flash_event_count == np.iinfo(np.uint32).max):
+            dataset = dataset.drop_vars(['flash_init_latitude', 'flash_init_longitude', 'flash_init_altitude', 'flash_area', 'flash_volume', 'flash_energy', 'flash_center_latitude', 'flash_center_longitude', 'flash_center_altitude', 'flash_power', 'flash_event_count', 'flash_duration_threshold', 'flash_time_start', 'flash_time_end', 'flash_duration'])
+    
+    for var in dataset.data_vars:
+        if np.all(dataset[var].data == np.nan):
+            dataset = dataset.drop_vars(var)
+    for attr in dataset.attrs:
+        if type(dataset.attrs[attr]) == dt.datetime:
+            dataset.attrs[attr] = dataset.attrs[attr].strftime('%Y-%m-%d %H:%M:%S.%f')
+    dataset.attrs.pop('config_times', None)
+    comp = dict(zlib=True, complevel=5)
+    encoding = {var: comp for var in dataset.data_vars}
+    dataset = dataset.chunk('auto')
+    dataset.to_netcdf(path, encoding=encoding)
+
+
+def count_decimals_in_array(a):
+    a = np.array(a).astype(str)
+    # Find the position of the decimal point in each string
+    decimal_positions = np.char.find(a, '.')
+
+    # Handle the case where there are no decimal points
+    decimal_positions[decimal_positions == -1] = np.char.str_len(a[decimal_positions == -1])
+
+    # Calculate the number of characters after the decimal point
+    decimal_counts = np.char.str_len(a) - decimal_positions - 1
+
+    return np.max(decimal_counts)
+
+
+def create_station_info_string(dataset):
+    """
+    Create a string representation of the station information table for an LMA dataset.
+
+    Parameters
+    ----------
+    dataset : xarray.Dataset
+        The LMA dataset to create the station information table for.
+
+    Returns
+    -------
+    str
+        The string representation of the station information table.
+    """
+    if 'number_of_stations' not in dataset.dims:
+        raise ValueError('Dataset does not contain station information.')
+    header_line = 'Station information: '
+    info_to_include = []
+    if 'station_code' in dataset.data_vars:
+        header_line += 'id, '
+        info_to_include.append('station_code')
+    else:
+        raise ValueError('Stations do not have letter codes. This is incompatible with the XLMA dat format, please assign station codes to all stations.')
+    
+    if 'station_name' in dataset.data_vars:
+        header_line += 'name, '
+        info_to_include.append('station_name')
+    else:
+        raise ValueError('Stations do not have names. This is incompatible with the XLMA dat format, please assign names to all stations.')
+    
+    if 'station_latitude' in dataset.data_vars:
+        header_line += 'lat(d), '
+        info_to_include.append('station_latitude')
+    else:
+        raise ValueError('Stations do not have latitudes. This is incompatible with the XLMA dat format, please assign latitudes to all stations.')
+    
+    if 'station_longitude' in dataset.data_vars:
+        header_line += 'lon(d), '
+        info_to_include.append('station_longitude')
+    else:
+        raise ValueError('Stations do not have longitudes. This is incompatible with the XLMA dat format, please assign longitudes to all stations.')
+    
+    if 'station_altitude' in dataset.data_vars:
+        header_line += 'alt(m), '
+        info_to_include.append('station_altitude')
+    else:
+        raise ValueError('Stations do not have altitudes. This is incompatible with the XLMA dat format, please assign altitudes to all stations.')
+    
+    if 'station_delay' in dataset.data_vars:
+        header_line += 'delay(ns), '
+        info_to_include.append('station_delay')
+        # This is optional
+    
+    if 'station_board_revision' in dataset.data_vars:
+        header_line += 'board_rev, '
+        info_to_include.append('station_board_revision')
+    else:
+        raise ValueError('Stations do not have board revisions. This is incompatible with the XLMA dat format, please assign board revisions to all stations.')
+    
+    if 'station_receive_channels' in dataset.data_vars:
+        header_line += 'rec_ch, '
+        info_to_include.append('station_receive_channels')
+        # This is optional
+    header_line = header_line[:-2]
+    stations_strings = []
+    for station_num in range(dataset.sizes['number_of_stations']):
+        this_station = dataset.isel(number_of_stations=station_num)
+        station_string = 'Sta_info: '
+        for var in info_to_include:
+            station_string += f'{this_station[var].data.astype(str).item()}  '
+        stations_strings.append(station_string)
+    return header_line+'\n'+'\n'.join(stations_strings)
+
+
+def create_station_data_string(dataset):
+    pass
+
+
+def create_event_data_string(dataset):
+    if 'analysis_start_time' in dataset.attrs:
+        start_date = dataset.attrs['analysis_start_time']
+        start_date = np.array([start_date]).astype('datetime64[D]')
+    else:
+        start_date = dataset.event_time.data.min().astype('datetime64[D]')
+    columns_line = 'Data: '
+    format_line = 'Data format: '
+    data_to_combine = []
+
+    day_sec = (dataset.event_time - start_date).data.astype('timedelta64[ns]').astype(np.float64)/1e9
+    day_sec_str = day_sec.astype(str)
+    day_sec_length = np.max(np.char.str_len(day_sec_str))
+    day_sec_str = np.char.ljust(day_sec_str, day_sec_length, fillchar='0')
+    columns_line += 'time (UT sec of day), '
+    format_line += f'{day_sec_length}.9f, ' #time is always in nanoseconds
+    data_to_combine.append(day_sec_str.T)
+
+    lat = dataset.event_latitude.data.astype(str)
+    lat_length = np.max(np.char.str_len(lat))
+    lat = np.char.ljust(lat, lat_length, fillchar='0')
+    columns_line += 'lat, '
+    lat_dec_count = count_decimals_in_array(lat)
+    format_line += f'{lat_length}.{lat_dec_count}f, '
+    data_to_combine.append(lat.T)
+
+    lon = dataset.event_longitude.data.astype(str)
+    lon_length = np.max(np.char.str_len(lon))
+    lon = np.char.ljust(lon, lon_length, fillchar='0')
+    columns_line += 'lon, '
+    lon_dec_count = count_decimals_in_array(lon)
+    format_line += f'{lon_length}.{lon_dec_count}f, '
+    data_to_combine.append(lon.T)
+
+    alt = dataset.event_altitude.data.astype(str)
+    alt_length = np.max(np.char.str_len(alt))
+    alt = np.char.ljust(alt, alt_length, fillchar='0')
+    columns_line += 'alt(m), '
+    alt_dec_count = count_decimals_in_array(alt)
+    format_line += f'{alt_length}.{alt_dec_count}f, '
+    data_to_combine.append(alt.T)
+
+    chi2 = dataset.event_chi2.data.astype(str)
+    chi2_length = np.max(np.char.str_len(chi2))
+    chi2 = np.char.ljust(chi2, chi2_length, fillchar='0')
+    columns_line += 'reduced chi^2, '
+    chi2_dec_count = count_decimals_in_array(chi2)
+    format_line += f'{chi2_length}.{chi2_dec_count}f, '
+    data_to_combine.append(chi2.T)
+
+    power = dataset.event_power.data.astype(str)
+    power_length = np.max(np.char.str_len(power))
+    power = np.char.ljust(power, power_length, fillchar='0')
+    columns_line += 'P(dBW), '
+    power_dec_count = count_decimals_in_array(power)
+    format_line += f'{power_length}.{power_dec_count}f, '
+    data_to_combine.append(power.T)
+
+    masks = np.vectorize(np.base_repr)(dataset.event_mask.data, base=16)
+    mask_len = np.max(np.char.str_len(masks))
+    masks = np.char.rjust(masks, mask_len, fillchar='0')
+    masks = '0x' + masks
+    mask_len += 2
+    columns_line += 'mask, '
+    format_line += f'{mask_len}x, '
+    data_to_combine.append(masks.T)
+
+    columns_line = columns_line[:-2]
+    format_line = format_line[:-2]
+    N_ev_line = f'Number of events: {dataset.sizes["number_of_events"]}'
+    data_line = '*** data ***'
+    data_str = '\n'.join(reduce(lambda x, y:np.char.add(np.char.add(x, '  '), y), data_to_combine))
+    return columns_line+'\n'+format_line+'\n'+N_ev_line+'\n'+data_line+'\n'+data_str
+
+
+    
+    
+
+def lma_dat_file(dataset, path, use_gzip=True):
+    """
+    Write an LMA dataset to a legacy .dat(.gz) file for use in IDL XLMA.
+
+    Parameters
+    ----------
+    dataset : xarray.Dataset
+        The LMA dataset to write.
+    path : str
+        The target path to the file to write.
+    use_gzip : bool
+        Whether to compress the file using gzip.
+    """
+
+    if 'network_configurations' in dataset.dims:
+        warnings.warn('Attempting to write dataset with multiple network configurations to a single file. This is highly experimental.\n'
+                      'Caveats:\n-Location names will be appended and separated by semicolon\n- The center of the network is derived from the mean of the locations of the stations')
+        stations_active = np.sum(dataset.station_active.data, axis=0)
+        num_active = np.sum(np.clip(stations_active, 0, 1))
+        stations_active = ' '.join(np.extract(stations_active, dataset.station_code.data).astype(str).tolist())
+    else:
+        num_active = np.sum(dataset.station_active.data)
+        stations_active = ' '.join(np.extract(dataset.station_active.data, dataset.station_code.data).astype(str).tolist())
+    
+    center_lon = dataset.network_center_longitude.data.item()
+    center_lat = dataset.network_center_latitude.data.item()
+    center_alt = dataset.network_center_altitude.data.item()
+    
+    unique_codes, code_counts = np.unique(dataset.station_code.data, return_counts=True)
+    if np.max(code_counts) > 1:
+        raise ValueError('Duplicate station codes found in dataset. This is incompatible with the XLMA dat format, please rename conflicting station letters.')
+    if b'' in unique_codes:
+        raise ValueError('Empty station codes found in dataset. This is incompatible with the XLMA dat format, please assign station codes to all stations.')
+    
+    if dataset.attrs["event_algorithm_name"].startswith('pyxlma'):
+        analysis_program = dataset.attrs["event_algorithm_name"]
+    else:
+        analysis_program = 'pyxlma; '+dataset.attrs["event_algorithm_name"]
+    if dataset.attrs["event_algorithm_version"].startswith('pyxlma'):
+        analysis_program_version = dataset.attrs["event_algorithm_version"]
+    else:
+        analysis_program_version = f'pyxlma-{importlib.metadata.version("pyxlma")}; {dataset.attrs["event_algorithm_version"]}'
+
+    lma_file = (f'pyxlma exported data -- https://github.com/deeplycloudy/xlma-python\n'
+                  f'Analysis program: {analysis_program}\n'
+                  f'Analysis program version: {analysis_program_version}\n'
+                  f'File created: {dt.datetime.utcnow().strftime("%a %b %d %H:%M:%S %Y")}\n'
+                  f'Data start time: {dataset.attrs["analysis_start_time"].strftime("%m/%d/%y %H:%M:%S")}\n'
+                  f'Number of seconds analyzed: {(dataset.attrs["analysis_end_time"] - dataset.attrs["analysis_start_time"]).total_seconds()}\n'
+                  f'Location: {"; ".join(np.unique(dataset.station_network.data).astype(str).tolist())}\n'
+                  f'Coordinate center (lat,lon,alt): {center_lat:.7f}, {center_lon:.7f}, {center_alt:.2f}\n'
+                  f'Coordinate frame: cartesian\n'
+                  f'Number of stations: {dataset.sizes['number_of_stations']}\n'
+                  f'Number of active stations: {num_active}\n'
+                  f'Active stations: {stations_active}\n'
+                  f'Minimum number of stations per solution: {dataset.attrs["min_stations"]}\n'
+                  f'Maximum reduced chi-squared: {dataset.attrs["max_chi2"]}\n'
+                  f'Maximum number of chi-squared iterations: {dataset.attrs["max_chi2_iterations"]}\n'
+                  f'{create_station_info_string(dataset)}\n'
+                  f'Station mask order: {dataset.attrs["station_mask_order"]}\n'
+                  ### STATION DATA TABLE
+                  f'{create_event_data_string(dataset)}'
+    )
+    if use_gzip:
+        with gzip.open(path, 'wb') as f:
+            f.write(lma_file.encode('utf-8'))
+    else:
+        with open(path, 'w') as f:
+            f.write(lma_file)
\ No newline at end of file
diff --git a/tests/test_io.py b/tests/test_io.py
index 85e8d74..9277cdb 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -15,7 +15,9 @@ def test_read_mf_dataset():
     files_to_read = ['examples/data/'+file for file in files_to_read]
     dataset, start_date = lma_read.dataset(files_to_read)
     assert start_date == dt(2023, 12, 24, 0, 57, 1)
-    assert dataset == xr.open_dataset('tests/truth/lma_netcdf/lma.nc')
+    truth = xr.open_dataset('tests/truth/lma_netcdf/lma.nc')
+    for var in truth.data_vars:
+        compare_dataarrays(dataset, truth, var)
 
 
 def test_read_onefile_dataset():