From db50fe5990139235392d4ff595d65f20fbba17b8 Mon Sep 17 00:00:00 2001 From: Gary Pavlis Date: Sat, 5 Apr 2025 05:25:15 -0400 Subject: [PATCH 1/7] Add usarray processing templates --- .../USArrayProcessExample/CNRDeconEngine.pf | 34 + notebooks/USArrayProcessExample/CNRRFDecon.py | 199 +++ .../CreateMasterDatabase.ipynb | 1235 +++++++++++++++++ .../Preprocess2seis_2014.ipynb | 348 +++++ .../Preprocess2seis_template.ipynb | 346 +++++ .../Preprocess2ts_2014.ipynb | 427 ++++++ .../Preprocess2ts_template.ipynb | 392 ++++++ notebooks/USArrayProcessExample/Sidebar.ipynb | 150 ++ .../WorkflowREADME.ipynb | 168 +++ .../index_mseed_template.ipynb | 137 ++ 10 files changed, 3436 insertions(+) create mode 100644 notebooks/USArrayProcessExample/CNRDeconEngine.pf create mode 100644 notebooks/USArrayProcessExample/CNRRFDecon.py create mode 100644 notebooks/USArrayProcessExample/CreateMasterDatabase.ipynb create mode 100644 notebooks/USArrayProcessExample/Preprocess2seis_2014.ipynb create mode 100644 notebooks/USArrayProcessExample/Preprocess2seis_template.ipynb create mode 100644 notebooks/USArrayProcessExample/Preprocess2ts_2014.ipynb create mode 100644 notebooks/USArrayProcessExample/Preprocess2ts_template.ipynb create mode 100644 notebooks/USArrayProcessExample/Sidebar.ipynb create mode 100644 notebooks/USArrayProcessExample/WorkflowREADME.ipynb create mode 100644 notebooks/USArrayProcessExample/index_mseed_template.ipynb diff --git a/notebooks/USArrayProcessExample/CNRDeconEngine.pf b/notebooks/USArrayProcessExample/CNRDeconEngine.pf new file mode 100644 index 0000000..9fbc68b --- /dev/null +++ b/notebooks/USArrayProcessExample/CNRDeconEngine.pf @@ -0,0 +1,34 @@ +# pf for testing new CNRDeconEngine implementation +# values are not necessarily reasonable. +# +# these are required by FFTDeconOperator +# +target_sample_interval 0.05 +operator_nfft 4096 +deconvolution_data_window_start -5.0 +deconvolution_data_window_end 100.0 +# +# parameters for CNRDeconEngine +# +algorithm colored_noise_damping +damping_factor 0.1 +noise_floor 0.01 +snr_regularization_floor 2.0 +snr_data_bandwidth_floor 1.5 +noise_window_start -200.0 +noise_window_end -5.0 +time_bandwidth_product 2.5 +number_tapers 4 + +# +# these are required to generate the ShapingWavelet object +# that is an attribute of the operator +# +shaping_wavelet_dt 0.05 +shaping_wavelet_type butterworth +npoles_lo 3 +npoles_hi 3 +# these are only initial values - they are adjusted dynamically +# in this algorithm +f3db_lo 0.02 +f3db_hi 1.5 diff --git a/notebooks/USArrayProcessExample/CNRRFDecon.py b/notebooks/USArrayProcessExample/CNRRFDecon.py new file mode 100644 index 0000000..303c78c --- /dev/null +++ b/notebooks/USArrayProcessExample/CNRRFDecon.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Run script for CNRDecon afer preprocessing compleed. + +Created on Fri Feb 28 08:31:33 2025 + +@author: pavlis +""" +from mspasspy.ccore.utility import AntelopePf +from mspasspy.ccore.algorithms.deconvolution import CNRDeconEngine +from mspasspy.algorithms.CNRDecon import CNRRFDecon +from mspasspy.ccore.algorithms.basic import TimeWindow +from mspasspy.ccore.seismic import SeismogramEnsemble,TimeSeries,Seismogram +from mspasspy.ccore.algorithms.amplitudes import MADAmplitude +from mspasspy.algorithms.window import WindowData +from mspasspy.algorithms.basic import ExtractComponent +from mspasspy.util.seismic import number_live +from mspasspy.util.Undertaker import Undertaker +import numpy as np +import dask.bag as dbg +import time + + +# change for different calendar year +year = 2014 +dbname = "usarray{}".format(year) +tsdir="./wf_TimeSeries/{}".format(year) +dbmaster="usarray48" +import mspasspy.client as msc +mspass_client = msc.Client(database_name=dbname) +# waveform data indexed to here +db = mspass_client.get_database() +# master database with source and receiver metadata +dbmd = mspass_client.get_database(dbmaster) + + + + + + +def remove_incident(d,ao,nw_end=-3.0,P_component=2): + if d.dead(): + return d + stime = max(d.t0,ao.t0) + etime = min(d.endtime(),ao.endtime()) + x_ao = WindowData(ao,stime,etime) + s_amp = np.zeros(3) + n_amp = np.zeros(3) + snr = np.zeros(3) + for i in range(3): + x = ExtractComponent(d,i) + x_n = WindowData(x,d.t0,nw_end) + n_amp[i] = MADAmplitude(x_n) + x_w = WindowData(x,stime,etime) + amp = np.dot(x_ao.data,x_w.data) + s_amp[i] = amp + if n_amp[i]>0.0: + snr[i] = s_amp[i]/n_amp[i] + else: + if s_amp[i]>0.0: + snr[i]=9999999.9 + else: + s_amp[i]=1.0 + scaled_ao = TimeSeries(x_ao) + # Note -amp used so when we add scaled it is subtracting incident + scaled_ao *= -amp + # operator += handles irregular start and end time + # taper may be advised but in this context probably necessary + x += scaled_ao + d.data[i,:] = x.data + + # post snr metrics before exiting + snrdoc=dict() + # not sur mongodb will handle a np array corectly so convert this + # to a python list + snrlist=list() + namplist=list() + for i in range(3): + snrlist.append(snr[i]) + namplist.append(n_amp[i]) + snrdoc['component_snr']=snrlist + snrdoc['component_noise']=namplist + # vector sum of incident horizontal snr a more useful metric than + # components + sig_H = 0.0 + noise_H = 0.0 + for i in range(3): + if i != P_component: + sig_H += s_amp[i]*s_amp[i] + noise_H += n_amp[i]*n_amp[i] + sig_H = np.sqrt(sig_H) + noise_H = np.sqrt(noise_H) + if noise_H < np.finfo(float).eps: + if sig_H < np.finfo(float).eps: + snr=1.0 + else: + snr = sig_H/np.finfo(float).eps + else: + snr = sig_H/noise_H + snrdoc['snr_H'] = snr + d['snr_RF'] = snrdoc + return d +#from mspasspy.graphics import SeismicPlotter +#import matplotlib.pyplot as plt +def process(sid,db,dbmd,engine,signal_window,noise_window,data_tag=None): + query = {'source_id' : sid} + ntotal = db.wf_Seismogram.count_documents(query) + query['Parrival.median_snr'] = {'$gt' : 2.0} + query['Parrival.snr_filtered_envelope_peak'] = {'$gt' : 3.0} + query['Parrival.bandwidth'] = {'$gt' : 8.0} + if data_tag: + query['data_tag'] = data_tag + cursor=db.wf_Seismogram.find(query) + ens = db.read_data(cursor,collection="wf_Seismogram") + print('source_id=',sid,' Processing ',len(ens.member),' of ',ntotal) + decon_ens = SeismogramEnsemble(len(ens.member)) + for d in ens.member: + if d.live: + Ptime=d['Ptime'] + d.ator(Ptime) + decon_output = CNRRFDecon(d, + engine, + signal_window=signal_window, + noise_window=noise_window, + bandwidth_subdocument_key="Parrival", + return_wavelet=True, + ) + rf0 = decon_output[0] + if rf0.dead(): + # it is a bit inefficient to instantiate an instance of + # an Undertaker for each error but CMRRFDecon exceptions + # are not common unless the engine is badly configured. + stedronsky = Undertaker(dbmd) + stedronsky.bury(rf0) + else: + ao = decon_output[1] + io = decon_output[2] + rf = remove_incident(rf0, ao) + # temp for debug with spyder - no save + #if rf: + #plotter=SeismicPlotter(normalize=True,style='wt') + #plotter.plot(rf) + #plt.show() + #continue + dir="wf_RF" + s=rf['dfile'] + dfile = "RF"+s + rf0=dbmd.save_data(rf0, + collection='wf_Seismogram', + return_data=False, + storage_mode='file', + dir=dir, + dfile=dfile, + data_tag='CNRRFDecon_data_raw') + rf=dbmd.save_data(rf, + collection='wf_Seismogram', + return_data=True, + storage_mode='file', + dir=dir, + dfile=dfile, + data_tag='CNRRFDecon_data') + + if rf.live: + wfid = rf['_id'] + ao['parent_wfid'] = wfid + io['parent_wfid'] = wfid + dfile = "ao_" + s + dbmd.save_data(ao,collection='wf_TimeSeries',storage_mode='file',dir=dir,dfile=dfile,data_tag='CNRRFDecon_ao') + dfile = "io_" + s + dbmd.save_data(io,collection='wf_TimeSeries', + storage_mode='file', + dir=dir,dfile=dfile, + data_tag='CNRRFDecon_io') + decon_ens.member.append(rf) + else: + print("WARNING: ensemble member was marked dead - has to be an abortion") + if number_live(decon_ens)>0: + decon_ens.set_live() + return decon_ens + +pf=AntelopePf('CNRDeconEngine.pf') +engine = CNRDeconEngine(pf) +signal_window=TimeWindow(-10.0,100.0) +noise_window=TimeWindow(-200.0,-5.0) + +sidlist=db.wf_Seismogram.distinct('source_id') +t0=time.time() +nprocessed=0 +for sid in sidlist: + print("Working on source_id=",sid) + rfens = process(sid,db,dbmd,engine,signal_window,noise_window) + nprocessed+=1 +#mydata = dbg.from_sequence(sidlist) +#mydata = mydata.map(process,db,engine,signal_window,noise_window) +#mydata = mydata.map(terminator) +#mydata.compute() +t=time.time() +print("Elapsed time=",t-t0) \ No newline at end of file diff --git a/notebooks/USArrayProcessExample/CreateMasterDatabase.ipynb b/notebooks/USArrayProcessExample/CreateMasterDatabase.ipynb new file mode 100644 index 0000000..4a6bc26 --- /dev/null +++ b/notebooks/USArrayProcessExample/CreateMasterDatabase.ipynb @@ -0,0 +1,1235 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6f8a5a81-899c-471e-ad8d-da997d4d4341", + "metadata": {}, + "source": [ + "# Download Master Metadata\n", + "This notebook creates a master database with all source and receiver metadata loaded in the standard MsPASS collection called \"source\", \"channel\", and \"site\". The collections are large as they span the entire time range of operation of the TA in the lower 48 states. To reduce the memory footprint, yearly processing scripts should subset these collections to only those spanning the data time range using the query argument of the normalization matcher classes. \n", + "\n", + "Earlier version of the usarray workflow downloaded year blocks of source and receiver metadata. I realized, however, that that created a mess at the end of preprocessing when we needed to merge all yearly databases to a single master for input to pwmig or related processing of the full data set. With this model the final merge uses year databases as input with all outputs to the master that is the database created by this notebook. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ae189c51-2fc5-4ca9-a1a5-e9db2b0655f2", + "metadata": {}, + "outputs": [], + "source": [ + "dbname = \"usarray48\"\n", + "mdts = \"2005-01-01T00:00:00.0\"\n", + "mdte = \"2016-01-01T00:00:00.0\"\n", + "# obspy requires data converted to their UTCDateTime object \n", + "from obspy import UTCDateTime\n", + "starttime=UTCDateTime(mdts)\n", + "endtime=UTCDateTime(mdte)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f6cb2afd-31e7-4414-956d-7172e12cf03a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import mspasspy.client as msc\n", + "mspass_client = msc.Client(database_name=dbname)\n", + "db = mspass_client.get_database()" + ] + }, + { + "cell_type": "markdown", + "id": "932f2393-2cde-4423-9e84-62a6452c12a3", + "metadata": {}, + "source": [ + "## Source metadata\n", + "This procedure will download way more sources than what was used in the original download but that is preferable to dropping a lot of data. There are two complications:\n", + "1. The large size of usarray requires a large range for the core shadow edge. The alternative would be the more complicated procedure used in the original download where we used a center point. That is worse, however, as it drops stations that ran continuously during the entire array operation.\n", + "2. I drop the magnitude threshold here from the original download size which drastically increases the source collection size. Hopefully there will be few if any overlaps and those that are can be resolved." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "101cd78b-3917-4d12-b439-53feb9bd3900", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from obspy.clients.fdsn import Client\n", + "client=Client(\"IRIS\")\n", + "\n", + "# lower 48 extreme range size is about 48 degrees - round up to 50\n", + "# point below is geographic center of the lower 48. Use range of 105 maximum \n", + "# from that point with a padding factor or 25 degrees. i.e. distance range \n", + "# up to 105+25=130 degrees. Minimum is set to 15 degrees but workflow will need to \n", + "# do more to limit near distances to workable range for P receiver functions.\n", + "# 15 should work to allow use of eastern us stations with california earthquake \n", + "# although that is likely a small number\n", + "lat0=39.8\n", + "lon0=-98.6\n", + "minmag=4.5\n", + "minradius=15.0\n", + "maxradius=130.0\n", + "\n", + "\n", + "cat=client.get_events(starttime=starttime,\n", + " endtime=endtime,\n", + " latitude=lat0,\n", + " longitude=lon0,\n", + " minradius=minradius,\n", + " maxradius=maxradius,\n", + " minmagnitude=minmag)\n", + "# this is a weird incantation suggested by obspy to print a summeary of all the events\n", + "#print(cat.__str__(print_all=True))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "4d5b081f-e1ab-4b7a-8dd1-86d6f3784342", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "number of event entries saved in source collection= 79207\n" + ] + } + ], + "source": [ + "n=db.save_catalog(cat)\n", + "print('number of event entries saved in source collection=',n)" + ] + }, + { + "cell_type": "markdown", + "id": "6455d225-a042-4719-bfaa-8fcde68a9180", + "metadata": {}, + "source": [ + "## Receiver Metadata\n", + "This is pretty much the same as the way we have done this process in tutorials and in the prototype I developed earlier downloading these data by year. The only difference here is the time range is longer." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "c060b236-725b-4e1f-b507-c8801db79b4f", + "metadata": {}, + "outputs": [], + "source": [ + "chan=\"B*\"\n", + "# these define a generous box around the lower 48 states. It may not \n", + "# exactly match original download script as it is likely somewhat larger than \n", + "# the download \n", + "minlongitude= -140.0\n", + "maxlongitude= -60\n", + "minlatitude= 22.0\n", + "maxlatitude= 52.0\n", + "inv=client.get_stations(starttime=starttime,\n", + " endtime=endtime,\n", + " minlongitude=minlongitude,\n", + " maxlongitude=maxlongitude,\n", + " minlatitude=minlatitude,\n", + " maxlatitude=maxlatitude,\n", + " format='xml',channel='BH?',level='response')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ed366c2d-0354-4ce4-8b28-6cf6e48457e9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "BK : BRIB : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 37.918861 -122.151787 0.21969999999999998\n", + "loc code section coordinates: 37.918812 -122.151756 0.2189\n", + "BK : BRIB : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 37.918861 -122.151787 0.21969999999999998\n", + "loc code section coordinates: 37.918812 -122.151756 0.2189\n", + "BK : ELFS : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.618351 -120.727913 1.5545\n", + "loc code section coordinates: 40.618351 -120.727913 1.5537999999999998\n", + "BK : HAST : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 36.388699 -121.551399 0.542\n", + "loc code section coordinates: 36.388699 -121.551399 0.5395\n", + "BK : HAST : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 36.388699 -121.551399 0.542\n", + "loc code section coordinates: 36.388699 -121.551399 0.5395\n", + "BK : HAST : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 36.388699 -121.551399 0.542\n", + "loc code section coordinates: 36.388699 -121.551399 0.5395\n", + "BK : HATC : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.816101 -121.461166 1.0092999999999999\n", + "loc code section coordinates: 40.816101 -121.461166 1.0086\n", + "BK : HATC : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.816101 -121.461166 1.0092999999999999\n", + "loc code section coordinates: 40.816101 -121.461166 1.0086\n", + "BK : HATC : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.816101 -121.461166 1.0092999999999999\n", + "loc code section coordinates: 40.816101 -121.461166 1.0086\n", + "BK : HELL : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 36.680111 -119.02282 1.14\n", + "loc code section coordinates: 36.680111 -119.02282 1.1375\n", + "BK : HELL : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 36.680111 -119.02282 1.14\n", + "loc code section coordinates: 36.680111 -119.02282 1.1375\n", + "BK : HELL : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 36.680111 -119.02282 1.14\n", + "loc code section coordinates: 36.680111 -119.02282 1.1375\n", + "BK : RAMR : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 35.635971 -120.869843 0.4168\n", + "loc code section coordinates: 35.635971 -120.869843 0.4143\n", + "BK : RAMR : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 35.635971 -120.869843 0.4168\n", + "loc code section coordinates: 35.635971 -120.869843 0.4143\n", + "BK : SUTB : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.229099 -121.786102 0.252\n", + "loc code section coordinates: 39.229099 -121.786102 0.2495\n", + "BK : SUTB : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.229099 -121.786102 0.252\n", + "loc code section coordinates: 39.229099 -121.786102 0.2495\n", + "BK : SUTB : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.229099 -121.786102 0.252\n", + "loc code section coordinates: 39.229099 -121.786102 0.2495\n", + "BK : VAK : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 37.877525 -122.248894 0.266\n", + "loc code section coordinates: 37.877525 -122.248894 0.256\n", + "BK : YBH : 50 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 41.73204 -122.710388 1.0597\n", + "loc code section coordinates: 41.73193 -122.710381 1.0595999999999999\n", + "BK : YBH : 50 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 41.73204 -122.710388 1.0597\n", + "loc code section coordinates: 41.73193 -122.710381 1.0595999999999999\n", + "BK : YBH : 50 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 41.73204 -122.710388 1.0597\n", + "loc code section coordinates: 41.73193 -122.710381 1.0595999999999999\n", + "BK : YBH : 50 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 41.73204 -122.710388 1.0597\n", + "loc code section coordinates: 41.73193 -122.710381 1.0595999999999999\n", + "CC : NORM : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 43.73896 -121.252678 2.1816500000000003\n", + "loc code section coordinates: 43.73896 -121.252678 1.286\n", + "CI : BLA2 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.06931 -116.38993 1.247\n", + "loc code section coordinates: 34.06931 -116.38993 1.245\n", + "CI : CWC : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 36.439047 -118.080495 1.56949\n", + "loc code section coordinates: 36.43988 -118.08016 1.595\n", + "CI : FOX2 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.7339 -118.24046 0.71974707\n", + "loc code section coordinates: 34.7339 -118.24046 0.7197\n", + "CI : FUR : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 36.46703 -116.86322 -0.037\n", + "loc code section coordinates: 36.46719 -116.86322 -0.038\n", + "CI : IBP : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 32.66112 -116.09286 0.879\n", + "loc code section coordinates: 32.66115 -116.0929 0.879\n", + "CI : LFP : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.305103 -118.487967 0.368\n", + "loc code section coordinates: 34.30529 -118.48805 0.368\n", + "CI : RRX : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.87499 -116.99681 0.676213623\n", + "loc code section coordinates: 34.87499 -116.99681 0.6762\n", + "CI : SHO : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 35.89953 -116.2753 0.451\n", + "loc code section coordinates: 35.8996 -116.27515 0.451\n", + "CI : SMF2 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.02201 -118.44627 0.052219999999999996\n", + "loc code section coordinates: 34.02201 -118.44627 0.0522\n", + "CI : SNR : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 32.86184 -115.43602 -0.063\n", + "loc code section coordinates: 32.86189 -115.43596 -0.063\n", + "CI : SYP : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.52781 -119.97832 1.278\n", + "loc code section coordinates: 34.52775 -119.97834 1.278\n", + "CN : EDB : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 49.873 -127.1216 0.15\n", + "loc code section coordinates: 49.873699 -127.119797 0.189\n", + "CN : SHB : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 49.5985 -123.8771 1.129\n", + "loc code section coordinates: 49.592999 -123.880501 1.143\n", + "G : SCZ : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 36.59798 -121.40481 0.261\n", + "loc code section coordinates: 36.598 -121.403 0.261\n", + "IU : ANMO : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.94591 -106.4572 1.82\n", + "loc code section coordinates: 34.94591 -106.4572 1.671\n", + "IU : ANMO : 10 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.94591 -106.4572 1.82\n", + "loc code section coordinates: 34.94591 -106.4572 1.7305\n", + "IU : BBSR : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 32.371299 -64.696299 0.03\n", + "loc code section coordinates: 32.371299 -64.696299 -0.0013\n", + "IU : BBSR : 01 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 32.371299 -64.696299 0.03\n", + "loc code section coordinates: 32.371299 -64.696299 -0.0013\n", + "IU : BBSR : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 32.371299 -64.696299 0.03\n", + "loc code section coordinates: 32.371299 -64.696299 -0.0013\n", + "IU : CCM : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 38.0557 -91.2446 0.222\n", + "loc code section coordinates: 38.0557 -91.2446 0.171\n", + "IU : CCM : 10 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 38.0557 -91.2446 0.222\n", + "loc code section coordinates: 38.0557 -91.2446 0.171\n", + "IU : DWPF : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 28.1103 -81.4327 0.03\n", + "loc code section coordinates: 28.1103 -81.4327 -0.132\n", + "IU : HKT : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 29.9618 -95.8384 -0.413\n", + "loc code section coordinates: 29.9618 -95.8384 -0.863\n", + "IU : HKT : 10 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 29.9618 -95.8384 -0.413\n", + "loc code section coordinates: 29.9618 -95.8384 -0.863\n", + "IU : RSSD : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 44.1212 -104.0359 2.09\n", + "loc code section coordinates: 44.1212 -104.0359 2.0227\n", + "IU : RSSD : 10 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 44.1212 -104.0359 2.09\n", + "loc code section coordinates: 44.1212 -104.0359 2.087\n", + "IU : RSSD : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 44.1212 -104.0359 2.09\n", + "loc code section coordinates: 44.1212 -104.0359 2.0227\n", + "IU : RSSD : 10 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 44.1212 -104.0359 2.09\n", + "loc code section coordinates: 44.1212 -104.0359 2.087\n", + "IU : SSPA : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.6358 -77.8876 0.27\n", + "loc code section coordinates: 40.6358 -77.8876 0.17\n", + "IU : SSPA : 10 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.6358 -77.8876 0.27\n", + "loc code section coordinates: 40.6358 -77.8876 0.268\n", + "IU : SSPA : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.6358 -77.8876 0.27\n", + "loc code section coordinates: 40.6358 -77.8876 0.17\n", + "IU : TUC : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 32.3098 -110.7847 0.91\n", + "loc code section coordinates: 32.3098 -110.7847 0.909\n", + "IU : TUC : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 32.3098 -110.7847 0.91\n", + "loc code section coordinates: 32.3098 -110.7847 0.909\n", + "IU : TUC : 10 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 32.3098 -110.7847 0.91\n", + "loc code section coordinates: 32.3098 -110.7847 0.909\n", + "IU : TUC : 60 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 32.3098 -110.7847 0.91\n", + "loc code section coordinates: 32.3098 -110.7847 0.909\n", + "IU : WCI : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 38.2289 -86.2939 0.21\n", + "loc code section coordinates: 38.2289 -86.2939 0.078\n", + "IU : WCI : 10 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 38.2289 -86.2939 0.21\n", + "loc code section coordinates: 38.2289 -86.2939 0.078\n", + "LD : FMPA : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.0478 -76.3208 0.121\n", + "loc code section coordinates: 40.047798 -76.320801 0.1212\n", + "LD : MMNY : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 42.731683 -77.906535 0.241\n", + "loc code section coordinates: 42.731899 -77.906601 0.241\n", + "LD : MVL : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.999395 -76.348888 0.106\n", + "loc code section coordinates: 39.999199 -76.350601 0.091\n", + "LD : PRNY : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 42.466617 -76.536167 0.248\n", + "loc code section coordinates: 42.466537 -76.536079 0.205\n", + "N4 : 060A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 27.0361 -80.3618 0.009\n", + "loc code section coordinates: 27.0361 -80.361801 0.009\n", + "N4 : 061Z : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 25.8657 -80.907 0.009\n", + "loc code section coordinates: 25.8657 -80.906998 0.009\n", + "N4 : 143B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 32.7032 -91.4036 0.031\n", + "loc code section coordinates: 32.703201 -91.403603 0.031\n", + "N4 : 146B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 32.6368 -89.0573 0.161\n", + "loc code section coordinates: 32.636799 -89.057297 0.161\n", + "N4 : 152A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 32.6686 -84.7188 0.214\n", + "loc code section coordinates: 32.668598 -84.718803 0.214\n", + "N4 : 154A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 32.6131 -83.1066 0.111\n", + "loc code section coordinates: 32.613098 -83.106598 0.111\n", + "N4 : 346B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 31.3876 -89.4649 0.086\n", + "loc code section coordinates: 31.3876 -89.464897 0.086\n", + "N4 : 352A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 31.4793 -84.9274 0.09309999999999999\n", + "loc code section coordinates: 31.4793 -84.927399 0.101\n", + "N4 : 441B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 30.7498 -93.1898 0.033\n", + "loc code section coordinates: 30.7498 -93.189796 0.033\n", + "N4 : 451A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 30.616 -85.7467 0.02\n", + "loc code section coordinates: 30.615999 -85.746696 0.02\n", + "N4 : 456A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 30.7248 -82.0223 0.026\n", + "loc code section coordinates: 30.7248 -82.022301 0.026\n", + "N4 : 545B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 30.0441 -90.4894 0.008\n", + "loc code section coordinates: 30.0441 -90.489403 0.008\n", + "N4 : 553A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 30.189 -84.4317 0.016\n", + "loc code section coordinates: 30.188999 -84.431702 0.016\n", + "N4 : 656A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 29.3689 -82.5348 0.028\n", + "loc code section coordinates: 29.3689 -82.534798 0.028\n", + "N4 : 735B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 28.8553 -97.8082 0.109\n", + "loc code section coordinates: 28.855301 -97.808197 0.109\n", + "N4 : D62A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 47.0819 -69.0501 0.189\n", + "loc code section coordinates: 47.081902 -69.050102 0.189\n", + "N4 : E28B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 46.5748 -100.6934 0.704\n", + "loc code section coordinates: 46.574799 -100.693398 0.704\n", + "N4 : E38A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 46.6058 -91.554 0.341\n", + "loc code section coordinates: 46.605801 -91.554199 0.341\n", + "N4 : E46A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 46.3665 -84.3062 0.269\n", + "loc code section coordinates: 46.366501 -84.306198 0.269\n", + "N4 : E62A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 46.6201 -69.5227 0.356\n", + "loc code section coordinates: 46.620098 -69.522697 0.356\n", + "N4 : F33B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 45.8398 -96.2929 0.314\n", + "loc code section coordinates: 45.839802 -96.2929 0.314\n", + "N4 : F42A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 45.7587 -88.1347 0.358\n", + "loc code section coordinates: 45.758701 -88.134697 0.358\n", + "N4 : F64A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 45.8633 -68.3496 0.179\n", + "loc code section coordinates: 45.8633 -68.349602 0.179\n", + "N4 : G40A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 45.2684 -90.2006 0.472\n", + "loc code section coordinates: 45.268398 -90.2006 0.472\n", + "N4 : G62A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 45.2186 -70.5319 0.426\n", + "loc code section coordinates: 45.218601 -70.531898 0.426\n", + "N4 : G65A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 45.2002 -67.5632 0.078\n", + "loc code section coordinates: 45.200199 -67.563202 0.078\n", + "N4 : H43A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 44.4697 -87.7704 0.274\n", + "loc code section coordinates: 44.4697 -87.770401 0.274\n", + "N4 : H62A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 44.5743 -71.1559 0.381\n", + "loc code section coordinates: 44.574299 -71.155899 0.381\n", + "N4 : I37B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 44.0149 -93.4003 0.354\n", + "loc code section coordinates: 44.0149 -93.400299 0.354\n", + "N4 : I40B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 43.8916 -90.6177 0.419\n", + "loc code section coordinates: 43.891602 -90.617699 0.419\n", + "N4 : I42A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 43.8908 -88.9134 0.298\n", + "loc code section coordinates: 43.8908 -88.913399 0.298\n", + "N4 : I45A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 44.0361 -86.2348 0.215\n", + "loc code section coordinates: 44.036098 -86.234802 0.215\n", + "N4 : I49A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 43.94 -82.8246 0.217\n", + "loc code section coordinates: 43.939999 -82.8246 0.217\n", + "N4 : I62A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 43.8743 -71.3359 0.264\n", + "loc code section coordinates: 43.874298 -71.335899 0.264\n", + "N4 : I63A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 44.0505 -70.5809 0.177\n", + "loc code section coordinates: 44.050499 -70.580902 0.177\n", + "N4 : J47A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 43.2382 -84.8214 0.236\n", + "loc code section coordinates: 43.238201 -84.821404 0.236\n", + "N4 : J55A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 43.2657 -77.8167 0.097\n", + "loc code section coordinates: 43.265701 -77.816704 0.097\n", + "N4 : J57A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 43.4099 -75.9968 0.191\n", + "loc code section coordinates: 43.409901 -75.996803 0.191\n", + "N4 : J59A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 43.4647 -74.5041 0.541\n", + "loc code section coordinates: 43.464699 -74.504097 0.541\n", + "N4 : J61A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 43.3462 -72.5535 0.253\n", + "loc code section coordinates: 43.346199 -72.553497 0.253\n", + "N4 : K43A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 42.7044 -88.332 0.264\n", + "loc code section coordinates: 42.704399 -88.332001 0.264\n", + "N4 : K50A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 42.7749 -82.6231 0.191\n", + "loc code section coordinates: 42.774899 -82.6231 0.191\n", + "N4 : K57A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 42.7313 -76.5163 0.408\n", + "loc code section coordinates: 42.7313 -76.516296 0.408\n", + "N4 : K62A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 42.6651 -72.2345 0.289\n", + "loc code section coordinates: 42.6651 -72.234497 0.289\n", + "N4 : L34B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 41.9666 -96.3762 0.414\n", + "loc code section coordinates: 41.966599 -96.376198 0.414\n", + "N4 : L40A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 42.0628 -91.2218 0.242\n", + "loc code section coordinates: 42.062801 -91.221802 0.242\n", + "N4 : L42A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 42.004 -89.667 0.257\n", + "loc code section coordinates: 42.004002 -89.667 0.257\n", + "N4 : L46A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 42.0127 -86.2955 0.223\n", + "loc code section coordinates: 42.012699 -86.295502 0.223\n", + "N4 : L56A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 42.1365 -77.5591 0.688\n", + "loc code section coordinates: 42.136501 -77.559097 0.688\n", + "N4 : L59A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 42.1902 -75.0426 0.677\n", + "loc code section coordinates: 42.190201 -75.042603 0.677\n", + "N4 : L64A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 41.9359 -70.8391 0.017\n", + "loc code section coordinates: 41.935902 -70.839104 0.017\n", + "N4 : M44A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 41.3882 -88.0432 0.207\n", + "loc code section coordinates: 41.388199 -88.043198 0.207\n", + "N4 : M50A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 41.4035 -83.0428 0.176\n", + "loc code section coordinates: 41.4035 -83.042801 0.176\n", + "N4 : M52A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 41.5405 -81.357 0.382\n", + "loc code section coordinates: 41.540501 -81.357002 0.382\n", + "N4 : M57A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 41.3372 -77.128 0.319\n", + "loc code section coordinates: 41.3372 -77.127998 0.319\n", + "N4 : M63A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 41.4038 -72.0464 0.044\n", + "loc code section coordinates: 41.403801 -72.046402 0.044\n", + "N4 : N35B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.8612 -95.6424 0.353\n", + "loc code section coordinates: 40.861198 -95.642403 0.353\n", + "N4 : N38B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.7931 -93.235 0.322\n", + "loc code section coordinates: 40.793098 -93.235001 0.322\n", + "N4 : N41A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.7077 -90.8552 0.226\n", + "loc code section coordinates: 40.707699 -90.855202 0.226\n", + "N4 : N47A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.8801 -85.6942 0.252\n", + "loc code section coordinates: 40.8801 -85.694199 0.252\n", + "N4 : N49A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.922 -84.1825 0.225\n", + "loc code section coordinates: 40.922001 -84.182503 0.225\n", + "N4 : N51A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.9183 -82.3748 0.343\n", + "loc code section coordinates: 40.918301 -82.374802 0.343\n", + "N4 : N53A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.8065 -80.8377 0.36\n", + "loc code section coordinates: 40.806499 -80.8377 0.36\n", + "N4 : N58A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.8396 -76.7158 0.2\n", + "loc code section coordinates: 40.8396 -76.715797 0.2\n", + "N4 : N62A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.9313 -73.4677 0.034\n", + "loc code section coordinates: 40.931301 -73.467697 0.034\n", + "N4 : O49A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.188 -84.3353 0.292\n", + "loc code section coordinates: 40.188 -84.335297 0.292\n", + "N4 : O52A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.1158 -81.8361 0.331\n", + "loc code section coordinates: 40.115799 -81.836098 0.331\n", + "N4 : O54A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 40.1821 -80.3778 0.357\n", + "loc code section coordinates: 40.182098 -80.3778 0.357\n", + "N4 : P38B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.6248 -93.5321 0.248\n", + "loc code section coordinates: 39.624802 -93.532097 0.248\n", + "N4 : P40B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.5298 -92.0483 0.32\n", + "loc code section coordinates: 39.5298 -92.048302 0.32\n", + "N4 : P43A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.6409 -89.5213 0.176\n", + "loc code section coordinates: 39.6409 -89.521301 0.176\n", + "N4 : P46A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.6178 -87.2067 0.194\n", + "loc code section coordinates: 39.617802 -87.206703 0.194\n", + "N4 : P48A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.4605 -85.4258 0.3\n", + "loc code section coordinates: 39.460499 -85.425797 0.3\n", + "N4 : P53A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.4868 -81.3896 0.27\n", + "loc code section coordinates: 39.486801 -81.389603 0.27\n", + "N4 : P57A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.4835 -78.0126 0.191\n", + "loc code section coordinates: 39.483501 -78.012604 0.191\n", + "N4 : P61A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.6734 -74.7919 0.022\n", + "loc code section coordinates: 39.673401 -74.791901 0.022\n", + "N4 : Q44B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 38.9033 -89.0169 0.168\n", + "loc code section coordinates: 38.903301 -89.016899 0.168\n", + "N4 : Q51A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.026 -83.3456 0.362\n", + "loc code section coordinates: 39.026001 -83.345596 0.362\n", + "N4 : Q52A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 38.9622 -82.2669 0.228\n", + "loc code section coordinates: 38.9622 -82.266899 0.228\n", + "N4 : Q54A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 38.9836 -80.8338 0.254\n", + "loc code section coordinates: 38.983601 -80.833801 0.254\n", + "N4 : Q56A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.0401 -79.1871 0.43\n", + "loc code section coordinates: 39.0401 -79.187103 0.43\n", + "N4 : R32B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 38.4225 -98.7111 0.568\n", + "loc code section coordinates: 38.422501 -98.711098 0.568\n", + "N4 : R40B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 38.2909 -92.2684 0.218\n", + "loc code section coordinates: 38.290901 -92.268402 0.218\n", + "N4 : R49A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 38.2916 -85.1714 0.251\n", + "loc code section coordinates: 38.291599 -85.171402 0.251\n", + "N4 : R50A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 38.2816 -84.3274 0.255\n", + "loc code section coordinates: 38.281601 -84.3274 0.255\n", + "N4 : R55A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 38.2825 -80.1195 0.833\n", + "loc code section coordinates: 38.282501 -80.119499 0.833\n", + "N4 : R61A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 38.3303 -75.339 0.076\n", + "loc code section coordinates: 38.330299 -75.338997 0.076\n", + "N4 : S44A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 37.6936 -89.2551 0.155\n", + "loc code section coordinates: 37.6936 -89.255096 0.155\n", + "N4 : S51A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 37.6392 -83.5935 0.286\n", + "loc code section coordinates: 37.639198 -83.593498 0.286\n", + "N4 : S54A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 37.7997 -81.3114 0.636\n", + "loc code section coordinates: 37.799702 -81.311401 0.636\n", + "N4 : S57A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 37.7605 -78.9536 0.264\n", + "loc code section coordinates: 37.760502 -78.953598 0.264\n", + "N4 : S61A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 37.6804 -75.6727 0.058\n", + "loc code section coordinates: 37.680401 -75.672699 0.058\n", + "N4 : T35B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 36.9162 -96.5119 0.413\n", + "loc code section coordinates: 36.916199 -96.511902 0.413\n", + "N4 : T42B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 37.0301 -91.0927 0.165\n", + "loc code section coordinates: 37.029999 -91.092499 0.165\n", + "N4 : T45B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 37.0159 -88.6459 0.136\n", + "loc code section coordinates: 37.0159 -88.645897 0.136\n", + "N4 : T47A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 36.9881 -87.1055 0.217\n", + "loc code section coordinates: 36.988098 -87.105499 0.217\n", + "N4 : T50A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 37.0204 -84.8384 0.302\n", + "loc code section coordinates: 37.020401 -84.838402 0.302\n", + "N4 : T57A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 36.9983 -79.2538 0.23\n", + "loc code section coordinates: 36.998299 -79.253799 0.23\n", + "N4 : U38B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 36.4394 -94.3857 0.389\n", + "loc code section coordinates: 36.4394 -94.385696 0.389\n", + "N4 : U49A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 36.5129 -85.7796 0.234\n", + "loc code section coordinates: 36.512901 -85.779602 0.234\n", + "N4 : U54A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 36.5209 -81.8204 0.837\n", + "loc code section coordinates: 36.520901 -81.820396 0.837\n", + "N4 : U56A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 36.3472 -80.3829 0.363\n", + "loc code section coordinates: 36.347198 -80.382896 0.363\n", + "N4 : V48A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 35.74 -86.8219 0.278\n", + "loc code section coordinates: 35.740002 -86.821899 0.278\n", + "N4 : V53A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 35.6694 -82.8124 0.681\n", + "loc code section coordinates: 35.669399 -82.812401 0.681\n", + "N4 : V55A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 35.8518 -81.2149 0.313\n", + "loc code section coordinates: 35.851799 -81.214897 0.313\n", + "N4 : V58A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 35.794 -79.115 0.127\n", + "loc code section coordinates: 35.793999 -79.114998 0.127\n", + "N4 : V61A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 35.7912 -76.5776 0.004\n", + "loc code section coordinates: 35.791199 -76.577599 0.004\n", + "N4 : W50A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 35.2002 -85.3119 0.587\n", + "loc code section coordinates: 35.200199 -85.311897 0.587\n", + "N4 : W52A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 35.0935 -83.9277 0.519\n", + "loc code section coordinates: 35.093498 -83.927696 0.519\n", + "N4 : W57A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 35.1529 -79.9928 0.085\n", + "loc code section coordinates: 35.152901 -79.992798 0.085\n", + "N4 : X48A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.4517 -87.0452 0.18\n", + "loc code section coordinates: 34.451698 -87.045197 0.18\n", + "N4 : X51A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.5658 -84.8574 0.214\n", + "loc code section coordinates: 34.5658 -84.857399 0.214\n", + "N4 : X58A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.5548 -79.3388 0.045\n", + "loc code section coordinates: 34.554798 -79.338799 0.045\n", + "N4 : Y45B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 33.8656 -89.5431 0.101\n", + "loc code section coordinates: 33.865601 -89.543098 0.101\n", + "N4 : Y49A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 33.8577 -86.4119 0.362\n", + "loc code section coordinates: 33.8577 -86.411903 0.362\n", + "N4 : Y52A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 33.864 -84.0626 0.286\n", + "loc code section coordinates: 33.863998 -84.062599 0.286\n", + "N4 : Y57A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.017 -80.3915 0.056\n", + "loc code section coordinates: 34.016998 -80.391502 0.056\n", + "N4 : Y58A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 33.9057 -79.6665 0.019\n", + "loc code section coordinates: 33.905701 -79.666496 0.019\n", + "N4 : Y60A : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.0046 -78.2163 0.007\n", + "loc code section coordinates: 34.004601 -78.216301 0.007\n", + "N4 : Z35B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 33.3309 -97.253 0.234\n", + "loc code section coordinates: 33.330898 -97.252998 0.234\n", + "N4 : Z38B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 33.2599 -94.9852 0.115\n", + "loc code section coordinates: 33.259899 -94.985199 0.115\n", + "N4 : Z47B : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 33.1989 -88.0696 0.064\n", + "loc code section coordinates: 33.198898 -88.069603 0.064\n", + "NE : VT1 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 44.3206 -72.7513 0.14241\n", + "loc code section coordinates: 44.3206 -72.7513 0.149\n", + "NE : YLE : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 41.3165 -72.9208 0.041\n", + "loc code section coordinates: 41.3165 -72.9208 0.005\n", + "NM : UTMT : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 36.3421 -88.8654 0.12\n", + "loc code section coordinates: 36.3497 -88.8636 0.11\n", + "PI : EP01 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.073299 -106.919296 1.43\n", + "loc code section coordinates: 34.074001 -106.918999 1.43\n", + "PI : EP02 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.073299 -106.919296 1.43\n", + "loc code section coordinates: 34.074001 -106.918999 1.43\n", + "PI : EP03 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.073299 -106.919296 1.43\n", + "loc code section coordinates: 34.074001 -106.918999 1.43\n", + "PI : EP04 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.073299 -106.919296 1.43\n", + "loc code section coordinates: 34.074001 -106.918999 1.43\n", + "PI : NP05 : 05 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.074001 -106.918999 1.43\n", + "loc code section coordinates: 34.0737 -106.918999 1.43\n", + "PI : NP05 : 30 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.074001 -106.918999 1.43\n", + "loc code section coordinates: 34.0737 -106.918999 1.43\n", + "PI : NP05 : 31 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.074001 -106.918999 1.43\n", + "loc code section coordinates: 34.0737 -106.918999 1.43\n", + "PI : NP05 : 32 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.074001 -106.918999 1.43\n", + "loc code section coordinates: 34.0737 -106.918999 1.43\n", + "PI : WP01 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.073299 -106.919296 1.43\n", + "loc code section coordinates: 34.074001 -106.918999 1.43\n", + "PI : WP03 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.073299 -106.919296 1.43\n", + "loc code section coordinates: 34.074001 -106.918999 1.43\n", + "PI : WP04 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 34.073299 -106.919296 1.43\n", + "loc code section coordinates: 34.074001 -106.918999 1.43\n", + "PN : PPBLN : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.198002 -86.547997 0.235\n", + "loc code section coordinates: 39.1987 -86.549797 0.2316\n", + "PN : PPFAY : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 41.673199 -84.324097 0.237\n", + "loc code section coordinates: 41.678001 -84.324997 0.237\n", + "PN : PPMAC : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 44.936699 -93.168701 0.2886\n", + "loc code section coordinates: 44.934212 -93.831818 0.2886\n", + "PN : PPNVW : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.518002 -87.166 -0.999\n", + "loc code section coordinates: 39.518002 -87.166 0.0\n", + "PN : PPSPR : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 37.328201 -102.624901 1.33\n", + "loc code section coordinates: 37.41 -102.623001 1.33\n", + "PN : PPUHS : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 45.916698 -119.334999 0.098\n", + "loc code section coordinates: 45.916 -119.334999 0.098\n", + "RB : BAHB : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 28.944 -113.561996 0.035\n", + "loc code section coordinates: 28.944 -113.561996 0.02\n", + "RB : GUAY : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 27.89866 -110.87272 0.05\n", + "loc code section coordinates: 27.89866 -110.87272 0.0\n", + "RB : PPXB : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 31.335 -113.632 0.01\n", + "loc code section coordinates: 31.335 -113.632 0.0\n", + "US : ECSD : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 43.7337 -96.6141 0.478\n", + "loc code section coordinates: 43.7337 -96.6141 0.43289999999999995\n", + "US : ECSD : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 43.7337 -96.6141 0.478\n", + "loc code section coordinates: 43.7337 -96.6141 0.43289999999999995\n", + "US : ECSD : 10 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 43.7337 -96.6141 0.478\n", + "loc code section coordinates: 43.7337 -96.6141 0.3506\n", + "US : ECSD : HR (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 43.7337 -96.6141 0.478\n", + "loc code section coordinates: 43.7337 -96.6141 0.3506\n", + "US : GOGA : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 33.41476 -83.47327 0.15\n", + "loc code section coordinates: 33.41476 -83.47327 0.1165\n", + "US : GOGA : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 33.41476 -83.47327 0.15\n", + "loc code section coordinates: 33.41476 -83.47327 0.1165\n", + "US : GOGA : 10 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 33.41476 -83.47327 0.15\n", + "loc code section coordinates: 33.41476 -83.47327 0.0174\n", + "US : GOGA : HR (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 33.41476 -83.47327 0.15\n", + "loc code section coordinates: 33.41476 -83.47327 0.0174\n", + "US : KSU1 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.1009 -96.6094 0.317\n", + "loc code section coordinates: 39.1009 -96.6094 0.2865\n", + "US : KSU1 : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.1009 -96.6094 0.317\n", + "loc code section coordinates: 39.1009 -96.6094 0.2963\n", + "US : KSU1 : HR (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.1009 -96.6094 0.317\n", + "loc code section coordinates: 39.1009 -96.6094 0.21180000000000002\n", + "US : RSSD : 00 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 44.120399 -104.036003 2.06\n", + "loc code section coordinates: 44.120419 -104.036186 1.9917\n", + "UU : SRU : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.11083 -110.52383 1.804\n", + "loc code section coordinates: 39.110828 -110.523827 1.804\n", + "UU : SRU : 01 (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 39.11083 -110.52383 1.804\n", + "loc code section coordinates: 39.110828 -110.523827 1.804\n", + "UW : BLOW : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 44.6837 -122.1863 0.6561\n", + "loc code section coordinates: 44.6837 -122.1862 0.6573\n", + "UW : DDRF : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 46.4911 -119.0595 0.238\n", + "loc code section coordinates: 46.4911 -119.0595 0.2381\n", + "UW : GNW : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 47.56413 -122.82498 0.22\n", + "loc code section coordinates: 47.564129 -122.824982 0.22\n", + "UW : HOOD : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 45.322262 -121.650932 1.5288\n", + "loc code section coordinates: 45.322262 -121.650932 1.512\n", + "UW : LTY : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 47.25451 -120.66636 0.807\n", + "loc code section coordinates: 47.254509 -120.666359 0.807\n", + "UW : SP2 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 47.55629 -122.249229 0.0222\n", + "loc code section coordinates: 47.55629 -122.249229 0.03\n", + "UW : TAKO : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 43.74313 -124.08337 0.012\n", + "loc code section coordinates: 43.74334 -124.082474 0.046\n", + "UW : TOLO : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 44.62161 -123.92305 0.012\n", + "loc code section coordinates: 44.621609 -123.92305 0.012\n", + "UW : TOLT : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 47.6947 -121.6895 0.541\n", + "loc code section coordinates: 47.694698 -121.689499 0.541\n", + "UW : YACT : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 45.9325 -122.4193 0.214\n", + "loc code section coordinates: 45.9325 -122.4193 0.2112\n", + "XA : YAC1 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 44.297501 -124.088402 0.32\n", + "loc code section coordinates: 44.29752 -124.088417 0.32\n", + "XD : MP06 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 45.831379 -122.195908 0.796\n", + "loc code section coordinates: 45.83138 -122.19591 2.61\n", + "XU : BS01 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 47.9608 -122.934799 0.5341\n", + "loc code section coordinates: 47.9608 -122.934799 0.0\n", + "XU : BS02 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 47.958698 -122.930496 0.6057\n", + "loc code section coordinates: 47.958599 -122.930603 0.0\n", + "XU : BS03 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 47.9548 -122.932999 0.579\n", + "loc code section coordinates: 47.9548 -122.932999 0.0\n", + "XU : BS04 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 47.954899 -122.927299 0.5741\n", + "loc code section coordinates: 47.9548 -122.927299 0.0\n", + "XU : PL01 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 47.457401 -123.157501 0.44160000000000005\n", + "loc code section coordinates: 47.4575 -123.157501 0.0\n", + "XU : PL02 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 47.451801 -123.157997 0.45389999999999997\n", + "loc code section coordinates: 47.4519 -123.157898 0.0\n", + "XU : PL03 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 47.454601 -123.150101 0.3419\n", + "loc code section coordinates: 47.454601 -123.150002 0.0\n", + "XU : PL04 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 47.4496 -123.148399 0.36360000000000003\n", + "loc code section coordinates: 47.449501 -123.148399 0.0\n", + "XU : PL05 : (Warning): station section position is not consistent with loc code position\n", + "Data in loc code section overrides station section\n", + "Station section coordinates: 47.457699 -123.144302 1.2512\n", + "loc code section coordinates: 47.457699 -123.144203 0.0\n", + "Database.save_inventory processing summary:\n", + "Number of site records processed= 4827\n", + "number of site records saved= 4827\n", + "number of channel records processed= 104150\n", + "number of channel records saved= 85449\n" + ] + }, + { + "data": { + "text/plain": [ + "(4827, 85449, 4827, 104150)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db.save_inventory(inv)" + ] + }, + { + "cell_type": "markdown", + "id": "f511f9ac-d3ad-422f-a735-f00396677bcd", + "metadata": {}, + "source": [ + "These indices should speed some forms of processing." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "eaffc861-bb5b-474f-98e2-9de93ffb9db9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'net_1_sta_1_chan_1'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pymongo\n", + "db.site.create_index({'location' : '2dsphere'})\n", + "db.channel.create_index({'location' : '2dsphere'})\n", + "db.channel.create_index([(\"net\",pymongo.ASCENDING),(\"sta\",pymongo.ASCENDING),(\"chan\",pymongo.ASCENDING)])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6fc971a4-919f-46f6-811f-914bc149c256", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'starttime_1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db.channel.create_index(\"starttime\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8335fbe-24f4-4d6f-b5f4-7b6591acbf35", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/USArrayProcessExample/Preprocess2seis_2014.ipynb b/notebooks/USArrayProcessExample/Preprocess2seis_2014.ipynb new file mode 100644 index 0000000..c1830cc --- /dev/null +++ b/notebooks/USArrayProcessExample/Preprocess2seis_2014.ipynb @@ -0,0 +1,348 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9170ade2-9d78-4f36-8d12-76bd0419242f", + "metadata": {}, + "source": [ + "# Extended USArray data preprocessing stage 2\n", + "This notebook does the second stage of preprocessing of the extended USArray data set. It reads common source gathers input previously assumed created in wf_TimeSeries and does the following processing steps:\n", + "1. Runs bundle_seed_data to transform the data to a SeismogramEnsemble \n", + "2. Applies the free surface transformation to all ensemble members\n", + "3. Runs broadband_snr_QC on all members. \n", + "\n", + "Step 3 tends to kill a lot of data but that is a key point. The cemetery contains the bodies." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e810137a-ae9e-44ff-bcc8-d31c286f133a", + "metadata": {}, + "outputs": [], + "source": [ + "# set to year of data being processed\n", + "year = 2014\n", + "dbname = \"usarray{}\".format(year)\n", + "wfdir = \"./wf_Seismogram/{}\".format(year)\n", + "dbmaster=\"usarray48\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54c091f6-7d4d-4bdb-973d-9fc7f19ffb8f", + "metadata": {}, + "outputs": [], + "source": [ + "import mspasspy.client as msc\n", + "mspass_client = msc.Client(database_name=dbname)\n", + "db = mspass_client.get_database()\n", + "dbmd = mspass_client.get_database(dbmaster)" + ] + }, + { + "cell_type": "markdown", + "id": "38371123-fa1f-457e-ad1c-bcc12e8914c8", + "metadata": {}, + "source": [ + "First do imports and define special functions used for this workflow." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "536bce0c-cc54-4f42-965b-7f05597be17f", + "metadata": {}, + "outputs": [], + "source": [ + "from mspasspy.algorithms.window import WindowData\n", + "from mspasspy.ccore.algorithms.basic import TimeWindow\n", + "from mspasspy.ccore.utility import ErrorSeverity\n", + "from mspasspy.db.normalize import normalize,ObjectIdMatcher\n", + "from mspasspy.algorithms.MTPowerSpectrumEngine import MTPowerSpectrumEngine\n", + "from mspasspy.algorithms.bundle import bundle_seed_data\n", + "from mspasspy.ccore.seismic import SlownessVector\n", + "from mspasspy.algorithms.basic import free_surface_transformation,rotate_to_standard\n", + "from mspasspy.algorithms.snr import broadband_snr_QC\n", + "from mspasspy.util.seismic import number_live\n", + "import dask.bag as dbg\n", + "import time\n", + "import math\n", + "\n", + "\n", + "\n", + "def apply_FST(d,rayp_key=\"rayp_P\",seaz_key='seaz',vp0=6.0,vs0=3.5):\n", + " \"\"\"\n", + " Apply free surface transformation operator of Kennett (1983) to an input `Seismogram` \n", + " object. Assumes ray parameter and azimuth data are stored as Metadata in the \n", + " input datum. If the ray parameter or azimuth key are not defined an error \n", + " message will be posted and the datum will be killed before returning. \n", + " :param d: datum to process\n", + " :type d: Seismogram\n", + " :param rayp_key: key to use to extract ray parameter to use to compute the \n", + " free surface transformation operator. Note function assumes the ray parameter is\n", + " spherical coordinate form: R sin(theta)/V. Default is \"rayp_P\".\n", + " :param seaz_key: key to use to extract station to event azimuth. Default is \"seaz\".\n", + " :param vp0: surface P wave velocity (km/s) to use for free surface transformation \n", + " :param vs0: surface S wave velocity (km/s) to use for free surface transformation.\n", + " \"\"\"\n", + " if d.is_defined(rayp_key) and d.is_defined(seaz_key):\n", + " rayp = d[rayp_key]\n", + " seaz = d[seaz_key]\n", + " # Some basic seismology here. rayp is the spherical earth ray parameter\n", + " # R sin(theta)/v. Free surface transformation needs apparent velocity \n", + " # at Earth's surface which is sin(theta)/v when R=Re. Hence the following\n", + " # simple convertion to get apparent slowness at surface sin(theta)/v\n", + " Re=6378.1\n", + " umag = rayp/Re # magnitude of slowness vector\n", + " # seaz is back azimuth - slowness vector points in direction of propagation\n", + " # with is 180 degrees away from back azimuth\n", + " az = seaz + 180.0\n", + " # component slowness vector components in local coordinates\n", + " ux = umag * math.sin(az)\n", + " uy = umag * math.cos(az)\n", + " # FST implementation requires this special class called a Slowness Vector\n", + " u = SlownessVector(ux,uy)\n", + " d = free_surface_transformation(d,u,vp0,vs0)\n", + " else:\n", + " d.kill()\n", + " message = \"one of required attributes rayp_P or seaz were not defined for this datum\"\n", + " d.elog.log_error(\"apply_FST\",message,ErrorSeverity.Invalid)\n", + " \n", + " return d\n", + "\n", + "\n", + "\n", + "def set_file_path(e,dir=wfdir):\n", + " \"\"\"\n", + " This function is used to set dir and dfile for this workflow for each \n", + " ensemble. Note these are set in the ensemble's metadata container \n", + " not the members. We don't set them for members as Database.save_data \n", + " only uses kwarg dir and dfile with the metadata as a fallback.\n", + " \"\"\"\n", + " e['dir']=dir\n", + " dfile = \"dfile_undefined\"\n", + " for d in e.member:\n", + " if d.live and 'dfile' in d:\n", + " dfile=d['dfile']\n", + " break\n", + " e[\"dfile\"] = dfile\n", + " return e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e307e480-0923-4e95-b209-e28689061e71", + "metadata": {}, + "outputs": [], + "source": [ + "# this function was copied from Preprocess2ts - it maybe should be a local .py file\n", + "def dbmdquery(year,padsize=86400.0):\n", + " \"\"\"\n", + " Constructs a MongoDB query dictionary to use as a query argument for normalization matcher classes.\n", + " (All standard BasicMatcher children have a query argument in the constructor for this purpose.)\n", + " The query is a range spanning specified calendar year. The interval is extended by padsize \n", + " seconds. (default is one day = 86400.0)\n", + " \"\"\"\n", + " # uses obspy's UTCDateTime class to create time range in epoch time using the \n", + " # calendar strings for convenience\n", + " tstr = \"{}-01-01T00:00:00.0\".format(year)\n", + " st = UTCDateTime(tstr)\n", + " starttime = st.timestamp() - padsize\n", + " tstr = \"{}-01-01T00:00:00.0\".format(year+1)\n", + " et = UTCDateTime(tstr)\n", + " endtime = et.timestamp() + padsize\n", + " # not sure the and is required but better safe than sorry\n", + " query = { \"$and\" :\n", + " [\n", + " {\"starttime\" : {\"$lte\" : endtime}},\n", + " {\"endtime\" : {\"$gte\" : starttime}}\n", + " ]\n", + " }\n", + " return query" + ] + }, + { + "cell_type": "markdown", + "id": "19edb463-0a68-4451-9709-d5985515da1e", + "metadata": {}, + "source": [ + "These are higher level functions map operators." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c61ecb40-6e9a-40cf-b7a9-192ced9db52e", + "metadata": {}, + "outputs": [], + "source": [ + "def read_tsensembles(source_id,db,data_tag=None):\n", + " \"\"\"\n", + " Reader for this script. Could be done with read_distributed_data and a query generator \n", + " to create a list of python dictionaries as input to read_distributed_data. This does the \n", + " same thing more or less and is a bit clearer.\n", + " \n", + " \"\"\"\n", + " print(\"Debug: entered read_tsensembles\")\n", + " query = {\"source_id\" : source_id}\n", + " if data_tag:\n", + " query[\"data_tag\"] = data_tag\n", + " n = db.wf_TimeSeries.count_documents(query)\n", + " print(\"Found \",n,\" documents for nsemble with source_id=\",source_id)\n", + " cursor = db.wf_TimeSeries.find(query)\n", + " ens = db.read_data(cursor,collection=\"wf_TimeSeries\")\n", + " print(\"Number of members in this ensemble=\",len(ens.member))\n", + " return ens\n", + "\n", + "def normalize_ensembles(ens,matcher):\n", + " \"\"\"\n", + " Workaround for bug in normalize function where decorators won't work.\n", + " This function should be removed when that bug is resolved.\n", + " \"\"\"\n", + " if ens.live:\n", + " for i in range(len(ens.member)):\n", + " ens.member[i] = normalize(ens.member[i],matcher)\n", + " return ens\n", + " \n", + "def process2seis(ens,\n", + " swin,\n", + " nwin,\n", + " signal_engine,\n", + " noise_engine,\n", + " ):\n", + " \"\"\"\n", + " \"\"\"\n", + " print(\"Processing TimeSeriesEnsemble with \",len(ens.member),\" members\")\n", + " ens3c = bundle_seed_data(ens)\n", + " del ens\n", + " N = len(ens3c.member)\n", + " for i in range(N):\n", + " ens3c.member[i] = apply_FST(ens3c.member[i])\n", + " ens3c.member[i] = broadband_snr_QC(ens3c.member[i],\n", + " component=2,\n", + " signal_window=swin,\n", + " noise_window=nwin,\n", + " use_measured_arrival_time=True,\n", + " measured_arrival_time_key=\"Ptime\",\n", + " noise_spectrum_engine=noise_engine,\n", + " signal_spectrum_engine=signal_engine,\n", + " )\n", + " return ens3c\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "b2e6b737-53c6-4f00-bb91-e8527a122570", + "metadata": {}, + "source": [ + "This is the driver script comparable to a fortran main." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "227cfea1-3103-4a36-9405-e6f837829bad", + "metadata": {}, + "outputs": [], + "source": [ + "t0 = time.time()\n", + "chanquery=dbmdquery(year)\n", + "chan_matcher = ObjectIdMatcher(db,\n", + " query=chanquery,\n", + " collection=\"channel\",\n", + " attributes_to_load=[\"_id\",\"lat\",\"lon\",\"elev\",\"hang\",\"vang\"],\n", + " )\n", + "\n", + "# for usage on IU system each node has 2*64=128 cores so using 128 workers for the run\n", + "# makes this twice that\n", + "npartitions=100\n", + "# seismogram output directory\n", + "dir_seismogram=\"wf_Seismogram/2013\"\n", + "swin = TimeWindow(-5.0,100.0)\n", + "nwin = TimeWindow(-195.0,-5.0)\n", + "# spectrum estimation engines for broadband_snr_QC\n", + "dt = 0.05\n", + "nsamp_noise = int((nwin.end-nwin.start)/dt) + 1\n", + "nsamp_sig = int((swin.end-swin.start)/dt) + 1\n", + "signal_engine=MTPowerSpectrumEngine(nsamp_sig,5.0,8,2*nsamp_sig,dt)\n", + "noise_engine=MTPowerSpectrumEngine(nsamp_noise,5.0,10,2*nsamp_noise,dt)\n", + "srcidlist_ms=db.wf_TimeSeries.distinct('source_id')\n", + "\n", + "srcidlist_finished = db.wf_Seismogram.distinct('source_id')\n", + "if len(srcidlist_finished)>0:\n", + " srcidlist=[]\n", + " for sid in srcidlist_ms:\n", + " if sid not in srcidlist_finished:\n", + " srcidlist.append(sid)\n", + "else:\n", + " srcidlist = srcidlist_ms\n", + "\n", + "\n", + "print(\"Number of distinct sources in wf_TimeSeries= \",len(srcidlist_ms))\n", + "print(\"Number to process this run=\",len(srcidlist))\n", + "# reduce size for testing \n", + "#sidtmp=[]\n", + "#for i in range(4):\n", + "# sidtmp.append(srcidlist[i])\n", + "#srcidlist=sidtmp\n", + "#print(\"Debug process list: \",srcidlist)\n", + "mydata = dbg.from_sequence(srcidlist)\n", + "mydata = mydata.map(read_tsensembles,db)\n", + "# note default behavior for normalize is to normalize all members\n", + "#mydata = mydata.map(normalize,chan_matcher,handles_ensembles=False)\n", + "#workaround for above until bug is fixed in normalize\n", + "mydata = mydata.map(normalize_ensembles,chan_matcher)\n", + "mydata = mydata.map(process2seis,\n", + " swin,\n", + " nwin,\n", + " signal_engine,\n", + " noise_engine,\n", + " )\n", + "mydata = mydata.map(set_file_path)\n", + "mydata = mydata.map(db.save_data,\n", + " collection=\"wf_Seismogram\",\n", + " storage_mode=\"file\",\n", + " dir=wfdir,\n", + " data_tag=\"FST_and_QCed\",\n", + " )\n", + "out=mydata.compute()\n", + "print(out)\n", + "t = time.time()\n", + "print(\"Run time = \",t-t0,\" seconds\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b312080b-6560-486e-acc7-7798a64a9ac6", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/USArrayProcessExample/Preprocess2seis_template.ipynb b/notebooks/USArrayProcessExample/Preprocess2seis_template.ipynb new file mode 100644 index 0000000..1d12ede --- /dev/null +++ b/notebooks/USArrayProcessExample/Preprocess2seis_template.ipynb @@ -0,0 +1,346 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9170ade2-9d78-4f36-8d12-76bd0419242f", + "metadata": {}, + "source": [ + "# Extended USArray data preprocessing stage 3\n", + "This notebook does the final stage of preprocessing of the extended USArray data set. It reads common source gathers (defined by common source_id value) it assumes were created in wf_TimeSeries and does the following processing steps:\n", + "1. Runs bundle_seed_data to transform the data to a SeismogramEnsemble \n", + "2. Applies the free surface transformation to all ensemble members\n", + "3. Runs broadband_snr_QC on all members. \n", + "\n", + "Step 3 tends to kill a lot of data but that is a key point. The cemetery contains the mummified bodies." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e810137a-ae9e-44ff-bcc8-d31c286f133a", + "metadata": {}, + "outputs": [], + "source": [ + "# set to year of data being processed\n", + "year = 2014\n", + "dbname = \"usarray{}\".format(year)\n", + "wfdir = \"./wf_Seismogram/{}\".format(year)\n", + "dbmaster=\"usarray48\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54c091f6-7d4d-4bdb-973d-9fc7f19ffb8f", + "metadata": {}, + "outputs": [], + "source": [ + "import mspasspy.client as msc\n", + "mspass_client = msc.Client(database_name=dbname)\n", + "db = mspass_client.get_database()\n", + "dbmd = mspass_client.get_database(dbmaster)" + ] + }, + { + "cell_type": "markdown", + "id": "38371123-fa1f-457e-ad1c-bcc12e8914c8", + "metadata": {}, + "source": [ + "First do imports and define special functions used for this workflow." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "536bce0c-cc54-4f42-965b-7f05597be17f", + "metadata": {}, + "outputs": [], + "source": [ + "from mspasspy.algorithms.window import WindowData\n", + "from mspasspy.ccore.algorithms.basic import TimeWindow\n", + "from mspasspy.ccore.utility import ErrorSeverity\n", + "from mspasspy.db.normalize import normalize,ObjectIdMatcher\n", + "from mspasspy.algorithms.MTPowerSpectrumEngine import MTPowerSpectrumEngine\n", + "from mspasspy.algorithms.bundle import bundle_seed_data\n", + "from mspasspy.ccore.seismic import SlownessVector\n", + "from mspasspy.algorithms.basic import free_surface_transformation,rotate_to_standard\n", + "from mspasspy.algorithms.snr import broadband_snr_QC\n", + "from mspasspy.util.seismic import number_live\n", + "import dask.bag as dbg\n", + "import time\n", + "import math\n", + "\n", + "\n", + "\n", + "def apply_FST(d,rayp_key=\"rayp_P\",seaz_key='seaz',vp0=6.0,vs0=3.5):\n", + " \"\"\"\n", + " Apply free surface transformation operator of Kennett (1983) to an input `Seismogram` \n", + " object. Assumes ray parameter and azimuth data are stored as Metadata in the \n", + " input datum. If the ray parameter or azimuth key are not defined an error \n", + " message will be posted and the datum will be killed before returning. \n", + " :param d: datum to process\n", + " :type d: Seismogram\n", + " :param rayp_key: key to use to extract ray parameter to use to compute the \n", + " free surface transformation operator. Note function assumes the ray parameter is\n", + " spherical coordinate form: R sin(theta)/V. Default is \"rayp_P\".\n", + " :param seaz_key: key to use to extract station to event azimuth. Default is \"seaz\".\n", + " :param vp0: surface P wave velocity (km/s) to use for free surface transformation \n", + " :param vs0: surface S wave velocity (km/s) to use for free surface transformation.\n", + " \"\"\"\n", + " if d.is_defined(rayp_key) and d.is_defined(seaz_key):\n", + " rayp = d[rayp_key]\n", + " seaz = d[seaz_key]\n", + " # Some basic seismology here. rayp is the spherical earth ray parameter\n", + " # R sin(theta)/v. Free surface transformation needs apparent velocity \n", + " # at Earth's surface which is sin(theta)/v when R=Re. Hence the following\n", + " # simple convertion to get apparent slowness at surface sin(theta)/v\n", + " Re=6378.1\n", + " umag = rayp/Re # magnitude of slowness vector\n", + " # seaz is back azimuth - slowness vector points in direction of propagation\n", + " # with is 180 degrees away from back azimuth\n", + " az = seaz + 180.0\n", + " # component slowness vector components in local coordinates\n", + " ux = umag * math.sin(az)\n", + " uy = umag * math.cos(az)\n", + " # FST implementation requires this special class called a Slowness Vector\n", + " u = SlownessVector(ux,uy)\n", + " d = free_surface_transformation(d,u,vp0,vs0)\n", + " else:\n", + " d.kill()\n", + " message = \"one of required attributes rayp_P or seaz were not defined for this datum\"\n", + " d.elog.log_error(\"apply_FST\",message,ErrorSeverity.Invalid)\n", + " \n", + " return d\n", + "\n", + "\n", + "\n", + "def set_file_path(e,dir=wfdir):\n", + " \"\"\"\n", + " This function is used to set dir and dfile for this workflow for each \n", + " ensemble. Note these are set in the ensemble's metadata container \n", + " not the members. We don't set them for members as Database.save_data \n", + " only uses kwarg dir and dfile with the metadata as a fallback.\n", + " \"\"\"\n", + " e['dir']=dir\n", + " dfile = \"dfile_undefined\"\n", + " for d in e.member:\n", + " if d.live and 'dfile' in d:\n", + " dfile=d['dfile']\n", + " break\n", + " e[\"dfile\"] = dfile\n", + " return e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e307e480-0923-4e95-b209-e28689061e71", + "metadata": {}, + "outputs": [], + "source": [ + "# this function was copied from Preprocess2ts - it maybe should be a local .py file\n", + "def dbmdquery(year,padsize=86400.0):\n", + " \"\"\"\n", + " Constructs a MongoDB query dictionary to use as a query argument for \n", + " normalization matcher classes site and channel. A different query is needed for source later.\n", + " (All standard BasicMatcher children have a query argument in the constructor for this purpose.)\n", + " The query is a range spanning specified calendar year. The interval is extended by padsize \n", + " seconds. (default is one day = 86400.0)\n", + " \"\"\"\n", + " # uses obspy's UTCDateTime class to create time range in epoch time using the \n", + " # calendar strings for convenience\n", + " tstr = \"{}-01-01T00:00:00.0\".format(year)\n", + " st = UTCDateTime(tstr)\n", + " starttime = st.timestamp - padsize\n", + " tstr = \"{}-01-01T00:00:00.0\".format(year+1)\n", + " et = UTCDateTime(tstr)\n", + " endtime = et.timestamp + padsize\n", + " # not sure the and is required but better safe than sorry\n", + " query = { \"$and\" :\n", + " [\n", + " {\"starttime\" : {\"$lte\" : endtime}},\n", + " {\"endtime\" : {\"$gte\" : starttime}}\n", + " ]\n", + " }\n", + " return query" + ] + }, + { + "cell_type": "markdown", + "id": "19edb463-0a68-4451-9709-d5985515da1e", + "metadata": {}, + "source": [ + "These are higher level functions map operators." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c61ecb40-6e9a-40cf-b7a9-192ced9db52e", + "metadata": {}, + "outputs": [], + "source": [ + "def read_tsensembles(source_id,db,data_tag=None):\n", + " \"\"\"\n", + " Reader for this script. Could be done with read_distributed_data and a query generator \n", + " to create a list of python dictionaries as input to read_distributed_data. This does the \n", + " same thing more or less and is a bit clearer.\n", + " \n", + " \"\"\"\n", + " print(\"Debug: entered read_tsensembles\")\n", + " query = {\"source_id\" : source_id}\n", + " if data_tag:\n", + " query[\"data_tag\"] = data_tag\n", + " n = db.wf_TimeSeries.count_documents(query)\n", + " print(\"Found \",n,\" documents for nsemble with source_id=\",source_id)\n", + " cursor = db.wf_TimeSeries.find(query)\n", + " ens = db.read_data(cursor,collection=\"wf_TimeSeries\")\n", + " print(\"Number of members in this ensemble=\",len(ens.member))\n", + " return ens\n", + "\n", + " \n", + "def process2seis(ens,\n", + " swin,\n", + " nwin,\n", + " signal_engine,\n", + " noise_engine,\n", + " ):\n", + " \"\"\"\n", + " This is the master processing function for this notebook. It assumes an input of ens \n", + " that is a common source gather with required Metadata set in the earlier run with the stage 2 \n", + " (Preprocess2ts) notebook. In this workflow ens is read in parallel with this functiond oing \n", + " the processing. The output is a SeismogramEnsemble run through broadband_snr_QC to set \n", + " snr metrics. It also normally kills a large fraction of the data. \n", + "\n", + " The approach using a loop over the ensemble was designed to reduce the memory footprint as \n", + " each member is processed and replaces its parent in the ensemble that is returned. \n", + " \"\"\"\n", + " print(\"Processing TimeSeriesEnsemble with \",len(ens.member),\" members\")\n", + " ens3c = bundle_seed_data(ens)\n", + " del ens\n", + " N = len(ens3c.member)\n", + " for i in range(N):\n", + " ens3c.member[i] = apply_FST(ens3c.member[i])\n", + " ens3c.member[i] = broadband_snr_QC(ens3c.member[i],\n", + " component=2,\n", + " signal_window=swin,\n", + " noise_window=nwin,\n", + " use_measured_arrival_time=True,\n", + " measured_arrival_time_key=\"Ptime\",\n", + " noise_spectrum_engine=noise_engine,\n", + " signal_spectrum_engine=signal_engine,\n", + " )\n", + " return ens3c\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "b2e6b737-53c6-4f00-bb91-e8527a122570", + "metadata": {}, + "source": [ + "This is the driver script comparable to a fortran main." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "227cfea1-3103-4a36-9405-e6f837829bad", + "metadata": {}, + "outputs": [], + "source": [ + "t0 = time.time()\n", + "chanquery=dbmdquery(year)\n", + "chan_matcher = ObjectIdMatcher(dbmd,\n", + " query=chanquery,\n", + " collection=\"channel\",\n", + " attributes_to_load=[\"_id\",\"lat\",\"lon\",\"elev\",\"hang\",\"vang\"],\n", + " )\n", + "\n", + "# for usage on IU system each node has 2*64=128 cores so using 128 workers for the run\n", + "# makes this twice that\n", + "npartitions=100\n", + "swin = TimeWindow(-5.0,100.0)\n", + "nwin = TimeWindow(-195.0,-5.0)\n", + "# spectrum estimation engines for broadband_snr_QC\n", + "dt = 0.05\n", + "nsamp_noise = int((nwin.end-nwin.start)/dt) + 1\n", + "nsamp_sig = int((swin.end-swin.start)/dt) + 1\n", + "signal_engine=MTPowerSpectrumEngine(nsamp_sig,5.0,8,2*nsamp_sig,dt)\n", + "noise_engine=MTPowerSpectrumEngine(nsamp_noise,5.0,10,2*nsamp_noise,dt)\n", + "srcidlist_ms=db.wf_TimeSeries.distinct('source_id')\n", + "\n", + "srcidlist_finished = db.wf_Seismogram.distinct('source_id')\n", + "if len(srcidlist_finished)>0:\n", + " srcidlist=[]\n", + " for sid in srcidlist_ms:\n", + " if sid not in srcidlist_finished:\n", + " srcidlist.append(sid)\n", + "else:\n", + " srcidlist = srcidlist_ms\n", + "\n", + "\n", + "print(\"Number of distinct sources in wf_TimeSeries= \",len(srcidlist_ms))\n", + "print(\"Number to process this run=\",len(srcidlist))\n", + "# reduce size for testing \n", + "#sidtmp=[]\n", + "#for i in range(4):\n", + "# sidtmp.append(srcidlist[i])\n", + "#srcidlist=sidtmp\n", + "#print(\"Debug process list: \",srcidlist)\n", + "mydata = dbg.from_sequence(srcidlist)\n", + "mydata = mydata.map(read_tsensembles,db)\n", + "# note default behavior for normalize is to normalize all members\n", + "#mydata = mydata.map(normalize,chan_matcher,handles_ensembles=False)\n", + "#workaround for above until bug is fixed in normalize\n", + "mydata = mydata.map(normalize,chan_matcher)\n", + "mydata = mydata.map(process2seis,\n", + " swin,\n", + " nwin,\n", + " signal_engine,\n", + " noise_engine,\n", + " )\n", + "mydata = mydata.map(set_file_path)\n", + "mydata = mydata.map(db.save_data,\n", + " collection=\"wf_Seismogram\",\n", + " storage_mode=\"file\",\n", + " dir=wfdir,\n", + " data_tag=\"FST_and_QCed\",\n", + " )\n", + "out=mydata.compute()\n", + "print(out)\n", + "t = time.time()\n", + "print(\"Run time = \",t-t0,\" seconds\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b312080b-6560-486e-acc7-7798a64a9ac6", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/USArrayProcessExample/Preprocess2ts_2014.ipynb b/notebooks/USArrayProcessExample/Preprocess2ts_2014.ipynb new file mode 100644 index 0000000..806205f --- /dev/null +++ b/notebooks/USArrayProcessExample/Preprocess2ts_2014.ipynb @@ -0,0 +1,427 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "554edf59-2c25-4210-8197-d4706709fd38", + "metadata": {}, + "source": [ + "# Extended USArray data preprocessing stage 2\n", + "This notebook does the first stage of actual preprocessing of the extended USArray data set. It should be run after the notebook curently called index_mseed_*.ipynb where the * is normally a year. It must be run before a related notebook called Preproces2seis_*.ipynb. After experimenting with several variants this approach seems to be the best solution for performance with a workable memory footprint. It does a fairly long string of operations all contained in the function atomic_ts_processor. \n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "bd9da46d-fa80-4c4a-b500-7af04be13de8", + "metadata": {}, + "outputs": [], + "source": [ + "# change for different calendar year\n", + "year = 2014\n", + "dbname = \"usarray{}\".format(year)\n", + "tsdir=\"./wf_TimeSeries/{}\".format(year)\n", + "dbmaster=\"usarray48\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "54c091f6-7d4d-4bdb-973d-9fc7f19ffb8f", + "metadata": {}, + "outputs": [], + "source": [ + "import mspasspy.client as msc\n", + "mspass_client = msc.Client(database_name=dbname)\n", + "# waveform data indexed to here\n", + "db = mspass_client.get_database()\n", + "# master database with source and receiver metadata\n", + "dbmd = mspass_client.get_database(dbmaster)\n" + ] + }, + { + "cell_type": "markdown", + "id": "38371123-fa1f-457e-ad1c-bcc12e8914c8", + "metadata": {}, + "source": [ + "First do imports and define special functions used for this workflow." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "536bce0c-cc54-4f42-965b-7f05597be17f", + "metadata": {}, + "outputs": [], + "source": [ + "from mspasspy.algorithms.window import WindowData\n", + "from mspasspy.algorithms.signals import detrend\n", + "from mspasspy.ccore.algorithms.basic import TimeWindow\n", + "from mspasspy.ccore.utility import ErrorSeverity\n", + "from mspasspy.algorithms.resample import (ScipyResampler,\n", + " ScipyDecimator,\n", + " resample,\n", + " )\n", + "from mspasspy.db.normalize import ObjectIdMatcher\n", + "from mspasspy.algorithms.calib import ApplyCalibEngine\n", + "from mspasspy.util.seismic import number_live\n", + "from obspy import UTCDateTime\n", + "from obspy.geodetics import gps2dist_azimuth,kilometers2degrees\n", + "from obspy.taup import TauPyModel\n", + "from dask.distributed import as_completed\n", + "import time\n", + "import math\n", + "\n", + "\n", + "\n", + "def set_PStime(d,Ptimekey=\"Ptime\",Stimekey=\"Stime\",model=None):\n", + " \"\"\"\n", + " Function to calculate P and S wave arrival time and set times \n", + " as the header (Metadata) fields defined by Ptimekey and Stimekey.\n", + " Tries to handle some complexities of the travel time calculator \n", + " returns when one or both P and S aren't calculatable. That is \n", + " the norm in or at the edge of the core shadow. \n", + " \n", + " :param d: input TimeSeries datum. Assumes datum's Metadata \n", + " contains stock source and channel attributes. \n", + " :param Ptimekey: key used to define the header attribute that \n", + " will contain the computed P time. Default \"Ptime\".\n", + " :param model: instance of obspy TauPyModel travel time engine. \n", + " Default is None. That mode is slow as an new engine will be\n", + " constructed on each call to the function. Normal use should \n", + " pass an instance for greater efficiency. \n", + " \"\"\"\n", + " if d.live:\n", + " if model is None:\n", + " model = TauPyModel(model=\"iasp91\") \n", + " # extract required source attributes\n", + " srclat=d[\"source_lat\"]\n", + " srclon=d[\"source_lon\"]\n", + " srcz=d[\"source_depth\"]\n", + " srct=d[\"source_time\"] \n", + " # extract required channel attributes\n", + " stalat=d[\"channel_lat\"]\n", + " stalon=d[\"channel_lon\"]\n", + " staelev=d[\"channel_elev\"]\n", + " # set up and run travel time calculator\n", + " georesult=gps2dist_azimuth(srclat,srclon,stalat,stalon)\n", + " # obspy's function we just called returns distance in m in element 0 of a tuple\n", + " # their travel time calculator it is degrees so we need this conversion\n", + " dist=kilometers2degrees(georesult[0]/1000.0)\n", + " arrivals=model.get_travel_times(source_depth_in_km=srcz,\n", + " distance_in_degree=dist,\n", + " phase_list=['P','S'])\n", + " # always post this for as it is not cheap to compute\n", + " # WARNING: don't use common abbrevation delta - collides with data dt\n", + " d['epicentral_distance']=dist\n", + " # these are CSS3.0 shorthands s - station e - event\n", + " esaz = georesult[1]\n", + " seaz = georesult[2]\n", + " # css3.0 names esax = event to source azimuth; seaz = source to event azimuth\n", + " d['esaz']=esaz\n", + " d['seaz']=seaz\n", + " # get_travel_times returns an empty list if a P time cannot be \n", + " # calculated. We trap that condition and kill the output \n", + " # with an error message\n", + " if len(arrivals)==2:\n", + " Ptime=srct+arrivals[0].time\n", + " rayp = arrivals[0].ray_param\n", + " Stime=srct+arrivals[1].time\n", + " rayp_S = arrivals[1].ray_param\n", + " d.put(Ptimekey,Ptime)\n", + " d.put(Stimekey,Stime)\n", + " # These keys are not passed as arguments but could be - a choice\n", + " # Ray parameter is needed for free surface transformation operator\n", + " # note tau p calculator in obspy returns p=R sin(theta)/V_0\n", + " d.put(\"rayp_P\",rayp)\n", + " d.put(\"rayp_S\",rayp_S)\n", + " elif len(arrivals)==1:\n", + " if arrivals[0].name == 'P':\n", + " Ptime=srct+arrivals[0].time\n", + " rayp = arrivals[0].ray_param\n", + " d.put(Ptimekey,Ptime)\n", + " d.put(\"rayp_P\",rayp)\n", + " else:\n", + " # Not sure we can assume name is S\n", + " if arrivals[0].name == 'S':\n", + " Stime=srct+arrivals[0].time\n", + " rayp_S = arrivals[0].ray_param\n", + " d.put(Stimekey,Stime)\n", + " d.put(\"rayp_S\",rayp_S)\n", + " else:\n", + " message = \"Unexpected single phase name returned by taup calculator\\n\"\n", + " message += \"Expected phase name S but got \" + arrivals[0].name\n", + " d.elog.log_error(\"set_PStime\",\n", + " message,\n", + " ErrorSeverity.Invalid)\n", + " d.kill()\n", + " else:\n", + " # in this context this only happens if no P or S could be calculated\n", + " # That shouldn't ever happen but we need this safety in he event it does\n", + " message = \"Travel time calculator failed completely\\n\"\n", + " message += \"Could not calculator P or S phase time\"\n", + " d.elog.log_error(\"set_PStime\",\n", + " message,\n", + " ErrorSeverity.Invalid)\n", + " d.kill()\n", + " return d\n", + "\n", + "\n", + "def apply_FST(d,rayp_key=\"rayp_P\",seaz_key='seaz',vp0=6.0,vs0=3.5):\n", + " \"\"\"\n", + " Apply free surface transformation operator of Kennett (1983) to an input `Seismogram` \n", + " object. Assumes ray parameter and azimuth data are stored as Metadata in the \n", + " input datum. If the ray parameter or azimuth key are not defined an error \n", + " message will be posted and the datum will be killed before returning. \n", + " :param d: datum to process\n", + " :type d: Seismogram\n", + " :param rayp_key: key to use to extract ray parameter to use to compute the \n", + " free surface transformation operator. Note function assumes the ray parameter is\n", + " spherical coordinate form: R sin(theta)/V. Default is \"rayp_P\".\n", + " :param seaz_key: key to use to extract station to event azimuth. Default is \"seaz\".\n", + " :param vp0: surface P wave velocity (km/s) to use for free surface transformation \n", + " :param vs0: surface S wave velocity (km/s) to use for free surface transformation.\n", + " \"\"\"\n", + " if d.is_defined(rayp_key) and d.is_defined(seaz_key):\n", + " rayp = d[rayp_key]\n", + " seaz = d[seaz_key]\n", + " # Some basic seismology here. rayp is the spherical earth ray parameter\n", + " # R sin(theta)/v. Free surface transformation needs apparent velocity \n", + " # at Earth's surface which is sin(theta)/v when R=Re. Hence the following\n", + " # simple convertion to get apparent slowness at surface sin(theta)/v\n", + " Re=6378.1\n", + " umag = rayp/Re # magnitude of slowness vector\n", + " # seaz is back azimuth - slowness vector points in direction of propagation\n", + " # with is 180 degrees away from back azimuth\n", + " az = seaz + 180.0\n", + " # component slowness vector components in local coordinates\n", + " ux = umag * math.sin(az)\n", + " uy = umag * math.cos(az)\n", + " # FST implementation requires this special class called a Slowness Vector\n", + " u = SlownessVector(ux,uy)\n", + " d = free_surface_transformation(d,u,vp0,vs0)\n", + " else:\n", + " d.kill()\n", + " message = \"one of required attributes rayp_P or seaz were not defined for this datum\"\n", + " d.elog.log_error(\"apply_FST\",message,ErrorSeverity.Invalid)\n", + " \n", + " return d\n", + "\n", + "\n", + "\n", + "def set_file_path(e,dir=tsdir):\n", + " \"\"\"\n", + " This function is used to set dir and dfile for this workflow for each \n", + " ensemble. Note these are set in the ensemble's metadata container \n", + " not the members. We don't set them for members as Database.save_data \n", + " only uses kwarg dir and dfile with the metadata as a fallback.\n", + " \"\"\"\n", + " e['dir']=dir\n", + " dfile = \"dfile_undefined\"\n", + " for d in e.member:\n", + " if d.live and 'dfile' in d:\n", + " dfile=d['dfile']\n", + " base,ext = dfile.split(\".\")\n", + " dfile=base\n", + " break\n", + " e[\"dfile\"] = dfile\n", + " return e\n", + "\n", + "def dbmdquery(year,padsize=86400.0):\n", + " \"\"\"\n", + " Constructs a MongoDB query dictionary to use as a query argument for normalization matcher classes.\n", + " (All standard BasicMatcher children have a query argument in the constructor for this purpose.)\n", + " The query is a range spanning specified calendar year. The interval is extended by padsize \n", + " seconds. (default is one day = 86400.0)\n", + " \"\"\"\n", + " # uses obspy's UTCDateTime class to create time range in epoch time using the \n", + " # calendar strings for convenience\n", + " tstr = \"{}-01-01T00:00:00.0\".format(year)\n", + " st = UTCDateTime(tstr)\n", + " starttime = st.timestamp - padsize\n", + " tstr = \"{}-01-01T00:00:00.0\".format(year+1)\n", + " et = UTCDateTime(tstr)\n", + " endtime = et.timestamp + padsize\n", + " # not sure the and is required but better safe than sorry\n", + " query = { \"$and\" :\n", + " [\n", + " {\"starttime\" : {\"$lte\" : endtime}},\n", + " {\"endtime\" : {\"$gte\" : starttime}}\n", + " ]\n", + " }\n", + " return query\n", + "\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "19edb463-0a68-4451-9709-d5985515da1e", + "metadata": {}, + "source": [ + "This function is the one used in map operator run on each enemble. " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c61ecb40-6e9a-40cf-b7a9-192ced9db52e", + "metadata": {}, + "outputs": [], + "source": [ + "def atomic_ts_processor(d,decimator,resampler,ttmodel,win,calib_engine):\n", + " \"\"\"\n", + " This function puts all the processing functions for input TimeSeres (d). It is called in this \n", + " workflow in a map operator applied to ensemble members. \n", + " \"\"\"\n", + " d = detrend(d,type=\"constant\")\n", + " d = resample(d,decimator,resampler)\n", + " d = set_PStime(d,model=ttmodel)\n", + " if d.live and \"Ptime\" in d:\n", + " ptime = d[\"Ptime\"]\n", + " d.ator(ptime)\n", + " d = WindowData(d,win.start,win.end)\n", + " d.rtoa()\n", + " d = calib_engine.apply_calib(d)\n", + " return d\n", + "\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "b2e6b737-53c6-4f00-bb91-e8527a122570", + "metadata": {}, + "source": [ + "This is the driver script comparable to a fortran main. The algorithm here makes sense only for large ensemles like the extended usarray data set. We process the data in blocks that are the common source gathers. That was found to reduce the memory footprint of the processing with a modest cost in overhead to submit each ensemble as a different job to the cluster. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "227cfea1-3103-4a36-9405-e6f837829bad", + "metadata": {}, + "outputs": [], + "source": [ + "from mspasspy.ccore.seismic import TimeSeriesEnsemble\n", + "from mspasspy.io.distributed import read_distributed_data\n", + "ttmodel = TauPyModel(model=\"iasp91\")\n", + "t0 = time.time()\n", + "# important to note these matchers are loaded from dbmd\n", + "# bulk_normalize above will set the matching ids\n", + "timequery = dbmdquery(year)\n", + "chan_matcher = ObjectIdMatcher(dbmd,\n", + " query=timequery,\n", + " collection=\"channel\",\n", + " attributes_to_load=[\"_id\",\"lat\",\"lon\",\"elev\",\"hang\",\"vang\"],\n", + " )\n", + "tstr1 = \"{}-01-01T00:00:00.0\".format(year)\n", + "tstr2 = \"{}-01-01T00:00:00.0\".format(year+1)\n", + "st1=UTCDateTime(tstr1)\n", + "st2=UTCDateTime(tstr2)\n", + "srcquery = {\"time\" : {\"$gte\" : st1.timestamp, \"$lt\" : st2.timestamp}}\n", + "source_matcher = ObjectIdMatcher(dbmd,\n", + " query=srcquery,\n", + " collection=\"source\",\n", + " attributes_to_load=[\"_id\",\"lat\",\"lon\",\"depth\",\"time\"],\n", + " load_if_defined=[\"magnitude\"],\n", + " )\n", + "target_sample_rate=20.0\n", + "resampler=ScipyResampler(target_sample_rate)\n", + "decimator=ScipyDecimator(target_sample_rate)\n", + "calib_engine = ApplyCalibEngine(dbmd)\n", + "tswin = TimeWindow(-200.0,500.0)\n", + "srcidlist_ms=db.wf_miniseed.distinct('source_id')\n", + "\n", + "srcidlist_finished = db.wf_TimeSeries.distinct('source_id')\n", + "if len(srcidlist_finished)>0:\n", + " srcidlist=[]\n", + " for sid in srcidlist_ms:\n", + " if sid not in srcidlist_finished:\n", + " srcidlist.append(sid)\n", + "else:\n", + " srcidlist = srcidlist_ms\n", + "\n", + "\n", + "print(\"Number of distinct sources in wf_miniseed= \",len(srcidlist_ms))\n", + "print(\"Number to process this run=\",len(srcidlist))\n", + "# reduce size for testing - fails submitting all at once\n", + "#sidtmp=[]\n", + "#for i in range(10):\n", + "# sidtmp.append(srcidlist[i])\n", + "#srcidlist=sidtmp\n", + "\n", + "for sid in srcidlist:\n", + " print(\"working on \",sid)\n", + " query = {\"source_id\" : sid}\n", + " mydata = read_distributed_data(db,\n", + " query=query,\n", + " collection=\"wf_miniseed\",\n", + " normalize=[chan_matcher,source_matcher],\n", + " npartitions=60,\n", + " )\n", + " mydata=mydata.map(atomic_ts_processor,\n", + " decimator,\n", + " resampler,\n", + " ttmodel,\n", + " tswin,\n", + " calib_engine)\n", + " dlist = mydata.compute()\n", + " # package into ensemble for a faster write \n", + " ens=TimeSeriesEnsemble(len(dlist))\n", + " for d in dlist:\n", + " ens.member.append(d)\n", + " ens.set_live()\n", + " ens = set_file_path(ens)\n", + " ens = db.save_data(ens,\n", + " return_data=True,\n", + " collection=\"wf_TimeSeries\",\n", + " storage_mode=\"file\",\n", + " dir=tsdir,\n", + " data_tag=\"preprocessed_map\",\n", + " )\n", + " del ens\n", + " \n", + " \n", + "t = time.time()\n", + "print(\"Run time = \",t-t0,\" seconds\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0968e936-1359-490d-8ed8-80f92381567a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78c2b5f8-b206-4bd7-841b-3258dac4bd0c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/USArrayProcessExample/Preprocess2ts_template.ipynb b/notebooks/USArrayProcessExample/Preprocess2ts_template.ipynb new file mode 100644 index 0000000..af83163 --- /dev/null +++ b/notebooks/USArrayProcessExample/Preprocess2ts_template.ipynb @@ -0,0 +1,392 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "554edf59-2c25-4210-8197-d4706709fd38", + "metadata": {}, + "source": [ + "# Extended USArray data preprocessing stage 2\n", + "This notebook does the first stage of actual preprocessing of the extended USArray data set. It should be run after the notebook curently called index_mseed_*.ipynb where the * is normally a year. It must be run before a related notebook called Preproces2seis_*.ipynb. After experimenting with several variants this approach seems to be the best solution for performance with a workable memory footprint. It does a fairly long string of operations all contained in the function atomic_ts_processor. \n", + "\n", + "Notice the approach here is processing the data as ensembles grouped by source_id. The parallel section submits data by ensemble the cluster rather than doing the entire data et in one massive map operator. That was done because currently dask will not handle bags of that size and we alway seem to abort on a memory fault. Another reason the ensemble approach is a good one is it allows this job to be rerun if there are problems. Note how the query that looks for source_id values in the output wf_TimeSeries collection and runs only ids that are not already present. In the rare case that a job would abort with only part of an ensemble saved, that could drop data but the only way that should happen is if the write hit a file limit. In that case, we would know as we couldn't continue until the file limit (e.g. quota) was resolved.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bd9da46d-fa80-4c4a-b500-7af04be13de8", + "metadata": {}, + "outputs": [], + "source": [ + "# change for different calendar year\n", + "year = 2014\n", + "dbname = \"usarray{}\".format(year)\n", + "tsdir=\"./wf_TimeSeries/{}\".format(year)\n", + "dbmaster=\"usarray48\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54c091f6-7d4d-4bdb-973d-9fc7f19ffb8f", + "metadata": {}, + "outputs": [], + "source": [ + "import mspasspy.client as msc\n", + "mspass_client = msc.Client(database_name=dbname)\n", + "# waveform data indexed to here\n", + "db = mspass_client.get_database()\n", + "# master database with source and receiver metadata\n", + "dbmd = mspass_client.get_database(dbmaster)\n" + ] + }, + { + "cell_type": "markdown", + "id": "38371123-fa1f-457e-ad1c-bcc12e8914c8", + "metadata": {}, + "source": [ + "First do imports and define special functions used for this workflow." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "536bce0c-cc54-4f42-965b-7f05597be17f", + "metadata": {}, + "outputs": [], + "source": [ + "from mspasspy.algorithms.window import WindowData\n", + "from mspasspy.algorithms.signals import detrend\n", + "from mspasspy.ccore.algorithms.basic import TimeWindow\n", + "from mspasspy.ccore.utility import ErrorSeverity\n", + "from mspasspy.algorithms.resample import (ScipyResampler,\n", + " ScipyDecimator,\n", + " resample,\n", + " )\n", + "from mspasspy.db.normalize import ObjectIdMatcher\n", + "from mspasspy.algorithms.calib import ApplyCalibEngine\n", + "from mspasspy.util.seismic import number_live\n", + "from obspy import UTCDateTime\n", + "from obspy.geodetics import gps2dist_azimuth,kilometers2degrees\n", + "from obspy.taup import TauPyModel\n", + "from dask.distributed import as_completed\n", + "import time\n", + "import math\n", + "\n", + "\n", + "\n", + "def set_PStime(d,Ptimekey=\"Ptime\",Stimekey=\"Stime\",model=None):\n", + " \"\"\"\n", + " Function to calculate P and S wave arrival time and set times \n", + " as the header (Metadata) fields defined by Ptimekey and Stimekey.\n", + " Tries to handle some complexities of the travel time calculator \n", + " returns when one or both P and S aren't calculatable. That is \n", + " the norm in or at the edge of the core shadow. \n", + " \n", + " :param d: input TimeSeries datum. Assumes datum's Metadata \n", + " contains stock source and channel attributes. \n", + " :param Ptimekey: key used to define the header attribute that \n", + " will contain the computed P time. Default \"Ptime\".\n", + " :param model: instance of obspy TauPyModel travel time engine. \n", + " Default is None. That mode is slow as an new engine will be\n", + " constructed on each call to the function. Normal use should \n", + " pass an instance for greater efficiency. \n", + " \"\"\"\n", + " if d.live:\n", + " if model is None:\n", + " model = TauPyModel(model=\"iasp91\") \n", + " # extract required source attributes\n", + " srclat=d[\"source_lat\"]\n", + " srclon=d[\"source_lon\"]\n", + " srcz=d[\"source_depth\"]\n", + " srct=d[\"source_time\"] \n", + " # extract required channel attributes\n", + " stalat=d[\"channel_lat\"]\n", + " stalon=d[\"channel_lon\"]\n", + " staelev=d[\"channel_elev\"]\n", + " # set up and run travel time calculator\n", + " georesult=gps2dist_azimuth(srclat,srclon,stalat,stalon)\n", + " # obspy's function we just called returns distance in m in element 0 of a tuple\n", + " # their travel time calculator it is degrees so we need this conversion\n", + " dist=kilometers2degrees(georesult[0]/1000.0)\n", + " arrivals=model.get_travel_times(source_depth_in_km=srcz,\n", + " distance_in_degree=dist,\n", + " phase_list=['P','S'])\n", + " # always post this for as it is not cheap to compute\n", + " # WARNING: don't use common abbrevation delta - collides with data dt\n", + " d['epicentral_distance']=dist\n", + " # these are CSS3.0 shorthands s - station e - event\n", + " esaz = georesult[1]\n", + " seaz = georesult[2]\n", + " # css3.0 names esax = event to source azimuth; seaz = source to event azimuth\n", + " d['esaz']=esaz\n", + " d['seaz']=seaz\n", + " # get_travel_times returns an empty list if a P time cannot be \n", + " # calculated. We trap that condition and kill the output \n", + " # with an error message\n", + " if len(arrivals)==2:\n", + " Ptime=srct+arrivals[0].time\n", + " rayp = arrivals[0].ray_param\n", + " Stime=srct+arrivals[1].time\n", + " rayp_S = arrivals[1].ray_param\n", + " d.put(Ptimekey,Ptime)\n", + " d.put(Stimekey,Stime)\n", + " # These keys are not passed as arguments but could be - a choice\n", + " # Ray parameter is needed for free surface transformation operator\n", + " # note tau p calculator in obspy returns p=R sin(theta)/V_0\n", + " d.put(\"rayp_P\",rayp)\n", + " d.put(\"rayp_S\",rayp_S)\n", + " elif len(arrivals)==1:\n", + " if arrivals[0].name == 'P':\n", + " Ptime=srct+arrivals[0].time\n", + " rayp = arrivals[0].ray_param\n", + " d.put(Ptimekey,Ptime)\n", + " d.put(\"rayp_P\",rayp)\n", + " else:\n", + " # Not sure we can assume name is S\n", + " if arrivals[0].name == 'S':\n", + " Stime=srct+arrivals[0].time\n", + " rayp_S = arrivals[0].ray_param\n", + " d.put(Stimekey,Stime)\n", + " d.put(\"rayp_S\",rayp_S)\n", + " else:\n", + " message = \"Unexpected single phase name returned by taup calculator\\n\"\n", + " message += \"Expected phase name S but got \" + arrivals[0].name\n", + " d.elog.log_error(\"set_PStime\",\n", + " message,\n", + " ErrorSeverity.Invalid)\n", + " d.kill()\n", + " else:\n", + " # in this context this only happens if no P or S could be calculated\n", + " # That shouldn't ever happen but we need this safety in he event it does\n", + " message = \"Travel time calculator failed completely\\n\"\n", + " message += \"Could not calculator P or S phase time\"\n", + " d.elog.log_error(\"set_PStime\",\n", + " message,\n", + " ErrorSeverity.Invalid)\n", + " d.kill()\n", + " return d\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "def set_file_path(e,dir=tsdir):\n", + " \"\"\"\n", + " This function is used to set dir and dfile for this workflow for each \n", + " ensemble. Note these are set in the ensemble's metadata container \n", + " not the members. We don't set them for members as Database.save_data \n", + " only uses kwarg dir and dfile with the metadata as a fallback.\n", + " \"\"\"\n", + " e['dir']=dir\n", + " dfile = \"dfile_undefined\"\n", + " for d in e.member:\n", + " if d.live and 'dfile' in d:\n", + " dfile=d['dfile']\n", + " base,ext = dfile.split(\".\")\n", + " dfile=base\n", + " break\n", + " e[\"dfile\"] = dfile\n", + " return e\n", + "\n", + "def dbmdquery(year,padsize=86400.0):\n", + " \"\"\"\n", + " Constructs a MongoDB query dictionary to use as a query argument for normalization matcher classes.\n", + " (All standard BasicMatcher children have a query argument in the constructor for this purpose.)\n", + " The query is a range spanning specified calendar year. The interval is extended by padsize \n", + " seconds. (default is one day = 86400.0)\n", + "\n", + " Note this query is appropriate for channel not site.\n", + " \"\"\"\n", + " # uses obspy's UTCDateTime class to create time range in epoch time using the \n", + " # calendar strings for convenience\n", + " tstr = \"{}-01-01T00:00:00.0\".format(year)\n", + " st = UTCDateTime(tstr)\n", + " starttime = st.timestamp - padsize\n", + " tstr = \"{}-01-01T00:00:00.0\".format(year+1)\n", + " et = UTCDateTime(tstr)\n", + " endtime = et.timestamp + padsize\n", + " # not sure the and is required but better safe than sorry\n", + " query = { \"$and\" :\n", + " [\n", + " {\"starttime\" : {\"$lte\" : endtime}},\n", + " {\"endtime\" : {\"$gte\" : starttime}}\n", + " ]\n", + " }\n", + " return query\n", + "\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "19edb463-0a68-4451-9709-d5985515da1e", + "metadata": {}, + "source": [ + "This function is the one used in map operator run on each enemble. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c61ecb40-6e9a-40cf-b7a9-192ced9db52e", + "metadata": {}, + "outputs": [], + "source": [ + "def atomic_ts_processor(d,decimator,resampler,ttmodel,win,calib_engine):\n", + " \"\"\"\n", + " This function puts all the processing functions for input TimeSeres (d). It is called in this \n", + " workflow in a map operator applied to ensemble members. \n", + " \"\"\"\n", + " d = detrend(d,type=\"constant\")\n", + " d = resample(d,decimator,resampler)\n", + " d = set_PStime(d,model=ttmodel)\n", + " if d.live and \"Ptime\" in d:\n", + " ptime = d[\"Ptime\"]\n", + " d.ator(ptime)\n", + " d = WindowData(d,win.start,win.end)\n", + " d.rtoa()\n", + " d = calib_engine.apply_calib(d)\n", + " return d\n", + "\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "b2e6b737-53c6-4f00-bb91-e8527a122570", + "metadata": {}, + "source": [ + "This is the driver script comparable to a fortran main. The algorithm here makes sense only for large ensemles like the extended usarray data set. We process the data in blocks that are the common source gathers. That was found to reduce the memory footprint of the processing with a modest cost in overhead to submit each ensemble as a different job to the cluster. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "227cfea1-3103-4a36-9405-e6f837829bad", + "metadata": {}, + "outputs": [], + "source": [ + "from mspasspy.ccore.seismic import TimeSeriesEnsemble\n", + "from mspasspy.io.distributed import read_distributed_data\n", + "ttmodel = TauPyModel(model=\"iasp91\")\n", + "t0 = time.time()\n", + "# important to note these matchers are loaded from dbmd\n", + "# bulk_normalize above will set the matching ids\n", + "timequery = dbmdquery(year)\n", + "chan_matcher = ObjectIdMatcher(dbmd,\n", + " query=timequery,\n", + " collection=\"channel\",\n", + " attributes_to_load=[\"_id\",\"lat\",\"lon\",\"elev\",\"hang\",\"vang\"],\n", + " )\n", + "tstr1 = \"{}-01-01T00:00:00.0\".format(year)\n", + "tstr2 = \"{}-01-01T00:00:00.0\".format(year+1)\n", + "st1=UTCDateTime(tstr1)\n", + "st2=UTCDateTime(tstr2)\n", + "srcquery = {\"time\" : {\"$gte\" : st1.timestamp, \"$lt\" : st2.timestamp}}\n", + "source_matcher = ObjectIdMatcher(dbmd,\n", + " query=srcquery,\n", + " collection=\"source\",\n", + " attributes_to_load=[\"_id\",\"lat\",\"lon\",\"depth\",\"time\"],\n", + " load_if_defined=[\"magnitude\"],\n", + " )\n", + "target_sample_rate=20.0\n", + "resampler=ScipyResampler(target_sample_rate)\n", + "decimator=ScipyDecimator(target_sample_rate)\n", + "calib_engine = ApplyCalibEngine(dbmd)\n", + "tswin = TimeWindow(-200.0,500.0)\n", + "srcidlist_ms=db.wf_miniseed.distinct('source_id')\n", + "\n", + "srcidlist_finished = db.wf_TimeSeries.distinct('source_id')\n", + "if len(srcidlist_finished)>0:\n", + " srcidlist=[]\n", + " for sid in srcidlist_ms:\n", + " if sid not in srcidlist_finished:\n", + " srcidlist.append(sid)\n", + "else:\n", + " srcidlist = srcidlist_ms\n", + "\n", + "\n", + "print(\"Number of distinct sources in wf_miniseed= \",len(srcidlist_ms))\n", + "print(\"Number to process this run=\",len(srcidlist))\n", + "# reduce size for testing - fails submitting all at once\n", + "#sidtmp=[]\n", + "#for i in range(10):\n", + "# sidtmp.append(srcidlist[i])\n", + "#srcidlist=sidtmp\n", + "\n", + "for sid in srcidlist:\n", + " print(\"working on \",sid)\n", + " query = {\"source_id\" : sid}\n", + " mydata = read_distributed_data(db,\n", + " query=query,\n", + " collection=\"wf_miniseed\",\n", + " normalize=[chan_matcher,source_matcher],\n", + " npartitions=60,\n", + " )\n", + " mydata=mydata.map(atomic_ts_processor,\n", + " decimator,\n", + " resampler,\n", + " ttmodel,\n", + " tswin,\n", + " calib_engine)\n", + " dlist = mydata.compute()\n", + " # package into ensemble for a faster write \n", + " ens=TimeSeriesEnsemble(len(dlist))\n", + " for d in dlist:\n", + " ens.member.append(d)\n", + " ens.set_live()\n", + " ens = set_file_path(ens)\n", + " ens = db.save_data(ens,\n", + " return_data=True,\n", + " collection=\"wf_TimeSeries\",\n", + " storage_mode=\"file\",\n", + " dir=tsdir,\n", + " data_tag=\"preprocessed_map\",\n", + " )\n", + " del ens\n", + " \n", + " \n", + "t = time.time()\n", + "print(\"Run time = \",t-t0,\" seconds\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0968e936-1359-490d-8ed8-80f92381567a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78c2b5f8-b206-4bd7-841b-3258dac4bd0c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/USArrayProcessExample/Sidebar.ipynb b/notebooks/USArrayProcessExample/Sidebar.ipynb new file mode 100644 index 0000000..6c820ac --- /dev/null +++ b/notebooks/USArrayProcessExample/Sidebar.ipynb @@ -0,0 +1,150 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "442acc98-4ad4-49ec-a7d7-de6c3f3dfb55", + "metadata": {}, + "source": [ + "# Sidebar Notebook\n", + "Something I have found universally useful in working with large data sets like this is to create a scatch notebook like this one. The purpose is to run one up validations while working on the full data set. I include this notebook in this mspass_tutorial directory to illustrate the use, but anyone reading this should recognize a notebook like this should be treated as a scratch workspace that connects to MongoDB and allows you to explore the state of your data. \n", + "\n", + "The first step is always the stock MsPASS incantation to connect to MongoDB. The dataase name in the call `dbclient.get_database()` is, of course, a variable. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c40d53d-6cf0-49b4-8847-30bdeb5ee4ba", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from mspasspy.db.client import DBClient\n", + "dbclient=DBClient()\n", + "db = dbclient.get_database(\"usarray2014\")" + ] + }, + { + "cell_type": "markdown", + "id": "a183e08d-ea54-43e5-ac15-9f5f45b391ee", + "metadata": {}, + "source": [ + "Here is a common initial, fast test. Peek at the first record of wf_miniseed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37daff06-5b98-446a-9a6f-c0d00f854e67", + "metadata": {}, + "outputs": [], + "source": [ + "from mspasspy.util.seismic import print_metadata\n", + "doc=db.wf_miniseed.find_one()\n", + "print_metadata(doc)" + ] + }, + { + "cell_type": "markdown", + "id": "803f52ce-cc23-4a61-aae7-e01e43788a68", + "metadata": {}, + "source": [ + "This is a typical example of a validation block. Checks how well normalization worked on this database." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5ecd333-8aed-43aa-a8ce-d21fec26be04", + "metadata": {}, + "outputs": [], + "source": [ + "n = db.wf_miniseed.count_documents({})\n", + "print(\"Total number of documents in wf_miniseed=\",n)\n", + "query={\"channel_id\" : {\"$exists\" : True}}\n", + "n1 = db.wf_miniseed.count_documents(query)\n", + "print(\"Number of documents with channel_id defined=\",n1)" + ] + }, + { + "cell_type": "markdown", + "id": "971ceb51-3998-4878-8761-efd905edac1b", + "metadata": {}, + "source": [ + "This command is often useful if something went wrong during indexing and wf_miniseed is incomplete and needs to rebuilt. It also illustrate that when I create one of these notebooks I ONLY run it interactively and the cells are not necessarily all executed or run in sequence. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b3dcc36-38e1-4c16-a556-5aa09921d197", + "metadata": {}, + "outputs": [], + "source": [ + "db.drop_collection(\"wf_miniseed\")" + ] + }, + { + "cell_type": "markdown", + "id": "45c21aa9-1d2e-49c3-abe6-3cdb00a1e041", + "metadata": {}, + "source": [ + "This command is often useful to check the status of the database." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c968ec7e-7081-4b82-9621-fd0f2457690c", + "metadata": {}, + "outputs": [], + "source": [ + "cursor=db.list_collections()\n", + "for doc in cursor:\n", + " print(doc['name'])" + ] + }, + { + "cell_type": "markdown", + "id": "d9e0c0f1-7a93-4b57-a532-a9f77bdb9af5", + "metadata": {}, + "source": [ + "This incantation is useful to clear out everything except wf_miniseed. It is useful if something went wrong in one of the \"Preprocess*\" scripts and you need start from only the wf_miniseed content." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "58786a53-80b3-4d29-bf2e-88b459aa4584", + "metadata": {}, + "outputs": [], + "source": [ + "cleanlist=[\"abortions\",\"cemetery\",\"history_object\",\"wf_Seismogram\",\"wf_TimeSeries\",\"history_global\",\"fs.chunks\",\"fs.files\",\"elog\"]\n", + "for c in cleanlist:\n", + " db.drop_collection(c)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/USArrayProcessExample/WorkflowREADME.ipynb b/notebooks/USArrayProcessExample/WorkflowREADME.ipynb new file mode 100644 index 0000000..553f2b9 --- /dev/null +++ b/notebooks/USArrayProcessExample/WorkflowREADME.ipynb @@ -0,0 +1,168 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "35d48c4f-67fc-4743-99e8-a5f0794ac105", + "metadata": {}, + "source": [ + "# Extended USArray Processing Workflow\n", + "This notebook has no python code. It was created to document how the extended usarray data set was created and how to run the notebooks in this directory to process that data. " + ] + }, + { + "cell_type": "markdown", + "id": "4a6596ac-dff8-4b25-9898-2b7bcd043234", + "metadata": {}, + "source": [ + "## Creation of data set\n", + "The data set involved was actually assembled several years ago. It used a sequence of python scripts to drive obspy's bulk_download function to assemble te waveforms. An example is found in the subdirectory below the current one called \"download_scripts\". Specifics can be found there but there are several points to note here:\n", + "1. The data were assembled in chunks of a years.\n", + "2. The data driving the assembly is a list of teleseismic earthquakes. That list is obtained by running the python function `download_events` which uses a point defined by the file \"year_centers.txt\" and finds all events with magnitude > 4.9 at distances from 25 to 95 degrees from that point.\n", + "3. The waveform data and station data were downloaded using obspy driven by the function called `download_events` also found in \"rfteledownload.py\". Note inside that function is hard coded the station search window with an ObsPy `RectangularDomain` class. See the function for the region.\n", + "\n", + "That procedure was marginally feasible with web services a few years ago. It took approximately 6 months with various starts and stops to assemble these data. A better metric is with no problems each year took around a week to assemble. The additional delays were from me figuring out how to deal with the disconnect of what I got from what I needed. I'm not sure I preserved all the details, but here are some things I know I had to deal with:\n", + "1. The original waveforms were downloaded as millions of single channel files. That would not have worked at all on an HPC file system, but I did this assembly on an old Mac with a conventional unix file system. I concatenated files in the event directories the download produced into event files with names like \"event1.mseed\", \"event2.mseed\", ... The miniseed event files were then saved on an archival storage system at Indiana University.\n", + "2. The download created images of the xml data downloaded. The event data was in single files and the station data was placed into another monster set of files in year directories. I archived that data, but later learned it was best to simply ignore it and refresh the same data with web services. See below\n", + "\n", + "I must note that I did my best but I have no way to know if these data are complete. Several years required multiple runs to get all the data the long running script died for one reason or another. " + ] + }, + { + "cell_type": "markdown", + "id": "2d0a6545-fbd0-4323-9222-1d8d2535b52c", + "metadata": {}, + "source": [ + "## Step 1: Create master Metadata database\n", + "Earlier attempts to deal with creating a working database for site, channel, and source collections went through through two variations I abandoned in favor of what is done here:\n", + "1. I tried using the xml files obspy had downloaded when I first started this years ago. That was a disaster because of duplicates and the absurd number of files.\n", + "2. Until recently I used a procedure similar to used in various tutorials from MsPASS short courses. That is, I would download event and station metadata matching the time span of the section of data being processed.\n", + "\n", + "I realized the second approach above was problematic when it came to assembling the full data set from 2005-2015. I could have merged the pieces, but when I realized I had a similar problem at the end of the sequence of merging all the receiver functions into a common database I realized it made more sense to make a master database for the full project and use the yearlies as more of a scratch database that would not necessarily even be retained. \n", + "\n", + "With that background step one for processing the entire data set is to run the notebook called \"CreateMasterDatabase.ipynb\". I ran that notebook on the cluster as an interactive job using the jupyter lab interface. The master here then contains the output for running that notebook. I should have recorded the time elapsed but it was at least an hour. Running that notebook creates three collections spanning Jan 1, 2005 to Dec 31, 2015: source, site, and channel. The source collection is much larger than what I downloaded as the lower magnitude limit was 4.5 versus 4.9 in the download. I did that to make sure we had all the event data. It makes the source collection about 4 times bigger than the original. Still tiny, however, compared to the total number of waveforms of which there are around 50 million" + ] + }, + { + "cell_type": "markdown", + "id": "95adc03b-cc8a-4d20-841f-8d81532713cf", + "metadata": {}, + "source": [ + "## Step 3: Preprocessing\n", + "### Overview\n", + "Preprocessing is defined here as taking the raw miniseed data downloaded earlier and producing a set of Seismogram objects that can be used as input for P to S impulse response estimates. This workflow forks at that point depending upon how one wants to produce the impulse response estimates. This workflow preserved here on github finalizes the processing using the new \"CNRDecon\" function to produce a form of \"receiver functions\" where the deconvolution is preformed on each Seismogram object without any information from any other data. I am developing an array deconvolution workflow that may eventually appear in the mspass_tutorials repository but that was not finished when this notebook was written. \n", + "\n", + "The top structure of this processing is to realize it was designed to be run in one year chunks. There were two reasons for that choice. First, it is a natural, simple organization. Second, the size of the raw data from one year of the extended usarray set of stations is about right to chew on for current generation hardware - around 0.5 to 1 TB per year depending on the years (2011 is about twice as big as other years because of huge number of aftershocks of the Tohoku earthquake in Japan in early 2011.) For that reason the rest of this main section describes what needs to be run for each year block." + ] + }, + { + "cell_type": "markdown", + "id": "32b5b441-8ed9-46d3-854c-a7bd6e880983", + "metadata": {}, + "source": [ + "### Yearly Step 1: retrieve miniseed data from archive\n", + "A copy of the miniseed files that define this data set are archived at Indiana University and TACC. I believe the two copies are identical. 2005 and 2006 will require some special attention and are not discussed in this document I'm posting to github. They were my original prototypes and I handled them differently. It will only confuse this tutorial to discuss that different handling. For now, step 1 is to retrieve the event files from the mass store at TACC or the IU system. The next phase *requires* the miniseed files to be placed in \"year\" directories rooted with \"./wf\" where \".\" is your selected work directory. e.g. if you are working on the data from 2012 the miniseed files should be placed in the directory \"./wf/2012\". Similarly, 2013 data should be placed in \"./wf/2013\". The next step would need to be modified if you change that file organization. Note the archive file names are fixed and have names like \"event1.msdeed\", \"event2.mseed\", ..., \"eventN.mseed\" where N is the number of events for that year. \n" + ] + }, + { + "cell_type": "markdown", + "id": "ff78dd05-e541-43a0-b7f6-4bbb76734b0a", + "metadata": {}, + "source": [ + "### Year step 2: Build wf_miniseed\n", + "The first serious processing is done by making a copy of the notebook called \"index_mseed_template.ipynb\" that will become your run file. I recommend you just change \"template\" to the version for the year you are processing. e.g. if you are working on 2012 I would run this in the shell:\n", + "```\n", + "cp index_mseed_template.ipynb index_mseed_2012.ipynb\n", + "```\n", + "Then edit the copy you created and edit one line in the first code box of that notebook. The template currently has this:\n", + "```\n", + "# change for different calendar year\n", + "year = 2014\n", + "```\n", + "As the comment suggest change the year appropriate - 2012 for the example immediately agove. \n", + "\n", + "Run that script on your cluster. This job does not benefit much from a large number of cores and is probably best run on a single node. The reason is over half the the processor time is spent running the `bulk_normalize` function to create the id cross references for the source, site, and channel collections. That is not parallelized in this notebook and takes quite some time to run. Allow 8 hours in a batch job to be sure, but most years will run in less than 2 hours. " + ] + }, + { + "cell_type": "markdown", + "id": "4f22bacd-93cd-4a2e-be12-f1b5c8050654", + "metadata": {}, + "source": [ + "### Year step 3: TimeSeries processing\n", + "The first notebook that does anything more than bookkeeping is the one you run next. The master is called \"Preprocess2ts_template.ipynb\". Like before for the year being processed you should:\n", + "1. Copy the master file changing \"template\" to the relevant year.\n", + "2. Edit the file changing the \"year = 2014\" to the relevant years.\n", + "\n", + "If the previous step completed successfully you should be able to run this notebook on the cluster. A few points about this part of the processing work:\n", + "1. With this set of waveform files as structured here this is large memory job. The reason is that if you look at the notebook you see that the processing is a loop over common source gathers and each gather is processed in a paralle construct. The processed gather is merged in a serial loop after collecting all the data from the cluster (what happens when the bag compute method is called). An advantage to that approach is it speeds output with the binary ensemble writer, but is at the cost of being a large memory algorithm. At IU I had to configure the job so that each worker had 25 Gb of memory. On the IU system that meant I had to request 25 Gb per mpi process, set the cpus-per-task to fit in the system's physical memory, and set the number of workers to match. On IU's bigred200 I set slurm up as follows:\n", + "```\n", + "#SBATCH --mem-per-cpu=25G\n", + "#SBATCH --cpus-per-task=8\n", + "```\n", + "and launched workers with this incantation:\n", + "```\n", + "export MSPASS_WORKER_ARG=\"--nworkers 8 --nthreads 1 --memory-limit=25G\"\n", + "```\n", + "2. Note the notebook writes event files in a new directory called \"./wf_TimeSeries\" with a year directory parallel to that in wf. e.g. if you are working on 2012 data the input will be miniseed files in wf/2012 and the processed output will appear in the directory \"wf_TimeSeries/2012\". Those files, however, are plain binary fwrite output of all the ensemble sample data. They are like old fortran unformatted write and are useless without database documents created in the wf_TimeSeries collection. The documents in wf_TimeSeries are the Metadata describing the sample data stored in the binary files. The names are the same as the parent miniseed files but we strip the \".mseed\" so the files have shorter names like \"event1\", \"event2\", etc.\n", + "3. This notebook takes the longest time to run of the series. On IU's bigred200 I requested 12 hours for a run, which was close for most years. I would double that for 2012 as that year had almost twice as many events due to the Tohoku earthquake as noted above." + ] + }, + { + "cell_type": "markdown", + "id": "ee3f4d21-c715-4f48-b960-0c73be0b1f51", + "metadata": {}, + "source": [ + "### Year step 4: Create edited Seismogram data\n", + "The template for this step is the notebook called \"Preprocess2seis_template.ipynb\". As with the previous year templates the top level steps is to copy the file to one for the year, change the year in that file, and run it on the cluster. \n", + "\n", + "This workflow is the most compute bound job of the series and will likely scale best if run on many nodes. The main bottleneck is running the function called `broadband_snr_QC`. That function does a lot of computation to compute a long list of metrics measuring the quality of a signal using a set of different measures of signal-to-noise ratio. The job is not at all IO bound from what I can tell because it reads and write ensembles in the binary format, which is the fastest IO method currently available with MsPASS and the computational load of `broadband_snr_QC` is not trivial. \n", + "\n", + "This workflow creates a \"wf_Seismgram\" collection in the year database much like the previous step creates a \"wf_TimeSeries\" collection for that year It also writes binary files similar to those in wf_TimeSeries but containing the Seismgoram objects grouped in source files. They are direct descendents but not necessarily in the same order. e.g. the sample data in file \"./wf_TimeSeries/2012/event4\" becomes the data stored in \"./wf_Seismogram/2012/event4\" but rearranged to match Seismogram objects and modified by this workflow. \n", + "\n", + "This notebook scales well with multiple processors. For instance, the 2013 data took less than 2 hours to run with 3 nodes at IU. " + ] + }, + { + "cell_type": "markdown", + "id": "7b60c994-17e1-4bdb-9714-26b9a93ef378", + "metadata": {}, + "source": [ + "### Year Step 5: Receiver Function Estimation\n", + "The final step is to run a python script called \"CNRRFDecon.py\". As with other components you need to change the `year = xxxx` to the appropriate value. A difference at the momement is this part of the workflow is a pure python script and not in a notebook. Otherwise, you should recognize the same block of code at the top of the python script. \n", + "\n", + "A couple relevant details about running this script:\n", + "1. The file \"CNRDeconEngine.pf\" is required to run this script. It is an \"pf-file\" that defines a fairly extensive set of parameters that are required for the algorithm to run.\n", + "2. This script is serial and is a waste if run on more than a single node. The reason is we have a serialization issue with `CNRDeconEngine` that we have been unable to resolve. Yearly results, in my experience, take of the order of one hour to run so the serial algorithm is feasible. If submitted batch I would suggest requesting a 2 hour block to assure completion. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0b5a611-ac75-40d8-8ef4-e16a45315d5f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/USArrayProcessExample/index_mseed_template.ipynb b/notebooks/USArrayProcessExample/index_mseed_template.ipynb new file mode 100644 index 0000000..f56c75d --- /dev/null +++ b/notebooks/USArrayProcessExample/index_mseed_template.ipynb @@ -0,0 +1,137 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cf17dfec-3239-4336-9d2f-5d4a1a93a442", + "metadata": {}, + "source": [ + "# Stage 1: Build wf_miniseed \n", + "This notebook is the first step for processing the extended usarray data set in year segments. It runs the indexing program to build a set of index documents in the wf_miniseed collection. It then runs bulk_normalize to create the channel_id and site_id cross references in the wf_miniseed documents. That is essential for the second stage of the processing.\n", + "\n", + "This is a separate notebook because prototypes demonstrated:\n", + "1. There are too many potential issues with miniseed data that can cause problems that is is useful to checkpoint the job at the end of the notebook to verify things are ok. That is particularly true of normalizations and potential miniseed data problems.\n", + "2. This notebook is note efficient to runw with a larger number of workers like the subsequent notebooks. Running in with only 8 workers or so with a small memory requirement can be it through faster than waiting for a larger job requiring more resources, which is the case for the notebooks run after this one. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "712c361f-185a-49b9-a548-4cf55cdca4ff", + "metadata": {}, + "outputs": [], + "source": [ + "# change for different calendar year\n", + "year = 2014\n", + "dbname = \"usarray{}\".format(year)\n", + "dbmaster=\"usarray48\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "506ac77c-d382-42b5-a3c8-795e400864ec", + "metadata": {}, + "outputs": [], + "source": [ + "import mspasspy.client as msc\n", + "mspass_client = msc.Client(database_name=dbname)\n", + "# waveform data indexed to here\n", + "db = mspass_client.get_database()\n", + "# master database with source and receiver metadata\n", + "dbmd = mspass_client.get_database(dbmaster)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "744ff640-a240-49d3-a5e2-ba53c75f8bb7", + "metadata": {}, + "outputs": [], + "source": [ + "# This builds a file list to drive index processing\n", + "import os\n", + "import fnmatch\n", + "import dask.bag as dbg\n", + "topdirectory=\"./wf\"\n", + "# assume year was defined at the top and data have the structure of wf/year/*.mseed\n", + "dir=\"{}/{}\".format(topdirectory,year)\n", + "filelist=fnmatch.filter(os.listdir(dir),'*.mseed')\n", + "tstart=time.time()\n", + "data = dbg.from_sequence(filelist)\n", + "data = data.map(db.index_mseed_file,dir)\n", + "data=data.compute()\n", + "tend=time.time()\n", + "print(\"Elapsed time to run index_mseed_file=\",tend-tstart)" + ] + }, + { + "cell_type": "markdown", + "id": "eff8fabf-11e6-45d6-8d8a-0b3e4b9e5218", + "metadata": {}, + "source": [ + "Normalization of the mseed records is complicated by the fact we are using two databases here. The current normalize_mseed will not work because it assumes one db holds wf_miniseed and the channel-site collections. For that reason I use bulk_normalize which is more generic. That, of course, is why the first part of this block creates the MiniseedMatcher and OriginTimeMatcher instance. \n", + "\n", + "Note the memory footprint of this job could be reduced by using the query parameter for the constructors for MiniseedMatcher and OriginTimeMatcher, but since this section is a serial job and the objects aren't that huge anyway I don't bother. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e4988418-28ab-4ea5-9190-9e307e31d3c5", + "metadata": {}, + "outputs": [], + "source": [ + "from mspasspy.db.normalize import MiniseedMatcher,OriginTimeMatcher,bulk_normalize\n", + "# prepend_collection_name defaults to True but best to be clear that is essential here\n", + "chan_matcher = MiniseedMatcher(dbmd,\n", + " collection=\"channel\",\n", + " prepend_collection_name=True,\n", + " )\n", + "site_matcher = MiniseedMatcher(dbmd,\n", + " collection=\"site\",\n", + " attributes_to_load=[\"_id\",\"lat\",\"lon\",\"elev\",\"starttime\",\"endtime\"],\n", + " prepend_collection_name=True,\n", + " )\n", + "source_matcher = OriginTimeMatcher(dbmd,\n", + " t0offset=0.0,\n", + " tolerance=100.0,\n", + " attributes_to_load=['_id','time'])\n", + "bno=bulk_normalize(db,wf_col=\"wf_miniseed\",matcher_list=[chan_matcher,site_matcher,source_matcher])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "448140e0-9beb-462c-a424-b991d625b2bc", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Number of channel_id values set=\",bno[1])\n", + "print(\"Number of site_id values set=\",bno[2])\n", + "print(\"Number of source_id values set=\",bno[3])\n", + "print(\"Number of documents processed=\",bno[0])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 920d047e8c6b5ee46ac7350ab37d49a6c4afe083 Mon Sep 17 00:00:00 2001 From: Gary Pavlis Date: Sat, 5 Apr 2025 05:26:02 -0400 Subject: [PATCH 2/7] Add download scripts for reference --- .../download_scripts/download2007.py | 26 ++++++ .../download_scripts/download2008.py | 19 ++++ .../download_scripts/download2009.py | 19 ++++ .../download_scripts/download2009_run2.py | 19 ++++ .../download_scripts/download2010.py | 19 ++++ .../download_scripts/download2011.py | 19 ++++ .../download_scripts/download2011_run2.py | 21 +++++ .../find_duplicate_net_sta.py | 88 +++++++++++++++++++ .../print_net_sta_chan_loc.py | 44 ++++++++++ .../download_scripts/rfteledownload.py | 87 ++++++++++++++++++ .../download_scripts/year_centers.txt | 11 +++ 11 files changed, 372 insertions(+) create mode 100644 notebooks/USArrayProcessExample/download_scripts/download2007.py create mode 100644 notebooks/USArrayProcessExample/download_scripts/download2008.py create mode 100644 notebooks/USArrayProcessExample/download_scripts/download2009.py create mode 100644 notebooks/USArrayProcessExample/download_scripts/download2009_run2.py create mode 100644 notebooks/USArrayProcessExample/download_scripts/download2010.py create mode 100644 notebooks/USArrayProcessExample/download_scripts/download2011.py create mode 100644 notebooks/USArrayProcessExample/download_scripts/download2011_run2.py create mode 100644 notebooks/USArrayProcessExample/download_scripts/find_duplicate_net_sta.py create mode 100644 notebooks/USArrayProcessExample/download_scripts/print_net_sta_chan_loc.py create mode 100644 notebooks/USArrayProcessExample/download_scripts/rfteledownload.py create mode 100644 notebooks/USArrayProcessExample/download_scripts/year_centers.txt diff --git a/notebooks/USArrayProcessExample/download_scripts/download2007.py b/notebooks/USArrayProcessExample/download_scripts/download2007.py new file mode 100644 index 0000000..a3660a9 --- /dev/null +++ b/notebooks/USArrayProcessExample/download_scripts/download2007.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Apr 10 09:09:14 2020 + +@author: pavlis +""" +import time +from rfteledownload import read_centers +from rfteledownload import get_catalog +from rfteledownload import download_events + +centers=read_centers() +yr=2007 +t0=time.time() +print("Running get_catalog") +cat=get_catalog(centers,year=yr) +fname=("events_%d.xml" % yr) +cat.write(fname,format='QUAKEML') +t1=time.time() +print("Elapsed time for to fetch catalog=",t1-t0) +print("Running download_events") +t1=time.time() +download_events(cat,yeartag=str(yr)) +t2=time.time() +print("Elapsed time to download events=",t2-t1) diff --git a/notebooks/USArrayProcessExample/download_scripts/download2008.py b/notebooks/USArrayProcessExample/download_scripts/download2008.py new file mode 100644 index 0000000..903c260 --- /dev/null +++ b/notebooks/USArrayProcessExample/download_scripts/download2008.py @@ -0,0 +1,19 @@ +import time +from rfteledownload import read_centers +from rfteledownload import get_catalog +from rfteledownload import download_events + +centers=read_centers() +yr=2008 +t0=time.time() +print("Running get_catalog") +cat=get_catalog(centers,year=yr) +fname=("events_%d.xml" % yr) +cat.write(fname,format='QUAKEML') +t1=time.time() +print("Elapsed time for to fetch catalog=",t1-t0) +print("Running download_events") +t1=time.time() +download_events(cat,yeartag=str(yr)) +t2=time.time() +print("Elapsed time to download events=",t2-t1) diff --git a/notebooks/USArrayProcessExample/download_scripts/download2009.py b/notebooks/USArrayProcessExample/download_scripts/download2009.py new file mode 100644 index 0000000..1213046 --- /dev/null +++ b/notebooks/USArrayProcessExample/download_scripts/download2009.py @@ -0,0 +1,19 @@ +import time +from rfteledownload import read_centers +from rfteledownload import get_catalog +from rfteledownload import download_events + +centers=read_centers() +yr=2009 +t0=time.time() +print("Running get_catalog") +cat=get_catalog(centers,year=yr) +fname=("events_%d.xml" % yr) +cat.write(fname,format='QUAKEML') +t1=time.time() +print("Elapsed time for to fetch catalog=",t1-t0) +print("Running download_events") +t1=time.time() +download_events(cat,yeartag=str(yr)) +t2=time.time() +print("Elapsed time to download events=",t2-t1) diff --git a/notebooks/USArrayProcessExample/download_scripts/download2009_run2.py b/notebooks/USArrayProcessExample/download_scripts/download2009_run2.py new file mode 100644 index 0000000..8c4200b --- /dev/null +++ b/notebooks/USArrayProcessExample/download_scripts/download2009_run2.py @@ -0,0 +1,19 @@ +import time +from rfteledownload import read_centers +from rfteledownload import get_catalog +from rfteledownload import download_events + +centers=read_centers() +yr=2009 +t0=time.time() +print("Running get_catalog") +cat=get_catalog(centers,year=yr) +fname=("events_%d.xml" % yr) +cat.write(fname,format='QUAKEML') +t1=time.time() +print("Elapsed time for to fetch catalog=",t1-t0) +print("Running download_events") +t1=time.time() +download_events(cat,yeartag=str(yr),firstid=544) +t2=time.time() +print("Elapsed time to download events=",t2-t1) diff --git a/notebooks/USArrayProcessExample/download_scripts/download2010.py b/notebooks/USArrayProcessExample/download_scripts/download2010.py new file mode 100644 index 0000000..ba4d9de --- /dev/null +++ b/notebooks/USArrayProcessExample/download_scripts/download2010.py @@ -0,0 +1,19 @@ +import time +from rfteledownload import read_centers +from rfteledownload import get_catalog +from rfteledownload import download_events + +centers=read_centers() +yr=2010 +t0=time.time() +print("Running get_catalog") +cat=get_catalog(centers,year=yr) +fname=("events_%d.xml" % yr) +cat.write(fname,format='QUAKEML') +t1=time.time() +print("Elapsed time for to fetch catalog=",t1-t0) +print("Running download_events") +t1=time.time() +download_events(cat,yeartag=str(yr)) +t2=time.time() +print("Elapsed time to download events=",t2-t1) diff --git a/notebooks/USArrayProcessExample/download_scripts/download2011.py b/notebooks/USArrayProcessExample/download_scripts/download2011.py new file mode 100644 index 0000000..3e1e8b7 --- /dev/null +++ b/notebooks/USArrayProcessExample/download_scripts/download2011.py @@ -0,0 +1,19 @@ +import time +from rfteledownload import read_centers +from rfteledownload import get_catalog +from rfteledownload import download_events + +centers=read_centers() +yr=2011 +t0=time.time() +print("Running get_catalog") +cat=get_catalog(centers,year=yr) +fname=("events_%d.xml" % yr) +cat.write(fname,format='QUAKEML') +t1=time.time() +print("Elapsed time for to fetch catalog=",t1-t0) +print("Running download_events") +t1=time.time() +download_events(cat,yeartag=str(yr)) +t2=time.time() +print("Elapsed time to download events=",t2-t1) diff --git a/notebooks/USArrayProcessExample/download_scripts/download2011_run2.py b/notebooks/USArrayProcessExample/download_scripts/download2011_run2.py new file mode 100644 index 0000000..ace63b7 --- /dev/null +++ b/notebooks/USArrayProcessExample/download_scripts/download2011_run2.py @@ -0,0 +1,21 @@ +import time +from rfteledownload import read_centers +from rfteledownload import get_catalog +from rfteledownload import download_events +from obspy import read_events + +centers=read_centers() +yr=2011 +t0=time.time() +#print("Running get_catalog") +#cat=get_catalog(centers,year=yr) +fname=("events_%d.xml" % yr) +#cat.write(fname,format='QUAKEML') +#t1=time.time() +#print("Elapsed time for to fetch catalog=",t1-t0) +cat=read_events(fname,format='QUAKEML') +print("Running download_events from id 904") +t1=time.time() +download_events(cat,yeartag=str(yr),firstid=904) +t2=time.time() +print("Elapsed time to download events=",t2-t1) diff --git a/notebooks/USArrayProcessExample/download_scripts/find_duplicate_net_sta.py b/notebooks/USArrayProcessExample/download_scripts/find_duplicate_net_sta.py new file mode 100644 index 0000000..596bced --- /dev/null +++ b/notebooks/USArrayProcessExample/download_scripts/find_duplicate_net_sta.py @@ -0,0 +1,88 @@ +import sys +from obspy import Inventory +from obspy import read_inventory +# Needed below to sort by sta as second element of tuples +def GetKeyFromTuple(item): + return item[1] +def stalist2tuples(stalist): + result=[] + for x in stalist: + # This ridiculous double call to split is needed because a typical + # element of statlis looks like this: + # AZ.BZN (Buzz Northerns Place, Anza, CA, USA) + z=x.split('.') + net=z[0] + zz=z[1].split(' ') + sta=zz[0] + # Although slightly irrational for this program I keep the order + # net sta because that order is locked in my brain and most + # seismologists + lnetsta=(net,sta) + result.append(tuple(lnetsta)) + return result + +stadir='' +args=sys.argv +if(len(args)!=2): + print('Usage Error:\npython find_duplicate_net_sta stadir') + exit(2) +try: + # This assumes structure created by bulk_download where stations are + # put in files in a directory with one station per file - why the * wildcard + # is needed + xmldir=args[1] + xmlfiles=xmldir+"/"+"*.xml" + inv=read_inventory(xmlfiles,format='STATIONXML') + # This returns an absurd structure in my opinion. For this we need only + # the stations data + x=inv.get_contents() + stations=x['stations'] + # The list in stations has names in the form net.sta + # This function converts these to a list of tuples in form (net,sta) + # Thes can sorted with the station key using GetKeyFromTuple + stup=stalist2tuples(stations) + stupsorted=sorted(stup,key=GetKeyFromTuple) + laststa='' + lastnet='' + last_n=0 + print('net sta values read from directory=',xmldir) + # this is not a memory efficient algorithm, but is a simpler algorithm + # than a line by line test for duplicates I've always found easy to + # mess up. This is pythonic pushing index positions to a list for + # any duplicate station + dups=dict() + duplist=list() + n=0 + ndups=0 + for x in stupsorted: + net=x[0] + sta=x[1] + if(n==0): + laststa=sta + lastnet=net + n=1 + continue + #print(n,net,sta) + if(sta==laststa): + if(ndups==0): + duplist.clear() + duplist.append(lastnet) + duplist.append(net) + dups[sta]=duplist + else: + y=dups[sta] + y.append(net) + dups[sta]=y + #print(ndups,sta,dups[sta]) + else: + ndups=0 + n+=1 + laststa=sta + lastnet=net + print('Duplicates:') + for sta in dups: + x=dups[sta] + for net in x: + print(net,sta) +except RuntimeError as err: + print('err') diff --git a/notebooks/USArrayProcessExample/download_scripts/print_net_sta_chan_loc.py b/notebooks/USArrayProcessExample/download_scripts/print_net_sta_chan_loc.py new file mode 100644 index 0000000..6a10084 --- /dev/null +++ b/notebooks/USArrayProcessExample/download_scripts/print_net_sta_chan_loc.py @@ -0,0 +1,44 @@ +import sys +from obspy import Inventory +from obspy import read_inventory +def chanlist2tuples(chanlist): + result=[] + for x in chanlist: + z=x.split('.') + net=z[0] + sta=z[1] + loc=z[2] + chan=z[3] + # Although slightly irrational for this program I make the order + # of output net, sta, chan, loc because that order is locked in my brain + # as an old antelope user + lnetsta=(net,sta,chan,loc) + result.append(tuple(lnetsta)) + return result + +stadir='' +args=sys.argv +if(len(args)!=2): + print('Usage Error:\npython print_net_sta_chan_loc.py stadir') + exit(2) +try: + # This assumes structure created by bulk_download where stations are + # put in files in a directory with one station per file - why the * wildcard + # is needed + xmldir=args[1] + xmlfiles=xmldir+"/"+"*.xml" + inv=read_inventory(xmlfiles,format='STATIONXML') + # This returns an absurd structure in my opinion. For this we need only + # the stations data + x=inv.get_contents() + channels=x['channels'] + # The list in channels has names in the form net.sta.loc.chan + # This function converts these to a list of tuples in form (net,sta,chan,loc) + chantuples=chanlist2tuples(channels) + for x in chantuples: + if(len(x[3])==0): + print(x[0],x[1],x[2],'NULL') + else: + print(x[0],x[1],x[2],x[3]) +except: + print('something threw an exception') diff --git a/notebooks/USArrayProcessExample/download_scripts/rfteledownload.py b/notebooks/USArrayProcessExample/download_scripts/rfteledownload.py new file mode 100644 index 0000000..ef6bf66 --- /dev/null +++ b/notebooks/USArrayProcessExample/download_scripts/rfteledownload.py @@ -0,0 +1,87 @@ +import time +from obspy import UTCDateTime +from obspy.clients.fdsn.mass_downloader import RectangularDomain, \ + Restrictions, MassDownloader +from obspy.clients.fdsn import Client + + +def read_centers(file='year_centers.txt'): + """ + Read text file of array centers. three numbers per + line: year of coverage, longitude, latitude. + Returns dict of lon,lat pairs (list) indexed by integer + year. + """ + centers={} + # More elegant method is to use with, but not necessary + # for this private program. + fh=open(file,mode='r') + for line in fh: + words=line.split() + yr=int(words[0]) + lon=words[1] + lat=words[2] + centers[yr]=[lon,lat] + return centers + +def get_catalog(centers, year=2005, minmag=4.9): + """ + Fetch year of data for usarray using coordinates for + center indexed by year and rf distance range of 25 to + 95 degree from array center. Range is slightly larger + than normal due to large array aperture of usarray. + May get some extras, but part of this exercise is + filtering problem data. + + :param centers: dict of array centers keyed by year + :param year: year to use to load events + :param minmag: lower limit on magnitude (no upper limit) + + :rtype: obspy catalog object + """ + client=Client("IRIS") + tstart=UTCDateTime(year,1,1,0,0) + tend=UTCDateTime(year+1,1,1,0,0) + coords=centers[year] + lon0=coords[0] + lat0=coords[1] + cat=client.get_events(starttime=tstart,endtime=tend, + latitude=lat0,longitude=lon0, + minradius=25.0,maxradius=95.0, + minmagnitude=minmag) + return cat + +def download_events(cat,yeartag="2005",firstid=1): + """ + Download events to lower 48 with MassDownloader using + events in obspy Catalog object cat. Parameters for + station definitions are hard wired + """ + wf_storage=yeartag + "/{starttime}/{network}_{station}_{location}.mseed" + site_storage="site_"+yeartag+".dir" + mdl = MassDownloader() + domain = RectangularDomain(minlatitude=20.0,maxlatitude=54.0, + minlongitude=-135, maxlongitude=-55) + count_evid=1 + for event in cat: + if(count_evid>=firstid): + t0=time.time() + o=event.preferred_origin() + # Time attribute is already a UTCDateTime object + origin_time=o.time + restrictions = Restrictions ( + starttime=origin_time, + endtime=origin_time + 3600.0, + reject_channels_with_gaps=True, + minimum_length=0.95, + minimum_interstation_distance_in_m=100.0, + channel_priorities=["BH[ZNE12]"], + location_priorities=["","00","10","01"]) + + wf_storage=("%s/event%d" % (yeartag,count_evid)) + mdl.download(domain,restrictions, + mseed_storage=wf_storage, + stationxml_storage=site_storage) + dt=time.time()-t0 + print("Event ",count_evid," download time (s)=",dt) + count_evid += 1 diff --git a/notebooks/USArrayProcessExample/download_scripts/year_centers.txt b/notebooks/USArrayProcessExample/download_scripts/year_centers.txt new file mode 100644 index 0000000..eabbd72 --- /dev/null +++ b/notebooks/USArrayProcessExample/download_scripts/year_centers.txt @@ -0,0 +1,11 @@ +2005 -122.0 41.0 +2006 -119.0 41.0 +2007 -114.0 41.0 +2008 -111.0 41.0 +2009 -105.0 41.0 +2010 -100.0 41.0 +2011 -95.0 41.0 +2012 -90.0 40.0 +2013 -82.0 39.0 +2014 -82.0 39.0 +2015 -76.0 40.0 From 92a3a0bf8c8ec8e46258a4d9f5ac34c3073cfcfe Mon Sep 17 00:00:00 2001 From: Gary Pavlis Date: Sun, 6 Apr 2025 14:56:54 -0400 Subject: [PATCH 3/7] Add processing notes template --- .../ProcessingNotes_template.ipynb | 179 ++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 notebooks/USArrayProcessExample/ProcessingNotes_template.ipynb diff --git a/notebooks/USArrayProcessExample/ProcessingNotes_template.ipynb b/notebooks/USArrayProcessExample/ProcessingNotes_template.ipynb new file mode 100644 index 0000000..f472604 --- /dev/null +++ b/notebooks/USArrayProcessExample/ProcessingNotes_template.ipynb @@ -0,0 +1,179 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ac2d98cd-3d4a-4e89-9354-5c4327b75caf", + "metadata": {}, + "source": [ + "# Processing Notes\n", + "Best practice for handling large data sets with MsPASS require a notebook like this one. The purpose of a notebook like this is to record progress in working through a large data set and any problems that required additional efforts to address. The entire point is to provide a record that will allow someone else to reproduce your work.\n", + "\n", + "This notebook may contain just plain text, but can also contain code fragments used to demonstrate issues that happen. In general, it should never be designed as a notebook that would be run independently. It is reasonable to put this stock incantation for this set of notebooks as any code blocks you would want to run will likely need this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23a01638-541b-4f72-aebf-0ffc41f87579", + "metadata": {}, + "outputs": [], + "source": [ + "# change for different calendar year\n", + "year = 2014\n", + "dbname = \"usarray{}\".format(year)\n", + "tsdir=\"./wf_TimeSeries/{}\".format(year)\n", + "dbmaster=\"usarray48\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41264258-6651-433d-9b7c-4f95faae86c2", + "metadata": {}, + "outputs": [], + "source": [ + "import mspasspy.client as msc\n", + "mspass_client = msc.Client(database_name=dbname)\n", + "# waveform data indexed to here\n", + "db = mspass_client.get_database()\n", + "# master database with source and receiver metadata\n", + "dbmd = mspass_client.get_database(dbmaster)\n" + ] + }, + { + "cell_type": "markdown", + "id": "e021cb5e-5114-46e1-b267-6d19320ea00c", + "metadata": {}, + "source": [ + "# Creating Master Database\n", + "Record any issues in running CreateMasterDatabase.ipynb. If nothing else record the timing data and when the work was done." + ] + }, + { + "cell_type": "markdown", + "id": "4406dbb1-5c3e-4c3e-acc8-22667132013e", + "metadata": {}, + "source": [ + "# Yearly Processing\n", + "Record your notes below for each year. Insert additional boxes if necessary. " + ] + }, + { + "cell_type": "markdown", + "id": "64a3d32a-fd30-48db-851b-6d515c6e4e53", + "metadata": {}, + "source": [ + "## 2005" + ] + }, + { + "cell_type": "markdown", + "id": "e0ede379-d12f-4edf-99de-b780631f87ef", + "metadata": {}, + "source": [ + "## 2006" + ] + }, + { + "cell_type": "markdown", + "id": "78f7cc81-a400-47ff-9a07-41b6b128ab27", + "metadata": {}, + "source": [ + "## 2006" + ] + }, + { + "cell_type": "markdown", + "id": "9b33121e-5355-496e-9c7d-ec1f88266d7c", + "metadata": {}, + "source": [ + "## 2007" + ] + }, + { + "cell_type": "markdown", + "id": "73e46171-9bad-45a8-993b-2b3cdeda8062", + "metadata": {}, + "source": [ + "## 2008" + ] + }, + { + "cell_type": "markdown", + "id": "69b17c83-5185-406e-b6bb-2cf0b9c24086", + "metadata": {}, + "source": [ + "## 2009" + ] + }, + { + "cell_type": "markdown", + "id": "26c5283e-af7a-4907-ae39-374b185eed7a", + "metadata": {}, + "source": [ + "## 2010" + ] + }, + { + "cell_type": "markdown", + "id": "dd9d345b-6a27-42a5-908f-e5bd54642034", + "metadata": {}, + "source": [ + "## 2011" + ] + }, + { + "cell_type": "markdown", + "id": "bcdc0c03-284e-4a46-8b2d-32e73de03d66", + "metadata": {}, + "source": [ + "## 2012" + ] + }, + { + "cell_type": "markdown", + "id": "c9f4b1d6-1650-438a-bb23-26d49e7eb07b", + "metadata": {}, + "source": [ + "## 2013" + ] + }, + { + "cell_type": "markdown", + "id": "216ad965-746f-4973-9da9-2a3bfbc97fda", + "metadata": {}, + "source": [ + "## 2014" + ] + }, + { + "cell_type": "markdown", + "id": "40b83ac7-0dae-4cb0-8ce8-623e3b2eaf91", + "metadata": {}, + "source": [ + "## 2015" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 2a13fe4d9b8c9c16a4d5d359c7a269c6b884730b Mon Sep 17 00:00:00 2001 From: Gary Pavlis Date: Sun, 6 Apr 2025 17:15:53 -0400 Subject: [PATCH 4/7] Add starting comments on yearly sections --- .../ProcessingNotes_template.ipynb | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/notebooks/USArrayProcessExample/ProcessingNotes_template.ipynb b/notebooks/USArrayProcessExample/ProcessingNotes_template.ipynb index f472604..b895b68 100644 --- a/notebooks/USArrayProcessExample/ProcessingNotes_template.ipynb +++ b/notebooks/USArrayProcessExample/ProcessingNotes_template.ipynb @@ -63,7 +63,8 @@ "id": "64a3d32a-fd30-48db-851b-6d515c6e4e53", "metadata": {}, "source": [ - "## 2005" + "## 2005\n", + "I have an archive copy of data I downloaded for this year that is inconsistent with the later data. I do not think a copy of the data exist on Frontera. Rather than use the questionable data I have I think the best approach for these data is to download them again from Earthscope. This is a realitively small piece of the data as Earthscope launched that year and there were only a handful of TA stations running until late in the year. I do not, however, know how many other broadband stations will be retrieved but it still will be smaller than any other year." ] }, { @@ -71,7 +72,8 @@ "id": "e0ede379-d12f-4edf-99de-b780631f87ef", "metadata": {}, "source": [ - "## 2006" + "## 2006 \n", + "This year of data is like 2005. The only difference is the size will be much larger the TA was expanded a lot during this second year of the TA construction." ] }, { @@ -127,7 +129,8 @@ "id": "bcdc0c03-284e-4a46-8b2d-32e73de03d66", "metadata": {}, "source": [ - "## 2012" + "## 2012\n", + "The data from this year was used to develop the initial prototypes for this workflow. There are around 4.5 M miniseed waveforms from around 1000 events. A number of bugs were worked out on these data, but reprocessing should go smoothly." ] }, { @@ -135,7 +138,8 @@ "id": "c9f4b1d6-1650-438a-bb23-26d49e7eb07b", "metadata": {}, "source": [ - "## 2013" + "## 2013\n", + "The data from this year was used for second order prototyping after the 2012 data were processed. The data 2013 are similar to 2012 in size but with only just under 900 events." ] }, { @@ -143,7 +147,12 @@ "id": "216ad965-746f-4973-9da9-2a3bfbc97fda", "metadata": {}, "source": [ - "## 2014" + "## 2014\n", + "Our download of the data from 2014 has some serious issues that have not been resolved. The data set downloaed earlier has 1052 event files, which is comparable to 2012 and 2013. However, 2012 and 2013 have around 4 M waveform segments created with index_mseed_files while the data we have archived had around 45 M segments. That created a huge slowdown in all database operations and I had problems getting bulk_normalize work at all. I found pretty clear evidence these data are seriously messed up with the data fragmented for some mysterious reason. Evidence for this came from two simple algorithms:\n", + "1. Counting the number of waveforms for each station. Some had reasonable numbers around 4000 but there were also many with an order of magnitude more indexed segments (i.e. 50-60 thousand). That is either a gap problem or the data in the files being scrambled by some unknown problem in the download.\n", + "2. I ran OriginTimeMatcher and counted matches with wf_miniseed documents. Only about 10% were hits which is consistent with 1.\n", + "\n", + "To process the 2014 data we will need to rerun the download procedure to see if the problem persists. " ] }, { From 0ff56c0a4563df6b83a4e3685bfc3f3d5021f308 Mon Sep 17 00:00:00 2001 From: Gary Pavlis Date: Mon, 7 Apr 2025 05:52:59 -0400 Subject: [PATCH 5/7] Update section on 2014 data after reviewing log files of a failure --- .../USArrayProcessExample/ProcessingNotes_template.ipynb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/notebooks/USArrayProcessExample/ProcessingNotes_template.ipynb b/notebooks/USArrayProcessExample/ProcessingNotes_template.ipynb index b895b68..9c5dbb1 100644 --- a/notebooks/USArrayProcessExample/ProcessingNotes_template.ipynb +++ b/notebooks/USArrayProcessExample/ProcessingNotes_template.ipynb @@ -148,7 +148,9 @@ "metadata": {}, "source": [ "## 2014\n", - "Our download of the data from 2014 has some serious issues that have not been resolved. The data set downloaed earlier has 1052 event files, which is comparable to 2012 and 2013. However, 2012 and 2013 have around 4 M waveform segments created with index_mseed_files while the data we have archived had around 45 M segments. That created a huge slowdown in all database operations and I had problems getting bulk_normalize work at all. I found pretty clear evidence these data are seriously messed up with the data fragmented for some mysterious reason. Evidence for this came from two simple algorithms:\n", + "Our download of the data from 2014 has some serious issues that have not been resolved. The data set downloaed earlier has 1052 event files, which is comparable to 2012 and 2013. However, 2012 and 2013 have around 4 M waveform segments created with index_mseed_files while the data we have archived had around 45 M segments. That created a huge slowdown in all database operations and I had problems getting bulk_normalize work at all. Inspection of the MongoDB log file shows only that the error was preceded by a \"Slow Query\" warning. The exception thrown shows a timeout error when bulk_normalize was running count_documents for the full wf_miniseed collection. I (glp) know from interactive work with this database that all operations of wf_miniseed were very slow so I am nearly certain this problem was created by a bloated wf_miniseed collection. It also illustrates why working with these data in yearly chunks is prudent. In any case, there is a solution that I haven't implemented. In line normalize calls are not subject to this problem because the only database operations they do is to load the data for matching in normalize.\n", + "\n", + "I found pretty clear evidence these data are seriously messed up with the data fragmented for some mysterious reason. Evidence for this came from two simple algorithms:\n", "1. Counting the number of waveforms for each station. Some had reasonable numbers around 4000 but there were also many with an order of magnitude more indexed segments (i.e. 50-60 thousand). That is either a gap problem or the data in the files being scrambled by some unknown problem in the download.\n", "2. I ran OriginTimeMatcher and counted matches with wf_miniseed documents. Only about 10% were hits which is consistent with 1.\n", "\n", From 7aea171b124ea77b66ede5e3b930e452d8a26a5d Mon Sep 17 00:00:00 2001 From: Gary Pavlis Date: Fri, 11 Apr 2025 09:21:22 -0400 Subject: [PATCH 6/7] Add missing import of UTCDateTime --- notebooks/USArrayProcessExample/Preprocess2seis_template.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/notebooks/USArrayProcessExample/Preprocess2seis_template.ipynb b/notebooks/USArrayProcessExample/Preprocess2seis_template.ipynb index 1d12ede..83aad5c 100644 --- a/notebooks/USArrayProcessExample/Preprocess2seis_template.ipynb +++ b/notebooks/USArrayProcessExample/Preprocess2seis_template.ipynb @@ -66,6 +66,7 @@ "from mspasspy.algorithms.basic import free_surface_transformation,rotate_to_standard\n", "from mspasspy.algorithms.snr import broadband_snr_QC\n", "from mspasspy.util.seismic import number_live\n", + "from obspy import UTCDateTime\n", "import dask.bag as dbg\n", "import time\n", "import math\n", From 427c2ba0f717a955b91dd5358a49d25584c50e95 Mon Sep 17 00:00:00 2001 From: Gary Pavlis Date: Sat, 12 Apr 2025 15:00:29 -0400 Subject: [PATCH 7/7] Fix bug in running free surface transformation and disable time filter of station metadata --- .../Preprocess2seis_template.ipynb | 59 +++++++++++++------ 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/notebooks/USArrayProcessExample/Preprocess2seis_template.ipynb b/notebooks/USArrayProcessExample/Preprocess2seis_template.ipynb index 83aad5c..4c81735 100644 --- a/notebooks/USArrayProcessExample/Preprocess2seis_template.ipynb +++ b/notebooks/USArrayProcessExample/Preprocess2seis_template.ipynb @@ -5,18 +5,18 @@ "id": "9170ade2-9d78-4f36-8d12-76bd0419242f", "metadata": {}, "source": [ - "# Extended USArray data preprocessing stage 3\n", - "This notebook does the final stage of preprocessing of the extended USArray data set. It reads common source gathers (defined by common source_id value) it assumes were created in wf_TimeSeries and does the following processing steps:\n", + "# Extended USArray data preprocessing stage 2\n", + "This notebook does the second stage of preprocessing of the extended USArray data set. It reads common source gathers input previously assumed created in wf_TimeSeries and does the following processing steps:\n", "1. Runs bundle_seed_data to transform the data to a SeismogramEnsemble \n", "2. Applies the free surface transformation to all ensemble members\n", "3. Runs broadband_snr_QC on all members. \n", "\n", - "Step 3 tends to kill a lot of data but that is a key point. The cemetery contains the mummified bodies." + "Step 3 tends to kill a lot of data but that is a key point. The cemetery contains the bodies." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "e810137a-ae9e-44ff-bcc8-d31c286f133a", "metadata": {}, "outputs": [], @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "54c091f6-7d4d-4bdb-973d-9fc7f19ffb8f", "metadata": {}, "outputs": [], @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "536bce0c-cc54-4f42-965b-7f05597be17f", "metadata": {}, "outputs": [], @@ -66,7 +66,6 @@ "from mspasspy.algorithms.basic import free_surface_transformation,rotate_to_standard\n", "from mspasspy.algorithms.snr import broadband_snr_QC\n", "from mspasspy.util.seismic import number_live\n", - "from obspy import UTCDateTime\n", "import dask.bag as dbg\n", "import time\n", "import math\n", @@ -105,7 +104,7 @@ " uy = umag * math.cos(az)\n", " # FST implementation requires this special class called a Slowness Vector\n", " u = SlownessVector(ux,uy)\n", - " d = free_surface_transformation(d,u,vp0,vs0)\n", + " d = free_surface_transformation(d,uvec=u,vp0=vp0,vs0=vs0)\n", " else:\n", " d.kill()\n", " message = \"one of required attributes rayp_P or seaz were not defined for this datum\"\n", @@ -134,7 +133,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "e307e480-0923-4e95-b209-e28689061e71", "metadata": {}, "outputs": [], @@ -142,8 +141,7 @@ "# this function was copied from Preprocess2ts - it maybe should be a local .py file\n", "def dbmdquery(year,padsize=86400.0):\n", " \"\"\"\n", - " Constructs a MongoDB query dictionary to use as a query argument for \n", - " normalization matcher classes site and channel. A different query is needed for source later.\n", + " Constructs a MongoDB query dictionary to use as a query argument for normalization matcher classes.\n", " (All standard BasicMatcher children have a query argument in the constructor for this purpose.)\n", " The query is a range spanning specified calendar year. The interval is extended by padsize \n", " seconds. (default is one day = 86400.0)\n", @@ -176,7 +174,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "c61ecb40-6e9a-40cf-b7a9-192ced9db52e", "metadata": {}, "outputs": [], @@ -199,6 +197,15 @@ " print(\"Number of members in this ensemble=\",len(ens.member))\n", " return ens\n", "\n", + "def normalize_ensembles(ens,matcher):\n", + " \"\"\"\n", + " Workaround for bug in normalize function where decorators won't work.\n", + " This function should be removed when that bug is resolved.\n", + " \"\"\"\n", + " if ens.live:\n", + " for i in range(len(ens.member)):\n", + " ens.member[i] = normalize(ens.member[i],matcher)\n", + " return ens\n", " \n", "def process2seis(ens,\n", " swin,\n", @@ -248,12 +255,30 @@ "execution_count": null, "id": "227cfea1-3103-4a36-9405-e6f837829bad", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of distinct sources in wf_TimeSeries= 1003\n", + "Number to process this run= 1003\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.10/site-packages/distributed/client.py:3169: UserWarning: Sending large graph of size 27.38 MiB.\n", + "This may cause some slowdown.\n", + "Consider scattering data ahead of time and using futures.\n", + " warnings.warn(\n" + ] + } + ], "source": [ "t0 = time.time()\n", - "chanquery=dbmdquery(year)\n", + "#chanquery=dbmdquery(year)\n", "chan_matcher = ObjectIdMatcher(dbmd,\n", - " query=chanquery,\n", " collection=\"channel\",\n", " attributes_to_load=[\"_id\",\"lat\",\"lon\",\"elev\",\"hang\",\"vang\"],\n", " )\n", @@ -294,7 +319,7 @@ "# note default behavior for normalize is to normalize all members\n", "#mydata = mydata.map(normalize,chan_matcher,handles_ensembles=False)\n", "#workaround for above until bug is fixed in normalize\n", - "mydata = mydata.map(normalize,chan_matcher)\n", + "mydata = mydata.map(normalize_ensembles,chan_matcher)\n", "mydata = mydata.map(process2seis,\n", " swin,\n", " nwin,\n", @@ -339,7 +364,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.15" + "version": "3.10.14" } }, "nbformat": 4,