/* this compiles a dataset with data from all stations, rather than just those on current CMS... it may add more data to time series for some segments than using current cms stations alone... BUT BUT BUT limit data to CMS (current stations) table before analyzing station_level data */ proc sql; create table info3 as select x.*, y.segment_id,y.usgs_gauge_id from wrim.raw as x left join crp_misc.stations_sept2015 as y on x.station_id = y.station_id; quit; proc sql; create table info3a as select x.*, y.seg_name from info3 as x left join crp_misc.segment_names as y on compress(x.segment_id) = compress(y.segid); quit; data info4 (drop = RFA_Sample_Set_ID__Tag_id Start_Date Start_time Start_Depth composite_category comment ); length watershed $60; set info3a; if station_id = . then delete; seg_grp=substr(segment_id,1,4); station=put(station_id, 5.0); If seg_grp= '0901' then watershed= 'Cedar Bayou '; If seg_grp= '0902' then watershed= 'Cedar Bayou Above Tidal '; If seg_grp= '1001' then watershed= 'San Jacinto River Tidal '; If seg_grp= '1002' then watershed= 'Lake Houston '; If seg_grp= '1003' then watershed= 'East Fork San Jacinto River'; If seg_grp= '1004' then watershed= 'West Fork San Jacinto River'; If seg_grp= '1005' then watershed= 'Houston Ship Channel/San Jacinto River Tidal '; If seg_grp= '1006' then watershed= 'Houston Ship Channel'; If seg_grp= '1007' then watershed= 'Houston Ship Channel/Buffalo Bayou Tidal '; If seg_grp= '1008' then watershed= 'Spring Creek '; If seg_grp= '1009' then watershed= 'Cypress Creek '; If seg_grp= '1010' then watershed= 'Caney Creek '; If seg_grp= '1011' then watershed= 'Peach Creek '; If seg_grp= '1012' then watershed= 'Lake Conroe '; If seg_grp= '1013' then watershed= 'Buffalo Bayou Tidal '; If seg_grp= '1014' then watershed= 'Buffalo Bayou Above Tidal '; If seg_grp= '1015' then watershed= 'Lake Creek '; If seg_grp= '1016' then watershed= 'Greens Bayou Above Tidal '; If seg_grp= '1017' then watershed= 'Whiteoak Bayou Above Tidal '; If seg_grp= '1101' then watershed= 'Clear Creek Tidal '; If seg_grp= '1102' then watershed= 'Clear Creek Above Tidal'; If seg_grp= '1103' then watershed= 'Dickinson Bayou Tidal '; If seg_grp= '1104' then watershed= 'Dickinson Bayou Above Tidal '; If seg_grp= '1105' then watershed= 'Bastrop Bayou Tidal '; If seg_grp= '1107' then watershed= 'Chocolate Bayou Tidal '; If seg_grp= '1108' then watershed= 'Chocolate Bayou Above Tidal'; If seg_grp= '1109' then watershed= 'Oyster Creek Tidal '; If seg_grp= '1110 ' then watershed= 'Oyster Creek Above Tidal'; If seg_grp= '1111' then watershed= 'Old Brazos River Channel Tidal '; If seg_grp= '1113' then watershed= 'Armand Bayou Tidal '; If seg_grp= '1301' then watershed= 'San Bernard River Tidal '; If seg_grp= '1302' then watershed= 'San Bernard River Above Tidal '; If seg_grp= '2421' then watershed= 'Upper Galveston Bay'; if seg_grp='2422' then watershed='Trinity Bay'; If seg_grp= '2423' then watershed= 'East Bay '; If seg_grp= '2424' then watershed= 'West Bay '; If seg_grp= '2425' then watershed= 'Clear Lake '; If seg_grp= '2426' then watershed= 'Tabbs Bay '; If seg_grp= '2427' then watershed= 'San Jacinto Bay'; If seg_grp= '2428' then watershed= 'Black Duck Bay '; If seg_grp= '2429' then watershed= 'Scott Bay '; If seg_grp= '2430' then watershed= 'Burnett Bay '; If seg_grp= '2431' then watershed= 'Moses Lake '; If seg_grp= '2432' then watershed= 'Chocolate Bay' ; If seg_grp= '2433' then watershed= 'Bastrop Bay '; If seg_grp= '2434' then watershed= 'Christmas Bay '; If seg_grp= '2435' then watershed= 'Drum Bay '; If seg_grp= '2436' then watershed= 'Barbours Cut'; If seg_grp= '2437' then watershed= 'Texas City Ship Channel'; If seg_grp= '2439' then watershed= 'Lower Galveston Bay '; if seg_grp='2438' then watershed='Bayport Ship Channel'; If seg_grp= '2501' then watershed= 'Gulf of Mexico'; If seg_grp= '2433' then watershed= 'Bastrop Bay / Oyster Lake '; If seg_grp= '2422' then watershed= 'Trinity Bay '; if seg_grp='0702' then watershed= 'Intracoastal Waterway Tidal'; if seg_grp='0801' then watershed = 'Trinity River Tidal'; if seg_grp='0802' then watershed='Trinity River Below Lake Livingston'; if seg_grp='0803' then watershed= 'Lake Livingston'; if seg_grp='1201' then watershed='Brazos River Tidal'; if seg_grp ='1202' then watershed='Brazos River Below Navsota Tidal'; if seg_grp='1209' then watershed = 'Navasota River Below Lake Limestone' ; if seg_grp ='1245' then watershed='Upper Oyster Creek'; if seg_grp='1304' then watershed= 'Caney Creek Tidal'; if seg_grp='1305' then watershed='Caney Creek Above Tidal'; if seg_grp='1401' then watershed= 'Colorado River Tidal'; if seg_grp='1402' then watershed= 'Colorado River Below La Grange'; if seg_grp='1501' then watershed= 'Tres Palacios Creek Tidal'; if seg_grp='1502' then watershed= 'Tres Palacios Creek Above Tidal'; if seg_grp='1604' then watershed= 'Lavaca River'; if seg_grp='2441' then watershed= 'East Matagorda Bay'; if seg_grp='2442' then watershed= 'Cedar Lakes'; if seg_grp='2451' then watershed= 'West Matagorda Bay'; if seg_grp='2452' then watershed= 'Tres Palacios Bay / Turtle Bay'; if seg_grp='2456' then watershed= 'Carancahua Bay'; if seg_grp = '2501' then watersehed = 'Gulf of Mexico'; run; data info6; set info4; if strip(watershed) = '' then delete; segment_name = propcase (seg_name); run; Data dat_1a /*additional data management */; length parameter $ 25 Units $ 20 ; set info6; If end_depth>0.4 then delete; If segment_ID in ( '2461' '2462' '2463' '2471' '2471A' '2472' '2473' '2481' '2482' '2483' '2483A' '2484' '2485' '2485A' '2485B' '2485D' '2491' '2492' '2492A' '2493' '2494' '2494A' ) then delete; if parameter_code = '72053' then parameter= 'Days Since Last Rain'; if parameter_code in (620 630 593 618 631) then Parameter= 'Nitrate-N'; if parameter_code = 625 then parameter='Total Kjeldahl Nitrogen'; if parameter_code in (608 610) then Parameter= 'Ammonia-N'; if parameter_code in (671 70507) then Parameter= 'Orthophosphate-P'; if parameter_code in (940 941) then Parameter= 'Chloride'; if parameter_code in (31699 31700 31648 ) then Parameter= 'E. Coli'; if parameter_code in (31701 31649) then Parameter= 'Enterococci'; if parameter_code in (70953 32211) then Parameter= 'Chlorophyll a' ; if parameter_code in (665) then Parameter= 'Total Phosphorus'; if parameter_code in (10) then Parameter= 'Temperature'; if parameter_code in (680) then Parameter= 'Total Organic Carbon'; if parameter_code in (945) then Parameter= 'Sulfate'; if parameter_code in (530) then Parameter= 'Total Suspended Solids'; if parameter_code in (900 46570 82394) then Parameter= 'Total Hardness'; if parameter_code in (70300 70294 47004 70301) then Parameter= 'Total Dissolved Solids'; if parameter_code=480 then Parameter= 'Salinity'; if parameter_code=535 then Parameter= 'Volatile Suspended Solids'; if parameter_code=410 then Parameter= 'Alkalinity'; if parameter_code in (94 95) then Parameter='Specific Conductance'; If parameter_code=300 then Parameter='Dissolved Oxygen'; if parameter_code=1351 then Parameter='Flow Severity'; if parameter_code in (61, 74069) then Parameter='Instantaneous Flow'; if parameter_code=400 then Parameter='pH'; if parameter_code=78 then Parameter='Secchi Transparency'; /* create units for graph labels*/ Units='mg/L'; if parameter_code in (31616 79835 31613 31625 31699 31700 31648 31701 31649) then Units= 'MPN/100 mL'; if parameter_code in (61 74069) then Units='CFS'; if parameter_code in (94 95) then Units= 'Microsiemens / cm'; If parameter_code=10 then Units='Degrees C'; if parameter_code in (70953 32211) then Units='Micrograms / L'; if parameter_code in (78) then Units='Meters'; if parameter_code in (61 74069) then Units2='CFS'; if parameter_code=400 then Units='Standard Units'; if parameter_code in (915 916 925 927 ) then Units=' '; run; data dat_1ab; set dat_1a; where monitoring_type in ('RT' 'XR') and length (parameter) > 0 ; run; proc sort data=dat_1ab nodupkey dupout = dupes_dat1ab ; by station_id end_date end_depth parameter collecting_entity ; run; /* assign values for censored data ("<") ; calculate logs ; */ data bsr2016.raw_data_all; set dat_1ab ; value_adj=value; If Greater_Than_Less_Than EQ '<' then do; value_adj = value/2; end; log_reg = log(value_adj); if strip(parameter) in ('Nitrate-N' 'Total Kjeldahl Nitrogen' 'Orthophosphate-P' 'Ammonia-N' 'Secchi Transparency' 'Total Phosphorus' ) then do; log_reg = log(value_adj+1); end; If parameter = 'pH' then pH_note2= ' 6.5 - 9.0 ' ; if parameter='pH' and seg_grp in ('1003' '1010' '1011' '1015') then do; pH_note2= ' 6.0 - 9.0 '; end; /* delete crazy censored values*/ If Parameter in ('Fecal Coliform' 'E. Coli' 'Enterococci' ) and Greater_Than_Less_Than EQ '<' and value>99 then delete; If Parameter in ('Nitrate-N' ) and Greater_Than_Less_Than EQ '<' and value>0.5 then delete; If Parameter in ('Orthophosphate-P' ) and Greater_Than_Less_Than EQ '<' and value>0.5 then delete; If Parameter in ('Ammonia-N' ) and Greater_Than_Less_Than EQ '<' and value>1 then delete; if seg_grp in ('2411' '2412' '2441') then delete; run; /* delete duplicates */ proc sort data=bsr2016.raw_data_all nodupkey dupout = dupes_raw_all; by station_id end_date end_depth parameter Collecting_Entity; run; /* data from 1999 - present*/ data dat1; set bsr2016.raw_data_all ; where end_date GE '01JUN2000'd; run; data dat2; set dat1; where substr(segment_id,1,2) in ('09' '13' '10' '11' '24' '25') and monitoring_type = 'RT' ; if collecting_entity = 'HP' then delete; if end_date GE '01Jun2015'd then delete; run; data dat2a; set dat2; /* value_adj2 and log_reg2 are original value_adj for censored, 1/2 of value; compare later to value_adj that are based on highest LOQ ... */ value_adj2 = value_adj; log_reg2 = log_reg; if parameter = 'Ammonia-N' then do; if value < 0.1 then do; value_adj = 0.05; log_reg = log(1.05); end; end; if parameter = 'Chlorophyll-a' then do; if value < 3 then do; value_adj = 1.5; log_reg = log(1.5); end; end; /* LOQ from 2003 to 2007 for HH was 0.2; this is a problem but I see no other solution than below */ if parameter = 'Nitrate-N' then do; if greater_than_less_than = '<' then do; value_adj = 0.1; log_reg = log(1.1); end; if value LE 0.2 then do; value_adj = 0.1; log_reg = log(1.1); end; end; if parameter = 'Total Phosphorus' then do; if greater_than_less_than = '<' then do; value_adj = 0.03; log_reg = log(1.03); end; if value LE 0.06 then do; value_adj = 0.03; log_reg = log(1.03); end; end; end; if parameter = 'Total Suspended Solids' then do; if greater_than_less_than = '<' then do; value_adj = 2; log_reg = log(2); end; if value < 4 then do; value_adj = 2; log_reg = log(2); end; end; if collecting_entity = 'LL' and segment_id = '0902' then delete; run; run; proc datasets library = work; delete dat1; run; data dat3; set dat2a; run; proc sql; create table dat3a as select *, count(parameter) as results from dat3 group by station_id, end_date, end_depth, end_time, collecting_entity; quit; proc datasets library = work; delete dat2 dat2a dat3; run; proc sort data = dat3a ; by station_id end_date collecting_entity parameter descending results; run; proc sort data = dat3a nodupkey dupout = dupes_dat3a; by station_id end_date parameter ; run; proc sql; create table flow as select station_id, end_date, collecting_entity, value_adj from dat3a where parameter_code = '00061'; quit; proc sql; create table dat5 as select x.*, y.value_adj as flow_adp from dat3a as x left join flow as y on x.station_id = y.station_id and x.end_date = y.end_date and x.collecting_entity = y.collecting_entity; quit; proc sql; create table rain as select station_id, end_date, collecting_entity, value from dat3a where parameter_code = '72053'; quit; proc sql; create table dat6 as select x.* , y.value as rain from dat5 as x left join rain as y on x.station_id = y.station_id and x.end_date = y.end_date and x.collecting_entity = y.collecting_entity; quit; data dat7; length rain2 $3 wet_dry $8; set dat6; rain2 = put(rain,3.); wet_dry = 'Dry'; if rain2 in (0 1 2) then do; wet_dry = 'Wet'; end; if rain = . then do; wet_dry='Unknown'; end; run; proc datasets library = work; delete dat3a dat5 dat6 ; run; proc sql; create table dat8 as select x.*, y.flow as flow_usgs from dat7 as x left join crp_misc.flow_15year_stations as y on x.end_date = y.datetime and x.station_id = y.station_id ; quit; data dat8; set dat8; flow_comp = coalesce (flow_usgs, flow_adp); run; proc datasets library = work; delete dat7 ; run; proc sql; create table dat9 as select x.*, y.P_group, y.P_order from dat8 as x left join crp_misc.parameter_grp as y on x.parameter = y.parameter; quit; proc datasets library = work; delete dat7 dat8; run; proc sql; create table dat10 as select x.*, y.ALU, y.au_id from dat9 as x left join wrim.station_information as y on x.station_id = y.station_id; quit; data wqs; set crp_misc.wqs; if parameter = 'Dissolved Oxygen (single sample minimum)' then do; parameter = 'Dissolved Oxygen'; end; run; proc sql; create table dat11 as select x.*, y.seg_type from dat10 as x left join wqs as y on x.station_id = y.station_id; quit; proc sql; create table bsr2016.data_all as select x.*, y.standard, y.standard_text , y.type from dat11 as x left join wqs as y on x.station_id = y.station_id and x.parameter = y.parameter; quit; proc sql; create table test as select x.*, y.au_id as au2 from bsr2016.data_all as x left join crp_misc.au_station_master as y on x.station_id = y.station_id; quit; data bsr2016.data_all (drop = au2); set test; if strip(au_id) = '' then do; au_id = au2; end; run; proc sort data = bsr2016.data_all; by end_date end_time end_depth usgs_gauge_id station_id parameter_code collecting_entity segment_id ; run; proc sort data = bsr2016.data_all nodupkey dupout = dupes; by end_date end_time usgs_gauge_id station_id parameter_code collecting_entity segment_id ; run; /* limits to stations in CMS for transposed data analysis */ proc sql; create table bsr2016.data as select y.* from wrim.station_information as x left join bsr2016.data_all as y on x.station_id = y.station_id; quit; proc datasets library = work; delete dat8 dat9 dat10 dat11 ; run; proc sort data = bsr2016.data; by station_id end_date end_time end_depth segment_id seg_grp watershed ph_note2 collecting_entity flow_comp flow_usgs wet_dry ; run; /* create footnote that allows graph to be run for all 4 scenarios of standard - none, one, two (ph) */ data bsr2016.data_all; length footnote1 $125; set bsr2016.data_all; ph1 = compress(ph_note2); ph2 =substr(ph1,1,3); ph3 = substr(ph1,5,3); pst1 = 1*ph2; pst2 = 1*ph3; if pst1 > 0 then do; footnote1 = 'If present, red line represents '||strip(type)||': '||strip(ph_note2)||' Standard Units'; end; if standard > 0 then do; footnote1 = 'If present, red line represents '||strip(type)||': '||strip (standard_text); end; std1 = standard; std2 = . ; if parameter = 'pH' then do; std1 = pst1; std2 = pst2; end; run; /* dataset with blank records (date only ) removed for transposition */ data trans1 ; set bsr2016.data; if end_date = . then delete; run; proc sort data = trans1; by station_id end_date segment_id seg_grp watershed segment_name collecting_entity flow_comp flow_usgs wet_dry seg_type; run; proc transpose data = trans1 out = trans_raw; var value_adj; id parameter; by station_id end_date segment_id seg_grp watershed segment_name collecting_entity flow_comp wet_dry seg_type; run; data trans_raw; set trans_raw; TN =Total_Kjeldahl_Nitrogen + Nitrate_N; run; data trans2; set trans1; where parameter in ('Alkalinity' 'Ammonia-N' 'Chloride' 'Chlorophyll a' 'E. Coli' 'Enterococci' 'Flow Severity' 'Instantaneous Flow' 'Nitrate-N' 'Salinity' 'Sulfate' 'Total Kjeldahl Nitrogen' 'Total Organic Carbon' 'Total Phosphorus' 'Total Suspended Solids' ); run; proc sort data = trans2; by station_id end_date segment_id seg_grp watershed collecting_entity flow_comp wet_dry ; run; proc transpose data = trans2 out = trans_log; var log_reg ; id parameter; by station_id end_date segment_id seg_grp watershed collecting_entity flow_comp wet_dry ; run; data trans_log3 (drop = collecting_entity flow_comp _name_ flow_severity watershed wet_dry seg_grp segment_id rename = ( Alkalinity = alk_log salinity = sal_log Ammonia_N = NH3_log Chloride = Cl_log Chlorophyll_a= Chlor_log E__Coli = EC_log Enterococci = ent_log Instantaneous_Flow = flow_log Nitrate_N= NO3_log Sulfate= SO4_log Total_Kjeldahl_Nitrogen = TKN_log Total_Organic_Carbon= TOC_log Total_Phosphorus= TPhos_log Total_Suspended_Solids = TSS_log)); set trans_log ; TN_log = Total_Kjeldahl_Nitrogen + Nitrate_N ; log10flow = log10(flow_comp); run; /* join raw and log transformed transposed data */ proc sql; create table bsr2016.data_transposed as select x.*, y.* from trans_raw as x left join trans_log3 as y on x.station_id = y.station_id and x.end_date = y.end_date; quit; data bsr2016.dat0; set bsr2016.data_all; where value_adj > 0; run; /* limit data to one event per month per parameter at each station */ data bsr2016.dat00; set bsr2016.dat0; mon = month(end_date); yr = year(end_date); if strip(ph_note2) NE '' then do; standard_text = ph_note2; end; run; proc sort data = bsr2016.dat00 nodupkey; by station_id mon yr parameter descending flow_comp flow_usgs ; run; /* don't run this again - already got the rankings and made subs...*/ /* proc sql; create table data_sum1 as select station_id, segment_id, year(end_date) as yr, parameter, value_adj, count(value_adj) as total_results, count(flow_usgs) as usgs_results from bsr2016.dat00 group by station_id, parameter; quit; proc sql; create table data_sum2 as select *, count(value_adj) as total_year from data_sum1 group station_id, parameter, yr; quit; data data_sum3; set data_sum2; result_yr = total_year/total_results; run; data data_sum3; set data_sum3; if total_year < 3 then delete; run; proc sql; create table data_sum4 as select *, mean(value_adj) as station_avg, count(distinct yr) as data_yrs from data_sum3 group by station_id, parameter; quit; proc sql; create table data_sum5 as select *, mean(value_adj) as seg_avg from data_sum4 group by segment_id, parameter; quit; data data_sum6; set data_sum5; mean_dev = abs(station_avg-seg_avg); dev_pct = 100* mean_dev/seg_avg; run; proc sort data = data_sum6 nodupkey; by station_id segment_id parameter yr; run; data data_sum7; set data_sum6; if total_year = . then delete; run; proc sort data = data_sum7; by segment_id parameter mean_dev total_results descending usgs_results ; run; proc sort data = data_sum7 nodupkey out = data_sum8; by segment_id parameter mean_dev total_results usgs_results station_id ; run; proc sql; create table data_sum_maxspan as select *, max(data_yrs) as max_span from data_sum7 group by segment_id; quit; data data_sum_maxspan; set data_sum_maxspan; where parameter in ('E. Coli' 'Enterococci'); run; data station_rank; set data_sum_maxspan; retain rank; by segment_id parameter; if first.segment_id or first.parameter then rank=1; else rank=rank+1; run; ods html close; ods html; proc sql; create table bsr2016.station_rank as select x.*, y.data_yrs from station_rank as x left join data_sum7 as y on x.station_id = y.station_id and x.parameter = y.parameter; quit; */ /* substitute stations for three that were selected by rank algorithm but are not the best choices - perhaps... */ data station_rank ; set bsr2016.station_rank ; if station_id = '16891' then do; rank = 1.5; end; if station_id = '11300' then do;rank = 1.5; end; if station_id = '11387' then do; rank = 1.5; end; if station_id = '11264' then do; rank = 1; end; if station_id = '11283' then do;rank = 1; end; if station_id = '11389' then do;rank = 1; end; run; proc sql; create table GOM as select distinct station_id, au_id from crp_misc.au_station_master where au_id in ('2501_02', '2501_03'); quit; proc sql; create table gom1 as select distinct y.station_id, y.parameter, y.segment_id, count(value_adj) as data, 1 as rank from gom as x left join bsr2016.data_all as y on x.station_id = y.station_id group by x.station_id, parameter ; quit; data bsr2016.station_rank; set station_rank gom1; run; proc sort data = bsr2016.station_rank nodupkey; by station_id parameter ; run; proc sql; create table rank1 as select distinct segment_id, station_id, count(distinct parameter) as param_rank from bsr2016.station_rank where rank = 1 group by station_id order by segment_id; quit; proc sql; create table rank2 as select *, max(param_rank) as maxparams from rank1 group by segment_id; quit; proc sql; create table rank3 as select * from rank2 where param_rank = maxparams; quit; proc sql ; create table test as select x.segment_id, y.station_id from bsr2016.usgs_segments as x left join rank3 as y on x.station_id = y.station_id; quit; data bsr2016.segment_flow_plots; set test; where strip(station_id) NE ''; run; proc sql; create table rank4 as select x.*, y.parameter, year(y.end_date) as yr, count (distinct year(y.end_date)) as yrs from rank3 as x left join bsr2016.dat00 as y on x.station_id = y.station_id group by x.station_id, y.parameter; quit; proc sort data = rank4 nodupkey; by segment_id station_id parameter yr; run; proc sql; create table seg_param_yr as select distinct segment_id, year(end_date) as yr, parameter from bsr2016.data; quit; proc sql; create table segpar1 as select x.*, y.station_id as refstation from seg_param_yr as x left join rank4 as y on x.segment_id = y.segment_id and x.parameter = y.parameter and x.yr = y.yr; quit; proc sort data = segpar1 nodupkey; by segment_id refstation parameter yr; run; proc sql; create table data_needed as select segment_id, parameter, yr from segpar1 where strip(refstation) = ''; quit; proc sql; create table data_needed_refstation as select x.*, y.refstation as station_id as refstation from data_needed as x left join segpar1 as y on x.segment_id = y.segment_id and x.parameter = y.parameter; quit; proc sort data = data_needed_refstation nodupkey; by segment_id parameter yr refstation; quit; data datneed; set data_needed_refstation; where strip(refstation) = ''; run; /* calculate nearest stations to refstation */ proc sql ;create table dist1 as select x.segment_id, x.station_id as refstation, y.station_Id from rank4 as x left join crp_misc.stations_sept2015 as y on x.segment_id = y.segment_id; quit; proc sort data = dist1 nodupkey; by segment_id refstation station_id; run; proc sql; create table dist2 as select x.*, y.latitude_num as reflat, y.longitude_num as reflong from dist1 as x left join crp_misc.stations_sept2015 as y on x.refstation = y.station_id; quit; proc sql; create table dist3 as select x.*, y.latitude_num as lat ,y.longitude_num as long from dist2 as x left join crp_misc.stations_sept2015 as y on x.station_id = y.station_id; quit; data dist4; set dist3; distance=geodist(reflat,reflong,lat,long,'m'); run; data dist5 (drop = refstation) ; set dist4; if refstation = station_id then delete; run; proc sql; create table dist6 as select *, min(distance) as mindist from dist5 group by segment_id; run; proc sql; create table datneed2 as select x.*, y.station_id, y.distance from datneed as x left join dist6 as y on x.segment_id = y.segment_id; quit; proc sql; create table datneed3 as select x.distance, y.* from datneed2 as x left join bsr2016.dat00 as y on x.station_Id = y.station_id and x.yr = year(y.end_date); quit; data datneed3; set datneed3; order = 2; yr = year(end_date) ; mon = month(end_date) ;run; proc sort data = datneed3 ;by segment_id parameter distance station_id mon yr ; run; proc sort data = datneed3 nodupkey ;by segment_id parameter mon yr; run; /* add all data to table of main stations with long data runs */ proc sql; create table data1 as select y.* from rank4 as x left join bsr2016.dat00 as y on x.station_id = y.station_id and x.parameter = y.parameter; quit; proc sort data = data1 nodupkey; by segment_id parameter end_date; run; data data1; set data1; order = 1; run; data data_subset; set data1 datneed3; run; proc sql; create table results as select segment_id , parameter, count(distinct yr) as yrs, count(value_adj) as results from data_subset group by segment_id, parameter; quit; proc sql; create table data_subset1 as select x.*, y.yrs from data_subset as x left join results as y on x.segment_id = y.segment_id and x.parameter = y.parameter; quit; data data_subset1 (drop = results); set data_subset1; if substr(stream_flow_type,1,1) in ('p' 'r') and substr(parameter,1,3) = 'Ent' then delete; proc sql; create table data_subset2 as select *, count(value_adj) as results from data_subset1 group by segment_id, parameter; quit; proc sql; create table bsr2016.trend_minimal as select * from data_subset2 where results LT 30 or yrs <7; quit; data data_subset3; set data_subset2; where results GE 30 and yrs > 6 ; run; proc sort data = data_subset3 ; by segment_id parameter order mon yr ; run; proc sort data = data_subset3 nodupkey ; by segment_id parameter mon yr ; run; proc sort data = data_subset3; by segment_id parameter end_date; run; data bsr2016.data_subset; set data_subset3; if parameter = 'Chlorophyll a' and greater_than_less_than = '<' and value = 10 then delete; log10flow = log10(flow_comp); if strip(parameter) = '' then delete; if strip(segment_id) = '' then delete; run; /* transposed subset of data for analysis */ data trans1 ; set bsr2016.data_subset; if end_date = . then delete; run; proc sort data = trans1; by station_id end_date segment_id seg_grp watershed segment_name collecting_entity flow_comp flow_usgs wet_dry ; run; proc transpose data = trans1 out = trans_raw; var value_adj; id parameter; by station_id end_date segment_id seg_grp watershed segment_name collecting_entity flow_comp wet_dry ; run; data trans_raw; set trans_raw; TN =Total_Kjeldahl_Nitrogen + Nitrate_N; run; data trans2; set trans1; where parameter in ('Alkalinity' 'Ammonia-N' 'Chloride' 'Chlorophyll a' 'E. Coli' 'Enterococci' 'Flow Severity' 'Instantaneous Flow' 'Nitrate-N' 'Salinity' 'Sulfate' 'Total Kjeldahl Nitrogen' 'Total Organic Carbon' 'Total Phosphorus' 'Total Suspended Solids' ); run; proc sort data = trans2; by station_id end_date segment_id seg_grp watershed collecting_entity flow_comp wet_dry ; run; proc transpose data = trans2 out = trans_log; var log_reg ; id parameter; by station_id end_date segment_id seg_grp watershed collecting_entity flow_comp wet_dry ; run; data trans_log3 (drop = collecting_entity flow_comp _name_ flow_severity watershed wet_dry seg_grp segment_id rename = ( Alkalinity = alk_log salinity = sal_log Ammonia_N = NH3_log Chloride = Cl_log Chlorophyll_a= Chlor_log E__Coli = EC_log Enterococci = ent_log Instantaneous_Flow = flow_log Nitrate_N= NO3_log Sulfate= SO4_log Total_Kjeldahl_Nitrogen = TKN_log Total_Organic_Carbon= TOC_log Total_Phosphorus= TPhos_log Total_Suspended_Solids = TSS_log)); set trans_log ; TN_log = Total_Kjeldahl_Nitrogen + Nitrate_N ; log10flow = log10(flow_comp); run; /* join raw and log transformed transposed data */ proc sql; create table bsr2016.data_subset_transposed as select x.*, y.* from trans_raw as x left join trans_log3 as y on x.station_id = y.station_id and x.end_date = y.end_date; quit;