program rapid_Qobs_from_HydroDesktop
implicit none

!PURPOSE
!This program allows preparing two of the files necessary for the optimization  
!of river routing parameters in RAPID (k and x).  The first file contains a list 
!of reach IDs where daily stream flow observations are available, the second file 
!contains the observations in m3/s for all these reaches and for many days.  
!Here we focus on river reaches from NHDPlus, and stream flow downloaded from
!USGS using HydroDesktop.
!NOTES
!Following the RAPID data model only those stations tha have daily data
!available everyday of a given time period are kept.
!Also, only one station can be used for a given river reach, so a selection
!must be made in case of multiple stations available for one giver river reach.
!Finally, those stations located on river reaches with no known
!direction of flow are not kept since RAPID cannot use them.
!INPUTS
!A file informing on stream gages available in NHDPlus and including:
!     -The USGS gage code
!     -The NHDPlus reach ID 
!     _The flow direction
!A file summarizing the NWIS data downloaded using HydroDesktop and including:
!     -The USGS gage code
!     -The number of data points downloaded
!A file containing all stream flow data downloaded using HydroDesktop
!     -Each column corresponds to a gage - the same order as the previous file
!     -Each row corresponds to a date, in chronological order
!     -Each data point is in cfs
!OUTPUTS
!A file containing a list of where gages are located  and including:
!     -The NHDPlus reach ID
!A file containing stream flow data
!     -Each column corresponds to a gage - the same order as the previous file
!     -Each row corresponds to a date, in chronological order
!     -Each data point is in m3/s
!Text written in stdout giving some information about available gages.
!AUTHOR 
!Cedric H. David, 2013


!*******************************************************************************
!Declaration of variables
!*******************************************************************************
!-------------------------------------------------------------------------------
character(len=100) :: SOURCE_FEA_COMID_FLOWDIR_file
!Unit 10
integer :: IS_ArcGIS, JS_ArcGIS
!Number of lines in file
character(len=8), allocatable, dimension(:) :: YV_SOURCE_FEA1
!USGS gage codes
integer, allocatable, dimension(:) :: IV_reach_id
!NHDPlus reach IDs
character(len=14), allocatable, dimension(:) :: YV_FLOWDIR
!NHDPlus flow direction
!-------------------------------------------------------------------------------

!-------------------------------------------------------------------------------
character(len=100) :: SOURCE_FEA_DataCount_file
!Unit 20
integer :: IS_HydroDesktop, JS_HydroDesktop
!Number of lines in file
character(len=8), allocatable, dimension(:) :: YV_SOURCE_FEA2
!USGS gage codes
integer, allocatable, dimension(:) :: IV_count
!Number of data values downloaded using HydroDesktop for each USGS gage 
!-------------------------------------------------------------------------------

!-------------------------------------------------------------------------------
character(len=100) :: Flow_cfs_file
!Unit 30
integer :: IS_time, JS_time
!Number of days in period of interest
real, allocatable, dimension(:) :: ZV_read
!Array where all the observations are read  
!-------------------------------------------------------------------------------

!-------------------------------------------------------------------------------
integer :: IS_gage
!Number to be used interchangeably to compute many types of number of gages
!-------------------------------------------------------------------------------

!-------------------------------------------------------------------------------
integer :: IS_gagefull, JS_gagefull, JS_gagefull2
!Number of gages with full daily record and located on known flow direction
integer, allocatable, dimension(:) :: IV_gagefull_id
!NHDPlus reach IDs where gages are with full daily record on known flow dir
integer, allocatable, dimension(:) :: IV_dup
!Number of other full gages that have the same NHDPlus reach ID
integer :: IS_dup
real :: ZS_dup
!Total number of river reaches that are host to more than one station
integer :: IS_sta
!Corresponding number of USGS stations
integer :: IS_rem
real :: ZS_rem
!Total number of river reaches to be removed to go from gagefull to gagetot
!-------------------------------------------------------------------------------

!-------------------------------------------------------------------------------
character(len=100) :: gage_id_file
!Unit 40
integer :: IS_gagetot, JS_gagetot
!Number of gages with full daily record, on known flow dir, independent reaches 
integer, allocatable, dimension(:) :: IV_gagetot_id
!NHDPlus reach IDs where gages are available
character(len=8) :: temp_SOURCE_FEA
!Temporary string holding the USGS gage code where duplicates are 
!-------------------------------------------------------------------------------

!-------------------------------------------------------------------------------
character(len=100) :: Qobs_file
!Unit 50
integer :: IS_count
character(len=13) :: fmt_char  
integer, allocatable, dimension(:) :: IV_index
!Allows associating each given JS_gagetot with its corresponding JS_HydroDesktop
integer, allocatable, dimension(:) :: IV_choice
!Number of gages to choose from on a given JS_gagetot that have full record and
!known flow direction
!-------------------------------------------------------------------------------


!*******************************************************************************
!Current variables
!*******************************************************************************
!IS_time=1461
!
!IS_ArcGIS=773
!SOURCE_FEA_COMID_FLOWDIR_file='./ArcGIS_SOURCE_FEA_COMID_FLOWDIR_Reg12.csv'
!
!IS_HydroDesktop=58
!SOURCE_FEA_DataCount_file='./HydroDesktop_SOURCE_FEA_DataCount_San_Guad.csv'
!Flow_cfs_file='./HydroDesktop_Flow_cfs_San_Guad.csv'
!
!gage_id_file='./Fortran_gage_id_San_Guad_2004_2007_full.csv'
!Qobs_file='./Fortran_Qobs_San_Guad_2004_2007_full.txt'

!IS_time=2922
!
!IS_ArcGIS=773
!SOURCE_FEA_COMID_FLOWDIR_file='./ArcGIS_SOURCE_FEA_COMID_FLOWDIR_Reg12.csv'
!
!IS_HydroDesktop=397
!SOURCE_FEA_DataCount_file='./HydroDesktop_SOURCE_FEA_DataCount_Reg12.csv'
!Flow_cfs_file='./HydroDesktop_Flow_cfs_Reg12.csv'
!
!gage_id_file='./Fortran_gage_id_Reg12_2000_2007_full.csv'
!Qobs_file='./Fortran_Qobs_Reg12_2000_2007_full.txt'

!IS_time=366
!
!IS_ArcGIS=1277
!SOURCE_FEA_COMID_FLOWDIR_file='./ArcGIS_SOURCE_FEA_COMID_FLOWDIR_Reg07.csv'
!
!IS_HydroDesktop=492
!SOURCE_FEA_DataCount_file='./HydroDesktop_SOURCE_FEA_DataCount_Reg07.csv'
!Flow_cfs_file='./HydroDesktop_Flow_cfs_Reg07.csv'
!
!gage_id_file='./Fortran_gage_id_Reg07_2004_2004_full.csv'
!Qobs_file='./Fortran_Qobs_Reg07_2004_2004_full.txt'

IS_time=2922

IS_ArcGIS=2311
SOURCE_FEA_COMID_FLOWDIR_file='./ArcGIS_SOURCE_FEA_COMID_FLOWDIR_Reg18.csv'

IS_HydroDesktop=907
SOURCE_FEA_DataCount_file='./HydroDesktop_SOURCE_FEA_DataCount_Reg18.csv'
Flow_cfs_file='./HydroDesktop_Flow_cfs_Reg18.csv'

gage_id_file='./Fortran_gage_id_Reg18_2000_2007_full.csv'
Qobs_file='./Fortran_Qobs_Reg18_2000_2007_full.txt'


!*******************************************************************************
!Allocate sizes
!*******************************************************************************
allocate(YV_SOURCE_FEA1(IS_ArcGIS))
allocate(IV_reach_id(IS_ArcGIS))
allocate(YV_FLOWDIR(IS_ArcGIS))

allocate(YV_SOURCE_FEA2(IS_HydroDesktop))
allocate(IV_count(IS_HydroDesktop))

allocate(ZV_read(IS_HydroDesktop))


!*******************************************************************************
!Read files
!*******************************************************************************
open(10,file=SOURCE_FEA_COMID_FLOWDIR_file,status='old')
do JS_ArcGIS=1,IS_ArcGIS
     read(10,*) YV_SOURCE_FEA1(JS_ArcGIS),                                     &
                IV_reach_id(JS_ArcGIS),                                        &
                YV_FLOWDIR(JS_ArcGIS)
end do
close(10)
!print *, YV_SOURCE_FEA1(IS_ArcGIS), IV_reach_id(IS_ArcGIS),                    &
!         YV_FLOWDIR(IS_ArcGIS)

open(20,file=SOURCE_FEA_DataCount_file,status='old')
do JS_HydroDesktop=1,IS_HydroDesktop
     read(20,*) YV_SOURCE_FEA2(JS_HydroDesktop), IV_count(JS_HydroDesktop)
end do
close(20)
!print *, YV_SOURCE_FEA2(IS_HydroDesktop), IV_count(IS_HydroDesktop)

open(30,file=Flow_cfs_file,status='old')
close(30)


!*******************************************************************************
!Compute some statistics
!*******************************************************************************
print *, '----------'
print *, 'Out of the', IS_ArcGIS,                                              &
         'stations that exist in the NHDPlus representation of NWIS'
!-------------------------------------------------------------------------------
IS_gage=0
do JS_ArcGIS=1,IS_ArcGIS
     if (YV_FLOWDIR(JS_ArcGIS)=='With') IS_gage=IS_gage+1
end do
print *, '- Only    ', IS_gage, 'are on reaches with known flow direction'
!-------------------------------------------------------------------------------
print *, '----------'
print *, 'Out of the', IS_HydroDesktop,                                        &
         'stations that have daily values downloaded with HydroDesktop'
!-------------------------------------------------------------------------------
IS_gage=0
do JS_HydroDesktop=1,IS_HydroDesktop
     if (IV_count(JS_HydroDesktop)==IS_time) IS_gage=IS_gage+1
end do
print *, '- Only    ', IS_gage, 'have a full data record' 
!-------------------------------------------------------------------------------

!-------------------------------------------------------------------------------
IS_gage=0
do JS_HydroDesktop=1,IS_HydroDesktop
do JS_ArcGIS=1,IS_ArcGIS
     if (YV_SOURCE_FEA1(JS_ArcGIS)==YV_SOURCE_FEA2(JS_HydroDesktop)) then 
          IS_gage=IS_gage+1
     end if
end do
end do
print *, '- Only    ', IS_gage, 'exist in NHDPlus' 
!-------------------------------------------------------------------------------
!-------------------------------------------------------------------------------
IS_gage=0
do JS_HydroDesktop=1,IS_HydroDesktop
do JS_ArcGIS=1,IS_ArcGIS
     if (YV_SOURCE_FEA1(JS_ArcGIS)==YV_SOURCE_FEA2(JS_HydroDesktop)) then 
     if (IV_count(JS_HydroDesktop)==IS_time) then
          IS_gage=IS_gage+1
     end if
     end if
end do
end do
print *, '- Only    ', IS_gage, 'exist in NHDPlus and have a full data record' 
!-------------------------------------------------------------------------------

!-------------------------------------------------------------------------------
IS_gagefull=0
do JS_ArcGIS=1,IS_ArcGIS
do JS_HydroDesktop=1,IS_HydroDesktop
     if (YV_FLOWDIR(JS_ArcGIS)=='With') then
     if (YV_SOURCE_FEA1(JS_ArcGIS)==YV_SOURCE_FEA2(JS_HydroDesktop)) then 
     if (IV_count(JS_HydroDesktop)==IS_time) then 
          IS_gagefull=IS_gagefull+1
     end if
     end if
     end if
end do
end do
print *, '- Only    ', IS_gagefull,                                            &
         'exist in NHDPlus and have a full data record and are on reaches ',   &
         'with known flow direction' 
!-------------------------------------------------------------------------------
print *, '----------'


!*******************************************************************************
!Allocate and populate IV_gagefull_id
!*******************************************************************************
allocate(IV_gagefull_id(IS_gagefull))
IV_gagefull_id=0

JS_gagefull=0
do JS_ArcGIS=1,IS_ArcGIS
do JS_HydroDesktop=1,IS_HydroDesktop
     if (YV_FLOWDIR(JS_ArcGIS)=='With') then
     if (YV_SOURCE_FEA1(JS_ArcGIS)==YV_SOURCE_FEA2(JS_HydroDesktop)) then 
     if (IV_count(JS_HydroDesktop)==IS_time) then 
          JS_gagefull=JS_gagefull+1
          IV_gagefull_id(JS_gagefull)=IV_reach_id(JS_ArcGIS)
     end if
     end if
     end if
end do
end do
!print *, IV_gagefull_id


!*******************************************************************************
!Checking for duplicates in IV_gagefull_id
!*******************************************************************************
print *, 'Out of the', IS_gagefull,                                            &
         'stations that exist in NHDPlus and have a full data record and are ',&
         'on reaches with known flow direction' 
allocate(IV_dup(IS_gagefull))
IV_dup=0
do JS_gagefull=1,IS_gagefull
do JS_gagefull2=1,IS_gagefull
     if (IV_gagefull_id(JS_gagefull)==IV_gagefull_id(JS_gagefull2) .and.       &
         JS_gagefull/=JS_gagefull2) then
          IV_dup(JS_gagefull)=IV_dup(JS_gagefull)+1
          !print *, '- Warning: Duplicate gages found at river ID:',            &
          !IV_gagefull_id(JS_gagefull)
     end if
end do
end do
!print * , IV_dup

ZS_dup=0
IS_dup=0
ZS_rem=0
IS_rem=0
IS_sta=0
do JS_gagefull=1,IS_gagefull
     if (IV_dup(JS_gagefull)/=0) then
          !print *, (IV_dup(JS_gagefull)+0.)/(IV_dup(JS_gagefull)+1)
          ZS_rem=ZS_rem+(0.+IV_dup(JS_gagefull))/(IV_dup(JS_gagefull)+1)
          ZS_dup=ZS_dup+(0.+IV_dup(JS_gagefull))/                              &
                        (IV_dup(JS_gagefull)*(IV_dup(JS_gagefull)+1))
          IS_sta=IS_sta+1
     end if
end do
IS_rem=nint(ZS_rem)
IS_dup=nint(ZS_dup)
IS_gagetot=IS_gagefull-IS_rem
!Checked for duplicates, the '0.' is necessary for Fortran to understand we 
!want a real number and not an integer

print *, '-         ', IS_dup, 'total number of river reaches that are host ', &
         'to more than one station'
print *, '-         ', IS_rem, 'total number of river reaches to be removed ', &
         'from list'
print *, '- i.e.    ', IS_sta, 'stations involved in selection from duplicates'
print *, '----------'
print *, 'Out of the', IS_HydroDesktop, 'stations that have daily values ',    &
         'downloaded with HydroDesktop'
print *, '- Only    ', IS_gagetot, 'exist in NHDPlus and have a full data ',   &
         'record and are on independent reaches with known flow direction' 
                                           


!*******************************************************************************
!Allocate and populate IV_gagetot_id
!*******************************************************************************
allocate(IV_gagetot_id(IS_gagetot))
IV_gagetot_id=0
IV_gagetot_id=maxval(IV_gagefull_id)
IV_gagetot_id(1)=minval(IV_gagefull_id)

do JS_gagefull=2,IS_gagetot
do JS_gagefull2=1,IS_gagefull
     if (IV_gagetot_id(JS_gagefull)   > IV_gagefull_id(JS_gagefull2)  .and.   &
         IV_gagetot_id(JS_gagefull-1) < IV_gagefull_id(JS_gagefull2)) then
          IV_gagetot_id(JS_gagefull)=IV_gagefull_id(JS_gagefull2)
     end if
end do
end do
!gagetot_id_file is now sorted in increasing order
!print *, IV_gagetot_id


!*******************************************************************************
!Populate gage_id_file
!*******************************************************************************
open(40,file=gage_id_file)
do JS_gagetot=1,IS_gagetot
     write(40,'(i12)') IV_gagetot_id(JS_gagetot)
end do
close(40)


!*******************************************************************************
!Finds gage corresponding to reach, picks downstream-most in case of duplicates
!*******************************************************************************
allocate(IV_index(IS_gagetot))
allocate(IV_choice(IS_gagetot))
IV_index=0
IV_choice=0

do JS_gagetot=1,IS_gagetot
temp_SOURCE_FEA=''
do JS_ArcGIS=1,IS_ArcGIS
if (IV_gagetot_id(JS_gagetot)==IV_reach_id(JS_ArcGIS)) then
     !At this point the river reach that is host to the gage located at 
     !JS_gagetot is the same as the one located at JS_ArcGIS. Now determining
     !where this station is in the HydroDesktop file.   

do JS_HydroDesktop=1,IS_HydroDesktop
     if (YV_SOURCE_FEA2(JS_HydroDesktop)==YV_SOURCE_FEA1(JS_ArcGIS) .and.      &
         IV_count(JS_HydroDesktop)==IS_time                        ) then
          !At this point we know that the station corresponding to the river 
          !reach at JS_gagetot is located at JS_HydroDesktop.  We pick that  
          !JS_HydroDesktop if it corresponds a higher SOURCE_FEA (further 
          !downstream) than the existing one and update SOURCE_FEA accordingly.
          IV_choice(JS_gagetot)=IV_choice(JS_gagetot)+1
          if (temp_SOURCE_FEA<=YV_SOURCE_FEA1(JS_ArcGIS)) then
               temp_SOURCE_FEA=YV_SOURCE_FEA1(JS_ArcGIS)
               IV_index(JS_gagetot)=JS_HydroDesktop
          end if
     end if
end do

end if
end do
!print *,temp_SOURCE_FEA
end do


do JS_gagetot=1,IS_gagetot
     if (IV_choice(JS_gagetot)/=1) then
          do JS_ArcGIS=1,IS_ArcGIS
               if (IV_gagetot_id(JS_gagetot)==IV_reach_id(JS_ArcGIS)) then

IS_count=0
do JS_HydroDesktop=1,IS_HydroDesktop
     if (YV_SOURCE_FEA1(JS_ArcGIS)==YV_SOURCE_FEA2(JS_HydroDesktop)) then 
          IS_count=IV_count(JS_HydroDesktop)  
     end if
end do

if (IS_count==IS_time) then
print '(a11,i9,a20,a8,a2,i5,a12,a10,a8)',                                      &
      '- river ID:', IV_gagetot_id(JS_gagetot),                                &
      ' could use station: ', YV_SOURCE_FEA1(JS_ArcGIS),                       &
      ' (', IS_count, ' datapoints)',                                          &
      ', picked: ', YV_SOURCE_FEA2(IV_index(JS_gagetot))
end if

               end if
          end do
     end if
end do


!*******************************************************************************
!Creates and populates Qobs_file
!*******************************************************************************
write(fmt_char,'(a2,i4,a7)') '(',IS_gagetot,'f14.6)'
!print *, fmt_char

open(30,file=Flow_cfs_file,status='old')
open(50,file=Qobs_file)
do JS_time=1,IS_time
     read(30,*) ZV_read
     ZV_read=(0.3048*0.3048*0.3048)*ZV_read    
     write(50, fmt_char ) ZV_read(IV_index)
end do
close(50)
close(30)


!*******************************************************************************
!end program
!*******************************************************************************
print *, 'Done'
end program rapid_Qobs_from_HydroDesktop
