Merge pull request #299 from USEPA/function_names_eh

Function names eh
USEPA · Jul 5, 2023 · 5e7185b · 5e7185b
2 parents e89d0a7 + 7b9745e
commit 5e7185b
Show file tree

Hide file tree

Showing 72 changed files with 906 additions and 983 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,47 +1,46 @@
 # Generated by roxygen2: do not edit by hand
 
 export("%>%")
-export(AboveNationalWQXUpperThreshold)
-export(AggregatedContinuousData)
-export(BelowNationalWQXLowerThreshold)
-export(GetActivityTypeRef)
-export(GetDetCondRef)
-export(GetDetLimitRef)
-export(GetMeasureUnitRef)
-export(GetWQXCharValRef)
-export(HarmonizationRefTable)
-export(HarmonizeData)
-export(InvalidCoordinates)
-export(InvalidFraction)
-export(InvalidMethod)
-export(InvalidResultUnit)
-export(InvalidSpeciation)
-export(JoinWQPProfiles)
-export(PotentialDuplicateRowID)
-export(QAPPDocAvailable)
-export(QAPPapproved)
-export(QualityControlActivity)
-export(TADABigdataRetrieval)
-export(TADAOverviewMap)
-export(TADAReadWQPWebServices)
 export(TADA_AutoClean)
+export(TADA_BigDataRetrieval)
+export(TADA_Boxplot)
 export(TADA_ConvertDepthUnits)
 export(TADA_ConvertResultUnits)
 export(TADA_ConvertSpecialChars)
+export(TADA_CreateComparableID)
+export(TADA_DataRetrieval)
+export(TADA_FieldCounts)
+export(TADA_FieldValuesPie)
+export(TADA_FieldValuesTable)
+export(TADA_FindContinuousData)
+export(TADA_FindNearbySites)
 export(TADA_FindPotentialDuplicates)
-export(TADA_NearbySites)
+export(TADA_FindQualityControlData)
+export(TADA_FlagCoordinates)
+export(TADA_FlagFraction)
+export(TADA_FlagMethod)
+export(TADA_FlagResultAboveThreshold)
+export(TADA_FlagResultBelowThreshold)
+export(TADA_FlagResultUnit)
+export(TADA_FlagSpeciation)
+export(TADA_GetActivityTypeRef)
+export(TADA_GetDetCondRef)
+export(TADA_GetDetLimitRef)
+export(TADA_GetMeasureUnitRef)
+export(TADA_GetSynonymRef)
+export(TADA_GetTemplate)
+export(TADA_GetWQXCharValRef)
+export(TADA_HarmonizeSynonyms)
+export(TADA_Histogram)
+export(TADA_IDCensoredData)
+export(TADA_JoinWQPProfiles)
 export(TADA_OrderCols)
-export(TADA_boxplot)
-export(TADA_hist)
-export(TADA_stats)
-export(TADA_summarizeColumn)
-export(TADAdataRetrieval)
-export(createComparableId)
-export(fieldCounts)
-export(fieldValuesPie)
-export(fieldValuesTable)
-export(getTADATemplate)
-export(idCensoredData)
-export(simpleCensoredMethods)
-export(summarizeCensoredData)
+export(TADA_OverviewMap)
+export(TADA_QAPPDocAvailable)
+export(TADA_QAPPapproved)
+export(TADA_ReadWQPWebServices)
+export(TADA_SimpleCensoredMethods)
+export(TADA_Stats)
+export(TADA_SummarizeCensoredData)
+export(TADA_SummarizeColumn)
 importFrom(magrittr,"%>%")
diff --git a/R/CensoredDataSuite.R b/R/CensoredDataSuite.R
@@ -24,14 +24,14 @@
 #' @export
 #' 
 
-idCensoredData <- function(.data){
+TADA_IDCensoredData <- function(.data){
   # check .data has all of the required columns
   expected_cols <- c(
     "ResultDetectionConditionText",
     "DetectionQuantitationLimitTypeName",
     "TADA.ResultMeasureValueDataTypes.Flag"
   )
-  checkColumns(.data, expected_cols)
+  TADA_CheckColumns(.data, expected_cols)
 
   ## First step: identify censored data
   cens = .data%>%dplyr::filter(TADA.ResultMeasureValueDataTypes.Flag=="Result Value/Unit Copied from Detection Limit")
@@ -41,7 +41,7 @@ idCensoredData <- function(.data){
 
   if(dim(cens)[1]>0){
     ## Bring in det cond reference table
-    cond.ref = GetDetCondRef()%>%dplyr::rename(ResultDetectionConditionText = Name)%>%dplyr::select(ResultDetectionConditionText, TADA.Detection_Type)
+    cond.ref = TADA_GetDetCondRef()%>%dplyr::rename(ResultDetectionConditionText = Name)%>%dplyr::select(ResultDetectionConditionText, TADA.Detection_Type)
 
     ## Join to censored data
     cens = dplyr::left_join(cens, cond.ref, by = "ResultDetectionConditionText")
@@ -61,7 +61,7 @@ idCensoredData <- function(.data){
     }
 
     ## Bring in det limit type reference table
-    limtype.ref = GetDetLimitRef()%>%dplyr::rename(DetectionQuantitationLimitTypeName = Name)%>%dplyr::select(DetectionQuantitationLimitTypeName, TADA.Limit_Type)
+    limtype.ref = TADA_GetDetLimitRef()%>%dplyr::rename(DetectionQuantitationLimitTypeName = Name)%>%dplyr::select(DetectionQuantitationLimitTypeName, TADA.Limit_Type)
 
     ## Join to censored data
     cens = dplyr::left_join(cens, limtype.ref, by = "DetectionQuantitationLimitTypeName")
@@ -130,22 +130,22 @@ idCensoredData <- function(.data){
 #' # and in instances where the measurement is non-detect, set the result value
 #' # to half of the detection limit value. For over-detect measurements, retain
 #' # the detection limit value as the result value as-is. 
-#' Nutrients_Utah_CensoredFlag = simpleCensoredMethods(Nutrients_Utah, nd_method = "multiplier", nd_multiplier = 0.5, od_method = "as-is", od_multiplier = "null")
+#' Nutrients_Utah_CensoredFlag = TADA_SimpleCensoredMethods(Nutrients_Utah, nd_method = "multiplier", nd_multiplier = 0.5, od_method = "as-is", od_multiplier = "null")
 #' 
 #' # Check for agreement between detection condition and detection limit type, and in instances where the measurement is non-detect, set the result value to a random value between 0 and the detection limit value. For over-detect measurements, retain the detection limit value as the result value as-is. 
-#' Nutrients_Utah_CensoredFlag = simpleCensoredMethods(Nutrients_Utah, nd_method = "randombelowlimit", nd_multiplier = "null", od_method = "as-is", od_multiplier = "null")
+#' Nutrients_Utah_CensoredFlag = TADA_SimpleCensoredMethods(Nutrients_Utah, nd_method = "randombelowlimit", nd_multiplier = "null", od_method = "as-is", od_multiplier = "null")
 #' 
 
 
 
-simpleCensoredMethods <- function(.data, nd_method = "multiplier", nd_multiplier = 0.5, od_method = "as-is", od_multiplier = "null"){
+TADA_SimpleCensoredMethods <- function(.data, nd_method = "multiplier", nd_multiplier = 0.5, od_method = "as-is", od_multiplier = "null"){
   # check .data has all of the required columns
   expected_cols <- c(
     "ResultDetectionConditionText",
     "DetectionQuantitationLimitTypeName",
     "TADA.ResultMeasureValueDataTypes.Flag"
   )
-  checkColumns(.data, expected_cols)
+  TADA_CheckColumns(.data, expected_cols)
 
   # check that multiplier is provided if method = "multiplier"
   if(nd_method == "multiplier"&nd_multiplier == "null"){
@@ -155,9 +155,9 @@ simpleCensoredMethods <- function(.data, nd_method = "multiplier", nd_multiplier
     stop("Please provide a multiplier for the upper detection limit handling method of 'multiplier'")
   }
 
-  # If user has not previously run idCensoredData function, run it here to get required columns 
+  # If user has not previously run TADA_IDCensoredData function, run it here to get required columns 
   if(!"TADA.CensoredData.Flag"%in%names(.data)){
-    cens.data = idCensoredData(.data)
+    cens.data = TADA_IDCensoredData(.data)
   }else{
     cens.data = .data
   }
@@ -206,72 +206,3 @@ simpleCensoredMethods <- function(.data, nd_method = "multiplier", nd_multiplier
   }
   return(.data)
 }
-
-#' Summarize Censored Data
-#' 
-#' This function creates a summary table of the percentage of non-detects by 
-#' specified ID columns. It can be used to determine the best method for handling 
-#' censored data estimation methods that depend upon the distribution of the dataset.
-#' 
-#' @param .data A TADA dataframe
-#' @param spec_cols A vector of column names to be used as aggregating variables when summarizing censored data information.
-#' @return A summary dataframe yielding measurement ncounts, censored data ncounts, 
-#' and percent of dataset that is censored, aggregated by user-defined grouping 
-#' variables. Also produces a column "TADA.Censored.Note" that identifies 
-#' when there is sufficient non-censored data to estimate censored data using statistical
-#' methods including Maximum Likelihood Estimation, Robust ROS and Kaplan Meier.
-#' The decision tree used to identify applicable statistical analyses is based 
-#' on the Baseline Assessment of Left-Censored Environmental Data Using R Tech Note.
-#' More info can be found here: https://www.epa.gov/sites/default/files/2016-05/documents/tech_notes_10_jun2014_r.pdf
-#' 
-#' 
-#' @export
-#' 
-#' @examples
-#' # Load example dataset:
-#' data(TADAProfileCleanTP)
-#' # TADAProfileCleanTP dataframe is clean, harmonized, and filtered
-#' # down to one Comparable Data Identifier
-#' 
-#' # Create summarizeCensoredData table:
-#' TADAProfileCleanTP_summarizeCensoredData <- summarizeCensoredData(TADAProfileCleanTP)
-#' 
-
-summarizeCensoredData <- function(.data, spec_cols = c("TADA.CharacteristicName","TADA.ResultMeasure.MeasureUnitCode","TADA.ResultSampleFractionText","TADA.MethodSpecificationName")){
-
-  if(any(is.na(.data$TADA.ResultMeasureValue))){
-    warning("Dataset contains data missing both a result value and a detection limit. Suggest removing or handling. See TADA Harmonization vignette for an example.")
-  }
-
-  if(!"TADA.CensoredData.Flag"%in%names(.data)){
-    cens = idCensoredData(.data)
-  }else{
-    cens = .data
-  }
-
-  sum_low = cens%>%dplyr::group_by_at(spec_cols)%>%
-    dplyr::filter(TADA.CensoredData.Flag%in%c("Non-Detect", "Uncensored"))%>%
-    dplyr::summarise(Measurement_Count = length(unique(ResultIdentifier)), Censored_Count = length(TADA.CensoredData.Flag[TADA.CensoredData.Flag=="Non-Detect"]), Percent_Censored = length(TADA.CensoredData.Flag[TADA.CensoredData.Flag=="Non-Detect"])/length(TADA.CensoredData.Flag)*100, Censoring_Levels = length(unique(TADA.ResultMeasureValue[TADA.CensoredData.Flag=="Non-Detect"])))%>%
-    dplyr::filter(Censored_Count>0)%>%
-    dplyr::mutate("TADA.CensoredData.Flag" = "Non-Detect")
-
-  sum_hi = cens%>%dplyr::group_by_at(spec_cols)%>%
-    dplyr::filter(TADA.CensoredData.Flag%in%c("Over-Detect", "Uncensored"))%>%
-    dplyr::summarise(Measurement_Count = length(unique(ResultIdentifier)), Censored_Count = length(TADA.CensoredData.Flag[TADA.CensoredData.Flag=="Over-Detect"]), Percent_Censored = length(TADA.CensoredData.Flag[TADA.CensoredData.Flag=="Over-Detect"])/length(TADA.CensoredData.Flag)*100, Censoring_Levels = length(unique(TADA.ResultMeasureValue[TADA.CensoredData.Flag=="Over-Detect"])))%>%
-    dplyr::filter(Censored_Count>0)%>%
-    dplyr::mutate("TADA.CensoredData.Flag" = "Over-Detect")
-
-  sum_all = plyr::rbind.fill(sum_low, sum_hi)
-
-  sum_all = sum_all%>%dplyr::mutate(TADA.Censored.Note = dplyr::case_when(
-    Percent_Censored>80 ~ as.character("Percent censored too high for estimation methods"), # greater than 80, cannot estimate
-    Percent_Censored<50&Censoring_Levels>1 ~ as.character("Kaplan-Meier"), # less than 50% censored, and multiple censoring levels (no minimum n)
-    Percent_Censored<50 ~ as.character("Robust Regression Order Statistics"), # less than 50% censored and one censoring level (no minimum n?)
-    Measurement_Count>=50 ~ as.character("Maximum Likelihood Estimation"), # 50%-80% censored, 50 or more measurements
-    Measurement_Count<50 ~ as.character("Robust Regression Order Statistics"), # 50%-80% censored, less than 50 measures
-    ))
-  if(dim(sum_all)[1]==0){
-    print("No censored data to summarize. Returning empty data frame.")
-  }
-  return(sum_all)
-}