9 R source code in `ToxPlot` package

Main R functions written for this analysis were compiled as an R package ToxPlot, and version 0.1.0 is available on CRAN.
For the most up-to-date version please go to the github link.
Below is the source code of ToxPlot ver.0.1.0 used in this analysis.
#' normalize per plate
#'
#' normalize raw readings as percent of median vehicle control wells
#'
#' @param dt data.frame contains essential columns with the raw data.
#' @param nctrl the name (spid) of the vehicle/solvent control used for calculation
#'
#' @return data.frame with normalized value columns. 'nval_mean' column is the normalized value
#' calculated using the mean of vehicle control wells, 'nval_median' column is the normalized value
#' calculated using the median of vehicle control wells.
#' @import dplyr
#'
#' @examples
#' ## normalize demo data
#' demo_mc_norm <- normalize_per_plate(demo_mc, nctrl = "DMSO")
#'
#' @export

normalize_per_plate <- function(dt, nctrl = "DMSO") {

  # claim variables for passing R CMD Check
  assay <- spid <- apid <- rval <- NULL


  output <- data.frame()
  #iterate through each assay, calculate separately
  for (as in unique(dt$assay)) {
    df_t <- normalize_single_assay(dplyr::filter(dt, assay == as), nctrl)
    output <- dplyr::bind_rows(output, df_t)
  }

  return(output)

}

#' normalize per plate (from single assay data)
#'
#' normalize raw readings as percent of median/mean vehicle control wells, per assay, per plate.
#' This function is called by normalize_per_plate, should not be called directly by user.
#' #'
#' @param dt data.frame contains essential columns with the raw data.
#' @param nctrl the name (spid) of the vehicle/solvent control used for calculation
#'
#' @return data.frame with normalized value columns. 'nval_mean' column is the normalized value
#' calculated using the mean of vehicle control wells, 'nval_median' column is the normalized value
#' calculated using the median of vehicle control wells.
#' @import dplyr
#
normalize_single_assay <- function(dt, nctrl) {

  # claim variables for passing R CMD Check
  assay <- spid <- apid <- rval <- NULL


  #calculate the mean and median value for negative control
  ctrl_avg <- dt %>%
    dplyr::filter(spid == nctrl) %>%
    dplyr::group_by(apid) %>%
    dplyr::summarize(mean_DMSO = mean(rval, na.rm = TRUE),
                     median_DMSO = stats::median(rval, na.rm = TRUE))

  #iterate through the data table to calculate normalized percent activity
  #normalization was done using both mean and median separately.

  temp <- data.frame()
  for (id in ctrl_avg$apid){
    med <- dplyr::filter(ctrl_avg, apid == id)$median_DMSO
    avg <- dplyr::filter(ctrl_avg, apid == id)$mean_DMSO
    t <- dplyr::filter(dt, apid == id) %>%
      mutate(nval_mean = 100 * rval/avg,
             nval_median = 100 * rval/med)
    temp <- bind_rows(temp, t)
  }
  return(temp)
}

#' Quality-control metrics calculation
#'
#' Calculate QC metrics, includin Z' score, CV of DMSO negative control, per assay plate.
#'
#' @param d data.frame contains essential columns with the raw data.
#' @param resp response type, specify either 'nval_median' or 'nval_mean' for QC calculation
#' @param assay_info assay_info list, contains names of primary and cytotox assay, names must match
#' what are provided in the raw data, under the column 'assay'.
#'
#' @examples
#' ## calculate QC measures from demo data
#' assay_info <- list(prim_assay = "Primary",toxi_assay = "Cytotox")
#' demo_mc_norm <- normalize_per_plate(demo_mc, nctrl = "DMSO")
#' qc <- qc_per_plate(demo_mc_norm, assay_info)
#'
#' @return three dataframe each representing negative control stats, positive control stats and QC metrics (CV_DMSO, Z' score, SSMD) for each assay plate
#' @export
# QC measures are calculated per 96-well palte. 'apid' plus 'assay' column can serve as the ID to distinguish each plate.

qc_per_plate <- function(d, assay_info, resp = "nval_median") {

  # claim variables for passing R CMD Check
  assay <- apid <- wllt <- conc <- CV_DMSO <- SSMD <- Z_prime <- logc <- mean_DMSO <- mean_positive <- NULL
  sd_positive <- sd_DMSO <- unique_id <- NULL



  # choose which normalized value to use (median based or mean based)
  if (resp == "nval_mean") {
    d$resp <- d$nval_mean
  } else if (resp == "nval_median") {
    d$resp <- d$nval_median
  } else {
    stop("specify either nval_median or nval_mean for QC calculation")
  }

  # create unique ID for each plate (apid + assay)
  # (can also distinguish between primary and cytotox assays)
  # group by the unique ID.
  d <- d %>% #dplyr::mutate(uid = paste(apid, assay)) %>%
      # dplyr::group_by(pid, assay, repi)
      dplyr::group_by(apid, assay)

  # calculate vehicle control (e.g., DMSO etc.) qc stats
  # use welltype = n to identify vehicle control
  n_ctrl_sum <- d %>%
    dplyr::filter(wllt == "n") %>%
    dplyr::summarize(count_DMSO = n(),                     #count of DMSO control wells on each plate
                     count_DMSO_NA = sum(is.na(resp)),     #count of DMSO control wells with missing values
                     mean_DMSO = mean(resp, na.rm = TRUE),
                     sd_DMSO = stats::sd(resp, na.rm = TRUE),
                     CV_DMSO = 100 * stats::sd(resp, na.rm = TRUE) / mean(resp, na.rm = TRUE)
                     # median_DMSO= median(resp, na.rm=TRUE),
                     # mad_DMSO= mad(resp, constant = 1, na.rm=TRUE),
                     # bmad_DMSO= mad(resp, constant = 1.4826, na.rm=TRUE),
                     # three_bmad = 3*bmad_DMSO
                     )


  # calculate positive control stats
  pctrl_prim <-  d %>%
    dplyr::filter(assay == assay_info$prim_assay,
                  wllt == "pr") %>%
    dplyr::filter(conc == max(conc)) %>%
    dplyr::summarize(sd_positive = stats::sd(resp, na.rm = TRUE),
                     mean_positive= mean(resp, na.rm = TRUE))

  pctrl_toxi <-  d %>%
    dplyr::filter(assay == assay_info$toxi_assay,
                  wllt == "pc") %>%
    dplyr::filter(conc == max(conc)) %>%
    dplyr::summarize(sd_positive = stats::sd(resp, na.rm = TRUE),
                     mean_positive = mean(resp, na.rm = TRUE))

  p_ctrl_sum <- bind_rows(pctrl_prim, pctrl_toxi)
  remove(pctrl_toxi, pctrl_prim)


  # calculate Z', SSMD

  qc <- dplyr::left_join(p_ctrl_sum, n_ctrl_sum,
                        by=c("apid"="apid", "assay"="assay")) %>%
       #replace NaN with 0 in sd_positive
       dplyr::mutate(sd_positive = (ifelse(is.na(sd_positive), 0, sd_positive))) %>%
       dplyr::mutate(Z_prime = 1 - 3*(sd_positive + sd_DMSO) / (abs(mean_positive - mean_DMSO)),
                       SSMD = (abs(mean_positive - mean_DMSO) / sqrt(sd_positive^2 + sd_DMSO^2))) %>%
       dplyr::mutate(unique_id = paste(apid, assay, sep = "_")) %>%
       dplyr::select(unique_id, apid, assay, CV_DMSO, Z_prime, SSMD)



  list(neg_ctrl_sum = n_ctrl_sum,
       pos_ctrl_sum = p_ctrl_sum,
       qc = qc)
}

#' hill mode in ToxCast tcpl package
#'
#' @param p a vector containing the Hill model parameters: top, log AC50, hill coefficient
#' @param x a vector of log concentrations
#' @return calculated y value based on the x and model parameters
#'
hill_model <- function(p, x) {
  y <- p[1] / (1 + 10 ^ ((p[2] - x) * p[3]))
  return(y)
}

# ##========inversed hill model (using 100- the Y value)=======##
# hill_model_inverse <- function(p, x) {
#   ###   p:     a numeric vector of length 4 containg the starting values for
#   ###          the hill model, in order: top, log AC50, hill
#   ###          coefficient
#   ###   x: a numeric vector containing the log concentration values
#
#   y <- 100 - (p[1] / (1 + 10 ^ ((p[2] - x) * p[3])))
#   return(y)
# }

#' calculate absolute EC_anything based on tcpl hill model
#'
#' @param p a vector containing the Hill model parameters: top, log AC50, hill coefficient
#' @param y the y value
#' @return calculated x value

log_abs_ec <- function(p, y) {
  ## y value is the absolute response value
  ##p[1] is top parameter
  ##p[2] is ga, logac50
  ##p[3] is gw, hillslope
  suppressWarnings(x <- p[2] - log10((p[1] - y) / y) / p[3])
  if (is.nan(x)) {x <- NA}
  return(x)
}


#' function to calculate Area Under the Curve (AUC) of the hill model
#'
#' @param p a vector containing the Hill model parameters: top, log AC50, hill coefficient
#' @param lower lower boundary of x for integration
#' @param upper upper boundary of x for integration
#' @return calculated area under the curve (AUC) value
#'
auc_hill_tcpl <- function(p, lower, upper) {
  #define the hill model function
  hill <- function(x) {
    p[1] / (1 + 10 ^ ((p[2] - x) * p[3]))
  }
  a <- stats::integrate(hill, lower = lower, upper = upper)
  return(a[[1]])
}




#' fit dose-resopnse curve using tcpl hill model
#'
#' Curve fitting using the tcplFit function in `tcpl` package.
#' Chemicals are modelled based on spid.
#' If you want to model the same chemical (e.g. positive controls),
#' then assign different spid to this chemical so the function can separate them out.
#' Absolute IC20 and absolute IC50 are calculated as well.
#'
#' @param df input data contain normalized assay readings
#' @param assay_info predefined names for primary and cytotoxicity assays,
#' use NULL if either one of the assay does not need to be modeled.
#' @param prim_cutoff significance cutoff for primary assay (eg. 3sigma or 3bMAD)
#' @param toxi_cutoff significance cutoff for cytotoxicity assay (eg. 3sigma or 3bMAD)
#'
#' @return A list object containing modeling results, the corresponding data for each chemical.
#'
#' @examples
#' ## fit curve with default significant threshold 20
#'
#' demo_md <- fit_curve_tcpl(mc_norm, assay_info =
#' list(prim_assay = "Primary", toxi_assay = "Cytotox"))
#'
#' ## start from raw data
#' # define assay
#' assay_info <- list(prim_assay = "Primary",toxi_assay = "Cytotox")
#' # data normalization
#' demo_mc_norm <- normalize_per_plate(demo_mc, nctrl = "DMSO")
#' # filter out test chemicals only
#' demo_mc_norm <- dplyr::filter(demo_mc_norm, wllt == "t")
#' # fit curve with default 20% threshold
#' demo_md <- fit_curve_tcpl(demo_mc_norm, assay_info)
#'
#' ## fit curve with specified significance threshold
#' demo_md <- fit_curve_tcpl(demo_mc_norm, assay_info, prim_cutoff = 25, toxi_cutoff = 25)
#'
#'
#' @import dplyr
#' @export

fit_curve_tcpl <- function(df, assay_info, prim_cutoff = 20, toxi_cutoff = 20) {

  # claim variables for passing R CMD Check
  conc <- spid <- assay <- apid <- NULL

  st_time <- Sys.time()
  r_list <- list()
  c_list <- list()
  bmad_prim <- prim_cutoff/3
  bmad_toxi <- toxi_cutoff/3
  # get log concentration
  df <- df %>% dplyr::mutate(logc = log10(conc))
  spid_list <- unique(df$spid)
  if (is.null(assay_info$prim) & is.null(assay_info$toxi_assay)) {
    stop("assay_info cannot be NULL for both primary and cytotoxicity assay")
  }
  n <- 1
  prim_md <- toxi_md <- model_list <- list()
  cat("Processing", length(unique(df$spid)), "samples(spid)....\n")
  # process by each spid
  for (id in unique(df$spid)) {
    #d <- df %>% dplyr::filter(spid == i)
    cat(id, "||")
    # model raiu data
    # check if toxi assay data is available, if not, skip modeling
    if (is.null(assay_info$prim_assay)) {
      prim_md <- NA
      prim_dt <- NA
    } else {

      prim_dt <- df %>% dplyr::filter(spid == id, assay == assay_info$prim_assay)
      m <- tcpl::tcplFit(logc = prim_dt$logc, resp = 100 - prim_dt$nval_median, bmad_prim)
      absIC50 <- log_abs_ec(c(m$hill_tp, m$hill_ga, m$hill_gw), 50)
      absIC20 <- log_abs_ec(c(m$hill_tp, m$hill_ga, m$hill_gw), 20)
      m[["absIC20"]] <- absIC20
      m[["absIC50"]] <- absIC50
      m[["apid"]] <- prim_dt[[1,1]]
      m[["assay"]] <- assay_info$prim_assay
      m[["spid"]] <- id
      #prim_md <- dplyr::bind_rows(prim_md, m)
      # print(m)
      prim_md <- data.frame(m) %>% dplyr::select(apid, assay, spid, everything())
    }

    # model cytotox data
    # check if toxi assay data is available, if not, skip modeling
    if (is.null(assay_info$toxi_assay)) {
      toxi_md <- NA
      toxi_dt <- NA
    } else {
      toxi_dt <- df %>% dplyr::filter(spid == id, assay==assay_info$toxi_assay)
      m <- tcpl::tcplFit(logc = toxi_dt$logc, resp = 100 - toxi_dt$nval_median, bmad_toxi)
      absIC50 <- log_abs_ec(c(m$hill_tp, m$hill_ga, m$hill_gw), 50)
      absIC20 <- log_abs_ec(c(m$hill_tp, m$hill_ga, m$hill_gw), 20)
      m[["absIC20"]] <- absIC20
      m[["absIC50"]] <- absIC50
      m[["apid"]] <- toxi_dt[[1,1]]
      m[["assay"]] <- assay_info$toxi_assay
      m[["spid"]] <- id
      #toxi_md <- dplyr::bind_rows(toxi_md, m)

      toxi_md <- data.frame(m) %>% dplyr::select(apid, assay, spid, everything())

    }

    #build final model list
    model_list[[n]] <- list(spid = id,
                            model_prim = prim_md,
                            model_toxi = toxi_md,
                            data_prim = prim_dt,
                            data_toxi = toxi_dt,
                            cutoff_prim = prim_cutoff,
                            cutoff_toxi = toxi_cutoff,
                            assay_info = assay_info)

    n <- n + 1

  }

  time <- difftime(Sys.time(), st_time) %>% round(1)
  cat("\nCurve Fitting Completed!\nCalculation time:", paste(unclass(time), units(time)), "\n\n")


  return(model_list)
}



#' function to calculate ranking score, TAA, med_diff, EC values based on tcpl hill model
#'
#' calculate ranking score, TAA, med_diff, absolute EC values, AC50, based on the hill model in tcpl package
#'
#' @param tcpl_models the list object returned by 'fit_curve_tcpl' function
#' @param spid_chnm_table a reference table with 'spid' and the corresponding chemical name 'chnm' column,
#' and the CAS number 'casn' column.
#' @param med_taa the median TAA value from reference chemical, if not supplied, then ranking score won't be calculated.
#' @param med_med_diff the median Median-Difference from reference chemical, if not supplied, then ranking score won't be calculated.
#' @return a dataframe containing ranking metrics for each chemical (spid)
#'
#' @examples
#' ## start with normalized data
#' demo_md <- fit_curve_tcpl(mc_norm, assay_info =
#' list(prim_assay = "Primary", toxi_assay = "Cytotox"))
#' demo_rank <- rank_tcpl(demo_md)
#'
#'
#' ## start from raw data
#' # define assay
#' assay_info <- list(prim_assay = "Primary",toxi_assay = "Cytotox")
#' # data normalization
#' demo_mc_norm <- normalize_per_plate(demo_mc, nctrl = "DMSO")
#' # filter out test chemicals only
#' demo_mc_norm <- dplyr::filter(demo_mc_norm, wllt == "t")
#' # fit curve with default 20% threshold
#' demo_md <- fit_curve_tcpl(demo_mc_norm, assay_info)
#' # calculate TAA and Med_diff only
#' demo_rank <- rank_tcpl(demo_md, med_taa = NULL, med_med_diff = NULL)
#'
#' ## calculate ranking score with specified median TAA and meidan Med_Difference
#' demo_rank <- rank_tcpl(demo_md, med_taa = 150, med_med_diff = 92)
#'
#' @export
#'
rank_tcpl <- function(tcpl_models, spid_chnm_table = NULL, med_taa = NULL, med_med_diff = NULL) {

  # claim variables for passing R CMD Check
  taa_norm <- med_diff_norm <- ranking_score <- spid <- logc <- nval_median <- NULL

  df <- data.frame()
  ##iterate through each spid
  for (i in seq_along(tcpl_models)) {
    #get chemical sample id 'spid'
    .spid <- tcpl_models[[i]]$spid
    # print(i)
    # print(.spid)
    #get chnm
    if (!is.null(spid_chnm_table)) {
      chnm <- dplyr::filter(spid_chnm_table, spid == .spid)$chnm
      casn <- dplyr::filter(spid_chnm_table, spid == .spid)$casn
    } else {
      chnm <- NA
      casn <- NA
    }

    assay_info <- tcpl_models[[i]]$assay_info
    if (is.null(assay_info$toxi_assay)) {
      stop("No toxi_assay, ranking score is not calculable!")
    }
    #extract modelling results
    m_toxi <- tcpl_models[[i]]$model_toxi
    m_prim <- tcpl_models[[i]]$model_prim

    #cutoff
    cutoff_prim <- tcpl_models[[i]]$cutoff_prim
    cutoff_toxi <- tcpl_models[[i]]$cutoff_toxi

    #
    #calculate med_diff at max(logc)
    max_prim <- tcpl_models[[i]]$data_prim %>%
      dplyr::filter(logc == max(logc)) %>%
      dplyr::summarize(median = stats::median(nval_median))
    max_toxi <- tcpl_models[[i]]$data_toxi %>%
      dplyr::filter(logc == max(logc)) %>%
      dplyr::summarize(median = stats::median(nval_median))
    med_diff <- max_toxi$median - max_prim$median

    ##get auc, logEC_3bmadprim, for prim
    if (!is.na(m_prim$hill) & m_prim$hill == 1) {
      para_prim <- c(m_prim$hill_tp, m_prim$hill_ga, m_prim$hill_gw)
      #get auc
      lr <- log_abs_ec(para_prim, cutoff_prim)
      aa_prim <-
        auc_hill_tcpl(para_prim, lower = lr, upper = m_prim$logc_max) - cutoff_prim * (m_prim$logc_max - lr)
      #get AC50
      AC50_prim <- m_prim$hill_ga
      #get absEC
      if (hill_model(para_prim,m_prim$logc_max) > 50) {
        absEC50_prim <- log_abs_ec(para_prim, 50)
        absEC80_prim <- log_abs_ec(para_prim, 20)
      } else if (hill_model(para_prim,m_prim$logc_max) > 20) {
        absEC50_prim <- NA
        absEC80_prim <- log_abs_ec(para_prim, 20)
      } else {
        absEC80_prim <- NA
        absEC50_prim <- NA
      }
    } else {
      aa_prim <- NA
      lr <- NA
      absEC80_prim <- NA
      absEC50_prim <- NA
      AC50_prim <- NA
    }

    ##get auc, logEC_3bmadprim, for toxitox
    if (!is.na(m_toxi$hill) & m_toxi$hill == 1) {
      para_toxi <- c(m_toxi$hill_tp, m_toxi$hill_ga, m_toxi$hill_gw)
      #get AC50
      AC50_toxi <- m_toxi$hill_ga
      #get aa_toxi
      if (hill_model(para_toxi,m_prim$logc_max) > cutoff_prim) {
        lc <- log_abs_ec(para_toxi, cutoff_prim)
        aa_toxi <-
          auc_hill_tcpl(para_toxi,lower = lc, upper = m_prim$logc_max) - cutoff_prim * (m_prim$logc_max - lc)
      } else {
        aa_toxi <- NA
        lc <- -3
      }

      #calculate absEC
      if (hill_model(para_toxi,m_prim$logc_max) > 50) {
        absEC50_toxi <- log_abs_ec(para_toxi, 50)
        absEC80_toxi <- log_abs_ec(para_toxi, 20)
      } else if (hill_model(para_toxi,m_prim$logc_max) > 20) {
        absEC50_toxi <- NA
        absEC80_toxi <- log_abs_ec(para_toxi, 20)
      } else {
        absEC80_toxi <- NA
        absEC50_toxi <- NA
      }
      #get cytotox limit (absEC_cutoff_toxi)
      absEC_ct_toxi <- log_abs_ec(para_toxi, cutoff_toxi)
    } else {
      aa_toxi <- NA
      lc <- -3
      absEC80_toxi <- NA
      absEC50_toxi <- NA
      AC50_toxi <- NA
      absEC_ct_toxi <- NA
    }

    ##calculate taa
    if (is.na(aa_prim)) {
      taa <- NA
    } else if (is.na(aa_toxi)) {
      taa <- aa_prim
    } else {
      taa <- aa_prim - aa_toxi
    }

    ##calculate selectivity based logEC value at 3bmad of prim assay.
    if (is.na(lr)) {
      selectivity_3bmad <- NA
    } else {
      selectivity_3bmad <- lc - lr
    }

    ##calculate selectivity based on logAC50 (original method)
    if (is.na(m_prim$hill_ga)) {
      selectivity_AC50 <- NA
    } else if (is.na(m_toxi$hill_ga)) {
      selectivity_AC50 <- -3 - m_prim$hill_ga
    } else {
      selectivity_AC50 <- m_toxi$hill_ga - m_prim$hill_ga
    }

    ##gather taa and selectivity into one table
    t <- data.frame(
      index = i,
      spid = .spid,
      chnm = chnm,
      casn = casn,
      # aa_toxi = aa_toxi,
      # aa_prim = aa_prim,
      taa = taa,
      # sel_3bMAD = selectivity_3bmad,
      # sel_AC50 = selectivity_AC50,
      med_diff = med_diff,
      AC50_toxi = AC50_toxi,
      AC50_prim = AC50_prim,
      absEC80_toxi = absEC80_toxi,
      absEC50_toxi = absEC50_toxi,
      absEC80_prim = absEC80_prim,
      absEC50_prim = absEC50_prim,
      cyto_lim = absEC_ct_toxi
    )

    df <- base::rbind(df, t)

  }

  #calculate ranking_score
  #this done by adding 0-100 rescaled TAA value an med_diff value
  # df <- df %>%
  #   dplyr::mutate(taa_rescale = rescale_0_100(taa),
  #                 med_diff_rescale = rescale_0_100(med_diff),
  #                 ranking_score = taa_rescale + med_diff_rescale ) %>%
  #   dplyr::arrange(desc(ranking_score))
  if (is.null(med_taa)&is.null(med_med_diff)) {
    df <- df %>%
      dplyr::mutate(ranking_score = NA)
  } else {
    df <- df %>%
      dplyr::mutate(taa_norm = taa/med_taa*100,
                    med_diff_norm = med_diff/med_med_diff*100,
                    ranking_score = taa_norm + med_diff_norm ) %>%
      dplyr::arrange(desc(ranking_score))

  }



  return(df)
}




#' function to summarize curve fitting results
#'
#' @param tcpl_models the list object returned by 'fit_curve_tcpl' function
#' @param spid_chnm_table a reference table with 'spid' and the corresponding chemical name 'chnm' column,
#' and the CAS number 'casn' column.
#' @return a data.frame contains summarized metrics for each chemical (spid)
#'
#' @examples
#' ## supply models as the essential argument. spid_chnm_table is optional.
#' demo_md <- fit_curve_tcpl(mc_norm, assay_info =
#' list(prim_assay = "Primary", toxi_assay = "Cytotox"))
#' demo_sum <- summary_tcpl(demo_md)
#'
#'
#' ## start from raw data
#' # define assay
#' assay_info <- list(prim_assay = "Primary",toxi_assay = "Cytotox")
#' # data normalization
#' demo_mc_norm <- normalize_per_plate(demo_mc, nctrl = "DMSO")
#' # filter out test chemicals only
#' demo_mc_norm <- dplyr::filter(demo_mc_norm, wllt == "t")
#' # fit curve with default 20% threshold
#' demo_md <- fit_curve_tcpl(demo_mc_norm, assay_info)
#' # obtain summary table
#' demo_sum <- summary_tcpl(demo_md)
#'
#' @export
#'
summary_tcpl <- function(tcpl_models, spid_chnm_table = NULL) {

  # claim variables for passing R CMD Check
  spid <- NULL


  df <- data.frame()
  ##iterate through each spid
  for (i in seq_along(tcpl_models)) {
    #get chemical sample id 'spid'
    .spid <- tcpl_models[[i]]$spid
    # print(i)
    # print(.spid)
    #get chnm
    if (!is.null(spid_chnm_table)) {
      chnm <- dplyr::filter(spid_chnm_table, spid == .spid)$chnm
      casn <- dplyr::filter(spid_chnm_table, spid == .spid)$chnm
    } else {
      chnm <- NA
    }

    #extract modelling results
    m_toxi <- tcpl_models[[i]]$model_toxi
    m_prim <- tcpl_models[[i]]$model_prim

    #cutoff
    cutoff_prim <- tcpl_models[[i]]$cutoff_prim
    cutoff_toxi <- tcpl_models[[i]]$cutoff_toxi

    #get assay_info
    assay_info <- tcpl_models[[i]]$assay_info

    ##get metrics for prim
    if (is.null(assay_info$prim_assay)) {
      absEC80_prim <- NA
      absEC50_prim <- NA
      AC50_prim <- NA
    } else {
      if (!is.na(m_prim$hill) & m_prim$hill == 1) {
        para_prim <- c(m_prim$hill_tp, m_prim$hill_ga, m_prim$hill_gw)
        #get AC50
        AC50_prim <- m_prim$hill_ga
        #get absEC
        if (hill_model(para_prim, m_prim$logc_max) > 50) {
          absEC50_prim <- log_abs_ec(para_prim, 50)
          absEC80_prim <- log_abs_ec(para_prim, 20)
        } else if (hill_model(para_prim,m_prim$logc_max) > 20) {
          absEC50_prim <- NA
          absEC80_prim <- log_abs_ec(para_prim, 20)
        } else {
          absEC80_prim <- NA
          absEC50_prim <- NA
        }
      } else {
        absEC80_prim <- NA
        absEC50_prim <- NA
        AC50_prim <- NA
      }
    }

    ##get metrics for toxi

    if (is.null(assay_info$toxi_assay)) {
      absEC80_toxi <- NA
      absEC50_toxi <- NA
      AC50_toxi <- NA
    } else {
      if (!is.na(m_toxi$hill) & m_toxi$hill == 1) {
        para_toxi <- c(m_toxi$hill_tp, m_toxi$hill_ga, m_toxi$hill_gw)
        #get AC50
        AC50_toxi <- m_toxi$hill_ga
        #calculate absEC
        if (hill_model(para_toxi,m_toxi$logc_max) > 50) {
          absEC50_toxi <- log_abs_ec(para_toxi, 50)
          absEC80_toxi <- log_abs_ec(para_toxi, 20)
        } else if (hill_model(para_toxi,m_toxi$logc_max) > 20) {
          absEC50_toxi <- NA
          absEC80_toxi <- log_abs_ec(para_toxi, 20)
        } else {
          absEC80_toxi <- NA
          absEC50_toxi <- NA
        }
      } else {
        absEC80_toxi <- NA
        absEC50_toxi <- NA
        AC50_toxi <- NA
      }

    }

    ##gather taa and selectivity into one table
    t <- data.frame(
      index = i,
      spid = .spid,
      chnm = chnm,
      AC50_toxi = AC50_toxi,
      AC50_prim = AC50_prim,
      absEC80_toxi = absEC80_toxi,
      absEC50_toxi = absEC50_toxi,
      absEC80_prim = absEC80_prim,
      absEC50_prim = absEC50_prim
    )

    df <- base::rbind(df, t)

  }


  return(df)
}




#' Plot dose-resonse curves based on the tcpl hill model
#'
#' Produce the plot for the dose-response curves and data points for both primary and toxicity assay.
#' The direction of the data and dose-resonse curves are presented as the original data, rather than
#' the uptrend direction required by the 'tcpl' function. Plots are sorted by the ranking_score.
#'
#' @param tcpl_models the list object created by 'fit_curve_tcpl' function
#' @param rank_table the data.frame output from 'rank_tcpl' function
#' @param spid_chnm_table the spid, chnm, casn info table
#' @param notation value can be TRUE or FALSE, determine whehter to show potency metrics on the plot
#' @param cunit  the unit of concentration, on default is "M" (molar).
#'
#' @return list of ggplot2 objects, each corresponding to one spid.
#' @import ggthemes ggplot2
#'
#' @examples
#' ## produce plots without notations
#' demo_md <- fit_curve_tcpl(mc_norm, assay_info =
#' list(prim_assay = "Primary", toxi_assay = "Cytotox"))
#' plots <- plot_tcpl(demo_md)
#'
#'
#' ## start from raw data
#' # define assay
#' assay_info <- list(prim_assay = "Primary",toxi_assay = "Cytotox")
#' # data normalization
#' demo_mc_norm <- normalize_per_plate(demo_mc, nctrl = "DMSO")
#' # filter out test chemicals only
#' demo_mc_norm <- dplyr::filter(demo_mc_norm, wllt == "t")
#' # fit curve with default 20% threshold
#' demo_md <- fit_curve_tcpl(demo_mc_norm, assay_info)
#' # calculate TAA and Med_diff only
#' demo_rank <- rank_tcpl(demo_md, med_taa = NULL, med_med_diff = NULL)
#' #produce plots with notations
#' demo_plots <- plot_tcpl(demo_md, demo_rank, notation = TRUE)
#'
#' ##produce plots with notations, with changed concentration unit displayed on the plot
#' demo_plots <- plot_tcpl(demo_md, demo_rank, notation = TRUE, cunit = "uM")
#'
#' @export
#'
#'
plot_tcpl <-
  function(tcpl_models, rank_table=NULL, spid_chnm_table = NULL, notation = FALSE, cunit = "M") {

    # claim variables for passing R CMD Check
    spid <- nval_median <- logc <- resp <- pred <- assay <- NULL

    #initiate empty output plot list
    plot_list <- list()
    #reorder if rank is available
    if (!is.null(rank_table)) {
      tcpl_models <- tcpl_models[rank_table$index]
    }

    #loop through tcpl_models's unique spid.
    #note tcpl_models is the level 4 output from tcpl package, including all the modelling results
    for (i in seq_along(tcpl_models)) {
      #get chemical sample id and chemical name
      .spid <- tcpl_models[[i]]$spid
      #print(.spid)
      if (!is.null(spid_chnm_table)) {
        chnm <- dplyr::filter(spid_chnm_table, spid == .spid)$chnm
        casn <- dplyr::filter(spid_chnm_table, spid == .spid)$casn
      } else {
        chnm <- NA
        casn <- NA
      }

      #cutoff
      cutoff_prim <- tcpl_models[[i]]$cutoff_prim
      cutoff_toxi <- tcpl_models[[i]]$cutoff_toxi
      #assay_info
      assay_info <- tcpl_models[[i]]$assay_info

      #get normalized resonse value (cytotox and raiu together)
      #and use 100 minus the response value (inverse the plot)
      d <- dplyr::bind_rows(tcpl_models[[i]]$data_prim, tcpl_models[[i]]$data_toxi) %>%
        dplyr::mutate(resp = nval_median) #%>%
      #mutate(assay = ifelse(aeid == 1, assay_info$toxi_assay, assay_info$prim_assay))

      #determine left and right x boundary of the plot
      if ( round(max(d$logc)) < max(d$logc) ) {
        rb <- round(max(d$logc)) + 1
      } else { rb <- round(max(d$logc))}

      lb <- round(min(d$logc))-1

      #initiate basic plot with data points.
      g <- ggplot(d, aes(x = logc, y = resp))

      if (!is.null(spid_chnm_table)){
        g <- g + labs(
          title = paste(i, ". SPID: ", .spid, "\nNAME: ", chnm, "\nCAS NO: ", casn, sep = ""),
          x = paste("Concentration (log", cunit, ")", sep = ""),
          y = "% Control Activity"
          )
      } else {
        g <- g + labs(
          title = paste(i, ". SPID:" , .spid),
          x = paste("Concentration (log", cunit, ")", sep = ""),
          y = "% Control Activity"
        )
      }

        # labs(
        #   title = paste(i, "\nSPID: " , .spid, "\nNAME: ", chnm, "\nCAS NO: ", casn, sep = ""),
        #   x = "Concentration (logM)",
        #   y = "% Control Activity"
        # )

      #extract modelling results
      m_toxi <- tcpl_models[[i]]$model_toxi
      m_prim <- tcpl_models[[i]]$model_prim

      #create 100 concentrations
      s <- expand.grid(logc = seq(lb, rb, length = 130))

      #test and plot cytotox model
      if (!is.na(m_toxi$hill) & m_toxi$hill == 1) {
        para_cyto <- c(m_toxi$hill_tp, m_toxi$hill_ga, m_toxi$hill_gw)
        p1 <- 100 - hill_model(para_cyto, s)
        p1 <- dplyr::bind_cols(s, data.frame(p1))
        names(p1) <- c("logc", "pred")
        g <- g + geom_line(
          data = p1,
          aes(x = logc, y = pred),
          size = 2,
          alpha = 0.9,
          color = "#e02929"
        )
        #plot the vertical line at 3bmad cutoff
        #g <- g+ geom_vline(xintercept = log_abs_ec(para_cyto, 3*m_toxi$bmad) )
      }

      #test and plot raiu model
      if (!is.na(m_prim$hill) & m_prim$hill == 1) {
        para_raiu <- c(m_prim$hill_tp, m_prim$hill_ga, m_prim$hill_gw)

        p2 <- 100 - hill_model(para_raiu, s)
        p2 <- bind_cols(s, data.frame(p2))
        names(p2) <- c("logc", "pred")
        g <- g + geom_line(
          data = p2,
          aes(x = logc, y = pred),
          size = 2,
          alpha = 0.9,
          color = "#377eb8"
        )
      }

      #draw 3bmad cutoff line for cytotox and raiu respectively
      g <- g +
        geom_hline(
          yintercept = 100 - cutoff_toxi,
          alpha = 0.5,
          size = 0.5,
          linetype = "dashed",
          color = "#e02929"
        )

      g <- g +
        geom_hline(
          yintercept = 100 - cutoff_prim,
          alpha = 0.5,
          size = 0.5,
          linetype = "dashed",
          color = "#377eb8"
        )

      #plot data points
      #aesthetics fixes
      g <- g +
        geom_point(
          aes(color = assay),
          shape = 21,
          alpha = 0.9,
          size = 3
        ) +
        coord_fixed(
          ylim = c(0, 125),
          #xlim = c(-10, -4),
          ratio = 4 / 120   #used to be 2/70, when x axis was from -9 to -4.
        ) +
        scale_y_continuous(breaks = seq(
          from = 0,
          to = 120,
          by = 20
        )) +
        scale_x_continuous(breaks = seq(
          from = lb,
          to = rb,
          by = 1
        )) +
        theme_few() +
        theme(legend.title = element_blank()) +
        scale_color_manual(values=c("#e02929", "#377eb8"))+
        theme(plot.title=element_text(hjust=0.5))

      ##adding annotations
      if (!is.null(rank_table) & notation == TRUE) {
        #Get ec and ranking info
        ds <- rank_table %>% dplyr::filter(spid==.spid)
        ds <- round_df(ds, digits=2)

        #annotate with text info
        if (is.na(ds$ranking_score)) {
          line1 <- ""} else {
          line1 <- paste("Ranking_Score:", ds$ranking_score)
        }

        line2 <- paste("TAA:", ds$taa)
        line3 <- paste("Med_Diff:", ds$med_diff)
        line4 <- paste(assay_info$prim_assay, "_AC50: ", ds$AC50_prim, sep = "")
        line5 <- paste(assay_info$prim_assay, "_absEC50: ", ds$absEC50_prim, sep = "")
        #line6 <- paste(assay_info$prim_assay, "_absEC80: ", ds$absEC80_prim, sep = "")
        g <- g +
          annotate("text", x= lb + 0.8, y = 30, alpha = 0.8, hjust=0, label=line1) +
          annotate("text", x= lb + 0.8, y = 25, alpha = 0.8, hjust=0, label= line2) +
          annotate("text", x= lb + 0.8, y = 20, alpha = 0.8, hjust=0, label= line3) +
          annotate("text", x= lb + 0.8, y = 15, alpha = 0.8, hjust=0, label= line4) +
          annotate("text", x= lb + 0.8, y = 10, alpha = 0.8, hjust=0, label= line5)
          #annotate("text", x= lb + 0.8, y = 5, alpha = 0.8, hjust=0, label= line6)

      }

      #collect all plots into a list
      plot_list[[i]] <- g

    }

    return(plot_list)

  }



#' Plot dose-resonse curves with minimal text annotation
#' This funciton plots dose-response curve with minimal text annotation,
#' no x and y axis label, 0 borders. Useful when need to present several plots
#' together.
#' @param tcpl_models the list object created by 'fit_curve_tcpl' function
#' @param rank_table the data.frame output from 'rank_tcpl' function
#' @param spid_chnm_table the spid, chnm, casn info table
#' @param notation value can be TRUE or FALSE, determine whehter to show potency metrics on the plot
#' @param cunit  the unit of concentration, on default is "M" (molar).
#'
#' @return list of ggplot2 objects, each corresponding to one spid.
#' @import ggthemes ggplot2
#'
#' @examples
#' ## produce plots without notations
#' demo_md <- fit_curve_tcpl(mc_norm, assay_info =
#' list(prim_assay = "Primary", toxi_assay = "Cytotox"))
#' plots_minimal <- plot_tcpl_minimal(demo_md)
#'
#' ## start from raw data
#' # define assay
#' assay_info <- list(prim_assay = "Primary",toxi_assay = "Cytotox")
#' # data normalization
#' demo_mc_norm <- normalize_per_plate(demo_mc, nctrl = "DMSO")
#' # filter out test chemicals only
#' demo_mc_norm <- dplyr::filter(demo_mc_norm, wllt == "t")
#' # fit curve with default 20% threshold
#' demo_md <- fit_curve_tcpl(demo_mc_norm, assay_info)
#' # calculate TAA and Med_diff only
#' demo_rank <- rank_tcpl(demo_md, med_taa = NULL, med_med_diff = NULL)
#' #produce plots with notations
#' demo_plots <- plot_tcpl_minimal(demo_md, demo_rank, notation = TRUE)
#'
#' ##produce plots with notations, with changed concentration unit displayed on the plot
#' demo_plots <- plot_tcpl_minimal(demo_md, demo_rank, notation = TRUE, cunit = "uM")
#'
#'
#'
#'
#' @export
#'
#'
plot_tcpl_minimal <-
  function(tcpl_models, rank_table = NULL, spid_chnm_table = NULL, notation = FALSE, cunit = "M") {

    # claim variables for passing R CMD Check
    spid <- nval_median <- logc <- resp <- pred <- assay <- NULL

    #initiate empty output plot list
    plot_list <- list()

    #reorder if rank is available
    if (!is.null(rank_table)) {
      tcpl_models <- tcpl_models[rank_table$index]
    }

    #loop through tcpl_models's unique spid.
    #note tcpl_models is the level 4 output from tcpl package, including all the modelling results
    for (i in seq_along(tcpl_models)) {
      #get chemical sample id and chemical name
      .spid <- tcpl_models[[i]]$spid
      #print(.spid)
      if (!is.null(spid_chnm_table)) {
        chnm <- dplyr::filter(spid_chnm_table, spid == .spid)$chnm
        casn <- dplyr::filter(spid_chnm_table, spid == .spid)$casn
      } else {
        chnm <- NA
        casn <- NA
      }

      #cutoff
      cutoff_prim <- tcpl_models[[i]]$cutoff_prim
      cutoff_toxi <- tcpl_models[[i]]$cutoff_toxi
      #assay_info
      assay_info <- tcpl_models[[i]]$assay_info

      #get normalized resonse value (cytotox and raiu together)
      #and use 100 minus the response value (inverse the plot)
      d <- dplyr::bind_rows(tcpl_models[[i]]$data_prim, tcpl_models[[i]]$data_toxi) %>%
        dplyr::mutate(resp = nval_median) #%>%
      #mutate(assay = ifelse(aeid == 1, assay_info$toxi_assay, assay_info$prim_assay))

      #determine left and right x boundary of the plot
      if ( round(max(d$logc)) < max(d$logc) ) {
        rb <- round(max(d$logc)) + 1
      } else { rb <- round(max(d$logc))}

      lb <- round(min(d$logc))-1

      #initiate basic plot with data points.
      g <- ggplot(d, aes(x = logc, y = resp))

      if (!is.null(spid_chnm_table)){
        g <- g + labs(
          title = paste(chnm, sep = ""),
          x = paste("Concentration (log", cunit, ")", sep = ""),
          y = "% Control Activity"
        )
      } else {
        g <- g + labs(
          title = paste(i, ". SPID:" , .spid),
          x = paste("Concentration (log", cunit, ")", sep = ""),
          y = "% Control Activity"
        )
      }

      # labs(
      #   title = paste(i, "\nSPID: " , .spid, "\nNAME: ", chnm, "\nCAS NO: ", casn, sep = ""),
      #   x = "Concentration (logM)",
      #   y = "% Control Activity"
      # )

      #extract modelling results
      m_toxi <- tcpl_models[[i]]$model_toxi
      m_prim <- tcpl_models[[i]]$model_prim

      #create 100 concentrations
      s <- expand.grid(logc = seq(lb, rb, length = 130))

      #test and plot cytotox model
      if (!is.na(m_toxi$hill) & m_toxi$hill == 1) {
        para_cyto <- c(m_toxi$hill_tp, m_toxi$hill_ga, m_toxi$hill_gw)
        p1 <- 100 - hill_model(para_cyto, s)
        p1 <- dplyr::bind_cols(s, data.frame(p1))
        names(p1) <- c("logc", "pred")
        g <- g + geom_line(
          data = p1,
          aes(x = logc, y = pred),
          size = 2,
          alpha = 0.9,
          color = "#e02929"
        )
        #plot the vertical line at 3bmad cutoff
        #g <- g+ geom_vline(xintercept = log_abs_ec(para_cyto, 3*m_toxi$bmad) )
      }

      #test and plot raiu model
      if (!is.na(m_prim$hill) & m_prim$hill == 1) {
        para_raiu <- c(m_prim$hill_tp, m_prim$hill_ga, m_prim$hill_gw)

        p2 <- 100 - hill_model(para_raiu, s)
        p2 <- bind_cols(s, data.frame(p2))
        names(p2) <- c("logc", "pred")
        g <- g + geom_line(
          data = p2,
          aes(x = logc, y = pred),
          size = 2,
          alpha = 0.9,
          color = "#377eb8"
        )
      }

      #draw 3bmad cutoff line for cytotox and raiu respectively
      g <- g +
        geom_hline(
          yintercept = 100 - cutoff_toxi,
          alpha = 0.5,
          size = 0.5,
          linetype = "dashed",
          color = "#e02929"
        )

      g <- g +
        geom_hline(
          yintercept = 100 - cutoff_prim,
          alpha = 0.5,
          size = 0.5,
          linetype = "dashed",
          color = "#377eb8"
        )

      #plot data points
      #aesthetics fixes
      g <- g +
        geom_point(
          aes(color = assay),
          shape = 21,
          alpha = 0.9,
          size = 3
        ) +
        coord_fixed(
          ylim = c(0, 125),
          xlim = c(-9, -4),
          ratio = 2 / 70   #used to be 2/70, when x axis was from -9 to -4.
        ) +
        scale_y_continuous(breaks = seq(
          from = 0,
          to = 120,
          by = 20
        )) +
        scale_x_continuous(breaks = seq(
          from = lb,
          to = rb,
          by = 1
        )) +
        theme_few() +
        theme(axis.title.x = element_blank()) +
        theme(axis.title.y = element_blank()) +
        theme(legend.title = element_blank(),
              legend.position = "none") +
        theme(legend.margin=unit(0, "null")) +
        scale_color_manual(values=c("#e02929", "#377eb8"))+
        theme(plot.title=element_text(hjust=0.5, size = 16))

      ##adding annotations
      if (!is.null(rank_table) & notation == TRUE) {
        #Get ec and ranking info
        ds <- rank_table %>% dplyr::filter(spid==.spid)
        ds <- round_df(ds, digits=2)

        #annotate with text info
        line1 <- paste("Ranking_Score:", ds$ranking_score)
        # line2 <- paste("TAA:", ds$taa)
        # line3 <- paste("Med_Diff:", ds$med_diff)
        line4 <- paste("AC50: ", ds$AC50_prim, sep = "")
        line5 <- paste("absEC50: ", ds$absEC50_prim, sep = "")
        # line6 <- paste(assay_info$prim_assay, "_absEC80: ", ds$absEC80_prim, sep = "")
        g <- g +
          annotate("text", x= lb + 0.8, y = 34, alpha = 0.8, hjust=0, label=line1) +
          # annotate("text", x= lb + 0.8, y = 25, alpha = 0.8, hjust=0, label= line2) +
          # annotate("text", x= lb + 0.8, y = 20, alpha = 0.8, hjust=0, label= line3) +
          annotate("text", x= lb + 0.8, y = 22, alpha = 0.8, hjust=0, label= line4) +
          annotate("text", x= lb + 0.8, y = 10, alpha = 0.8, hjust=0, label= line5)
          # annotate("text", x= lb + 0.8, y = 5, alpha = 0.8, hjust=0, label= line6)

      }

      #collect all plots into a list
      plot_list[[i]] <- g

    }

    return(plot_list)

  }


#' save plots in pdf
#'
#' save ggplot2 plots generated in a list to a pdf file
#'
#' @param plot_list  the r list object contains all ggplot2 objects
#' @param filename  the output file name, including the file directory
#'
#' @examples
#' ## start from raw data
#' # define assay
#' assay_info <- list(prim_assay = "Primary",toxi_assay = "Cytotox")
#' # data normalization
#' demo_mc_norm <- normalize_per_plate(demo_mc, nctrl = "DMSO")
#' # filter out test chemicals only
#' demo_mc_norm <- dplyr::filter(demo_mc_norm, wllt == "t")
#' # fit curve with default 20% threshold
#' demo_md <- fit_curve_tcpl(demo_mc_norm, assay_info)
#' # calculate TAA and Med_diff only
#' demo_rank <- rank_tcpl(demo_md, med_taa = NULL, med_med_diff = NULL)
#' #produce plots with notations
#' demo_plots <- plot_tcpl_minimal(demo_md, demo_rank, notation = TRUE)
#'
#' ## save all the plots as pdf
#' # save_plot_pdf(demo_plots, ".\output plots\all_plots.pdf")
#'
#' ## save the 1st plot as pdf
#' # save_plot_pdf(demo_plots[1], ".\output plots\plot1.pdf")
#'
#'
#'
#' @export
#'

save_plot_pdf <- function(plot_list, filename) {
  # claim variables for passing R CMD Check
  assay <- spid <- apid <- rval <- NULL

  grDevices::pdf(filename)
  cat("Preparing file...\n")
  invisible(lapply(plot_list, print))
  grDevices::dev.off()
  cat("Finished!")
}



#' round digits of numbers
#'
#' round numbers in a datafram to specified digits
#'
#' @param df the dataframe input
#' @param digits  the specified number of digits
#' @return a dataframe
#'
#'
round_df <- function(df, digits) {
  nums <- vapply(df, is.numeric, FUN.VALUE = logical(1))
  df[,nums] <- round(df[,nums], digits = digits)
  df
}
9 R source code in ToxPlot package

9 R source code in `ToxPlot` package