LY Palbo IC50 analysis of pan cancer cell lines (Gong Buchanan 2017)

2024/04/07

The data is from this paper:

read data

LY_data <- read_csv(file.path(file_dir, "Buchanan_AbsIC50Data_Abema.csv"))
## New names:
## Rows: 560 Columns: 31
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (16): Cell Line, Cancer Type, DCAF, KHSV cyclin (k-cyclin), CCND 3'UTR l... dbl
## (12): Abs IC50 (µM) Abemaciclib, StdErr(Abs IC50), CCND1 cn, CCND2 cn, C... lgl
## (3): ...29, ...30, ...31
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...29`
## • `` -> `...30`
## • `` -> `...31`
Palbo_data <- read_csv(file.path(file_dir, "Buchanan_AbsIC50Data_Palbo.csv"))
## New names:
## Rows: 492 Columns: 24
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (13): Cell Line, Cancer Type, DCAF, KHSV cyclin (k-cyclin), CCND1 3UTR l... dbl
## (10): Abs IC50 (µM) Palbociclib, CCND1 cn, CCND2 cn, CCND3 cn, CCNE1 cn,... lgl
## (1): ...23
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...23`
## • `` -> `...24`

summary

LY_data %>% dim
## [1] 560  31
Palbo_data %>% dim
## [1] 492  24
LY_data %>% DT::datatable()
Palbo_data %>% DT::datatable()

tidy up

LY_data <- LY_data %>% dplyr::rename("cell_line" = "Cell Line",
                                     "cancer_type" = "Cancer Type",
                                     "IC50" = "Abs IC50 (µM) Abemaciclib")


Palbo_data <- Palbo_data %>% dplyr::rename("cell_line" = "Cell Line",
                                           "cancer_type" = "Cancer Type",
                                           "IC50" = "Abs IC50 (µM) Palbociclib")
LY_data$RB1_status = ifelse(is.na(LY_data$"RB1 mut"), "WT", "MUT")
Palbo_data$RB1_status = ifelse(is.na(Palbo_data$"RB1 mut"), "WT", "MUT")
combined_data <- left_join(LY_data %>% dplyr::select("cell_line", "cancer_type", "IC50", "RB1_status"), 
                           Palbo_data %>% dplyr::select("cell_line", "cancer_type", "IC50", "RB1_status"), 
                           by = "cell_line")

colnames(combined_data) = combined_data %>% 
  colnames %>% 
  gsub("\\.x", "_LY", .) %>%
  gsub("\\.y", "_Palbo", .)

combined_data$cancer_type_Palbo %>% is.na %>% table
## .
## FALSE  TRUE 
##   492    68
# Theres less IC50 values for Palbo than LY. these are changed to 0. was finding an error when plotting in ggplot, so decided to make it 0.... myabe there is a better way?
combined_data$IC50_Palbo[is.na(combined_data$IC50_Palbo)] = 0

overlap of cell lines treated with LY and Palbo

list_i <- list(LY_data = LY_data$cell_line, 
               Palbo_data = Palbo_data$cell_line
)
     
LY_Palbo_overlap <- calculate.overlap(x = list_i)

For most cell lines, the IC50 value is avaialble for both LY and Palbo.

ggvenn(
  list_i, 
  fill_color = c("#0073C2FF", "#EFC000FF", "#868686FF"),
  stroke_size = 0.5, set_name_size = 7, text_size = 7
  )

cancer type summary

cancer_type_df <- combined_data$cancer_type_LY %>% table %>% sort(decreasing = TRUE) %>% data.frame
colnames(cancer_type_df) <- c("cancer_type", "frequency")

cancer_type_df %>% ggplot(aes(x = cancer_type, y = frequency, fill = cancer_type)) + 
  geom_bar(stat = "identity") +
  geom_text(aes(label=frequency), position=position_dodge(width=0.9), vjust=-0.25, size = 2) +
  theme_bw() + 
  theme(legend.position = "none", legend.title = element_blank() ,axis.text.x = element_text(angle = 45, hjust = 1))

IC50 summary

cellline_i <- c(combined_data %>% dplyr::arrange(IC50_LY) %>% .$cell_line %>% head(n=40), 
                combined_data %>% dplyr::arrange(IC50_Palbo) %>% .$cell_line %>% head(n=40)) %>% unique

combined_data <- combined_data %>% unite(plot_name, cell_line, cancer_type_LY, remove = FALSE)
combined_data$plot_name <- combined_data$plot_name %>% gsub(" ", "_", .)

combined_data %>% ggplot(aes(x = -log10(IC50_LY), y = -log10(IC50_Palbo), col = cancer_type_LY)) + 
  geom_point() +
  geom_text_repel(data = combined_data %>% filter(cell_line %in% cellline_i), aes(label = plot_name), size = 2.5) + 
  theme_bw() + 
  theme(legend.position = "bottom", legend.title = element_blank())

RB mutatant status by cancer type

RB_mutant_df <- combined_data %>% 
  dplyr::group_by(cancer_type_LY, RB1_status_LY) %>% 
  summarise(count = n())
## `summarise()` has grouped output by 'cancer_type_LY'. You can override using
## the `.groups` argument.
RB_mutant_df$cancer_type_LY <- factor(RB_mutant_df$cancer_type_LY, levels = cancer_order)
#RB_mutant_df %>% head
RB_mutant_df$count %>% max()

RB_mutant_df %>% 
  ggplot(aes(x = RB1_status_LY, y = count)) + 
  geom_bar(stat= "identity") + 
  ylim(c(0, 65)) +
  facet_wrap( . ~ cancer_type_LY) +
  geom_text(aes(label=count), position=position_dodge(width=0.9), vjust=-0.25, size = 2) +
  theme_bw()
RB_mutant_df %>% 
  ggplot(aes(x = cancer_type_LY, y = count, fill = RB1_status_LY)) + 
  #geom_bar(stat=stat_count) +
  geom_bar(stat='identity') + 
  #ylim(c(0, 65)) +
  #facet_wrap( . ~ cancer_type_LY) +
  #geom_text(aes(label=count), position=position_dodge(width=0.9), vjust=-0.25, size = 2) +
  theme_bw() +
  theme(legend.position = "none", axis.title.x = element_blank() ,axis.text.x = element_text(angle = 45, hjust = 1))

which cancer type responds the best

combined_data %>% 
  ggplot(aes(y = IC50_LY,  x = fct_reorder(cancer_type_LY, IC50_LY, .desc	
 = TRUE))) + 
  geom_boxplot() +
  theme_bw() + 
  theme(legend.position = "none", axis.title.x = element_blank() ,axis.text.x = element_text(angle = 45, hjust = 1))
combined_data %>% 
  ggplot(aes(y = IC50_Palbo,  x = fct_reorder(cancer_type_LY, IC50_Palbo, .desc	
 = TRUE))) + 
  geom_boxplot() +
  theme_bw() + 
  theme(legend.position = "none", axis.title.x = element_blank() ,axis.text.x = element_text(angle = 45, hjust = 1))

plot IC50 according to cancer type

combined_data$RB1_status_LY

RB1_col = c("WT" = "Dodgerblue3", "MUT" = "orange")

plot_IC50 <- function(type){
  temp_df <- combined_data %>% filter(cancer_type_LY == type)

  max_x = temp_df$IC50_LY %>% max %>% round(digits = 1) + 1
  max_y = temp_df$IC50_Palbo %>%  max(na.rm= TRUE) %>% round(digits = 1) +1

  temp_df %>% 
    ggplot(aes(x = IC50_LY, y = IC50_Palbo, col = RB1_status_LY)) + 
    geom_point() +
    geom_text_repel(data = temp_df, aes(label = plot_name), size = 2.5) + 
    scale_color_manual(values  = RB1_col) +
    ggtitle(type) +
    labs(x = "IC50 LY (uM)", y = "IC50 Palbo (uM)") +
    ylim(max_y, 0) +
    xlim(max_x, 0) +
    theme_bw() + 
    theme(legend.position = "top", legend.title = element_blank(), plot.title = element_text(hjust = 0.5, size=25))
  }
plot_IC50("breast cancer")
plot_IC50("lung NSCLC")
plot_IC50("melanoma")
plot_IC50("prostate cancer")
plot_IC50("pancreatic cancer")
plot_IC50("colon cancer")
plot_IC50("ovarian cancer")
plot_IC50("liver cancer")
plot_IC50("ovarian cancer")