# get top genesets for each comparison
top_genesets <- lapply(hypergeo_df, function(x){
x %>% filter(p.adjust < 0.05) %>% .$Description %>% head(n=15)
}) %>% unlist %>% unique
hypergeo_plot <- data.frame()
for(i in 1:length(hypergeo_df)){
temp <- hypergeo_df[[i]] %>% dplyr::slice(match(top_genesets, ID)) %>% mutate(group= names(hypergeo_df)[i])
hypergeo_plot <- rbind(hypergeo_plot, temp)
}
hypergeo_plot$group <- factor(hypergeo_plot$group)
hypergeo_plot$ID <- factor(hypergeo_plot$ID , levels = top_genesets)
hypergeo_plot$GeneRatio %>% head
# GeneRatio is a character vector that looks like "82/2838". need to divde the first with second element to get a numeric value
hypergeo_plot$GeneRatio_number <- hypergeo_plot$GeneRatio %>% str_split(pattern = "/") %>% lapply(function(x){
x <- x %>% as.numeric
x[[1]]/x[[2]]
}) %>% unlist
hypergeo_plot$group <- factor(hypergeo_plot$group, levels = unique(hypergeo_plot$group))
# any p,adjust value higher than 0.05 are removed from the plot using NA
hypergeo_plot$GeneRatio_number[hypergeo_plot$p.adjust > 0.05] <- NA
hypergeo_plot$p.adjust[hypergeo_plot$p.adjust > 0.05] <- NA