看我~带土~神威

1、从UCSC下载数据然后读取,用data.table::fread 最好啦

 #读取数据
data <- data.table::fread("data/tcga_RSEM_gene_tpm.gz")
geneid <- data.table::fread("data/gencode.v23.annotation.gtf.gz")
clin <- data.table::fread("data/Survival_SupplementalTable_S1_20171025_xena_sp")
jiba <-data.table::fread("data/probeMap_gencode.v23.annotation.gene.probemap")

2、加载包

library(dplyr)
library(tibble)
library(tidyr)

3、修剪一下探针信息表

jiba <- jiba[,1:2]
colnames(data)[1] <- 'id'
test <- data1[1:10,1:10]

4、合并探针信息

data1 <- data %>%
  inner_join(jiba,by="id") %>%
  ## 去掉多余信息
  select(-id) %>%  
  ## 重新排列
  select(gene,everything()) #%>%

5、去重

data1 <- data1 %>%
  mutate(rowMean =rowMeans(.[,-1])) %>%
  ## 把表达量的平均值按从大到小排序
  arrange(desc(rowMean)) %>%
  ## 去重,symbol留下第一个
  distinct(gene,.keep_all = T) %>%
  ## 反向选择去除rowMean这一列
  select(-rowMean) %>%
  column_to_rownames("gene")

6、转制

data1 <- as.data.frame(t(data1))

7、修剪临床信息表

metadata <- data.frame(clin[,1])
metadata <- metadata %>%
  mutate(group=sample) %>%
  separate(group,sep = "-",into = c("q1","q2","q3","subtype"))%>%
  select("subtype")
clin2 <- cbind(clin,metadata)
clin2 <- select(clin2,1, 3, 35)
clin2 <- filter(clin2,subtype=="01")

7、与临床信息合并

colnames(clin2)[2] <- 'type'

data1 <- data1 %>%
  ## 行名转列名,因为只有变成数据框的列,才可以用inner_join
  rownames_to_column("sample") %>%
  ## 合并探针的信息
  inner_join(clin2,by="sample")

8、定义函数

###批量计算各个癌症中的相关性系数和p值
pancor <- function(gene1,gene2,data){
  data1 <- split(data,data$type)
  do.call(rbind,lapply(data1, function(x){
    dd  <- cor.test(as.numeric(x[,gene1]),as.numeric(x[,gene2]),type="pearson")
    data.frame(type=x$type[1],cor=dd$estimate,p.value=dd$p.value )
  }))
}

9、画图

plotdf <- pancor("HIF1A","P4HA2",data1)
library(ggplot2)
options(scipen = 2)
ggplot(plotdf,aes(-log10(p.value),cor))+
  ## 画点
  geom_point(size=6,fill="purple",shape = 21,colour="black",stroke = 2)+
  ## 改变y轴
  scale_y_continuous(expand = c(0,0),limits = c(-1,1),breaks = seq(-1,1,0.2))+
  ## 改变x轴
  scale_x_log10(expand = c(0,0),limits = c(0.1, 1000),breaks = c(0.1,10,1000))+
  ## 加横线
  geom_hline(yintercept = 0,size=1.5)+
  ## 加竖线
  geom_vline(xintercept = -log10(0.5),size=1.5)+
  ## 坐标轴
  labs(x=bquote(-log[10]~italic("P")),y="Pearson correlation (r)")+
  theme(axis.title=element_text(size=20),
        axis.text = element_text(face = "bold",size = 16),
        axis.ticks.length=unit(.4, "cm"),
        axis.ticks = element_line(colour = "black", size = 1),
        panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        panel.border = element_rect(colour = "black", fill=NA, size=1.5),
        plot.margin = margin(1, 1, 1, 1, "cm"))