
Network inference and analysis of CLL data
Frédéric Bertrand and Myriam Maumy-Bertrand
Université de Strasbourg et CNRS,IRMA, labex IRMIAfrederic.bertrand@lecnam.net
2025-09-15
Source:vignettes/ExampleCLL.Rmd
ExampleCLL.Rmd
Data preparation
Retrieve the full CLL dataset.
require(Patterns)
CLLfile <- "https://github.com/fbertran/Patterns/raw/master/add_data/CLL.RData"
download.file(CLLfile,destfile = file.path(tempdir(),"downloadData.RData"),method = "libcurl")
load(file.path(tempdir(),"downloadData.RData"))
unlink(file.path(tempdir(),"downloadData.RData"))
CLL[1:10,1:5]
Split the CLL
dataset into healthy and aggressive
stimulated and unstimulated dataset.
hea_US<-CLL[,which((1:48)%%8<5&(1:48)%%8>0)+2]
hea_S<-CLL[,which(!((1:48)%%8<5&(1:48)%%8>0))+2]
agg_US<-CLL[,which((1:40)%%8<5&(1:40)%%8>0)+98]
agg_S<-CLL[,which(!((1:40)%%8<5&(1:40)%%8>0))+98]
m_hea_US<-as.omics_array(hea_US,c(60,90,210,390),6,name=CLL[,1],gene_ID=CLL[,2])
m_hea_S<- as.omics_array(hea_S,c(60,90,210,390),6,name=CLL[,1],gene_ID=CLL[,2])
m_agg_US<-as.omics_array((agg_US),c(60,90,210,390),5,name=CLL[,1],gene_ID=CLL[,2])
m_agg_S<- as.omics_array((agg_S),c(60,90,210,390),5,name=CLL[,1],gene_ID=CLL[,2])
Focus on EGR1, run the code to get the graph of the expression values (pasted together for all the subjects) for all the probeset tagged as EGR1.
Selection genes according to their profiles.
selection1<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)),-1,alpha=0.1)
selection2<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)+1),-1,alpha=0.1)
selection3<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)+2),50,alpha=0.005)
selection4<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)+3),50,alpha=0.005)
Merge the four selections into a single one.
selection<-Patterns::unionOmics(list(selection1,selection2,selection3,selection4))
summary(selection)
Number of genes in the merged selection.
length(selection@gene_ID)
Translate the probesets’ names for the selection.
require(biomaRt)
affyids=c("202763_at","209310_s_at","207500_at")
ensembl = useMart("ensembl",dataset="hsapiens_gene_ensembl")
infos<-getBM(attributes=c("affy_hg_u133_plus_2","ensembl_gene_id","hgnc_symbol","chromosome_name","start_position","end_position","band"), filters = "affy_hg_u133_plus_2", values = CLL[CLL[,1] %in% selection@name,1] , mart = ensembl,uniqueRows=TRUE, checkFilters = TRUE)
Network inference
Add groupping information according to the pre-merge selection membership to perform network inference.
selection@group <- rep(NA, length(selection@name))
names(selection@group) <- selection@name
selection@group[selection@name %in% selection4@name] <- 4
selection@group[selection@name %in% selection3@name] <- 3
selection@group[selection@name %in% selection2@name] <- 2
selection@group[selection@name %in% selection1@name] <- 1
plot(selection)
Check the length of the group
slot of the
selection
object.
length(selection@group)
Performs a lasso based inference of the network. Then prints the
network
pbject.
network<-inference(selection,fitfun="LASSO2",Finit=CascadeFinit(4,4),Fshape=CascadeFshape(4,4))
str(network)
Plot the inferred F matrix.
plotF(network@F, choice='F')
Save results.
Focus on transcription factors.
Retrieve human transcription factors from HumanTFDB, extracted from AnimalTFDB 3.0: a comprehensive resource for annotation and prediction of animal transcription factors. Hui Hu, Ya-Ru Miao, Long-Hao Jia, Qing-Yang Yu, Qiong Zhang and An-Yuan Guo. Nucl. Acids Res. (2018).
doc <- read.delim("http://bioinfo.life.hust.edu.cn/static/AnimalTFDB3/download/Homo_sapiens_TF",encoding = "UTF-8", header=TRUE)
TF<-as.character(doc[,"Symbol"])
TF<-TF[order(TF)]
The TF
object holds the list of human transcription
factors geneID. We retrieve those that are in the selection
object.
infos_selection <- infos[infos$affy_hg_u133_plus_2 %in% selection@name,]
tfs<-which(infos_selection[,"hgnc_symbol"] %in% TF)
Some plots of the TF
found in the selection.