Página anterior« Indice del Curso
#trimming
for file in raw_data/*168*; do fastp -i $file -l 140 -q 30 -o trimmed/${file##*/}; done
for file in raw_data/*rep*; do fastp -i $file -l 70 -q 30 -o trimmed/${file##*/}; done
##mapping
STAR --runMode genomeGenerate --genomeDir index --genomeFastaFiles index/KY030782.fna --genomeSAindexNbases 7
for file in trimmed/*.fastq.gz; do STAR --genomeDir index --readFilesCommand gunzip -c --readFilesIn $file --outFileNamePrefix Star_out/${file##*/} --outSAMtype BAM Unsorted SortedByCoordinate --runThreadN 24; done
#assambly
for file in *.sortedByCoord.out.bam; do stringtie -p 16 -G ../index/KY030782.gtf -o ${file%Aligned.sortedByCoord.out.bam}.gtf -l ${file%Aligned.sortedByCoord.out.bam} $file; done
cat *.gtf > merge.txt
stringtie --merge -p 24 -G ../index/KY030782.gtf -o stringtie_merged.gtf merge.txt
for file in *.sortedByCoord.out.bam; do stringtie $file -e -B -p 24 -G stringtie_merged.gtf -o ballgown/${file%Aligned.sortedByCoord.out.bam}/${file%Aligned.sortedByCoord.out.bam}.gtf; done
cd ballgown
../../prepDE.py3
##input matriz de conteo
countData <- as.matrix(read.csv('gene_count_matrix.csv',row.names = 'gene_id'))
colData <- read.csv('phenodata.csv', row.names = 1)
all(rownames(colData) %in% colnames(countData))
countData <- countData[, rownames(colData)]
all(rownames(colData)==colnames(countData))
##generando colores por experimento (condicion)
col.condition <- c('noPep_5min'='green', 'pep_5min'='green', 'nopep_10min'= 'orange', 'pep_10min'= 'orange', 'nopep_20min'='red', 'pep_20min'='red')
colData$color <- col.condition[as.vector(colData$condition)]
##removiendo conteos nulos
prop.null <- apply(countData, 2, function(x) 100*mean(x==0))
barplot(prop.null,
horiz=TRUE, cex.names=1, las=1,
col=colData$color, xlab='% de conteos nulos')
countData <- countData[rowSums(countData) > 0,]
##Analisis de expresión diferencial diferencial
library(DESeq2)
dds <- DESeqDataSetFromMatrix(countData = countData, colData = colData, design = ~condition)
dds <- DESeq(dds)
#removiendo genes con muy bajo conteo
dds <- dds[rowSums(counts(dds)) > 0,]
##normalizando
dds.norm <- estimateSizeFactors(dds)
##varianza con respecto a la media
##calculando media y varianza
norm.counts <- counts(dds.norm, normalized=TRUE)
mean.counts <- rowMeans(norm.counts)
variance.counts <- apply(norm.counts, 1, var)
#generar resultados
dds.disp <- estimateDispersions(dds.norm)
alpha <- 0.05
lfcTH <- 1
wald.test <- nbinomWaldTest(dds.disp)
res1 <- results(wald.test, contrast = c('condition','nopep_10min', 'pep_10min'), pAdjustMethod="BH")
res2 <- results(wald.test, contrast = c('condition','nopep_20min', 'pep_20min'), pAdjustMethod="BH")
summary(res1)
##exportar resultados
write.csv(res1, file = 'nopep_vs_pep_10min.csv')
write.csv(res2, file = 'nopep_vs_pep_20min.csv')
#PCA
plotPCA(rlog)
rld <- rlog(wald.test, blind = FALSE)
View(rld)
#PCA
plotPCA(rlog)
##VolcanoPlot
res1Volcano <- results(wald.test, contrast = c('condition','nopep_10min', 'pep_10min'))
keyvals1 <- rep('black', nrow(res1Volcano))
names(keyvals1) <- rep('Mid', nrow(res1Volcano))
keyvals1[which(res1Volcano$log2FoldChange > 1)] <- 'forestgreen'
names(keyvals1)[which(res1Volcano$log2FoldChange >1 )] <- 'high'
keyvals1[which(res1Volcano$log2FoldChange < -1)] <- 'red'
names(keyvals1)[which(res1Volcano$log2FoldChange < -1)] <- 'low'
library(EnhancedVolcano)
EnhancedVolcano(res1Volcano,
lab = rownames(res1Volcano),
x = 'log2FoldChange',
y = 'pvalue',
selectLab = rownames(res1Volcano)[which(names(keyvals1) %in% c('high', 'low'))],
xlim = c(-6,6),
xlab = bquote(~Log[2]~'FC'),
pCutoff = 0.05,
FCcutoff = 1.0,
cutoffLineCol = 'blue',
cutoffLineType = 'dashed',
pointSize = 3.5,
labSize = 4.5,
shape = c(42,42,42,19),
col = c('grey9','grey9','grey9','grey9'),
colCustom = keyvals1,
colAlpha = 1,
legendPosition = 'top',
legendLabSize = 15,
legendIconSize = 5.0,
border = 'partial',
borderWidth = 1.5,
borderColour = 'black')
#heatmap
rld <- rlog(wald.test, blind = FALSE)
head(assay(rld), 3)
sampleDists <- dist(t(assay(rld)))
library("pheatmap")
library("RColorBrewer")
sampleDistMatrix <- as.matrix( sampleDists )
rownames(sampleDistMatrix) <- paste( rld$condition, sep = " - " )
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
clustering_distance_rows = sampleDists,
clustering_distance_cols = sampleDists,
col = colors)
library("genefilter")
topVarGenes <- head(order(rowVars(assay(rld)), decreasing = TRUE))
mat <- assay(rld)[ topVarGenes, ]
mat <- mat - rowMeans(mat)
anno <- as.data.frame(colData(rld))
rownames(anno) <- colnames(countData)
library("pheatmap")
#probablemente bote error, pero con la correcion anterior no deberia
pheatmap(mat, annotation_col = anno)
#diagrama de venn
library(limma)
venn_data <- data.frame(p10min = res1$log2FoldChange >= 1 | res1$log2FoldChange <= -1,
p20min = res2$log2FoldChange >= 1 | res2$log2FoldChange <= -1)
vennDiagram(venn_data)
#PCA
plotPCA(rld)