Description
tximport
imports transcript-level estimates from various external software and optionally summarizes abundances, counts, and transcript lengths to the gene-level (default) or outputs transcript-level matrices (see txOut
argument)
https://rdrr.io/bioc/tximport/man/tximport.html
Supported software (i.e. the type
argument):
With type = "none"
, one can specify the columns containing required information
Scaling
The countsFromAbundance
argument has 4 options depedning on whether to generate estimated counts using abundance estimates
no - no scaling (default)
scaledTPM - scaled up to library size
lengthScaledTPM - scaled using the average transcript length over samples and then the library size
dtuScaledTPM - scaled using the median transcript length among isoforms of a gene, and then the library size. dtuScaledTPM is designed for differential transcript usage (DTU) analysis in combination with txOut=TRUE
, and it requires specifing a tx2gene
data.frame
Examples
In /home/USER/SSAPs
we execute R
Load a TxDb object and creat the tx2gene
data.frame from the TxDb object, this is
Copy library (GenomicFeatures)
txdb.filename <- "/home/USER/db/refanno/gencode.v33.annotation.sqlite"
txdb <- loadDb( txdb.filename )
k <- keys( txdb, keytype = "TXNAME" )
tx2gene <- select( txdb, k, "GENEID" , "TXNAME" )
Create a named vector files
pointing to the quantification files
Copy files <- file.path ( "salmon" , list.files ( "salmon" ), "quant.sf" )
names (files) <- list.files ( "salmon" )
Copy > files
ERR2675454 ERR2675455
"salmon/ERR2675454/quant.sf" "salmon/ERR2675455/quant.sf"
ERR2675458 ERR2675459
"salmon/ERR2675458/quant.sf" "salmon/ERR2675459/quant.sf"
ERR2675460 ERR2675461
"salmon/ERR2675460/quant.sf" "salmon/ERR2675461/quant.sf"
ERR2675464 ERR2675465
"salmon/ERR2675464/quant.sf" "salmon/ERR2675465/quant.sf"
ERR2675468 ERR2675469
"salmon/ERR2675468/quant.sf" "salmon/ERR2675469/quant.sf"
ERR2675472 ERR2675473
"salmon/ERR2675472/quant.sf" "salmon/ERR2675473/quant.sf"
ERR2675476 ERR2675477
"salmon/ERR2675476/quant.sf" "salmon/ERR2675477/quant.sf"
ERR2675478 ERR2675479
"salmon/ERR2675478/quant.sf" "salmon/ERR2675479/quant.sf"
ERR2675480 ERR2675481
"salmon/ERR2675480/quant.sf" "salmon/ERR2675481/quant.sf"
ERR2675484 ERR2675485
"salmon/ERR2675484/quant.sf" "salmon/ERR2675485/quant.sf"
Import the quantification files using tximport()
. By specifying txOut = TRUE
, one can obtain transcript-level counts. By default, the function outputs gene-level summarization
Copy library (tximport)
# requires tx2gene for gene-level summarization
txi.salmon.g <- tximport( files, type = "salmon" , tx2gene = tx2gene )
txi.salmon.t <- tximport( files, type = "salmon" , txOut = TRUE )
Copy # Gene-level
> head (txi.salmon.g $ counts)[, 1 : 10 ]
ERR2675454 ERR2675455 ERR2675458 ERR2675459 ERR2675460
ENSG00000000003 .15 1478.000 2160.627 1002.000 1889.194 1097.359
ENSG00000000005 .6 44.000 26.000 19.000 49.000 22.000
ENSG00000000419 .12 1070.685 917.091 1341.018 634.318 786.430
ENSG00000000457 .14 502.911 487.025 400.202 479.016 434.075
ENSG00000000460 .17 223.088 163.000 192.798 79.000 94.000
ENSG00000000938 .13 59.000 32.000 62.000 38.000 53.000
ERR2675461 ERR2675464 ERR2675465 ERR2675468 ERR2675469
ENSG00000000003 .15 1118.000 1097.442 1595.800 1251.481 1705.223
ENSG00000000005 .6 39.000 32.000 31.000 44.000 32.000
ENSG00000000419 .12 790.000 971.212 743.000 662.631 790.212
ENSG00000000457 .14 357.535 336.014 245.154 381.113 542.008
ENSG00000000460 .17 131.464 166.102 79.846 106.143 116.147
ENSG00000000938 .13 34.000 24.000 23.000 27.000 21.000
Copy # Transcript-level
> head (txi.salmon.t $ counts)[, 1 : 10 ]
ERR2675454 ERR2675455 ERR2675458 ERR2675459 ERR2675460
ENST00000456328 .2 0.000 0.000 0.000 0.0 0.000
ENST00000450305 .2 0.000 0.000 0.000 0.0 0.000
ENST00000488147 .1 134.186 148.079 124.806 295.6 213.842
ENST00000619216 .1 0.000 0.000 0.000 0.0 0.000
ENST00000473358 .1 0.000 0.000 0.000 0.0 0.000
ENST00000469289 .1 0.000 1.000 0.000 0.0 0.000
ERR2675461 ERR2675464 ERR2675465 ERR2675468 ERR2675469
ENST00000456328 .2 5.202 0.000 0.000 0.00 0.00
ENST00000450305 .2 0.000 0.000 0.000 0.00 0.00
ENST00000488147 .1 98.824 351.035 196.086 182.09 205.51
ENST00000619216 .1 0.000 0.000 0.000 0.00 0.00
ENST00000473358 .1 0.000 0.000 0.000 0.00 0.00
ENST00000469289 .1 0.000 0.000 0.000 0.00 0.00
Copy # Performs scaling for DTU analysis
txi.salmon.s <- tximport( files, type = "salmon" ,
txOut = TRUE , tx2gene = tx2gene, countsFromAbundance = "dtuScaledTPM" )
Copy # Transcript-level, dtuScaledTPM
> head (txi.salmon.s $ counts)[, 1 : 10 ]
ERR2675454 ERR2675455 ERR2675458 ERR2675459 ERR2675460
ENST00000456328 .2 0.0000 0.0000 0.0000 0.0000 0.0000
ENST00000450305 .2 0.0000 0.0000 0.0000 0.0000 0.0000
ENST00000488147 .1 226.5014 252.2381 204.1243 373.8044 284.3047
ENST00000619216 .1 0.0000 0.0000 0.0000 0.0000 0.0000
ENST00000473358 .1 0.0000 0.0000 0.0000 0.0000 0.0000
ENST00000469289 .1 0.0000 1.6571 0.0000 0.0000 0.0000
ERR2675461 ERR2675464 ERR2675465 ERR2675468 ERR2675469
ENST00000456328 .2 4.784217 0.0000 0.000 0.0000 0.0000
ENST00000450305 .2 0.000000 0.0000 0.000 0.0000 0.0000
ENST00000488147 .1 148.232222 488.9641 287.104 240.2427 271.6737
ENST00000619216 .1 0.000000 0.0000 0.000 0.0000 0.0000
ENST00000473358 .1 0.000000 0.0000 0.000 0.0000 0.0000
ENST00000469289 .1 0.000000 0.0000 0.000 0.0000 0.0000
If quantification file format is not supported, we can also load the files by specifying required columns (i.e. txIdCol
, abundanceCol
, countsCol
and lengthCol
). We using the Salmon quantification files as an exmaple
salmon/ERR2675454/quant.sf
Copy Name Length EffectiveLength TPM NumReads
ENST00000456328.2 1657 1455.216 0.000000 0.000
ENST00000450305.2 632 468.000 0.000000 0.000
ENST00000488147.1 1351 1031.467 5.868714 134.186
ENST00000619216.1 68 9.000 0.000000 0.000
ENST00000473358.1 712 548.000 0.000000 0.000
ENST00000469289.1 535 371.000 0.000000 0.000
ENST00000607096.1 138 26.000 0.000000 0.000
ENST00000417324.1 1187 1023.000 0.000000 0.000
ENST00000461467.1 590 426.000 0.000000 0.000
Copy txi.salmon <- tximport( files, type = "none" , txIn = TRUE , txOut = TRUE ,
txIdCol = "Name" , abundanceCol = "TPM" ,
countsCol = "NumReads" , lengthCol = "Length" ,
importer = function (x) readr :: read_tsv( x ))
Copy > head (txi.salmon $ counts)[, 1 : 10 ]
ERR2675454 ERR2675455 ERR2675458 ERR2675459 ERR2675460
ENST00000456328 .2 0.000 0.000 0.000 0.0 0.000
ENST00000450305 .2 0.000 0.000 0.000 0.0 0.000
ENST00000488147 .1 134.186 148.079 124.806 295.6 213.842
ENST00000619216 .1 0.000 0.000 0.000 0.0 0.000
ENST00000473358 .1 0.000 0.000 0.000 0.0 0.000
ENST00000469289 .1 0.000 1.000 0.000 0.0 0.000
ERR2675461 ERR2675464 ERR2675465 ERR2675468 ERR2675469
ENST00000456328 .2 5.202 0.000 0.000 0.00 0.00
ENST00000450305 .2 0.000 0.000 0.000 0.00 0.00
ENST00000488147 .1 98.824 351.035 196.086 182.09 205.51
ENST00000619216 .1 0.000 0.000 0.000 0.00 0.00
ENST00000473358 .1 0.000 0.000 0.000 0.00 0.00
ENST00000469289 .1 0.000 0.000 0.000 0.00 0.00