About tximport

Description

tximport imports transcript-level estimates from various external software and optionally summarizes abundances, counts, and transcript lengths to the gene-level (default) or outputs transcript-level matrices (see txOut argument)

https://rdrr.io/bioc/tximport/man/tximport.html

Supported software (i.e. the type argument):

  • salmon

  • sailfish

  • alevin

  • kallisto

  • rsem

  • stringtie

With type = "none", one can specify the columns containing required information

Scaling

The countsFromAbundance argument has 4 options depedning on whether to generate estimated counts using abundance estimates

  • no - no scaling (default)

  • scaledTPM - scaled up to library size

  • lengthScaledTPM - scaled using the average transcript length over samples and then the library size

  • dtuScaledTPM - scaled using the median transcript length among isoforms of a gene, and then the library size. dtuScaledTPM is designed for differential transcript usage (DTU) analysis in combination with txOut=TRUE, and it requires specifing a tx2gene data.frame

Examples

In /home/USER/SSAPs we execute R

Load a TxDb object and creat the tx2gene data.frame from the TxDb object, this is

library(GenomicFeatures)

txdb.filename <- "/home/USER/db/refanno/gencode.v33.annotation.sqlite"
txdb <- loadDb(txdb.filename)

k <- keys(txdb, keytype = "TXNAME")
tx2gene <- select(txdb, k, "GENEID", "TXNAME")

Create a named vector files pointing to the quantification files

files <- file.path("salmon", list.files("salmon"), "quant.sf")
names(files) <- list.files("salmon")
> files
                  ERR2675454                   ERR2675455 
"salmon/ERR2675454/quant.sf" "salmon/ERR2675455/quant.sf" 
                  ERR2675458                   ERR2675459 
"salmon/ERR2675458/quant.sf" "salmon/ERR2675459/quant.sf" 
                  ERR2675460                   ERR2675461 
"salmon/ERR2675460/quant.sf" "salmon/ERR2675461/quant.sf" 
                  ERR2675464                   ERR2675465 
"salmon/ERR2675464/quant.sf" "salmon/ERR2675465/quant.sf" 
                  ERR2675468                   ERR2675469 
"salmon/ERR2675468/quant.sf" "salmon/ERR2675469/quant.sf" 
                  ERR2675472                   ERR2675473 
"salmon/ERR2675472/quant.sf" "salmon/ERR2675473/quant.sf" 
                  ERR2675476                   ERR2675477 
"salmon/ERR2675476/quant.sf" "salmon/ERR2675477/quant.sf" 
                  ERR2675478                   ERR2675479 
"salmon/ERR2675478/quant.sf" "salmon/ERR2675479/quant.sf" 
                  ERR2675480                   ERR2675481 
"salmon/ERR2675480/quant.sf" "salmon/ERR2675481/quant.sf" 
                  ERR2675484                   ERR2675485 
"salmon/ERR2675484/quant.sf" "salmon/ERR2675485/quant.sf" 

Import the quantification files using tximport(). By specifying txOut = TRUE, one can obtain transcript-level counts. By default, the function outputs gene-level summarization

library(tximport)

# requires tx2gene for gene-level summarization
txi.salmon.g <- tximport(files, type = "salmon", tx2gene = tx2gene)

txi.salmon.t <- tximport(files, type = "salmon", txOut = TRUE)
# Gene-level
> head(txi.salmon.g$counts)[,1:10]
                   ERR2675454 ERR2675455 ERR2675458 ERR2675459 ERR2675460
ENSG00000000003.15   1478.000   2160.627   1002.000   1889.194   1097.359
ENSG00000000005.6      44.000     26.000     19.000     49.000     22.000
ENSG00000000419.12   1070.685    917.091   1341.018    634.318    786.430
ENSG00000000457.14    502.911    487.025    400.202    479.016    434.075
ENSG00000000460.17    223.088    163.000    192.798     79.000     94.000
ENSG00000000938.13     59.000     32.000     62.000     38.000     53.000
                   ERR2675461 ERR2675464 ERR2675465 ERR2675468 ERR2675469
ENSG00000000003.15   1118.000   1097.442   1595.800   1251.481   1705.223
ENSG00000000005.6      39.000     32.000     31.000     44.000     32.000
ENSG00000000419.12    790.000    971.212    743.000    662.631    790.212
ENSG00000000457.14    357.535    336.014    245.154    381.113    542.008
ENSG00000000460.17    131.464    166.102     79.846    106.143    116.147
ENSG00000000938.13     34.000     24.000     23.000     27.000     21.000
# Transcript-level
> head(txi.salmon.t$counts)[,1:10]
                  ERR2675454 ERR2675455 ERR2675458 ERR2675459 ERR2675460
ENST00000456328.2      0.000      0.000      0.000        0.0      0.000
ENST00000450305.2      0.000      0.000      0.000        0.0      0.000
ENST00000488147.1    134.186    148.079    124.806      295.6    213.842
ENST00000619216.1      0.000      0.000      0.000        0.0      0.000
ENST00000473358.1      0.000      0.000      0.000        0.0      0.000
ENST00000469289.1      0.000      1.000      0.000        0.0      0.000
                  ERR2675461 ERR2675464 ERR2675465 ERR2675468 ERR2675469
ENST00000456328.2      5.202      0.000      0.000       0.00       0.00
ENST00000450305.2      0.000      0.000      0.000       0.00       0.00
ENST00000488147.1     98.824    351.035    196.086     182.09     205.51
ENST00000619216.1      0.000      0.000      0.000       0.00       0.00
ENST00000473358.1      0.000      0.000      0.000       0.00       0.00
ENST00000469289.1      0.000      0.000      0.000       0.00       0.00
# Performs scaling for DTU analysis
txi.salmon.s <- tximport(files, type = "salmon", 
txOut = TRUE, tx2gene = tx2gene, countsFromAbundance = "dtuScaledTPM")
# Transcript-level, dtuScaledTPM
> head(txi.salmon.s$counts)[,1:10]
                  ERR2675454 ERR2675455 ERR2675458 ERR2675459 ERR2675460
ENST00000456328.2     0.0000     0.0000     0.0000     0.0000     0.0000
ENST00000450305.2     0.0000     0.0000     0.0000     0.0000     0.0000
ENST00000488147.1   226.5014   252.2381   204.1243   373.8044   284.3047
ENST00000619216.1     0.0000     0.0000     0.0000     0.0000     0.0000
ENST00000473358.1     0.0000     0.0000     0.0000     0.0000     0.0000
ENST00000469289.1     0.0000     1.6571     0.0000     0.0000     0.0000
                  ERR2675461 ERR2675464 ERR2675465 ERR2675468 ERR2675469
ENST00000456328.2   4.784217     0.0000      0.000     0.0000     0.0000
ENST00000450305.2   0.000000     0.0000      0.000     0.0000     0.0000
ENST00000488147.1 148.232222   488.9641    287.104   240.2427   271.6737
ENST00000619216.1   0.000000     0.0000      0.000     0.0000     0.0000
ENST00000473358.1   0.000000     0.0000      0.000     0.0000     0.0000
ENST00000469289.1   0.000000     0.0000      0.000     0.0000     0.0000

If quantification file format is not supported, we can also load the files by specifying required columns (i.e. txIdCol, abundanceCol, countsCol and lengthCol). We using the Salmon quantification files as an exmaple

salmon/ERR2675454/quant.sf
Name	Length	EffectiveLength	TPM	NumReads
ENST00000456328.2	1657	1455.216	0.000000	0.000
ENST00000450305.2	632	468.000	0.000000	0.000
ENST00000488147.1	1351	1031.467	5.868714	134.186
ENST00000619216.1	68	9.000	0.000000	0.000
ENST00000473358.1	712	548.000	0.000000	0.000
ENST00000469289.1	535	371.000	0.000000	0.000
ENST00000607096.1	138	26.000	0.000000	0.000
ENST00000417324.1	1187	1023.000	0.000000	0.000
ENST00000461467.1	590	426.000	0.000000	0.000
txi.salmon <- tximport(files, type = "none", txIn = TRUE, txOut = TRUE, 
txIdCol = "Name", abundanceCol = "TPM", 
countsCol = "NumReads", lengthCol = "Length",
importer = function(x) readr::read_tsv(x))
> head(txi.salmon$counts)[,1:10]
                  ERR2675454 ERR2675455 ERR2675458 ERR2675459 ERR2675460
ENST00000456328.2      0.000      0.000      0.000        0.0      0.000
ENST00000450305.2      0.000      0.000      0.000        0.0      0.000
ENST00000488147.1    134.186    148.079    124.806      295.6    213.842
ENST00000619216.1      0.000      0.000      0.000        0.0      0.000
ENST00000473358.1      0.000      0.000      0.000        0.0      0.000
ENST00000469289.1      0.000      1.000      0.000        0.0      0.000
                  ERR2675461 ERR2675464 ERR2675465 ERR2675468 ERR2675469
ENST00000456328.2      5.202      0.000      0.000       0.00       0.00
ENST00000450305.2      0.000      0.000      0.000       0.00       0.00
ENST00000488147.1     98.824    351.035    196.086     182.09     205.51
ENST00000619216.1      0.000      0.000      0.000       0.00       0.00
ENST00000473358.1      0.000      0.000      0.000       0.00       0.00
ENST00000469289.1      0.000      0.000      0.000       0.00       0.00

Last updated