mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-12-08 20:38:47 +00:00
@@ -1584,6 +1584,8 @@ R:
|
|||||||
aliases:
|
aliases:
|
||||||
- R
|
- R
|
||||||
primary_extension: .r
|
primary_extension: .r
|
||||||
|
aliases:
|
||||||
|
- Rscript
|
||||||
extensions:
|
extensions:
|
||||||
- .R
|
- .R
|
||||||
- .rsx
|
- .rsx
|
||||||
|
|||||||
@@ -649,6 +649,9 @@
|
|||||||
"Perl": [
|
"Perl": [
|
||||||
"ack"
|
"ack"
|
||||||
],
|
],
|
||||||
|
"R": [
|
||||||
|
"expr-dist"
|
||||||
|
],
|
||||||
"Ruby": [
|
"Ruby": [
|
||||||
"Appraisals",
|
"Appraisals",
|
||||||
"Capfile",
|
"Capfile",
|
||||||
@@ -688,8 +691,8 @@
|
|||||||
".gemrc"
|
".gemrc"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"tokens_total": 591725,
|
"tokens_total": 592149,
|
||||||
"languages_total": 719,
|
"languages_total": 720,
|
||||||
"tokens": {
|
"tokens": {
|
||||||
"ABAP": {
|
"ABAP": {
|
||||||
"*/**": 1,
|
"*/**": 1,
|
||||||
@@ -52298,39 +52301,39 @@
|
|||||||
},
|
},
|
||||||
"R": {
|
"R": {
|
||||||
"df.residual.mira": 1,
|
"df.residual.mira": 1,
|
||||||
"<": 24,
|
"<": 46,
|
||||||
"-": 28,
|
"-": 51,
|
||||||
"function": 14,
|
"function": 18,
|
||||||
"(": 163,
|
"(": 219,
|
||||||
"object": 12,
|
"object": 12,
|
||||||
"...": 4,
|
"...": 4,
|
||||||
")": 162,
|
")": 220,
|
||||||
"{": 35,
|
"{": 46,
|
||||||
"fit": 2,
|
"fit": 2,
|
||||||
"analyses": 1,
|
"analyses": 1,
|
||||||
"[": 13,
|
"[": 23,
|
||||||
"]": 13,
|
"]": 24,
|
||||||
"return": 8,
|
"return": 8,
|
||||||
"df.residual": 2,
|
"df.residual": 2,
|
||||||
"}": 35,
|
"}": 46,
|
||||||
"df.residual.lme": 1,
|
"df.residual.lme": 1,
|
||||||
"fixDF": 1,
|
"fixDF": 1,
|
||||||
"df.residual.mer": 1,
|
"df.residual.mer": 1,
|
||||||
"sum": 1,
|
"sum": 1,
|
||||||
"object@dims": 1,
|
"object@dims": 1,
|
||||||
"*": 2,
|
"*": 2,
|
||||||
"c": 9,
|
"c": 11,
|
||||||
"+": 3,
|
"+": 4,
|
||||||
"df.residual.default": 1,
|
"df.residual.default": 1,
|
||||||
"q": 2,
|
"q": 3,
|
||||||
"df": 3,
|
"df": 3,
|
||||||
"if": 13,
|
"if": 19,
|
||||||
"is.null": 2,
|
"is.null": 8,
|
||||||
"mk": 2,
|
"mk": 2,
|
||||||
"try": 3,
|
"try": 3,
|
||||||
"coef": 1,
|
"coef": 1,
|
||||||
"silent": 3,
|
"silent": 3,
|
||||||
"TRUE": 12,
|
"TRUE": 14,
|
||||||
"mn": 2,
|
"mn": 2,
|
||||||
"f": 9,
|
"f": 9,
|
||||||
"fitted": 1,
|
"fitted": 1,
|
||||||
@@ -52345,15 +52348,75 @@
|
|||||||
"length": 3,
|
"length": 3,
|
||||||
"k": 3,
|
"k": 3,
|
||||||
"max": 1,
|
"max": 1,
|
||||||
"SHEBANG#!Rscript": 1,
|
"SHEBANG#!Rscript": 2,
|
||||||
|
"#": 45,
|
||||||
|
"MedianNorm": 2,
|
||||||
|
"data": 11,
|
||||||
|
"geomeans": 3,
|
||||||
|
"<->": 1,
|
||||||
|
"exp": 1,
|
||||||
|
"rowMeans": 1,
|
||||||
|
"log": 5,
|
||||||
|
"apply": 2,
|
||||||
|
"2": 1,
|
||||||
|
"cnts": 2,
|
||||||
|
"median": 1,
|
||||||
|
"library": 1,
|
||||||
|
"print_usage": 2,
|
||||||
|
"file": 4,
|
||||||
|
"stderr": 1,
|
||||||
|
"cat": 1,
|
||||||
|
"spec": 2,
|
||||||
|
"matrix": 3,
|
||||||
|
"byrow": 3,
|
||||||
|
"ncol": 3,
|
||||||
|
"opt": 23,
|
||||||
|
"getopt": 1,
|
||||||
|
"help": 1,
|
||||||
|
"stdout": 1,
|
||||||
|
"status": 1,
|
||||||
|
"height": 7,
|
||||||
|
"out": 4,
|
||||||
|
"res": 6,
|
||||||
|
"width": 7,
|
||||||
|
"ylim": 7,
|
||||||
|
"read.table": 1,
|
||||||
|
"header": 1,
|
||||||
|
"sep": 4,
|
||||||
|
"quote": 1,
|
||||||
|
"nsamp": 8,
|
||||||
|
"dim": 1,
|
||||||
|
"outfile": 4,
|
||||||
|
"sprintf": 2,
|
||||||
|
"png": 2,
|
||||||
|
"h": 12,
|
||||||
|
"hist": 4,
|
||||||
|
"plot": 7,
|
||||||
|
"FALSE": 9,
|
||||||
|
"mids": 4,
|
||||||
|
"density": 4,
|
||||||
|
"type": 3,
|
||||||
|
"col": 4,
|
||||||
|
"rainbow": 4,
|
||||||
|
"main": 2,
|
||||||
|
"xlab": 2,
|
||||||
|
"ylab": 2,
|
||||||
|
"for": 3,
|
||||||
|
"i": 6,
|
||||||
|
"in": 8,
|
||||||
|
"lines": 6,
|
||||||
|
"devnum": 2,
|
||||||
|
"dev.off": 2,
|
||||||
|
"size.factors": 2,
|
||||||
|
"data.matrix": 1,
|
||||||
|
"data.norm": 3,
|
||||||
|
"t": 1,
|
||||||
|
"x": 3,
|
||||||
|
"/": 1,
|
||||||
"ParseDates": 2,
|
"ParseDates": 2,
|
||||||
"lines": 4,
|
|
||||||
"dates": 3,
|
"dates": 3,
|
||||||
"matrix": 2,
|
|
||||||
"unlist": 2,
|
"unlist": 2,
|
||||||
"strsplit": 3,
|
"strsplit": 3,
|
||||||
"ncol": 2,
|
|
||||||
"byrow": 2,
|
|
||||||
"days": 2,
|
"days": 2,
|
||||||
"times": 2,
|
"times": 2,
|
||||||
"hours": 2,
|
"hours": 2,
|
||||||
@@ -52374,7 +52437,6 @@
|
|||||||
"ggplot": 1,
|
"ggplot": 1,
|
||||||
"aes": 2,
|
"aes": 2,
|
||||||
"y": 1,
|
"y": 1,
|
||||||
"x": 1,
|
|
||||||
"geom_point": 1,
|
"geom_point": 1,
|
||||||
"size": 1,
|
"size": 1,
|
||||||
"Freq": 1,
|
"Freq": 1,
|
||||||
@@ -52382,12 +52444,8 @@
|
|||||||
"range": 1,
|
"range": 1,
|
||||||
"ggsave": 1,
|
"ggsave": 1,
|
||||||
"filename": 1,
|
"filename": 1,
|
||||||
"plot": 1,
|
|
||||||
"width": 1,
|
|
||||||
"height": 1,
|
|
||||||
"hello": 2,
|
"hello": 2,
|
||||||
"print": 1,
|
"print": 1,
|
||||||
"#": 42,
|
|
||||||
"module": 25,
|
"module": 25,
|
||||||
"code": 19,
|
"code": 19,
|
||||||
"available": 1,
|
"available": 1,
|
||||||
@@ -52409,7 +52467,6 @@
|
|||||||
"even": 1,
|
"even": 1,
|
||||||
"attach": 11,
|
"attach": 11,
|
||||||
"is": 7,
|
"is": 7,
|
||||||
"FALSE": 5,
|
|
||||||
"optionally": 1,
|
"optionally": 1,
|
||||||
"attached": 2,
|
"attached": 2,
|
||||||
"to": 8,
|
"to": 8,
|
||||||
@@ -52419,7 +52476,6 @@
|
|||||||
"defaults": 1,
|
"defaults": 1,
|
||||||
".": 5,
|
".": 5,
|
||||||
"However": 1,
|
"However": 1,
|
||||||
"in": 6,
|
|
||||||
"interactive": 2,
|
"interactive": 2,
|
||||||
"invoked": 1,
|
"invoked": 1,
|
||||||
"directly": 1,
|
"directly": 1,
|
||||||
@@ -52467,7 +52523,6 @@
|
|||||||
"first.": 1,
|
"first.": 1,
|
||||||
"That": 1,
|
"That": 1,
|
||||||
"local": 3,
|
"local": 3,
|
||||||
"file": 1,
|
|
||||||
"./a.r": 1,
|
"./a.r": 1,
|
||||||
"will": 2,
|
"will": 2,
|
||||||
"loaded.": 1,
|
"loaded.": 1,
|
||||||
@@ -52537,7 +52592,6 @@
|
|||||||
"parent": 9,
|
"parent": 9,
|
||||||
".BaseNamespaceEnv": 1,
|
".BaseNamespaceEnv": 1,
|
||||||
"paste": 3,
|
"paste": 3,
|
||||||
"sep": 3,
|
|
||||||
"source": 2,
|
"source": 2,
|
||||||
"chdir": 1,
|
"chdir": 1,
|
||||||
"envir": 5,
|
"envir": 5,
|
||||||
@@ -52590,7 +52644,6 @@
|
|||||||
"Reloading": 1,
|
"Reloading": 1,
|
||||||
"primarily": 1,
|
"primarily": 1,
|
||||||
"useful": 1,
|
"useful": 1,
|
||||||
"for": 1,
|
|
||||||
"testing": 1,
|
"testing": 1,
|
||||||
"during": 1,
|
"during": 1,
|
||||||
"module_ref": 3,
|
"module_ref": 3,
|
||||||
@@ -52612,8 +52665,7 @@
|
|||||||
"pts": 1,
|
"pts": 1,
|
||||||
"spsample": 1,
|
"spsample": 1,
|
||||||
"polyg": 1,
|
"polyg": 1,
|
||||||
"numpoints": 1,
|
"numpoints": 1
|
||||||
"type": 1
|
|
||||||
},
|
},
|
||||||
"Racket": {
|
"Racket": {
|
||||||
";": 3,
|
";": 3,
|
||||||
@@ -63056,7 +63108,7 @@
|
|||||||
"Protocol Buffer": 63,
|
"Protocol Buffer": 63,
|
||||||
"PureScript": 1652,
|
"PureScript": 1652,
|
||||||
"Python": 5715,
|
"Python": 5715,
|
||||||
"R": 1243,
|
"R": 1667,
|
||||||
"Racket": 331,
|
"Racket": 331,
|
||||||
"Ragel in Ruby Host": 593,
|
"Ragel in Ruby Host": 593,
|
||||||
"RDoc": 279,
|
"RDoc": 279,
|
||||||
@@ -63236,7 +63288,7 @@
|
|||||||
"Protocol Buffer": 1,
|
"Protocol Buffer": 1,
|
||||||
"PureScript": 4,
|
"PureScript": 4,
|
||||||
"Python": 7,
|
"Python": 7,
|
||||||
"R": 5,
|
"R": 6,
|
||||||
"Racket": 2,
|
"Racket": 2,
|
||||||
"Ragel in Ruby Host": 3,
|
"Ragel in Ruby Host": 3,
|
||||||
"RDoc": 1,
|
"RDoc": 1,
|
||||||
@@ -63287,5 +63339,5 @@
|
|||||||
"YAML": 2,
|
"YAML": 2,
|
||||||
"Zephir": 2
|
"Zephir": 2
|
||||||
},
|
},
|
||||||
"md5": "58816c8da227d1157f624a68c2f3ab55"
|
"md5": "fa38e2b617caaf230146a7adab264419"
|
||||||
}
|
}
|
||||||
101
samples/R/filenames/expr-dist
Executable file
101
samples/R/filenames/expr-dist
Executable file
@@ -0,0 +1,101 @@
|
|||||||
|
#!/usr/bin/env Rscript
|
||||||
|
|
||||||
|
# Copyright (c) 2013 Daniel S. Standage, released under MIT license
|
||||||
|
#
|
||||||
|
# expr-dist: plot distributions of expression values before and after
|
||||||
|
# normalization; visually confirm that normalization worked
|
||||||
|
# as expected
|
||||||
|
#
|
||||||
|
# Program input is a matrix of expression values, each row corresponding to a
|
||||||
|
# molecule (gene, transcript, etc) and each row corresponding to that molecule's
|
||||||
|
# expression level or abundance. The program expects the rows and columns to be
|
||||||
|
# named, and was tested primarily on output produced by the
|
||||||
|
# 'rsem-generate-data-matrix' script distributed with the RSEM package.
|
||||||
|
#
|
||||||
|
# The program plots the distributions of the logged expression values by sample
|
||||||
|
# as provided, then normalizes the values, and finally plots the distribution of
|
||||||
|
# the logged normalized expression values by sample. The expectation is that all
|
||||||
|
# samples' distributions will have a similar shape but different medians prior
|
||||||
|
# to normalization, and that post normalization they will all have an identical
|
||||||
|
# median to facilitate cross-sample comparison.
|
||||||
|
|
||||||
|
|
||||||
|
# MedianNorm function borrowed from the EBSeq library version 1.1.6
|
||||||
|
# See http://www.bioconductor.org/packages/devel/bioc/html/EBSeq.html
|
||||||
|
MedianNorm <- function(data)
|
||||||
|
{
|
||||||
|
geomeans <- exp( rowMeans(log(data)) )
|
||||||
|
apply(data, 2, function(cnts) median((cnts/geomeans)[geomeans > 0]))
|
||||||
|
}
|
||||||
|
|
||||||
|
library("getopt")
|
||||||
|
print_usage <- function(file=stderr())
|
||||||
|
{
|
||||||
|
cat("
|
||||||
|
expr-dist: see source code for full description
|
||||||
|
Usage: expr-dist [options] < expr-matrix.txt
|
||||||
|
Options:
|
||||||
|
-h|--help: print this help message and exit
|
||||||
|
-o|--out: STRING prefix for output files; default is 'expr-dist'
|
||||||
|
-r|--res: INT resolution (dpi) of generated graphics; default is 150
|
||||||
|
-t|--height: INT height (pixels) of generated graphics; default is 1200
|
||||||
|
-w|--width: INT width (pixels) of generated graphics; default is 1200
|
||||||
|
-y|--ylim: REAL the visible range of the Y axis depends on the first
|
||||||
|
distribution plotted; if other distributions are getting
|
||||||
|
cut off, use this setting to override the default\n\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
spec <- matrix( c("help", 'h', 0, "logical",
|
||||||
|
"out", 'o', 1, "character",
|
||||||
|
"res", 'r', 1, "integer",
|
||||||
|
"height", 't', 1, "integer",
|
||||||
|
"width", 'w', 1, "integer",
|
||||||
|
"ylim", 'y', 1, "double"),
|
||||||
|
byrow=TRUE, ncol=4)
|
||||||
|
opt <- getopt(spec)
|
||||||
|
if(!is.null(opt$help))
|
||||||
|
{
|
||||||
|
print_usage(file=stdout())
|
||||||
|
q(status=1)
|
||||||
|
}
|
||||||
|
if(is.null(opt$height)) { opt$height <- 1200 }
|
||||||
|
if(is.null(opt$out)) { opt$out <- "expr-dist" }
|
||||||
|
if(is.null(opt$res)) { opt$res <- 150 }
|
||||||
|
if(is.null(opt$width)) { opt$width <- 1200 }
|
||||||
|
if(!is.null(opt$ylim)) { opt$ylim <- c(0, opt$ylim) }
|
||||||
|
|
||||||
|
# Load data, determine number of samples
|
||||||
|
data <- read.table(file("stdin"), header=TRUE, sep="\t", quote="")
|
||||||
|
nsamp <- dim(data)[2] - 1
|
||||||
|
data <- data[,1:nsamp+1]
|
||||||
|
|
||||||
|
# Plot distribution of expression values before normalization
|
||||||
|
outfile <- sprintf("%s-median.png", opt$out)
|
||||||
|
png(outfile, height=opt$height, width=opt$width, res=opt$res)
|
||||||
|
h <- hist(log(data[,1]), plot=FALSE)
|
||||||
|
plot(h$mids, h$density, type="l", col=rainbow(nsamp)[1], main="",
|
||||||
|
xlab="Log expression value", ylab="Proportion of molecules", ylim=opt$ylim)
|
||||||
|
for(i in 2:nsamp)
|
||||||
|
{
|
||||||
|
h <- hist(log(data[,i]), plot=FALSE)
|
||||||
|
lines(h$mids, h$density, col=rainbow(nsamp)[i])
|
||||||
|
}
|
||||||
|
devnum <- dev.off()
|
||||||
|
|
||||||
|
# Normalize by median
|
||||||
|
size.factors <- MedianNorm(data.matrix(data))
|
||||||
|
data.norm <- t(apply(data, 1, function(x){ x / size.factors }))
|
||||||
|
|
||||||
|
# Plot distribution of normalized expression values
|
||||||
|
outfile <- sprintf("%s-median-norm.png", opt$out)
|
||||||
|
png(outfile, height=opt$height, width=opt$width, res=opt$res)
|
||||||
|
h <- hist(log(data.norm[,1]), plot=FALSE)
|
||||||
|
plot(h$mids, h$density, type="l", col=rainbow(nsamp)[1], main="",
|
||||||
|
xlab="Log normalized expression value", ylab="Proportion of molecules",
|
||||||
|
ylim=opt$ylim)
|
||||||
|
for(i in 2:nsamp)
|
||||||
|
{
|
||||||
|
h <- hist(log(data.norm[,i]), plot=FALSE)
|
||||||
|
lines(h$mids, h$density, col=rainbow(nsamp)[i])
|
||||||
|
}
|
||||||
|
devnum <- dev.off()
|
||||||
Reference in New Issue
Block a user