IT知识库 购物 网址 游戏 小说 歌词 快照 开发 股票 美女 新闻 笑话 | 汉字 软件 日历 阅读 下载 图书馆 编程 China
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
图片批量下载器
↓批量下载图片,美女图库↓
图片自动播放器
↓图片自动播放器↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
vbs/VBScript DOS/BAT hta htc python perl 游戏相关 VBA 远程脚本 ColdFusion ruby专题 autoit seraphzone PowerShell linux shell Lua Golang Erlang 其它教程 CSS/HTML/Xhtml html5 CSS XML/XSLT Dreamweaver教程 经验交流 开发者乐园 Android开发资料
站长资讯 .NET新手 ASP.NET C# WinForm Silverlight WCF CLR WPF XNA VisualStudio ASP.NET-MVC .NET控件开发 EntityFramework WinRT-Metro Java C++ PHP Delphi Python Ruby C语言 Erlang Go Swift Scala R语言 Verilog 其它语言 架构设计 面向对象 设计模式 领域驱动 Html-Css JavaScript jQuery HTML5 SharePoint GIS技术 SAP OracleERP DynamicsCRM K2 BPM 信息安全 企业信息 Android开发 iOS开发 WindowsPhone WindowsMobile 其他手机 敏捷开发 项目管理 软件工程 SQLServer Oracle MySQL NoSQL 其它数据库 Windows7 WindowsServer Linux
  IT知识库 -> R语言 -> R中矩阵运算 -> 正文阅读

[R语言]R中矩阵运算

R中矩阵运算
# 数据产生
# rnorm(n, mean = 0, sd = 1) 正态分布的随机数(r 代表随机,可以替换成dnorm, pnorm, qnorm 作不同计算。r= random = 随机, d= density = 密度, p= probability = 概率 , q =quantile = 分位)
# runif(n, min = 0, max = 1) 平均分布的随机数
# rep(1,5) 把1重复5次
# scale(1:5) 标准化数据
> a <- c(rnorm(5), rnorm(5,1), runif(5), runif(5,-1,1), 1:5, rep(0,5), c(2,10,11,13,4), scale(1:5)[1:5])
> a
 [1] -0.41253556  0.12192929 -0.47635888 -0.97171653  1.09162243  1.87789657
 [7] -0.11717937  2.92953522  1.33836620 -0.03269026  0.87540920  0.13005744
[13]  0.11900686  0.76663940  0.28407356 -0.91251181  0.17997973  0.50452258
[19]  0.25961316 -0.58052230  1.00000000  2.00000000  3.00000000  4.00000000
[25]  5.00000000  0.00000000  0.00000000  0.00000000  0.00000000  0.00000000
[31]  2.00000000 10.00000000 11.00000000 13.00000000  4.00000000 -1.26491106
[37] -0.63245553  0.00000000  0.63245553  1.26491106
> a <- matrix(a, ncol=5, byrow=T)
> a
           [,1]       [,2]       [,3]       [,4]        [,5]
[1,] -0.4125356  0.1219293 -0.4763589 -0.9717165  1.09162243
[2,]  1.8778966 -0.1171794  2.9295352  1.3383662 -0.03269026
[3,]  0.8754092  0.1300574  0.1190069  0.7666394  0.28407356
[4,] -0.9125118  0.1799797  0.5045226  0.2596132 -0.58052230
[5,]  1.0000000  2.0000000  3.0000000  4.0000000  5.00000000
[6,]  0.0000000  0.0000000  0.0000000  0.0000000  0.00000000
[7,]  2.0000000 10.0000000 11.0000000 13.0000000  4.00000000
[8,] -1.2649111 -0.6324555  0.0000000  0.6324555  1.26491106
 
# 求行的加和
> rowSums(a)
[1] -0.6470593  5.9959284  2.1751865 -0.5489186 15.0000000  0.0000000 40.0000000
[8]  0.0000000

# 去除全部为0的行
> a <- a[rowSums(abs(a))!=0,]
> a
           [,1]       [,2]       [,3]       [,4]        [,5]
[1,] -0.4125356  0.1219293 -0.4763589 -0.9717165  1.09162243
[2,]  1.8778966 -0.1171794  2.9295352  1.3383662 -0.03269026
[3,]  0.8754092  0.1300574  0.1190069  0.7666394  0.28407356
[4,] -0.9125118  0.1799797  0.5045226  0.2596132 -0.58052230
[5,]  1.0000000  2.0000000  3.0000000  4.0000000  5.00000000
[6,]  2.0000000 10.0000000 11.0000000 13.0000000  4.00000000
[7,] -1.2649111 -0.6324555  0.0000000  0.6324555  1.26491106
 
# 矩阵运算,R默认针对整个数据进行常见运算
# 所有值都乘以2
> a * 2
           [,1]       [,2]       [,3]       [,4]        [,5]
[1,] -0.8250711  0.2438586 -0.9527178 -1.9434331  2.18324487
[2,]  3.7557931 -0.2343587  5.8590704  2.6767324 -0.06538051
[3,]  1.7508184  0.2601149  0.2380137  1.5332788  0.56814712
[4,] -1.8250236  0.3599595  1.0090452  0.5192263 -1.16104460
[5,]  2.0000000  4.0000000  6.0000000  8.0000000 10.00000000
[6,]  4.0000000 20.0000000 22.0000000 26.0000000  8.00000000
[7,] -2.5298221 -1.2649111  0.0000000  1.2649111  2.52982213
 
# 所有值取绝对值,再取对数 (取对数前一般加一个数避免对0或负值取对数)
> log2(abs(a)+1)
          [,1]      [,2]      [,3]      [,4]      [,5]
[1,] 0.4982872 0.1659818 0.5620435 0.9794522 1.0646224
[2,] 1.5250147 0.1598608 1.9743587 1.2255009 0.0464076
[3,] 0.9072054 0.1763961 0.1622189 0.8210076 0.3607278
[4,] 0.9354687 0.2387621 0.5893058 0.3329807 0.6604014
[5,] 1.0000000 1.5849625 2.0000000 2.3219281 2.5849625
[6,] 1.5849625 3.4594316 3.5849625 3.8073549 2.3219281
[7,] 1.1794544 0.7070437 0.0000000 0.7070437 1.1794544
 
# 取出最大值、最小值、行数、列数
> max(a)
[1] 13
> min(a)
[1] -1.264911
> nrow(a)
[1] 7
> ncol(a)
[1] 5
 
# 增加一列或一行
# cbind: column bind
> cbind(a, 1:7)
           [,1]       [,2]       [,3]       [,4]        [,5] [,6]
[1,] -0.4125356  0.1219293 -0.4763589 -0.9717165  1.09162243    1
[2,]  1.8778966 -0.1171794  2.9295352  1.3383662 -0.03269026    2
[3,]  0.8754092  0.1300574  0.1190069  0.7666394  0.28407356    3
[4,] -0.9125118  0.1799797  0.5045226  0.2596132 -0.58052230    4
[5,]  1.0000000  2.0000000  3.0000000  4.0000000  5.00000000    5
[6,]  2.0000000 10.0000000 11.0000000 13.0000000  4.00000000    6
[7,] -1.2649111 -0.6324555  0.0000000  0.6324555  1.26491106    7
> cbind(a, seven=1:7)
                                                             seven
[1,] -0.4125356  0.1219293 -0.4763589 -0.9717165  1.09162243     1
[2,]  1.8778966 -0.1171794  2.9295352  1.3383662 -0.03269026     2
[3,]  0.8754092  0.1300574  0.1190069  0.7666394  0.28407356     3
[4,] -0.9125118  0.1799797  0.5045226  0.2596132 -0.58052230     4
[5,]  1.0000000  2.0000000  3.0000000  4.0000000  5.00000000     5
[6,]  2.0000000 10.0000000 11.0000000 13.0000000  4.00000000     6
[7,] -1.2649111 -0.6324555  0.0000000  0.6324555  1.26491106     7
 
# rbind: row bind
> rbind(a,1:5)
           [,1]       [,2]       [,3]       [,4]        [,5]
[1,] -0.4125356  0.1219293 -0.4763589 -0.9717165  1.09162243
[2,]  1.8778966 -0.1171794  2.9295352  1.3383662 -0.03269026
[3,]  0.8754092  0.1300574  0.1190069  0.7666394  0.28407356
[4,] -0.9125118  0.1799797  0.5045226  0.2596132 -0.58052230
[5,]  1.0000000  2.0000000  3.0000000  4.0000000  5.00000000
[6,]  2.0000000 10.0000000 11.0000000 13.0000000  4.00000000
[7,] -1.2649111 -0.6324555  0.0000000  0.6324555  1.26491106
[8,]  1.0000000  2.0000000  3.0000000  4.0000000  5.00000000
 
# 计算每一行的mad (中值绝对偏差,一般认为比方差的鲁棒性更强,更少受异常值的影响,更能反映数据间的差异)1表示矩阵行,2表示矩阵列,也可以是c(1,2)
> apply(a,1,mad)
[1] 0.7923976 2.0327283 0.2447279 0.4811672 1.4826000 4.4478000 0.9376786
 
# 计算每一行的var (方差)
# apply表示对数据(第一个参数)的每一行 (第二个参数赋值为1) 或每一列 (2)操作。最后返回一个列表
> apply(a,1,var)
[1]  0.6160264  1.6811161  0.1298913  0.3659391  2.5000000 22.5000000  1.0000000
 
# 计算每一列的平均值
> apply(a,2,mean)
[1] 0.4519068 1.6689045 2.4395294 2.7179083 1.5753421
 
# 取出中值绝对偏差大于0.5的行
> b = a[apply(a,1,mad)>0.5,]
> b
           [,1]       [,2]       [,3]       [,4]        [,5]
[1,] -0.4125356  0.1219293 -0.4763589 -0.9717165  1.09162243
[2,]  1.8778966 -0.1171794  2.9295352  1.3383662 -0.03269026
[3,]  1.0000000  2.0000000  3.0000000  4.0000000  5.00000000
[4,]  2.0000000 10.0000000 11.0000000 13.0000000  4.00000000
[5,] -1.2649111 -0.6324555  0.0000000  0.6324555  1.26491106
 
# 矩阵按照mad的大小降序排列
> c = b[order(apply(b,1,mad), decreasing=T),]
> c
           [,1]       [,2]       [,3]       [,4]        [,5]
[1,]  2.0000000 10.0000000 11.0000000 13.0000000  4.00000000
[2,]  1.8778966 -0.1171794  2.9295352  1.3383662 -0.03269026
[3,]  1.0000000  2.0000000  3.0000000  4.0000000  5.00000000
[4,] -1.2649111 -0.6324555  0.0000000  0.6324555  1.26491106
[5,] -0.4125356  0.1219293 -0.4763589 -0.9717165  1.09162243
 
> rownames(c) <- paste('Gene', letters[1:5], sep="_")
> colnames(c) <- toupper(letters[1:5])
> c
                A          B          C          D           E
Gene_a  2.0000000 10.0000000 11.0000000 13.0000000  4.00000000
Gene_b  1.8778966 -0.1171794  2.9295352  1.3383662 -0.03269026
Gene_c  1.0000000  2.0000000  3.0000000  4.0000000  5.00000000
Gene_d -1.2649111 -0.6324555  0.0000000  0.6324555  1.26491106
Gene_e -0.4125356  0.1219293 -0.4763589 -0.9717165  1.09162243
 
# 矩阵转置
> expr = t(c)
> expr
  Gene_a      Gene_b Gene_c     Gene_d     Gene_e
A      2  1.87789657      1 -1.2649111 -0.4125356
B     10 -0.11717937      2 -0.6324555  0.1219293
C     11  2.92953522      3  0.0000000 -0.4763589
D     13  1.33836620      4  0.6324555 -0.9717165
E      4 -0.03269026      5  1.2649111  1.0916224
 
# 矩阵值的替换
> expr2 = expr
> expr2[expr2<0] = 0
> expr2
  Gene_a   Gene_b Gene_c    Gene_d    Gene_e
A      2 1.877897      1 0.0000000 0.0000000
B     10 0.000000      2 0.0000000 0.1219293
C     11 2.929535      3 0.0000000 0.0000000
D     13 1.338366      4 0.6324555 0.0000000
E      4 0.000000      5 1.2649111 1.0916224
 
# 矩阵中只针对某一列替换
# expr2是个矩阵不是数据框,不能使用列名字索引
> expr2[expr2$Gene_b<1, "Gene_b"] <- 1
Error in expr2$Gene_b : $ operator is invalid for atomic vectors
# str是一个最为常用、好用的查看变量信息的工具,尤其是对特别复杂的变量,可以看清其层级结构,便于提取数据
> str(expr2)
 num [1:5, 1:5] 2 10 11 13 4 ...
 - attr(*, "dimnames")=List of 2
  ..$ : chr [1:5] "A" "B" "C" "D" ...
  ..$ : chr [1:5] "Gene_a" "Gene_b" "Gene_c" "Gene_d" ...
 
# 转换为数据框,再进行相应的操作
> expr2 <- as.data.frame(expr2)
> str(expr2)
'data.frame':    5 obs. of  5 variables:
 $ Gene_a: num  2 10 11 13 4
 $ Gene_b: num  1.88 1 2.93 1.34 1
 $ Gene_c: num  1 2 3 4 5
 $ Gene_d: num  0 0 0 0.632 1.265
 $ Gene_e: num  0 0.122 0 0 1.092
> expr2[expr2$Gene_b<1, "Gene_b"] <- 1
> expr2
  Gene_a   Gene_b Gene_c    Gene_d    Gene_e
A      2 1.877897      1 0.0000000 0.0000000
B     10 1.000000      2 0.0000000 0.1219293
C     11 2.929535      3 0.0000000 0.0000000
D     13 1.338366      4 0.6324555 0.0000000
E      4 1.000000      5 1.2649111 1.0916224

R中矩阵筛选合并

# 读入样品信息
> sampleInfo = "Samp;Group;Genotype
+ A;Control;WT
+ B;Control;WT
+ D;Treatment;Mutant
+ C;Treatment;Mutant
+ E;Treatment;WT
+ F;Treatment;WT"
> phenoData = read.table(text=sampleInfo, sep=";", header=T, row.names=1, quote="")
> phenoData
      Group Genotype
A   Control       WT
B   Control       WT
D Treatment   Mutant
C Treatment   Mutant
E Treatment       WT
F Treatment       WT
 
# 把样品信息按照基因表达矩阵中的样品信息排序,并只保留有基因表达信息的样品
> phenoData[match(rownames(expr), rownames(phenoData)),]
      Group Genotype
A   Control       WT
B   Control       WT
C Treatment   Mutant
D Treatment   Mutant
E Treatment       WT
 
# 注意顺序,%in%比match更好理解一些
> phenoData = phenoData[rownames(phenoData) %in% rownames(expr),]
> phenoData
      Group Genotype
A   Control       WT
B   Control       WT
C Treatment   Mutant
D Treatment   Mutant
E Treatment       WT
 
# 合并矩阵
# by=0 表示按照行的名字排序
# by=columnname 表示按照共有的某一列排序
# 合并后多出了新的一列Row.names
> merge_data = merge(expr, phenoData, by=0, all.x=T)
> merge_data
  Row.names Gene_a      Gene_b Gene_c     Gene_d     Gene_e     Group Genotype
1         A      2  1.87789657      1 -1.2649111 -0.4125356   Control       WT
2         B     10 -0.11717937      2 -0.6324555  0.1219293   Control       WT
3         C     11  2.92953522      3  0.0000000 -0.4763589 Treatment   Mutant
4         D     13  1.33836620      4  0.6324555 -0.9717165 Treatment   Mutant
5         E      4 -0.03269026      5  1.2649111  1.0916224 Treatment       WT
 
> rownames(merge_data) <- merge_data$Row.names
> merge_data 
  Row.names Gene_a      Gene_b Gene_c     Gene_d     Gene_e     Group Genotype
A         A      2  1.87789657      1 -1.2649111 -0.4125356   Control       WT
B         B     10 -0.11717937      2 -0.6324555  0.1219293   Control       WT
C         C     11  2.92953522      3  0.0000000 -0.4763589 Treatment   Mutant
D         D     13  1.33836620      4  0.6324555 -0.9717165 Treatment   Mutant
E         E      4 -0.03269026      5  1.2649111  1.0916224 Treatment       WT
 
# 去除一列;-1表示去除第一列
> merge_data = merge_data[,-1]
> merge_data
  Gene_a      Gene_b Gene_c     Gene_d     Gene_e     Group Genotype
A      2  1.87789657      1 -1.2649111 -0.4125356   Control       WT
B     10 -0.11717937      2 -0.6324555  0.1219293   Control       WT
C     11  2.92953522      3  0.0000000 -0.4763589 Treatment   Mutant
D     13  1.33836620      4  0.6324555 -0.9717165 Treatment   Mutant
E      4 -0.03269026      5  1.2649111  1.0916224 Treatment       WT
 
# 提取出所有的数值列。sapply()对列表或者向量使用函数
> merge_data[sapply(merge_data, is.numeric)]
  Gene_a      Gene_b Gene_c     Gene_d     Gene_e
A      2  1.87789657      1 -1.2649111 -0.4125356
B     10 -0.11717937      2 -0.6324555  0.1219293
C     11  2.92953522      3  0.0000000 -0.4763589
D     13  1.33836620      4  0.6324555 -0.9717165
E      4 -0.03269026      5  1.2649111  1.0916224

上一篇文章      下一篇文章      查看所有文章
加:2017-08-27 23:24:52  更:2017-08-27 23:24:54 
 
  R语言 最新文章
R的方差检验
Solving.Differential.Equations.in.R笔记1
云铝项目市场调查分析
东深新能源科技有限公司
ggplot2图形解析
R语言学习
R包
学习笔记TF037:实现强化学习策略网络
dplyr包
R语言数据可视化0—ggplot2介绍
技术频道: 站长资讯 .NET新手区 ASP.NET C# WinForm Silverlight WCF CLR WPF XNA Visual Studio ASP.NET MVC .NET控件开发 Entity Framework WinRT/Metro Java C++ PHP Delphi Python Ruby C语言 Erlang Go Swift Scala R语言 Verilog 其它语言 架构设计 面向对象 设计模式 领域驱动设计 Html/Css JavaScript jQuery HTML5 SharePoint GIS技术 SAP Oracle ERP Dynamics CRM K2 BPM 信息安全 企业信息化其他 Android开发 iOS开发 Windows Phone Windows Mobile 其他手机开发 敏捷开发 项目与团队管理 软件工程其他 SQL Server Oracle MySQL NoSQL 其它数据库 Windows 7 Windows Server Linux
脚本语言: vbs/VBScript DOS/BAT hta htc python perl 游戏相关 VBA 远程脚本 ColdFusion ruby专题 autoit seraphzone PowerShell linux shell Lua Golang Erlang 其它教程
网站开发: CSS/HTML/Xhtml html5 CSS XML/XSLT Dreamweaver教程 经验交流 开发者乐园 Android开发资料
360图书馆 软件开发资料 文字转语音 购物精选 软件下载 美食菜谱 新闻资讯 电影视频 小游戏 Chinese Culture 股票 三丰软件 开发 中国文化 网文精选 阅读网 看图 日历 万年历 2018年8日历
2018-8-15 3:17:38
多播视频美女直播
↓电视,电影,美女直播,迅雷资源↓
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
图片批量下载器
↓批量下载图片,美女图库↓
  网站联系: qq:121756557 email:121756557@qq.com  IT知识库