IT数码 购物 网址 头条 软件 日历 阅读 图书馆
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
图片批量下载器
↓批量下载图片,美女图库↓
图片自动播放器
↓图片自动播放器↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁
 
   -> 系统运维 -> linux NUMA 内存性能测试工具 numademo -> 正文阅读

[系统运维]linux NUMA 内存性能测试工具 numademo

memset

# numademo 1G memset
2 nodes available
Cannot determine CPU cache size
memory with no policy memset              Avg 26927.56 MB/s Max 26989.29 MB/s Min 26822.09 MB/s
local memory memset                       Avg 26884.54 MB/s Max 26954.06 MB/s Min 26847.57 MB/s
memory interleaved on all nodes memset    Avg 13196.72 MB/s Max 14128.55 MB/s Min 11570.00 MB/s
memory on node 1 memset                   Avg 26610.44 MB/s Max 26687.42 MB/s Min 26518.69 MB/s
memory on node 3 memset                   Avg 10000.71 MB/s Max 10138.34 MB/s Min 9965.12 MB/s
memory interleaved on 1 memset            Avg 26867.66 MB/s Max 26927.02 MB/s Min 26774.60 MB/s
setting preferred node to 1
memory without policy memset              Avg 26890.20 MB/s Max 26921.62 MB/s Min 26841.53 MB/s
setting preferred node to 3
memory without policy memset              Avg 9969.95 MB/s Max 9975.68 MB/s Min 9965.49 MB/s
manual interleaving to all nodes memset   Avg 13600.29 MB/s Max 13647.29 MB/s Min 13560.09 MB/s
manual interleaving on node 0/1 memset    Avg 26916.15 MB/s Max 26973.02 MB/s Min 26859.66 MB/s
current interleave node 1
running on node 1, preferred node 0
local memory memset                       Avg 26955.48 MB/s Max 26981.83 MB/s Min 26919.59 MB/s
memory interleaved on all nodes memset    Avg 13709.41 MB/s Max 13858.49 MB/s Min 13631.35 MB/s
memory interleaved on node 0/1 memset     Avg 25967.22 MB/s Max 26047.15 MB/s Min 25860.83 MB/s
alloc on node 3 memset                    Avg 9561.23 MB/s Max 9575.35 MB/s Min 9549.13 MB/s
local allocation memset                   Avg 25924.52 MB/s Max 26121.29 MB/s Min 25348.61 MB/s
setting wrong preferred node memset       Avg 9747.35 MB/s Max 9861.43 MB/s Min 9727.86 MB/s
setting correct preferred node memset     Avg 26019.13 MB/s Max 26105.41 MB/s Min 25932.66 MB/s
running on node 3, preferred node 0
local memory memset                       Avg 28741.57 MB/s Max 29209.52 MB/s Min 28157.81 MB/s
memory interleaved on all nodes memset    Avg 13450.03 MB/s Max 14381.18 MB/s Min 11495.43 MB/s
memory interleaved on node 0/1 memset     Avg 9307.51 MB/s Max 9323.27 MB/s Min 9279.75 MB/s
alloc on node 1 memset                    Avg 9314.34 MB/s Max 9323.75 MB/s Min 9301.86 MB/s
local allocation memset                   Avg 28251.23 MB/s Max 28531.92 MB/s Min 28154.12 MB/s
setting wrong preferred node memset       Avg 9461.36 MB/s Max 9470.13 MB/s Min 9434.68 MB/s
setting correct preferred node memset     Avg 28129.26 MB/s Max 28158.55 MB/s Min 28111.37 MB/s

memcpy

# numademo 1G memcpy
2 nodes available
Cannot determine CPU cache size
memory with no policy memcpy              Avg 12365.35 MB/s Max 13271.64 MB/s Min 9305.17 MB/s
local memory memcpy                       Avg 47384.48 MB/s Max 47550.68 MB/s Min 47007.35 MB/s
memory interleaved on all nodes memcpy    Avg 26573.56 MB/s Max 26950.00 MB/s Min 26323.01 MB/s
memory on node 1 memcpy                   Avg 38779.62 MB/s Max 39570.36 MB/s Min 34569.92 MB/s
memory on node 3 memcpy                   Avg 22022.94 MB/s Max 22042.66 MB/s Min 21980.84 MB/s
memory interleaved on 1 memcpy            Avg 39356.43 MB/s Max 39516.48 MB/s Min 39244.95 MB/s
setting preferred node to 1
memory without policy memcpy              Avg 11652.55 MB/s Max 11743.23 MB/s Min 11595.48 MB/s
setting preferred node to 3
memory without policy memcpy              Avg 6007.48 MB/s Max 6028.94 MB/s Min 5976.79 MB/s
manual interleaving to all nodes memcpy   Avg 8116.78 MB/s Max 8228.73 MB/s Min 8056.17 MB/s
manual interleaving on node 0/1 memcpy    Avg 11643.77 MB/s Max 11685.97 MB/s Min 11598.99 MB/s
current interleave node 1
running on node 1, preferred node 0
local memory memcpy                       Avg 47246.87 MB/s Max 47351.47 MB/s Min 47120.81 MB/s
memory interleaved on all nodes memcpy    Avg 25628.93 MB/s Max 25777.64 MB/s Min 25511.22 MB/s
memory interleaved on node 0/1 memcpy     Avg 47260.18 MB/s Max 47326.42 MB/s Min 47191.22 MB/s
alloc on node 3 memcpy                    Avg 5492.49 MB/s Max 5505.41 MB/s Min 5476.18 MB/s
local allocation memcpy                   Avg 13273.64 MB/s Max 13317.07 MB/s Min 13228.96 MB/s
setting wrong preferred node memcpy       Avg 5534.01 MB/s Max 5570.10 MB/s Min 5471.49 MB/s
setting correct preferred node memcpy     Avg 13328.18 MB/s Max 13360.32 MB/s Min 13280.17 MB/s
running on node 3, preferred node 0
local memory memcpy                       Avg 51844.04 MB/s Max 52146.17 MB/s Min 51577.57 MB/s
memory interleaved on all nodes memcpy    Avg 25641.60 MB/s Max 25901.38 MB/s Min 25498.50 MB/s
memory interleaved on node 0/1 memcpy     Avg 16918.19 MB/s Max 17028.65 MB/s Min 16322.23 MB/s
alloc on node 1 memcpy                    Avg 5488.25 MB/s Max 5529.25 MB/s Min 5456.53 MB/s
local allocation memcpy                   Avg 13455.33 MB/s Max 13481.26 MB/s Min 13429.83 MB/s
setting wrong preferred node memcpy       Avg 5479.94 MB/s Max 5521.80 MB/s Min 5437.69 MB/s
setting correct preferred node memcpy     Avg 13438.35 MB/s Max 13476.86 MB/s Min 13372.96 MB/s

forward

# numademo 1G forward
2 nodes available
Cannot determine CPU cache size
memory with no policy forward             Avg 10345.84 MB/s Max 10364.20 MB/s Min 10335.87 MB/s
local memory forward                      Avg 8701.35 MB/s Max 10354.61 MB/s Min 5448.94 MB/s
memory interleaved on all nodes forward   Avg 7890.15 MB/s Max 7908.36 MB/s Min 7879.34 MB/s
memory on node 1 forward                  Avg 11143.32 MB/s Max 11190.99 MB/s Min 10864.86 MB/s
memory on node 3 forward                  Avg 5448.25 MB/s Max 5450.66 MB/s Min 5443.20 MB/s
memory interleaved on 1 forward           Avg 10441.04 MB/s Max 11178.99 MB/s Min 7101.42 MB/s
setting preferred node to 1
memory without policy forward             Avg 11136.77 MB/s Max 11183.65 MB/s Min 10795.06 MB/s
setting preferred node to 3
memory without policy forward             Avg 5425.09 MB/s Max 5445.08 MB/s Min 5372.20 MB/s
manual interleaving to all nodes forward  Avg 7882.31 MB/s Max 7902.42 MB/s Min 7788.24 MB/s
manual interleaving on node 0/1 forward   Avg 11173.43 MB/s Max 11185.74 MB/s Min 11162.95 MB/s
current interleave node 1
running on node 1, preferred node 0
local memory forward                      Avg 10676.51 MB/s Max 10698.90 MB/s Min 10579.68 MB/s
memory interleaved on all nodes forward   Avg 7929.68 MB/s Max 7932.08 MB/s Min 7925.70 MB/s
memory interleaved on node 0/1 forward    Avg 10683.54 MB/s Max 10690.70 MB/s Min 10666.70 MB/s
alloc on node 3 forward                   Avg 5154.34 MB/s Max 5185.58 MB/s Min 5111.26 MB/s
local allocation forward                  Avg 10693.20 MB/s Max 10704.13 MB/s Min 10680.60 MB/s
setting wrong preferred node forward      Avg 5197.30 MB/s Max 5242.39 MB/s Min 5063.46 MB/s
setting correct preferred node forward    Avg 10321.32 MB/s Max 10564.79 MB/s Min 9863.69 MB/s
running on node 3, preferred node 0
local memory forward                      Avg 10257.98 MB/s Max 10415.48 MB/s Min 10195.43 MB/s
memory interleaved on all nodes forward   Avg 7922.00 MB/s Max 7931.32 MB/s Min 7886.46 MB/s
memory interleaved on node 0/1 forward    Avg 5256.75 MB/s Max 5259.03 MB/s Min 5254.40 MB/s
alloc on node 1 forward                   Avg 5254.91 MB/s Max 5257.49 MB/s Min 5252.52 MB/s
local allocation forward                  Avg 10398.64 MB/s Max 10429.44 MB/s Min 10316.11 MB/s
setting wrong preferred node forward      Avg 5251.63 MB/s Max 5259.11 MB/s Min 5240.17 MB/s
setting correct preferred node forward    Avg 10210.21 MB/s Max 10321.66 MB/s Min 10062.71 MB/s

backward

# numademo 1G backward
2 nodes available
Cannot determine CPU cache size
memory with no policy backward            Avg 11374.24 MB/s Max 11405.31 MB/s Min 11307.31 MB/s
local memory backward                     Avg 9176.80 MB/s Max 11528.26 MB/s Min 6193.14 MB/s
memory interleaved on all nodes backward  Avg 8696.17 MB/s Max 8740.55 MB/s Min 8539.59 MB/s
memory on node 1 backward                 Avg 12086.99 MB/s Max 12101.50 MB/s Min 12063.97 MB/s
memory on node 3 backward                 Avg 6256.84 MB/s Max 6259.53 MB/s Min 6252.50 MB/s
memory interleaved on 1 backward          Avg 12051.88 MB/s Max 12107.91 MB/s Min 11955.04 MB/s
setting preferred node to 1
memory without policy backward            Avg 11744.63 MB/s Max 11985.20 MB/s Min 10492.32 MB/s
setting preferred node to 3
memory without policy backward            Avg 6228.39 MB/s Max 6232.18 MB/s Min 6222.82 MB/s
manual interleaving to all nodes backward Avg 8369.83 MB/s Max 8728.12 MB/s Min 7170.37 MB/s
manual interleaving on node 0/1 backward  Avg 11720.50 MB/s Max 11970.77 MB/s Min 11542.38 MB/s
current interleave node 1
running on node 1, preferred node 0
local memory backward                     Avg 11772.67 MB/s Max 11956.77 MB/s Min 11313.14 MB/s
memory interleaved on all nodes backward  Avg 8859.78 MB/s Max 8872.21 MB/s Min 8839.27 MB/s
memory interleaved on node 0/1 backward   Avg 11917.91 MB/s Max 11927.55 MB/s Min 11907.58 MB/s
alloc on node 3 backward                  Avg 5936.62 MB/s Max 5963.34 MB/s Min 5930.01 MB/s
local allocation backward                 Avg 12051.57 MB/s Max 12081.21 MB/s Min 11919.74 MB/s
setting wrong preferred node backward     Avg 5958.82 MB/s Max 5974.86 MB/s Min 5930.21 MB/s
setting correct preferred node backward   Avg 12053.91 MB/s Max 12066.28 MB/s Min 12040.84 MB/s
running on node 3, preferred node 0
local memory backward                     Avg 10952.35 MB/s Max 11505.03 MB/s Min 9706.58 MB/s
memory interleaved on all nodes backward  Avg 8835.80 MB/s Max 8863.35 MB/s Min 8802.89 MB/s
memory interleaved on node 0/1 backward   Avg 6034.12 MB/s Max 6048.23 MB/s Min 6029.31 MB/s
alloc on node 1 backward                  Avg 6021.08 MB/s Max 6036.02 MB/s Min 5938.25 MB/s
local allocation backward                 Avg 11489.88 MB/s Max 11546.23 MB/s Min 11413.92 MB/s
setting wrong preferred node backward     Avg 6027.94 MB/s Max 6032.97 MB/s Min 6010.58 MB/s
setting correct preferred node backward   Avg 11558.66 MB/s Max 11563.39 MB/s Min 11549.21 MB/s

stream

# numademo 1G stream
2 nodes available
Cannot determine CPU cache size
memory with no policy STREAM              Copy 12495.89 MB/s Scale 12377.05 MB/s Add 13299.27 MB/s Triad 13203.44 MB/s
local memory STREAM                       Copy 12554.67 MB/s Scale 12572.49 MB/s Add 13398.67 MB/s Triad 13337.08 MB/s
memory interleaved on all nodes STREAM    Copy 8636.82 MB/s Scale 8579.04 MB/s Add 8451.73 MB/s Triad 8439.11 MB/s
memory on node 1 STREAM                   Copy 12487.00 MB/s Scale 12441.83 MB/s Add 13297.11 MB/s Triad 13227.63 MB/s
memory on node 3 STREAM                   Copy 5664.64 MB/s Scale 5788.39 MB/s Add 6105.62 MB/s Triad 6123.11 MB/s
memory interleaved on 1 STREAM            Copy 12485.44 MB/s Scale 12490.22 MB/s Add 13292.99 MB/s Triad 13227.32 MB/s
setting preferred node to 1
memory without policy STREAM              Copy 12499.84 MB/s Scale 12449.00 MB/s Add 13306.70 MB/s Triad 13209.59 MB/s
setting preferred node to 3
memory without policy STREAM              Copy 5768.75 MB/s Scale 5820.65 MB/s Add 6029.41 MB/s Triad 6084.35 MB/s
manual interleaving to all nodes STREAM   Copy 8630.78 MB/s Scale 8631.60 MB/s Add 8462.45 MB/s Triad 8465.79 MB/s
manual interleaving on node 0/1 STREAM    Copy 12532.87 MB/s Scale 12471.96 MB/s Add 13354.17 MB/s Triad 13242.34 MB/s
current interleave node 1
running on node 1, preferred node 0
local memory STREAM                       Copy 12459.18 MB/s Scale 12490.69 MB/s Add 13322.21 MB/s Triad 13247.24 MB/s
memory interleaved on all nodes STREAM    Copy 8701.27 MB/s Scale 8631.83 MB/s Add 8433.28 MB/s Triad 8436.93 MB/s
memory interleaved on node 0/1 STREAM     Copy 12528.69 MB/s Scale 12526.07 MB/s Add 13376.82 MB/s Triad 13308.31 MB/s
alloc on node 3 STREAM                    Copy 5600.20 MB/s Scale 5767.46 MB/s Add 6017.36 MB/s Triad 6008.52 MB/s
local allocation STREAM                   Copy 12436.62 MB/s Scale 12364.92 MB/s Add 13224.22 MB/s Triad 13100.31 MB/s
setting wrong preferred node STREAM       Copy 5686.68 MB/s Scale 5796.37 MB/s Add 6031.99 MB/s Triad 6039.48 MB/s
setting correct preferred node STREAM     Copy 12469.58 MB/s Scale 12422.37 MB/s Add 13309.45 MB/s Triad 13198.06 MB/s
running on node 3, preferred node 0
local memory STREAM                       Copy 12308.35 MB/s Scale 12295.39 MB/s Add 13311.81 MB/s Triad 13232.22 MB/s
memory interleaved on all nodes STREAM    Copy 8744.53 MB/s Scale 8660.44 MB/s Add 8469.46 MB/s Triad 8487.08 MB/s
memory interleaved on node 0/1 STREAM     Copy 5601.28 MB/s Scale 5716.38 MB/s Add 6033.88 MB/s Triad 6054.60 MB/s
alloc on node 1 STREAM                    Copy 5670.15 MB/s Scale 5778.17 MB/s Add 6057.92 MB/s Triad 6069.18 MB/s
local allocation STREAM                   Copy 12488.51 MB/s Scale 12474.60 MB/s Add 13312.29 MB/s Triad 13225.07 MB/s
setting wrong preferred node STREAM       Copy 5579.89 MB/s Scale 5641.59 MB/s Add 5976.38 MB/s Triad 5986.72 MB/s
setting correct preferred node STREAM     Copy 12473.25 MB/s Scale 12468.49 MB/s Add 13258.20 MB/s Triad 13157.95 MB/s

random2

# numademo 1m random2
2 nodes available
Cannot determine CPU cache size
memory with no policy random2             Avg 408.63 MB/s Max 415.94 MB/s Min 405.01 MB/s
local memory random2                      Avg 367.33 MB/s Max 369.74 MB/s Min 365.87 MB/s
memory interleaved on all nodes random2   Avg 395.76 MB/s Max 397.19 MB/s Min 392.87 MB/s
memory on node 1 random2                  Avg 491.92 MB/s Max 493.68 MB/s Min 486.58 MB/s
memory on node 3 random2                  Avg 358.87 MB/s Max 360.71 MB/s Min 356.42 MB/s
memory interleaved on 1 random2           Avg 492.75 MB/s Max 495.08 MB/s Min 487.94 MB/s
setting preferred node to 1
memory without policy random2             Avg 492.75 MB/s Max 494.15 MB/s Min 491.14 MB/s
setting preferred node to 3
memory without policy random2             Avg 364.29 MB/s Max 382.97 MB/s Min 354.97 MB/s
manual interleaving to all nodes random2  Avg 393.17 MB/s Max 420.61 MB/s Min 353.17 MB/s
manual interleaving on node 0/1 random2   Avg 449.09 MB/s Max 489.99 MB/s Min 392.87 MB/s
current interleave node 1
running on node 1, preferred node 0
local memory random2                      Avg 568.40 MB/s Max 633.58 MB/s Min 299.34 MB/s
memory interleaved on all nodes random2   Avg 489.83 MB/s Max 491.14 MB/s Min 487.71 MB/s
memory interleaved on node 0/1 random2    Avg 630.23 MB/s Max 634.73 MB/s Min 620.83 MB/s
alloc on node 3 random2                   Avg 429.55 MB/s Max 436.54 MB/s Min 408.01 MB/s
local allocation random2                  Avg 631.94 MB/s Max 636.66 MB/s Min 630.53 MB/s
setting wrong preferred node random2      Avg 433.30 MB/s Max 435.46 MB/s Min 431.34 MB/s
setting correct preferred node random2    Avg 633.20 MB/s Max 636.27 MB/s Min 627.14 MB/s
running on node 3, preferred node 0
local memory random2                      Avg 621.53 MB/s Max 624.90 MB/s Min 609.28 MB/s
memory interleaved on all nodes random2   Avg 499.49 MB/s Max 501.47 MB/s Min 491.60 MB/s
memory interleaved on node 0/1 random2    Avg 447.48 MB/s Max 449.07 MB/s Min 442.06 MB/s
alloc on node 1 random2                   Avg 445.18 MB/s Max 447.54 MB/s Min 441.51 MB/s
local allocation random2                  Avg 619.84 MB/s Max 624.90 MB/s Min 615.36 MB/s
setting wrong preferred node random2      Avg 447.08 MB/s Max 449.84 MB/s Min 444.12 MB/s
setting correct preferred node random2    Avg 620.64 MB/s Max 623.78 MB/s Min 616.81 MB/s

phrchase

# numademo 1m ptrchase
2 nodes available
Cannot determine CPU cache size
memory with no policy ptrchase            Avg 111550.64 MB/s Max 116508.44 MB/s Min 104857.60 MB/s
local memory ptrchase                     Avg 109226.67 MB/s Max 116508.44 MB/s Min 95325.09 MB/s
memory interleaved on all nodes ptrchase  Avg 151967.54 MB/s Max 174762.67 MB/s Min 149796.57 MB/s
memory on node 1 ptrchase                 Avg 218453.33 MB/s Max 262144.00 MB/s Min 209715.20 MB/s
memory on node 3 ptrchase                 Avg 104857.60 MB/s Max 116508.44 MB/s Min 95325.09 MB/s
memory interleaved on 1 ptrchase          Avg 209715.20 MB/s Max 209715.20 MB/s Min 209715.20 MB/s
setting preferred node to 1
memory without policy ptrchase            Avg 209715.20 MB/s Max 262144.00 MB/s Min 174762.67 MB/s
setting preferred node to 3
memory without policy ptrchase            Avg 183960.70 MB/s Max 262144.00 MB/s Min 74898.29 MB/s
manual interleaving to all nodes ptrchase Avg 151967.54 MB/s Max 174762.67 MB/s Min 149796.57 MB/s
manual interleaving on node 0/1 ptrchase  Avg 132731.14 MB/s Max 149796.57 MB/s Min 116508.44 MB/s
current interleave node 1
running on node 1, preferred node 0
local memory ptrchase                     Avg 183960.70 MB/s Max 209715.20 MB/s Min 174762.67 MB/s
memory interleaved on all nodes ptrchase  Avg 194180.74 MB/s Max 209715.20 MB/s Min 174762.67 MB/s
memory interleaved on node 0/1 ptrchase   Avg 177724.75 MB/s Max 209715.20 MB/s Min 149796.57 MB/s
alloc on node 3 ptrchase                  Avg 213995.10 MB/s Max 262144.00 MB/s Min 209715.20 MB/s
local allocation ptrchase                 Avg 180788.97 MB/s Max 209715.20 MB/s Min 174762.67 MB/s
setting wrong preferred node ptrchase     Avg 213995.10 MB/s Max 262144.00 MB/s Min 209715.20 MB/s
setting correct preferred node ptrchase   Avg 177724.75 MB/s Max 209715.20 MB/s Min 174762.67 MB/s
running on node 3, preferred node 0
local memory ptrchase                     Avg 174762.67 MB/s Max 174762.67 MB/s Min 174762.67 MB/s
memory interleaved on all nodes ptrchase  Avg 205603.14 MB/s Max 209715.20 MB/s Min 174762.67 MB/s
memory interleaved on node 0/1 ptrchase   Avg 238312.73 MB/s Max 262144.00 MB/s Min 209715.20 MB/s
alloc on node 1 ptrchase                  Avg 197844.53 MB/s Max 262144.00 MB/s Min 104857.60 MB/s
local allocation ptrchase                 Avg 190650.18 MB/s Max 209715.20 MB/s Min 174762.67 MB/s
setting wrong preferred node ptrchase     Avg 227951.30 MB/s Max 262144.00 MB/s Min 209715.20 MB/s
setting correct preferred node ptrchase   Avg 190650.18 MB/s Max 209715.20 MB/s Min 174762.67 MB/s

附:

stream关键源码及算法说明(string_lib.c)

Copy算法        
        times[0][k] = mysecond();
		for (j = 0; j < N; j++)
			c[j] = a[j];
		times[0][k] = mysecond() - times[0][k];

Scale算法
		times[1][k] = mysecond();
		for (j = 0; j < N; j++)
			b[j] = scalar * c[j];
		times[1][k] = mysecond() - times[1][k];

Add算法
		times[2][k] = mysecond();
		for (j = 0; j < N; j++)
			c[j] = a[j] + b[j];
		times[2][k] = mysecond() - times[2][k];

Triad算法
		times[3][k] = mysecond();
		for (j = 0; j < N; j++)
			a[j] = b[j] + scalar * c[j];
		times[3][k] = mysecond() - times[3][k];

numademo.c

/* Copyright (C) 2003,2004 Andi Kleen, SuSE Labs.
   Test/demo program for libnuma. This is also a more or less useful benchmark
   of the NUMA characteristics of your machine. It benchmarks most possible
   NUMA policy memory configurations with various benchmarks.
   Compile standalone with cc -O2 numademo.c -o numademo -lnuma -lm

   numactl is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public
   License as published by the Free Software Foundation; version
   2.

   numactl is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should find a copy of v2 of the GNU General Public License somewhere
   on your Linux system; if not, write to the Free Software Foundation,
   Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#define _GNU_SOURCE 1
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <sys/time.h>
#include "numa.h"
#ifdef HAVE_STREAM_LIB
#include "stream_lib.h"
#endif
#ifdef HAVE_MT
#include "mt.h"
#endif
#ifdef HAVE_CLEAR_CACHE
#include "clearcache.h"
#else
static inline void clearcache(void *a, unsigned size) {}
#endif
#define FRACT_NODES 8
#define FRACT_MASKS 32
int fract_nodes;
int *node_to_use;
unsigned long msize;

/* Should get this from cpuinfo, but on !x86 it's not there */
enum {
	CACHELINESIZE = 64,
};

enum test {
	MEMSET = 0,
	MEMCPY,
	FORWARD,
	BACKWARD,
	STREAM,
	RANDOM2,
	PTRCHASE,
} thistest;

char *delim = " ";
int force;
int regression_testing=0;

char *testname[] = {
	"memset",
	"memcpy",
	"forward",
	"backward",
#ifdef HAVE_STREAM_LIB
	"stream",
#endif
#ifdef HAVE_MT
	"random2",
#endif
	"ptrchase",
	NULL,
};

void output(char *title, char *result)
{
	if (!isspace(delim[0]))
		printf("%s%s%s\n", title,delim, result);
	else
		printf("%-42s%s\n", title, result);
}

#ifdef HAVE_STREAM_LIB
void do_stream(char *name, unsigned char *mem)
{
	int i;
	char title[100], buf[100];
	double res[STREAM_NRESULTS];
	stream_verbose = 0;
	clearcache(mem, msize);
	stream_init(mem);
	stream_test(res);
	sprintf(title, "%s%s%s", name, delim, "STREAM");
	buf[0] = '\0';
	for (i = 0; i < STREAM_NRESULTS; i++) {
		if (buf[0])
			strcat(buf,delim);
		sprintf(buf+strlen(buf), "%s%s%.2f%sMB/s",
			stream_names[i], delim, res[i], delim);
	}
	output(title, buf);
	clearcache(mem, msize);
}
#endif

/* Set up a randomly distributed list to fool prefetchers */
union node {
	union node *next;
	struct {
		unsigned nexti;
		unsigned val;
	};
};

static int cmp_node(const void *ap, const void *bp)
{
	union node *a = (union node *)ap;
	union node *b = (union node *)bp;
	return a->val - b->val;
}

void **ptrchase_init(unsigned char *mem)
{
	long i;
	union node *nodes = (union node *)mem;
	long nmemb = msize / sizeof(union node);
	srand(1234);
	for (i = 0; i < nmemb; i++) {
		nodes[i].val = rand();
		nodes[i].nexti = i + 1;
	}
	qsort(nodes, nmemb, sizeof(union node), cmp_node);
	for (i = 0; i < nmemb; i++) {
		union node *n = &nodes[i];
		n->next = n->nexti >= nmemb ? NULL : &nodes[n->nexti];
	}
	return (void **)nodes;
}

static inline unsigned long long timerfold(struct timeval *tv)
{
	return tv->tv_sec * 1000000ULL + tv->tv_usec;
}

#define LOOPS 10

void memtest(char *name, unsigned char *mem)
{
	long k;
	struct timeval start, end, res;
	unsigned long long max, min, sum, r;
	int i;
	char title[128], result[128];

	if (!mem) {
		fprintf(stderr,
		"Failed to allocate %lu bytes of memory. Test \"%s\" exits.\n",
			msize, name);
		return;
	}

#ifdef HAVE_STREAM_LIB
	if (thistest == STREAM) {
		do_stream(name, mem);
		goto out;
	}
#endif

	max = 0;
	min = ~0UL;
	sum = 0;

	/*
	 * Note:  0th pass allocates the pages, don't measure
	 */
	for (i = 0; i < LOOPS+1; i++) {
		clearcache(mem, msize);
		switch (thistest) {
		case PTRCHASE:
		{
			void **ptr;
			ptr = ptrchase_init(mem);
			gettimeofday(&start,NULL);
			while (*ptr)
				ptr = (void **)*ptr;
			gettimeofday(&end,NULL);
			/* Side effect to trick the optimizer */
			*ptr = "bla";
			break;
		}

		case MEMSET:
			gettimeofday(&start,NULL);
			memset(mem, 0xff, msize);
			gettimeofday(&end,NULL);
			break;

		case MEMCPY:
			gettimeofday(&start,NULL);
			memcpy(mem, mem + msize/2, msize/2);
			gettimeofday(&end,NULL);
			break;

		case FORWARD:
			/* simple kernel to just fetch cachelines and write them back.
			   will trigger hardware prefetch */
			gettimeofday(&start,NULL);
			for (k = 0; k < msize; k+=CACHELINESIZE)
				mem[k]++;
			gettimeofday(&end,NULL);
			break;

		case BACKWARD:
			gettimeofday(&start,NULL);
			for (k = msize-5; k > 0; k-=CACHELINESIZE)
				mem[k]--;
			gettimeofday(&end,NULL);
			break;

#ifdef HAVE_MT
		case RANDOM2:
		{
			unsigned * __restrict m = (unsigned *)mem;
			unsigned max = msize / sizeof(unsigned);
			unsigned mask;

			mt_init();
			mask = 1;
			while (mask < max)
				mask = (mask << 1) | 1;
			/*
			 * There's no guarantee all memory is touched, but
			 * we assume (hope) that the distribution of the MT
			 * is good enough to touch most.
			 */
			gettimeofday(&start,NULL);
			for (k = 0; k < max; k++) {
				unsigned idx = mt_random() & mask;
				if (idx >= max)
					idx -= max;
				m[idx]++;
			}
			gettimeofday(&end,NULL);
		}

#endif
		default:
			break;
		}

		if (!i)
			continue;  /* don't count allocation pass */

		timersub(&end, &start, &res);
		r = timerfold(&res);
		if (r > max) max = r;
		if (r < min) min = r;
		sum += r;
	}
	sprintf(title, "%s%s%s", name, delim, testname[thistest]);
#define H(t) (((double)msize) / ((double)t))
#define D3 delim,delim,delim
	sprintf(result, "Avg%s%.2f%sMB/s%sMax%s%.2f%sMB/s%sMin%s%.2f%sMB/s",
		delim,
		H(sum/LOOPS),
		D3,
		H(min),
		D3,
		H(max),
		delim);
#undef H
#undef D3
	output(title,result);

#ifdef HAVE_STREAM_LIB
 out:
#endif
	/* Just to make sure that when we switch CPUs that the old guy
	   doesn't still keep it around. */
	clearcache(mem, msize);

	numa_free(mem, msize);
}

int popcnt(unsigned long val)
{
	int i = 0, cnt = 0;
	while (val >> i) {
		if ((1UL << i) & val)
			cnt++;
		i++;
	}
	return cnt;
}

int max_node, numnodes;

int get_node_list(void)
{
        int a, got_nodes = 0;
        long long free_node_sizes;

        numnodes = numa_num_configured_nodes();
        node_to_use = (int *)malloc(numnodes * sizeof(int));
        max_node = numa_max_node();
        for (a = 0; a <= max_node; a++) {
                if (numa_node_size(a, &free_node_sizes) > 0)
                        node_to_use[got_nodes++] = a;
        }
        if(got_nodes != numnodes)
                return -1;
        return 0;
}

void test(enum test type)
{
	unsigned long mask;
	int i, k;
	char buf[512];
	struct bitmask *nodes;

	nodes = numa_allocate_nodemask();
	thistest = type;

	if (regression_testing) {
		printf("\nTest %s doing 1 of %d nodes and 1 of %d masks.\n",
			testname[thistest], fract_nodes, FRACT_MASKS);
	}

	memtest("memory with no policy", numa_alloc(msize));
	memtest("local memory", numa_alloc_local(msize));

	memtest("memory interleaved on all nodes", numa_alloc_interleaved(msize));
	for (i = 0; i < numnodes; i++) {
		if (regression_testing && (node_to_use[i] % fract_nodes)) {
		/* for regression testing (-t) do only every eighth node */
			continue;
		}
		sprintf(buf, "memory on node %d", node_to_use[i]);
		memtest(buf, numa_alloc_onnode(msize, node_to_use[i]));
	}

	for (mask = 1, i = 0; mask < (1UL<<numnodes); mask++, i++) {
		int w;
		char buf2[20];
		if (popcnt(mask) == 1)
			continue;
		if (regression_testing && (i > 50)) {
			break;
		}
		if (regression_testing && (i % FRACT_MASKS)) {
		/* for regression testing (-t)
			do only every 32nd mask permutation */
			continue;
		}
		numa_bitmask_clearall(nodes);
		for (w = 0; mask >> w; w++) {
			if ((mask >> w) & 1)
				numa_bitmask_setbit(nodes, w);
		}

		sprintf(buf, "memory interleaved on");
		for (k = 0; k < numnodes; k++)
			if ((1UL<<node_to_use[k]) & mask) {
				sprintf(buf2, " %d", node_to_use[k]);
				strcat(buf, buf2);
			}
		memtest(buf, numa_alloc_interleaved_subset(msize, nodes));
	}

	for (i = 0; i < numnodes; i++) {
		if (regression_testing && (node_to_use[i] % fract_nodes)) {
		/* for regression testing (-t) do only every eighth node */
			continue;
		}
		printf("setting preferred node to %d\n", node_to_use[i]);
		numa_set_preferred(node_to_use[i]);
		memtest("memory with preferred policy", numa_alloc(msize));
	}

	numa_set_interleave_mask(numa_all_nodes_ptr);
	memtest("manual interleaving to all nodes", numa_alloc(msize));

	if (numnodes > 0) {
		numa_bitmask_clearall(nodes);
		numa_bitmask_setbit(nodes, 0);
		numa_bitmask_setbit(nodes, 1);
		numa_set_interleave_mask(nodes);
		memtest("manual interleaving on node 0/1", numa_alloc(msize));
		printf("current interleave node %d\n", numa_get_interleave_node());
	}

	numa_bitmask_free(nodes);

	numa_set_interleave_mask(numa_no_nodes_ptr);

	nodes = numa_allocate_nodemask();

	for (i = 0; i < numnodes; i++) {
		int oldhn = numa_preferred();

		if (regression_testing && (node_to_use[i] % fract_nodes)) {
		/* for regression testing (-t) do only every eighth node */
			continue;
		}
		numa_run_on_node(node_to_use[i]);
		printf("running on node %d, preferred node %d\n",node_to_use[i], oldhn);

		memtest("local memory", numa_alloc_local(msize));

		memtest("memory interleaved on all nodes",
			numa_alloc_interleaved(msize));

		if (numnodes >= 2) {
			numa_bitmask_clearall(nodes);
			numa_bitmask_setbit(nodes, 0);
			numa_bitmask_setbit(nodes, 1);
			memtest("memory interleaved on node 0/1",
				numa_alloc_interleaved_subset(msize, nodes));
		}

		for (k = 0; k < numnodes; k++) {
			if (node_to_use[k] == node_to_use[i])
				continue;
			if (regression_testing && (node_to_use[k] % fract_nodes)) {
			/* for regression testing (-t)
				do only every eighth node */
				continue;
			}
			sprintf(buf, "alloc on node %d", node_to_use[k]);
			numa_bitmask_clearall(nodes);
			numa_bitmask_setbit(nodes, node_to_use[k]);
			numa_set_membind(nodes);
			memtest(buf, numa_alloc(msize));
			numa_set_membind(numa_all_nodes_ptr);
		}

		numa_set_localalloc();
		memtest("local allocation", numa_alloc(msize));

		numa_set_preferred(node_to_use[(i + 1) % numnodes]);
		memtest("setting wrong preferred node", numa_alloc(msize));
		numa_set_preferred(node_to_use[i]);
		memtest("setting correct preferred node", numa_alloc(msize));
		numa_set_preferred(-1);
		if (!delim[0])
			printf("\n\n\n");
	}
	numa_bitmask_free(nodes);
	/* numa_run_on_node_mask is not tested */
}

void usage(void)
{
	int i;
	printf("usage: numademo [-S] [-f] [-c] [-e] [-t] msize[kmg] {tests}\nNo tests means run all.\n");
	printf("-c output CSV data. -f run even without NUMA API. -S run stupid tests. -e exit on error\n");
	printf("-t regression test; do not run all node combinations\n");
	printf("valid tests:");
	for (i = 0; testname[i]; i++)
		printf(" %s", testname[i]);
	putchar('\n');
	exit(1);
}

/* duplicated to make numademo standalone */
long memsize(char *s)
{
	char *end;
	long length = strtoul(s,&end,0);
	switch (toupper(*end)) {
	case 'G': length *= 1024;  /*FALL THROUGH*/
	case 'M': length *= 1024;  /*FALL THROUGH*/
	case 'K': length *= 1024; break;
	}
	return length;
}

int main(int ac, char **av)
{
	int simple_tests = 0;

	while (av[1] && av[1][0] == '-') {
		ac--;
		switch (av[1][1]) {
		case 'c':
			delim = ",";
			break;
		case 'f':
			force = 1;
			break;
		case 'S':
			simple_tests = 1;
			break;
		case 'e':
			numa_exit_on_error = 1;
			numa_exit_on_warn = 1;
			break;
		case 't':
			regression_testing = 1;
			break;
		default:
			usage();
			break;
		}
		++av;
	}

	if (!av[1])
		usage();

	if (numa_available() < 0) {
		printf("your system does not support the numa API.\n");
		if (!force)
			exit(1);
	}
	if(get_node_list()){
		fprintf(stderr, "Configured Nodes does not match available memory nodes\n");
		exit(1);
	}

	printf("%d nodes available\n", numnodes);
	fract_nodes = (((numnodes-1)/8)*2) + FRACT_NODES;

	if (numnodes <= 3)
		regression_testing = 0; /* set -t auto-off for small systems */

	msize = memsize(av[1]);

	if (!msize)
		usage();

#ifdef HAVE_STREAM_LIB
	stream_setmem(msize);
#endif

	if (av[2] == NULL) {
		test(MEMSET);
		test(MEMCPY);
		if (simple_tests) {
			test(FORWARD);
			test(BACKWARD);
		}
#ifdef HAVE_MT
		test(RANDOM2);
#endif
#ifdef HAVE_STREAM_LIB
		test(STREAM);
#endif
		if (msize >= sizeof(union node)) {
			test(PTRCHASE);
		} else {
			fprintf(stderr, "You must set msize at least %lu bytes for ptrchase test.\n",
				sizeof(union node));
			exit(1);
		}
	} else {
		int k;
		for (k = 2; k < ac; k++) {
			int i;
			int found = 0;
			for (i = 0; testname[i]; i++) {
				if (!strcmp(testname[i],av[k])) {
					test(i);
					found = 1;
					break;
				}
			}
			if (!found) {
				fprintf(stderr,"unknown test `%s'\n", av[k]);
				usage();
			}
		}
	}
	free(node_to_use);
	return 0;
}

  系统运维 最新文章
配置小型公司网络WLAN基本业务(AC通过三层
如何在交付运维过程中建立风险底线意识,提
快速传输大文件,怎么通过网络传大文件给对
从游戏服务端角度分析移动同步(状态同步)
MySQL使用MyCat实现分库分表
如何用DWDM射频光纤技术实现200公里外的站点
国内顺畅下载k8s.gcr.io的镜像
自动化测试appium
ctfshow ssrf
Linux操作系统学习之实用指令(Centos7/8均
上一篇文章      下一篇文章      查看所有文章
加:2022-04-26 12:16:02  更:2022-04-26 12:18:36 
 
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁

360图书馆 购物 三丰科技 阅读网 日历 万年历 2025年1日历 -2025/1/6 22:42:39-

图片自动播放器
↓图片自动播放器↓
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
图片批量下载器
↓批量下载图片,美女图库↓
  网站联系: qq:121756557 email:121756557@qq.com  IT数码