------------------- vsm --------------------
# L2B: 8個 $l2bid
# L1B: 8個 $l1bid
# MAB: 16個 $mabid
# PE: 4個 $subpeid
# $mabid * 4 + $subpeid = $peid
# PDM -> DRAM single-unicast = mvp/n64 $p0@0 $d0@0
# L2BM(64) -> PDM concat(512) = mvd 1回
# L1BM(32) -> L2BM(256) concat = l2bmd 8回
# PE(4) -> L1BM(256) concat = l1bmd 1回
# l1bmpos($lmpos,$peid) = $lmpos * 64 + $peid
# l2bmpos($l1bmpos) = 64 * $ncycle + 8 * $l1bid + $cycleoffset
# where
# $ncycle = $l1bmpos / 8
# $cycleoffset = $l1bmpos % 8
# pdmpos($l2bmpos) = 128 * $ncycle + 16 * $l2bid + $cycleoffset
# where
# $ncycle = $l2bmpos / 16
# $cycleoffset = $l2bmpos % 16
# /8 = >> 3
# *8 = << 3
imm ui"3" $r1/1000 $s3