Matrix Square

Submission Date: 2024/9/18 23:02:48
Status: Accepted
Author: okaduki
Lines: 64

VSM

# transpose
gmwrite $lm[0,4,16,20] $ly0
gmwrite $lm[32,36,48,52] $ly4
gmread $ly0 $ls[0,4,16,20]
gmread $ly4 $ls[32,36,48,52]
gmwrite $lm[2,6,18,22] $ly0
gmwrite $lm[34,38,50,54] $ly4
gmread $ly0 $ls[8,12,24,28]
gmread $ly4 $ls[40,44,56,60]
gmwrite $lm[8,12,24,28] $ly0
gmwrite $lm[40,44,56,60] $ly4
gmread $ly0 $ls[2,6,18,22]
gmread $ly4 $ls[34,38,50,54]
gmwrite $lm[10,14,26,30] $ly0
gmwrite $lm[42,46,58,62] $ly4
gmread $ly0 $ls[10,14,26,30]
gmread $ly4 $ls[42,46,58,62]
# ---
# gbfn B = ls
gbfn $ls[0,4,16,20] $ls[0,4,16,20]
gbfn $ls[32,36,48,52] $ls[32,36,48,52]
gbfn $ls[2,6,18,22] $ls[2,6,18,22]
gbfn $ls[34,38,50,54] $ls[34,38,50,54]
gbfn $ls[8,12,24,28] $ls[8,12,24,28]
gbfn $ls[40,44,56,60] $ls[40,44,56,60]
gbfn $ls[10,14,26,30] $ls[10,14,26,30]
gbfn $ls[42,46,58,62] $ls[42,46,58,62]
# gbfn A = lm -> lr
gbfn $lm[0,4,16,20] $lr[0,4,16,20]
gbfn $lm[32,36,48,52] $lr[32,36,48,52]
gbfn $lm[2,6,18,22] $lr[2,6,18,22]
gbfn $lm[34,38,50,54] $lr[34,38,50,54]
gbfn $lm[8,12,24,28] $lr[8,12,24,28]
gbfn $lm[40,44,56,60] $lr[40,44,56,60]
gbfn $lm[10,14,26,30] $lr[10,14,26,30]
gbfn $lm[42,46,58,62] $lr[42,46,58,62]
# matmul
gmwrite $lr[0,4,16,20] $lx0
gmwrite $lr[32,36,48,52] $lx4
gmmul $lx $ls[0,4,16,20] $lr[128,132,144,148]
gmmul $lx $ls[32,36,48,52] $lr[160,164,176,180]
gmwrite $lr[2,6,18,22] $lx0
gmwrite $lr[34,38,50,54] $lx4
gmfma $lx $ls[2,6,18,22] $lr[128,132,144,148] $ln[0,4,16,20]
gmfma $lx $ls[34,38,50,54] $lr[160,164,176,180] $ln[32,36,48,52]
gmwrite $lr[0,4,16,20] $lx0
gmwrite $lr[32,36,48,52] $lx4
gmmul $lx $ls[8,12,24,28] $lr[136,140,152,156]
gmmul $lx $ls[40,44,56,60] $lr[168,172,184,188]
gmwrite $lr[2,6,18,22] $lx0
gmwrite $lr[34,38,50,54] $lx4
gmfma $lx $ls[10,14,26,30] $lr[136,140,152,156] $ln[8,12,24,28]
gmfma $lx $ls[42,46,58,62] $lr[168,172,184,188] $ln[40,44,56,60]
gmwrite $lr[8,12,24,28] $lx0
gmwrite $lr[40,44,56,60] $lx4
gmmul $lx $ls[0,4,16,20] $lr[130,134,146,150]
gmmul $lx $ls[32,36,48,52] $lr[162,166,178,182]
gmwrite $lr[10,14,26,30] $lx0
gmwrite $lr[42,46,58,62] $lx4
gmfma $lx $ls[2,6,18,22] $lr[130,134,146,150] $ln[2,6,18,22]
gmfma $lx $ls[34,38,50,54] $lr[162,166,178,182] $ln[34,38,50,54]
gmwrite $lr[8,12,24,28] $lx0
gmwrite $lr[40,44,56,60] $lx4
gmmul $lx $ls[8,12,24,28] $lr[138,142,154,158]
gmmul $lx $ls[40,44,56,60] $lr[170,174,186,190]
gmwrite $lr[10,14,26,30] $lx0
gmwrite $lr[42,46,58,62] $lx4
gmfma $lx $ls[10,14,26,30] $lr[138,142,154,158] $ln[10,14,26,30]
gmfma $lx $ls[42,46,58,62] $lr[170,174,186,190] $ln[42,46,58,62]
הההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההה
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

Standard Output

ACCEPTED!! score=64 j=64 m=0 bytes=2424
הההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההה
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

Standard Error

------------------- vsm --------------------
# ======= In(0): ((8_L2B:1), (4_PE:1, 2:1, 2_W:1), (16:2))@LM0 / Float =======
d set $lm0n0c0p0 1 422C000042140000 # values=[43.0, 37.0] / Float @[0,0,0],[0,1,0]
d set $lm4n0c0p0 1 42BE000042640000 # values=[95.0, 57.0] / Float @[0,0,1],[0,1,1]
d set $lm8n0c0p0 1 4296000042400000 # values=[75.0, 48.0] / Float @[0,0,2],[0,1,2]
d set $lm12n0c0p0 1 427C000042100000 # values=[63.0, 36.0] / Float @[0,0,3],[0,1,3]
d set $lm16n0c0p0 1 41C0000042820000 # values=[24.0, 65.0] / Float @[0,0,4],[0,1,4]
d set $lm20n0c0p0 1 41C0000041B00000 # values=[24.0, 22.0] / Float @[0,0,5],[0,1,5]
d set $lm24n0c0p0 1 4170000042100000 # values=[15.0, 36.0] / Float @[0,0,6],[0,1,6]
d set $lm28n0c0p0 1 42AE000042280000 # values=[87.0, 42.0] / Float @[0,0,7],[0,1,7]
d set $lm32n0c0p0 1 42800000424C0000 # values=[64.0, 51.0] / Float @[0,0,8],[0,1,8]
d set $lm36n0c0p0 1 4292000042A00000 # values=[73.0, 80.0] / Float @[0,0,9],[0,1,9]
d set $lm40n0c0p0 1 4130000041D80000 # values=[11.0, 27.0] / Float @[0,0,10],[0,1,10]
d set $lm44n0c0p0 1 42C2000042600000 # values=[97.0, 56.0] / Float @[0,0,11],[0,1,11]
d set $lm48n0c0p0 1 42A80000427C0000 # values=[84.0, 63.0] / Float @[0,0,12],[0,1,12]
d set $lm52n0c0p0 1 41E8000041600000 # values=[29.0, 14.0] / Float @[0,0,13],[0,1,13]
d set $lm56n0c0p0 1 41D0000042800000 # values=[26.0, 64.0] / Float @[0,0,14],[0,1,14]
d set $lm60n0c0p0 1 41D0000041C80000 # values=[26.0, 25.0] / Float @[0,0,15],[0,1,15]
d set $lm2n0c0p0 1 41700000426C0000 # values=[15.0, 59.0] / Float @[0,2,0],[0,3,0]
d set $lm6n0c0p0 1 42BE000041D00000 # values=[95.0, 26.0] / Float @[0,2,1],[0,3,1]
d set $lm10n0c0p0 1 42C0000042C20000 # values=[96.0, 97.0] / Float @[0,2,2],[0,3,2]
d set $lm14n0c0p0 1 42A40000429E0000 # values=[82.0, 79.0] / Float @[0,2,3],[0,3,3]
d set $lm18n0c0p0 1 4214000042BC0000 # values=[37.0, 94.0] / Float @[0,2,4],[0,3,4]
d set $lm22n0c0p0 1 4190000042B40000 # values=[18.0, 90.0] / Float @[0,2,5],[0,3,5]
d set $lm26n0c0p0 1 428E0000427C0000 # values=[71.0, 63.0] / Float @[0,2,6],[0,3,6]
d set $lm30n0c0p0 1 4244000042B80000 # values=[49.0, 92.0] / Float @[0,2,7],[0,3,7]
 
 
הההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההה
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX