Contains

Submission Date: 2024/9/23 16:29:44
Status: Accepted
Author: suzumushi
Lines: 53

VSM

imm i"16" $lr32v $ls32v $t # $ls32v: shift XXX s32 unused, t resuesd
imm f"8388608" $r48v $s48v # $[rs]48v: floating pattern
imm i"32" $lr40v $ls40v # shift
# convert ln[0:31] to half-precision floats
# ls[0:31] hold vectorlized half-precision values
# ($lr16 and $lr24 are temporarily used to avoid register conflicts)
ior $n1v2 $r48v $nowrite
ior $n9v2 $r48v $nowrite; fvaddr $aluf -$r48v $s0v2
ior $n17v2 $r48v $nowrite; fvaddr $aluf -$r48v $r16v2 # $s8 -> $r16
ior $n25v2 $r48v $nowrite; fvaddr $aluf -$r48v $s16v2
# convert ln[0:31] to half-precision floats
# lr[0:7] hold vectorlized half-precision values
ior $m1v2 $r48v $nowrite; fvaddr $aluf -$r48v $r24v2 # $s24 -> $r24
ior $m9v2 $r48v $nowrite; fvaddr $aluf -$r48v $r0v
ior $m17v2 $r48v $nowrite; fvaddr $aluf -$r48v $r4v
ior $m25v2 $r48v $nowrite; fvaddr $aluf -$r48v $s56v # $r8 -> $s56
# vectorization ls[0:31]
ullsr $ls0v $t $nowrite; fvaddr $aluf -$r48v $s60v # ditto
ullsr $lr16v $t $nowrite; hvaddr $aluf $ls0ve $ls0v
ullsr $mauf $ls40v $nowrite; hvaddr $aluf $lr16ve $lr16v
ullsr $mauf $lr40v $nowrite; hvaddr $aluf $ls0ve $ls0v
ullsr $ls16v $t $nowrite; hvaddr $aluf $lr16ve $ls8v
ullsr $lr24v $t $nowrite; hvaddr $aluf $ls16ve $ls16v
ullsr $mauf $ls40v $nowrite; hvaddr $aluf $lr24ve $lr24v
ullsr $mauf $lr40v $nowrite; hvaddr $aluf $ls16ve $ls16v
ullsr $ls56v $t $nowrite; hvaddr $aluf $lr24ve $ls24v
hvaddr $aluf $lr0ve $lr0v; snot $t $t # initialize results to non-0 values (sinc $t $t is ok)
# compare lm[0:31] and ln[0:31] values with (ln - lm),
# so float/int 0 results means the same value
# compare results are updated by unsigned min
hvaddr -$mauf $ls0e $nowrite; imm ui"0x0000FFFF" $ls56v # $ls56v: output mask
hvaddr -$lr0v $ls2e $nowrite; usmin $mauf $t $nowrite
hvaddr -$lr0v $ls4e $nowrite; usmin $mauf $aluf $nowrite
hvaddr -$lr0v $ls6e $nowrite; usmin $mauf $aluf $nowrite
hvaddr -$lr0v $ls8e $nowrite; usmin $mauf $aluf $nowrite
hvaddr -$lr0v $ls10e $nowrite; usmin $mauf $aluf $nowrite
hvaddr -$lr0v $ls12e $nowrite; usmin $mauf $aluf $nowrite
hvaddr -$lr0v $ls14e $nowrite; usmin $mauf $aluf $nowrite
hvaddr -$lr0v $ls16e $nowrite; usmin $mauf $aluf $nowrite
hvaddr -$lr0v $ls18e $nowrite; usmin $mauf $aluf $nowrite
hvaddr -$lr0v $ls20e $nowrite; usmin $mauf $aluf $nowrite
hvaddr -$lr0v $ls22e $nowrite; usmin $mauf $aluf $nowrite
hvaddr -$lr0v $ls24e $nowrite; usmin $mauf $aluf $nowrite
hvaddr -$lr0v $ls26e $nowrite; usmin $mauf $aluf $nowrite
hvaddr -$lr0v $ls28e $nowrite; usmin $mauf $aluf $nowrite
hvaddr -$lr0v $ls30e $nowrite; usmin $mauf $aluf $nowrite
usmin $mauf $aluf $nowrite
# compile MAB and PE0-3 results
slnot $aluf $nowrite # convert results logic, 1 means the same value was found.
l1bmrlbor $aluf $lb0
l1bmm $lbi $lr16v # $lr16v: results register
msr $lbf $nowrite
sor $aluf $lr16v $lr16v
msr $aluf $nowrite
sor $aluf $lr16v $lr16v
msr $aluf $nowrite
sor $aluf $lr16v $lr16v
# move results to ln[32:63]
iand $aluf $ls56v $lr24v
uilsr $lr16v $ls32v $lr16v
ipassa $r24v $n49v2
ipassa $r28v $n57v2
ipassa $r16v $n33v2
ipassa $r20v $n41v2
הההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההה
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

Standard Output

ACCEPTED!! score=53 j=53 m=0 bytes=2372
הההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההה
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

Standard Error

------------------- vsm --------------------
# ======= In(0): ((8_L2B:1, 8_L1B:1, 16:1))@LM0 / ULong =======
d set $lm0n0c0b0 1 000000000000002F # values=[47] / ULong @[0]
d set $lm2n0c0b0 1 0000000000000066 # values=[102] / ULong @[1]
d set $lm4n0c0b0 1 000000000000029E # values=[670] / ULong @[2]
d set $lm6n0c0b0 1 0000000000000100 # values=[256] / ULong @[3]
d set $lm8n0c0b0 1 0000000000000021 # values=[33] / ULong @[4]
d set $lm10n0c0b0 1 0000000000000305 # values=[773] / ULong @[5]
d set $lm12n0c0b0 1 00000000000003B9 # values=[953] / ULong @[6]
d set $lm14n0c0b0 1 00000000000002F3 # values=[755] / ULong @[7]
d set $lm16n0c0b0 1 000000000000035B # values=[859] / ULong @[8]
d set $lm18n0c0b0 1 0000000000000160 # values=[352] / ULong @[9]
d set $lm20n0c0b0 1 00000000000000F4 # values=[244] / ULong @[10]
d set $lm22n0c0b0 1 000000000000005D # values=[93] / ULong @[11]
d set $lm24n0c0b0 1 00000000000003A2 # values=[930] / ULong @[12]
d set $lm26n0c0b0 1 0000000000000085 # values=[133] / ULong @[13]
d set $lm28n0c0b0 1 00000000000002B0 # values=[688] / ULong @[14]
d set $lm30n0c0b0 1 000000000000004C # values=[76] / ULong @[15]
d set $lm0n0c0b1 1 0000000000000240 # values=[576] / ULong @[16]
d set $lm2n0c0b1 1 0000000000000072 # values=[114] / ULong @[17]
d set $lm4n0c0b1 1 000000000000017A # values=[378] / ULong @[18]
d set $lm6n0c0b1 1 00000000000000FF # values=[255] / ULong @[19]
d set $lm8n0c0b1 1 000000000000030A # values=[778] / ULong @[20]
d set $lm10n0c0b1 1 000000000000011C # values=[284] / ULong @[21]
d set $lm12n0c0b1 1 0000000000000187 # values=[391] / ULong @[22]
d set $lm14n0c0b1 1 0000000000000128 # values=[296] / ULong @[23]
 
 
הההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההה
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX