Contains

Submission Date: 2024/9/21 17:06:19
Status: Accepted
Author: rillomas
Lines: 138

VSM

# A is different per L1B(16 values per L1B, 8 L1Bs, 8 L2Bs so total 16*8*8=1024 values)
# B has the same value between all L1B, with 1024 values per L1B
# (16 values per PE, 4 PE, 16 MAB so total 16*4*16=1024 values)
# This means we need to check if 16 values are contained within each L1B copies of 1024 values
# Check if all 16 value is contained within a MAB and store 16 results (16*16=256 per L1B)
# Reduce all 16 results per value to one (16 results per L1B)
# Reduce size of input/search area to short since short is enough
# Replicate input values to a short array
# Create long constants (only lower 32bits are written)
imm i"32" $s259
imm i"16" $s261
imm i"48" $s257
imm i"0xff" $s263
# d get $ls256n0c0b0m0p0 4
llsl $lm0v $ls258 $lr0v
llsl $lm8v $ls258 $lr8v
llsl $lm16v $ls258 $lr16v
llsl $lm24v $ls258 $lr24v
lor $lr0v $lm0v $lr0v
lor $lr8v $lm8v $lr8v
lor $lr16v $lm16v $lr16v
lor $lr24v $lm24v $lr24v
# d get $lr0n0c0b0m0p0 16
llsl $lr0v $ls260 $ls0v
llsl $lr8v $ls260 $ls8v
llsl $lr16v $ls260 $ls16v
llsl $lr24v $ls260 $ls24v
lor $lr0v $ls0v $lm0v
lor $lr8v $ls8v $lm8v
lor $lr16v $ls16v $lm16v
lor $lr24v $ls24v $lm24v
# d get $lm0n0c0b0m0p0 16
# Convert search area to ushort
llsl $ln0v8 $ls256 $lr0v
llsl $ln2v8 $ls258 $ls0v
llsl $ln4v8 $ls260 $lr8v
lpassa $ln6v8 $ls8v
# d get $lr0n0c0b0m0p0 8
# d get $ls0n0c0b0m0p0 8
lor $lr0v $ls0v $lr0v
nop
lor $lr8v $ls8v $ls0v
nop
lor $lr0v $ls0v $ln0v
# d get $ln0n0c0b0m0p0 4
# subtract each value in M from N
nop
nop
ssub $ln0 $lm0v $lr0v
ssub $ln0 $lm8v $lr8v
ssub $ln0 $lm16v $lr16v
ssub $ln0 $lm24v $lr24v
ssub $ln2 $lm0v $ls0v
ssub $ln2 $lm8v $ls8v
ssub $ln2 $lm16v $ls16v
ssub $ln2 $lm24v $ls24v
ssub $ln4 $lm0v $lr32v
ssub $ln4 $lm8v $lr40v
ssub $ln4 $lm16v $lr48v
ssub $ln4 $lm24v $lr56v
ssub $ln6 $lm0v $ls32v
ssub $ln6 $lm8v $ls40v
ssub $ln6 $lm16v $ls48v
ssub $ln6 $lm24v $ls56v
# d get $lr0n0c0b0m0p0 64
# If leftover is zero we have a match so output is 1
# It leftover is not zero it's a mismatch so we output 0
slnot $lr0v $lr0v
slnot $lr8v $lr8v
slnot $lr16v $lr16v
slnot $lr24v $lr24v
slnot $ls0v $ls0v
slnot $ls8v $ls8v
slnot $ls16v $ls16v
slnot $ls24v $ls24v
slnot $lr32v $lr32v
slnot $lr40v $lr40v
slnot $lr48v $lr48v
slnot $lr56v $lr56v
slnot $ls32v $ls32v
slnot $ls40v $ls40v
slnot $ls48v $ls48v
slnot $ls56v $ls56v
# d get $lr0n0c0b0m0p0 16
# d get $ls0n0c0b0m0p0 16
# d get $lr32n0c0b0m0p0 16
# d get $ls32n0c0b0m0p0 16
# Reduce each search result to 4 values (16 -> 4 per value, 4*16=64 total)
lor $lr0v $ls0v $lr0v
lor $lr8v $ls8v $lr8v
lor $lr16v $ls16v $lr16v
lor $lr24v $ls24v $lr24v
lor $lr32v $ls32v $ls0v
lor $lr40v $ls40v $ls8v
lor $lr48v $ls48v $ls16v
lor $lr56v $ls56v $ls24v
lor $lr0v $ls0v $lr0v
lor $lr8v $ls8v $lr8v
lor $lr16v $ls16v $lr16v
lor $lr24v $ls24v $lr24v
# d get $lr0n0c0b0m0p0 16
# Reduce each search result to one 16bit value (4 -> 1) and
# merge all results to 4 long registers (16bit x 4 x4 = 16 results)
llsr $lr0v $ls258 $ls0v
llsr $lr8v $ls258 $ls8v
llsr $lr16v $ls258 $ls16v
llsr $lr24v $ls258 $ls24v
# d get $ls0n0c0b0m0p0 16
# 4 -> 2 results in last 32bit
lor $lr0v $ls0v $lr0v
lor $lr8v $ls8v $lr8v
lor $lr16v $ls16v $lr16v
lor $lr24v $ls24v $lr24v
# d get $lr0n0c0b0m0p0 16
# 2 -> 1 result in last 16 bit
llsr $lr0v $ls260 $ls0v
llsr $lr8v $ls260 $ls8v
llsr $lr16v $ls260 $ls16v
llsr $lr24v $ls260 $ls24v
lor $lr0v $ls0v $lr0v
lor $lr8v $ls8v $lr8v
lor $lr16v $ls16v $lr16v
lor $lr24v $ls24v $lr24v
land $lr0v $ls262 $lm0v
land $lr8v $ls262 $lm8v
land $lr16v $ls262 $lm16v
land $lr24v $ls262 $lm24v
# d get $lm0n0c0b0m0p0 16
# merge sixteen 16bit values to 4 consecutive long registers
nop
nop
llsl $lm0v8 $ls256 $lr0v
llsl $lm2v8 $ls258 $ls0v
llsl $lm4v8 $ls260 $lr8v
lpassa $lm6v8 $ls8v
# d get $lr0n0c0b0m0p0 8
# d get $ls0n0c0b0m0p0 8
lor $lr0v $ls0v $lr0v
nop
lor $lr8v $ls8v $ls0v
nop
lor $lr0v $ls0v $lm0v
# d get $lm0n0c0b0m0p0 4
# reduce between all MABs
nop
nop
l1bmrsbor $lm0v $lb0
# We have 16x4 values left so we broadcast that to each MAB
l1bmm $lbi $lr0v
# d get $lr0n0c0b0m0 4
# Now we need to merge between each PE for the final value
nop
nop
msl $lr0v $ls0v
# d get $lr0n0c0b0m0 1
# d get $ls0n0c0b0m0 1
nop
msl $ls0v $lm0v
# d get $ls0n0c0b0m0 1
# d get $lm0n0c0b0m0 1
nop
nop
msl $lm0v $ln0v
# d get $lr0n0c0b0m0 1
# d get $ls0n0c0b0m0 1
# d get $lm0n0c0b0m0 1
# d get $ln0n0c0b0m0 1
# OR all four values
lor $lr0v $ls0v $lr0v
nop
lor $lm0v $ln0v $ls0v
nop
nop
lor $ls0v $lr0v $lr0v
# d get $lr0n0c0b0m0p0 4
# extend each short value to long and write to output
imm i"32" $ls258
imm i"16" $ls260
imm i"0xff" $r263
nop
nop
llsr $lr0v $ls256 $ls0v
llsr $lr0v $ls258 $ls8v
llsr $lr0v $ls260 $ls16v
lpassa $lr0v $ls24v
# d get $lr262n0c0b0m0p0 1
# d get $ls0n0c0b0m0p0 16
nop
land $ls0v8 $lr262 $ln32v
land $ls2v8 $lr262 $ln40v
land $ls4v8 $lr262 $ln48v
land $ls6v8 $lr262 $ln56v
הההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההה
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

Standard Output

ACCEPTED!! score=138 j=138 m=0 bytes=2718
הההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההה
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

Standard Error

------------------- vsm --------------------
# ======= In(0): ((8_L2B:1, 8_L1B:1, 16:1))@LM0 / ULong =======
d set $lm0n0c0b0 1 000000000000002F # values=[47] / ULong @[0]
d set $lm2n0c0b0 1 0000000000000066 # values=[102] / ULong @[1]
d set $lm4n0c0b0 1 000000000000029E # values=[670] / ULong @[2]
d set $lm6n0c0b0 1 0000000000000100 # values=[256] / ULong @[3]
d set $lm8n0c0b0 1 0000000000000021 # values=[33] / ULong @[4]
d set $lm10n0c0b0 1 0000000000000305 # values=[773] / ULong @[5]
d set $lm12n0c0b0 1 00000000000003B9 # values=[953] / ULong @[6]
d set $lm14n0c0b0 1 00000000000002F3 # values=[755] / ULong @[7]
d set $lm16n0c0b0 1 000000000000035B # values=[859] / ULong @[8]
d set $lm18n0c0b0 1 0000000000000160 # values=[352] / ULong @[9]
d set $lm20n0c0b0 1 00000000000000F4 # values=[244] / ULong @[10]
d set $lm22n0c0b0 1 000000000000005D # values=[93] / ULong @[11]
d set $lm24n0c0b0 1 00000000000003A2 # values=[930] / ULong @[12]
d set $lm26n0c0b0 1 0000000000000085 # values=[133] / ULong @[13]
d set $lm28n0c0b0 1 00000000000002B0 # values=[688] / ULong @[14]
d set $lm30n0c0b0 1 000000000000004C # values=[76] / ULong @[15]
d set $lm0n0c0b1 1 0000000000000240 # values=[576] / ULong @[16]
d set $lm2n0c0b1 1 0000000000000072 # values=[114] / ULong @[17]
d set $lm4n0c0b1 1 000000000000017A # values=[378] / ULong @[18]
d set $lm6n0c0b1 1 00000000000000FF # values=[255] / ULong @[19]
d set $lm8n0c0b1 1 000000000000030A # values=[778] / ULong @[20]
d set $lm10n0c0b1 1 000000000000011C # values=[284] / ULong @[21]
d set $lm12n0c0b1 1 0000000000000187 # values=[391] / ULong @[22]
d set $lm14n0c0b1 1 0000000000000128 # values=[296] / ULong @[23]
 
 
הההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההה
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX