header

normal mode unavailable here

root/nn/match-A/out

--- Directories ---
dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-animdim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc-animdim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu-animdim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-animdim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc-animdim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu-animdim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc-animdim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu-animdim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-animdim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc-animdim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu-animdim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-animdim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc-animdim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu-animdim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-animdim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc-animdim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu-animdim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-animdim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc-animdim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu-animdim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-animdim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc-animdim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu-animdim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-animdim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc-animdim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu-animdim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-animdim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc-animdim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu-animdim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-animdim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc-animdim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu-animdim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp-animdim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc-animdim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu-animdim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp-animdim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc-animdim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu-animdim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp-animdim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc-animdim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu-animdim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp-animdim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc-animdim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu-animdim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-animdim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc-animdim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu-animdim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-animdim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc-animdim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu-animdim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-animdim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc-animdim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu-animdim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-animdim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc-animdim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu-animdim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-animdim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc-anim
--- Files ---
dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dotsim.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npyext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_loss-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngext_raw-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-new.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dotsim-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero2-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=hetero3-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse+dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.png