header

normal mode unavailable here

root/nn/restoreJ-no-help-5/out

--- Directories ---
--- Files ---
.loss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded.png.t5fyVwdim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=basic_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=basic_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=basic_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=extra_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=extra_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=extra_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=basic_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=basic_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=basic_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=extra_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=extra_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=extra_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=basic_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=basic_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=basic_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=extra_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=extra_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=extra_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=basic_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=basic_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=basic_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=extra_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=extra_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=extra_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded_loss_mse.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_cos.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_dot.npydim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False_loss_mse.npyloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded---.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc---.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc---.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded---.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc---.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu---.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=basic.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=extra.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=basic.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=extra.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=basic.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=extra.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp---.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-cos-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc---.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu---.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc---.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc---.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded---.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp---.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded---.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded---.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu---.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu---.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp---.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded---.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu---.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp---.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded---.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp---.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu---.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp---.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc---.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu---.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu---.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc---.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded---.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu---.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp---.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu---.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp---.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded---.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu---.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp---.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu---.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp---.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded---.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu---.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded---.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp---.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded---.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=basic.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=extra.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=basic.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=extra.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=basic.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=extra.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-dot-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded---.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.00390625-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded---.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.015625-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded---.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded---.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded---.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc---.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu---.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded---.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu---.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded---.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu---.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.0625-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu---.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=0.25-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp---.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=cos-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=dot-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mix-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.0001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=basic.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=extra.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=adam-bwd_lr=0.001-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=basic.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=extra.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.01-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=basic.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=deep-fc.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=extra.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=logexpgelu.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp-unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=loglinexp.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-model=unbounded.pngloss-mse-dim=[20,30]-dim_sample=1.0-loss=mse-opti=sgd-bwd_lr=0.1-σs=[id,gelu,exp(2**6),log(2**-8)]-θbatch=False.png