param dim_i, dim_o loss opti dim_sample bwd_σ value dim_i=20 dim_o=30 mse mse+cos mse,cos adam, lr=1e-5 adam, lr=1e-4 adam, lr=1e-3 adam, lr=1e-2 2^0 2^-2 2^-4 2^-6 2^-8 loglinexp confusion external loss bwd mse loss bwd cos loss bwd dot loss bwd dot sim loss