torch.sum(
  custom.torch.fast_prod_positive_multi(
    custom.torch.shuffle(
      custom.torch.sum_multi(
        custom.torch.mul_along_dim(
          custom.torch.mul_along_dim(
            torch.index_select(
              custom.torch.mul_along_dim(
                torch.index_select(
                  torch.scatter(
                    torch.ones(torch.Size([20, 13])),
                    -1,
                    torch.expand(
                      tensor([[0, 5, 7]], device='cuda:0'),
                      torch.Size([20, 3]),
                    ),
                    custom.torch.mul_along_dim(
                      torch.index_select(
                        custom.torch.mul_along_dim(
                          torch.expand(
                            torch.unsqueeze(
                              symbol('scale'),
                              -1,
                            ),
                            (20, 2),
                          ),
                          custom.torch.heads_tails(symbol('alpha_factorized_vs_joint')),
                          dim=-1
                        ),
                        -1,
                        tensor([0, 0, 1], device='cuda:0'),
                      ),
                      torch.scatter(
                        torch.ones((20, 3)),
                        -1,
                        torch.expand(
                          tensor([[0, 1]], device='cuda:0'),
                          (20, 2),
                        ),
                        custom.torch.heads_tails(symbol('alpha_regime_vs_architecture')),
                      ),
                      dim=-1
                    ),
                  ),
                  -1,
                  tensor([ 0,  1,  5,  7,  8,  2,  2,  3,  3,  4,  4,  6,  6,  9,  9, 10, 10, 11,
                          11, 12, 12], device='cuda:0'),
                ),
                torch.scatter(
                  torch.ones((20, 21)),
                  -1,
                  torch.expand(
                    tensor([[ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]],
                           device='cuda:0'),
                    (20, 16),
                  ),
                  custom.torch.heads_tails(
                    torch.cat(
                      [symbol('alpha_factorized_or_joint_lr_factorized'),
                       symbol('alpha_factorized_or_joint_momentum_factorized'),
                       symbol('alpha_factorized_or_joint_wd_factorized'),
                       symbol('alpha_factorized_or_joint_units_channels_factorized'),
                       symbol('alpha_factorized_or_joint_lr_joint'),
                       symbol('alpha_factorized_or_joint_momentum_joint'),
                       symbol('alpha_factorized_or_joint_wd_joint'),
                       symbol('alpha_factorized_or_joint_units_channels_joint')]
                      dim=-1
                    )
                  ),
                ),
                dim=-1
              ),
              -1,
              tensor([ 0,  1,  2,  3,  4,  5,  5,  5,  5,  6, 11, 11, 11, 11, 12, 13, 13, 13,
                      13, 14, 19, 19, 19, 19, 20,  7,  7,  7,  7,  7,  8,  9,  9,  9,  9,  9,
                      10, 15, 15, 15, 15, 15, 16, 17, 17, 17, 17, 17, 18], device='cuda:0'),
            ),
            torch.scatter(
              torch.ones((20, 49)),
              -1,
              torch.expand(
                tensor([[ 5,  6,  7,  8, 25, 26, 27, 28, 29, 31, 32, 33, 34, 35, 10, 11, 12, 13,
                         15, 16, 17, 18, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 20, 21, 22, 23]],
                       device='cuda:0'),
                (20, 36),
              ),
              torch.cat(
                [symbol('w_additive_lr_factorized'),
                 symbol('w_additive_momentum_factorized'),
                 symbol('w_additive_wd_factorized'),
                 symbol('w_additive_units_channels_factorized'),
                 symbol('w_additive_lr_joint'),
                 symbol('w_additive_momentum_joint'),
                 symbol('w_additive_wd_joint'),
                 symbol('w_additive_units_channels_joint')]
                dim=-1
              ),
            ),
            dim=-1
          ),
          custom.torch.shuffle(
            torch.cat(
              [matern(
                custom.torch.cdist_multi(
                  operator.truediv(
                    torch.index_select(
                      symbol('x1'),
                      -1,
                      tensor([ 0,  1, 21,  7,  8, 10,  9, 11, 12, 13, 14, 15,  2,  3,  4,  5,  6, 20,
                              16, 17, 18, 19,  7,  8, 10,  9, 11, 12, 13, 14, 15,  2,  3,  4,  5,  6,
                              16, 17, 18, 19,  7,  8, 10,  9, 16, 17, 18, 19,  0,  1, 21, 20,  7,  8,
                              10,  9, 16, 17, 18, 19, 11, 12, 13, 14, 15,  2,  3,  4,  5,  6, 11, 12,
                              13, 14, 15,  2,  3,  4,  5,  6], device='cuda:0'),
                    ),
                    torch.unsqueeze(
                      torch.index_select(
                        symbol('lengthscale'),
                        -1,
                        tensor([ 0,  1,  2,  7,  8,  9, 10, 15, 16, 17, 18, 19, 25, 26, 27, 28, 29, 35,
                                36, 37, 38, 39, 52, 53, 54, 55, 60, 61, 62, 63, 64, 70, 71, 72, 73, 74,
                                80, 81, 82, 83, 11, 12, 13, 14, 40, 41, 42, 43, 44, 45, 46, 47, 56, 57,
                                58, 59, 84, 85, 86, 87, 20, 21, 22, 23, 24, 30, 31, 32, 33, 34, 65, 66,
                                67, 68, 69, 75, 76, 77, 78, 79], device='cuda:0'),
                      ),
                      -2,
                    ),
                  ),
                  operator.truediv(
                    torch.index_select(
                      symbol('x2'),
                      -1,
                      tensor([ 0,  1, 21,  7,  8, 10,  9, 11, 12, 13, 14, 15,  2,  3,  4,  5,  6, 20,
                              16, 17, 18, 19,  7,  8, 10,  9, 11, 12, 13, 14, 15,  2,  3,  4,  5,  6,
                              16, 17, 18, 19,  7,  8, 10,  9, 16, 17, 18, 19,  0,  1, 21, 20,  7,  8,
                              10,  9, 16, 17, 18, 19, 11, 12, 13, 14, 15,  2,  3,  4,  5,  6, 11, 12,
                              13, 14, 15,  2,  3,  4,  5,  6], device='cuda:0'),
                    ),
                    torch.unsqueeze(
                      torch.index_select(
                        symbol('lengthscale'),
                        -1,
                        tensor([ 0,  1,  2,  7,  8,  9, 10, 15, 16, 17, 18, 19, 25, 26, 27, 28, 29, 35,
                                36, 37, 38, 39, 52, 53, 54, 55, 60, 61, 62, 63, 64, 70, 71, 72, 73, 74,
                                80, 81, 82, 83, 11, 12, 13, 14, 40, 41, 42, 43, 44, 45, 46, 47, 56, 57,
                                58, 59, 84, 85, 86, 87, 20, 21, 22, 23, 24, 30, 31, 32, 33, 34, 65, 66,
                                67, 68, 69, 75, 76, 77, 78, 79], device='cuda:0'),
                      ),
                      -2,
                    ),
                  ),
                  groups=[((3, 2), 1), ((1, 2), 37), ((4, 2), 5), ((5, 2), 4)]
                )
                nu=2.5
              ),
               torch.exp(
                operator.neg(
                  custom.torch.cdist_multi(
                    operator.truediv(
                      torch.index_select(
                        symbol('x1'),
                        -1,
                        tensor([22, 23, 24, 25, 22, 23, 24, 25], device='cuda:0'),
                      ),
                      torch.unsqueeze(
                        torch.index_select(
                          symbol('lengthscale'),
                          -1,
                          tensor([ 3,  4,  5,  6, 48, 49, 50, 51], device='cuda:0'),
                        ),
                        -2,
                      ),
                    ),
                    operator.truediv(
                      torch.index_select(
                        symbol('x2'),
                        -1,
                        tensor([22, 23, 24, 25, 22, 23, 24, 25], device='cuda:0'),
                      ),
                      torch.unsqueeze(
                        torch.index_select(
                          symbol('lengthscale'),
                          -1,
                          tensor([ 3,  4,  5,  6, 48, 49, 50, 51], device='cuda:0'),
                        ),
                        -2,
                      ),
                    ),
                    groups=[((4, 1), 2)]
                  )
                )
              )]
              dim=-3
            ),
            tensor([ 0, 47, 15, 40, 48,  1,  2,  3,  4, 38, 16, 17, 18, 19, 39, 20, 21, 22,
                    23, 41, 34, 35, 36, 37, 42,  5,  6,  7,  8,  9, 43, 10, 11, 12, 13, 14,
                    44, 24, 25, 26, 27, 28, 45, 29, 30, 31, 32, 33, 46], device='cuda:0'),
            dim=-3
          ),
          dim=-3
        )
        groups=[(1, 5), (5, 4), (6, 4)], dim=-3
      ),
      tensor([ 0,  1,  5,  9, 10,  2,  6,  3,  4,  7, 11, 12,  8], device='cuda:0'),
      dim=-3
    )
    groups=[(5, 1), (2, 1), (6, 1)], dim=-3
  )
  dim=-3
)