#pragma once #include "../extensions.h" torch::Tensor non_diag_mask_cpu(torch::Tensor row, torch::Tensor col, int64_t M, int64_t N, int64_t k);