Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ cmake -DGGML_METAL=ON -DBUILD_SHARED_LIBS=Off ..
cmake --build . --config Release
```

Once built, pass the `--n-gpu-layers` flag with a value greather than 0 to the executable.

### Using cuBLAS

The inference can be offloaded on a CUDA backend with cuBLAS.
Expand Down
50 changes: 27 additions & 23 deletions decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,61 +12,65 @@

struct encodec_decoder_block {
// upsampling layers
struct ggml_tensor *us_conv_w;
struct ggml_tensor *us_conv_b;
struct ggml_tensor * us_conv_w;
struct ggml_tensor * us_conv_b;

// conv1
struct ggml_tensor *conv_1_w;
struct ggml_tensor *conv_1_b;
struct ggml_tensor * conv_1_w;
struct ggml_tensor * conv_1_b;

// conv2
struct ggml_tensor *conv_2_w;
struct ggml_tensor *conv_2_b;
struct ggml_tensor * conv_2_w;
struct ggml_tensor * conv_2_b;

// shortcut
struct ggml_tensor *conv_sc_w;
struct ggml_tensor *conv_sc_b;
struct ggml_tensor * conv_sc_w;
struct ggml_tensor * conv_sc_b;
};

struct encodec_decoder {
struct ggml_tensor *init_conv_w;
struct ggml_tensor *init_conv_b;
struct ggml_tensor * init_conv_w;
struct ggml_tensor * init_conv_b;

encodec_lstm lstm;

struct ggml_tensor *final_conv_w;
struct ggml_tensor *final_conv_b;
struct ggml_tensor * final_conv_w;
struct ggml_tensor * final_conv_b;

std::vector<encodec_decoder_block> blocks;
};

struct ggml_tensor *encodec_forward_decoder(
const struct encodec_decoder *decoder, struct ggml_context *ctx0,
struct ggml_tensor *quantized_out, const int *ratios, const int kernel_size, const int res_kernel_size,
const int stride) {
struct ggml_tensor * encodec_forward_decoder(
const struct encodec_decoder * decoder,
struct ggml_context * ctx0,
struct ggml_tensor * quantized_out,
const int * ratios,
const int kernel_size,
const int res_kernel_size,
const int stride) {

if (!quantized_out) {
fprintf(stderr, "%s: null input tensor\n", __func__);
return NULL;
}

struct ggml_tensor *inpL = strided_conv_1d(
struct ggml_tensor * inpL = strided_conv_1d(
ctx0, quantized_out, decoder->init_conv_w, decoder->init_conv_b, stride);

// lstm
{
struct ggml_tensor *cur = inpL;
struct ggml_tensor * cur = inpL;

const encodec_lstm lstm = decoder->lstm;

// first lstm layer
char l0_prefix[7] = "dec_l0";
struct ggml_tensor *hs1 = forward_pass_lstm_unilayer(
struct ggml_tensor * hs1 = forward_pass_lstm_unilayer(
ctx0, cur, lstm.l0_ih_w, lstm.l0_hh_w, lstm.l0_ih_b, lstm.l0_hh_b, l0_prefix);

// second lstm layer
char l1_prefix[7] = "dec_l1";
struct ggml_tensor *out = forward_pass_lstm_unilayer(
struct ggml_tensor * out = forward_pass_lstm_unilayer(
ctx0, hs1, lstm.l1_ih_w, lstm.l1_hh_w, lstm.l1_ih_b, lstm.l1_hh_b, l1_prefix);

inpL = ggml_add(ctx0, inpL, out);
Expand All @@ -81,10 +85,10 @@ struct ggml_tensor *encodec_forward_decoder(
inpL = strided_conv_transpose_1d(
ctx0, inpL, block.us_conv_w, block.us_conv_b, ratios[layer_ix]);

struct ggml_tensor *current = inpL;
struct ggml_tensor * current = inpL;

// shortcut
struct ggml_tensor *shortcut = strided_conv_1d(
struct ggml_tensor * shortcut = strided_conv_1d(
ctx0, inpL, block.conv_sc_w, block.conv_sc_b, stride);

// conv1
Expand All @@ -106,7 +110,7 @@ struct ggml_tensor *encodec_forward_decoder(
// final conv
inpL = ggml_elu(ctx0, inpL);

struct ggml_tensor *decoded_inp = strided_conv_1d(
struct ggml_tensor * decoded_inp = strided_conv_1d(
ctx0, inpL, decoder->final_conv_w, decoder->final_conv_b, stride);

return decoded_inp;
Expand Down
Loading
Loading