Voxscribe Bot Code

Discord Voice Transcription Bot (Voxscribe) (Uses Whisper API)

What This Bot Does

This Discord bot joins voice channels and listens to users speaking. It records short snippets of voice audio, converts the audio to the correct format, and sends it to OpenAI's Whisper API for transcription. The bot then posts the transcribed text back into the text channel, tagging the user who spoke. and its also shit at its job

How It Works

When a user types !join in a text channel, the bot joins their current voice channel. It listens for when users start speaking, records their audio until they stop for a moment, then converts that audio to a WAV file and sends it to the Whisper transcription API. After getting the transcript, it posts it as a message so everyone in the server can read what was said. probably

How to Set It Up

1. Get your Discord Bot Token: Create a Discord bot at the Discord Developer Portal and copy its token.
2. Get your OpenAI API Key: Sign up or log in at the OpenAI Platform, create an API key, and copy it.
3. Replace YOUR_DISCORD_BOT_TOKEN and YOUR_OPENAI_API_KEY in the bot code below with your actual keys.
4. Make sure you have node.js installed, and install dependencies: discord.js, @discordjs/voice, prism-media, fluent-ffmpeg, ffmpeg-static, node-fetch, and form-data.
5. Run your bot with node index.js.
6. Join a voice channel, then type !join in a text channel to activate the transcription.
7. Speak in the voice channel, and after you stop talking, the bot will post your transcribed speech. might work

Join the Testing Discord Server

Want to test the Whisper Voice Transcription Bot live? Join our public testing Discord server i also pay for this:

Join the Test Server

Download the Bot Package

Download the complete bot folder with all code and setup files. Note: You still need to install dependencies (like Node modules and FFmpeg) after downloading.

Need Help?

If you encounter any problems or have questions, feel free to DM me on Discord: choppedtbh

const { Client, GatewayIntentBits } = require('discord.js');
const { joinVoiceChannel, EndBehaviorType } = require('@discordjs/voice');
const prism = require('prism-media');
const ffmpeg = require('fluent-ffmpeg');
const fs = require('fs');
const path = require('path');
const fetch = require('node-fetch');
const FormData = require('form-data');
const ffmpegPath = require('ffmpeg-static');

const DISCORD_TOKEN = 'YOUR_DISCORD_BOT_TOKEN';
const OPENAI_API_KEY = 'YOUR_OPENAI_API_KEY';

const client = new Client({
  intents: [
    GatewayIntentBits.Guilds,
    GatewayIntentBits.GuildVoiceStates,
    GatewayIntentBits.GuildMessages,
    GatewayIntentBits.MessageContent,
  ],
});

client.once('ready', () => {
  console.log(`Logged in as ${client.user.tag}`);
});

async function joinAndRecord(channel, textChannel) {
  const connection = joinVoiceChannel({
    channelId: channel.id,
    guildId: channel.guild.id,
    adapterCreator: channel.guild.voiceAdapterCreator,
  });

  connection.receiver.speaking.on('start', (userId) => {
    console.log(`Listening to ${userId}`);

    const audioStream = connection.receiver.subscribe(userId, {
      end: {
        behavior: EndBehaviorType.AfterSilence,
        duration: 1000,
      },
    });

    const pcmStream = new prism.opus.Decoder({ frameSize: 960, channels: 2, rate: 48000 });
    const outputStream = audioStream.pipe(pcmStream);

    const chunks = [];
    outputStream.on('data', (chunk) => {
      chunks.push(chunk);
    });

    outputStream.on('end', async () => {
      console.log(`Finished receiving audio from ${userId}, converting and transcribing...`);
      const buffer = Buffer.concat(chunks);

      try {
        const rawFilePath = path.join(__dirname, `./temp_${userId}.raw`);
        const wavFilePath = path.join(__dirname, `./temp_${userId}.wav`);
        fs.writeFileSync(rawFilePath, buffer);

        await new Promise((resolve, reject) => {
          ffmpeg(rawFilePath)
            .inputOptions([
              '-f s16le',
              '-ar 48000',
              '-ac 2',
            ])
            .outputOptions(['-f wav'])
            .on('end', resolve)
            .on('error', reject)
            .save(wavFilePath);
        });

        const transcript = await transcribeAudio(wavFilePath);

        await textChannel.send(\`<@\${userId}> said: \${transcript}\`);

        fs.unlinkSync(rawFilePath);
        fs.unlinkSync(wavFilePath);
      } catch (error) {
        console.error('Error during transcription:', error);
        textChannel.send(\`Failed to transcribe audio from <@\${userId}>.\`);
      }
    });
  });
}

async function transcribeAudio(filePath) {
  const formData = new FormData();
  formData.append('file', fs.createReadStream(filePath));
  formData.append('model', 'whisper-1');

  const response = await fetch('https://api.openai.com/v1/audio/transcriptions', {
    method: 'POST',
    headers: {
      Authorization: \`Bearer \${OPENAI_API_KEY}\`,
    },
    body: formData,
  });

  if (!response.ok) {
    const error = await response.text();
    throw new Error(\`OpenAI API error: \${error}\`);
  }

  const data = await response.json();
  return data.text;
}

client.on('messageCreate', async (message) => {
  if (!message.guild) return;
  if (!message.content.startsWith('!join')) return;

  const voiceChannel = message.member.voice.channel;
  if (!voiceChannel) {
    return message.reply('You need to be in a voice channel first!');
  }

  try {
    await joinAndRecord(voiceChannel, message.channel);
    message.reply(\`Joined and recording in \${voiceChannel.name}\`);
  } catch (error) {
    console.error(error);
    message.reply('Failed to join or record voice channel.');
  }
});

client.login(DISCORD_TOKEN);