How to record speaker and microphone sounds simultaneously with python in windows?

Asked

Viewed 654 times

1

I tried to record the sound of the desktop with the pyaudio, but I can only hear the sound of the microphone or speaker.

I also downloaded the portaudio, but I’m not sure I can get the sound of the speaker and the sound of the microphone with it simultaneously

I noticed that when I changed the input device to stereo mixer I was able to record the audio from the speaker but I stopped recording the microphone.

This is the code I’m using:

import pyaudio
import wave
import threading
import time
import subprocess

CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "tmp/tmp.wav"

class recorder:
    def __init__(self):
        self.going = False
        self.process = None
        self.filename = "ScreenCapture.mpg"
    def record(self,filename):
        try:
            if self.process.is_alive():
                self.going = False
        except AttributeError:
                print("test")
        self.process = threading.Thread(target=self._record)
        self.process.start()
        self.filename = filename
    def _record(self):
        p = pyaudio.PyAudio()
        stream = p.open(format=FORMAT,
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        frames_per_buffer=CHUNK,
                        as_loopback=True)

        #print("* recording")

        frames = []

        self.going = True

        while self.going:
            data = stream.read(CHUNK)
            frames.append(data)

       # print("* done recording")

        stream.stop_stream()
        stream.close()
        p.terminate()

        wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(p.get_sample_size(FORMAT))
        wf.setframerate(RATE)
        wf.writeframes(b''.join(frames))
        wf.close()


    def stop_recording(self):
        self.going = False

What should I do?

Code updated as per the advice of ederwander but has not yet worked. I don’t know if the problem is the code or my notebook :

import pyaudio
import wave
import threading
import time
import subprocess
#from pydub import AudioSegment
import numpy as np

CHUNK = 1024
FORMAT = pyaudio.paInt16
#CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "tmp/tmp.wav"

p = pyaudio.PyAudio()

for i in range(0, p.get_device_count()):
    print(i, p.get_device_info_by_index(i)['name'])


class recorder:
    def __init__(self):
        self.going = False
        self.process = None
        self.filename = "ScreenCapture.mpg"

    def record(self, filename):
        try:
            if self.process.is_alive():
                self.going = False
        except AttributeError:
            print("test")
        self.process = threading.Thread(target=self._record)
        self.process.start()
        self.filename = filename

    def _record(self):
        #p = pyaudio.PyAudio()
        stream = p.open(
            format=FORMAT,
            channels=2,
            rate=RATE,
            input=True,
            frames_per_buffer=CHUNK,
            input_device_index=0,
            as_loopback=True)
        stream2 = p.open(
            format=FORMAT,
            channels=1,
            rate=RATE,
            input=True,
            frames_per_buffer=CHUNK,
            input_device_index=1)
        # as_loopback=False)

        print("* recording")

        frames = []
        frames2= []

        self.going = True

        while self.going:
            data = stream.read(CHUNK)
            data2 = stream2.read(CHUNK)
            frames.append(data)
            frames2.append(data)
        # frames = dados do som as_loopback (Speakers)
        frames = b''.join(frames);

        # frames2 = dados do som  Microfone
        frames2 = b''.join(frames2);

        # decodificando os dados do Speaker
        Sdecoded = np.frombuffer(frames, 'int16')

        # decodificando o microfone
        Mdecoded = np.frombuffer(frames2, 'int16')

        # convertendo os dados do Speaker em um vetor do tipo Numpy (facilitando a vida na hora de pegar os canais de áudio)
        Sdecoded = np.array(Sdecoded, dtype='int16')

        # pegando os dados do lado direito
        direito = Sdecoded[1::2]

        # pegando os dados do lado esquerdo
        esquerdo = Sdecoded[::2]

        # mixando tudo para mono = somar lado direito + lado esquerdo + os dados decofificados do Microfone q já estão em mono
        mix = (direito + esquerdo + Mdecoded)

        # garantindo que nenhum valor extrapole os limites do short int
        signal = np.clip(mix, -32767, 32766)

        # codificar os dados novamente
        encodecoded = wave.struct.pack("%dh" % (len(signal)), *list(signal))

        print("* done recording")

        stream.stop_stream()
        stream.close()
        stream2.stop_stream()
        stream2.close()
        p.terminate()

        wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
        wf.setnchannels(1)
        wf.setsampwidth(p.get_sample_size(FORMAT))
        wf.setframerate(RATE)
        #wf.writeframes(b''.join(frames))
        wf.writeframes((encodecoded))
        wf.close()

    def stop_recording(self):
        self.going = False

1 answer

1


Come on, first install the Numpy via pip one pip install numpy should solve the issue... We’ll need it to do the decode quickly, in this code also use it to convert lists into vectors, also use the function clip that works as an audio Limiter, when vc mixes something vc just sums the vectors, in doing so inevitably vc will extrapolate the minimum value of the short int then we need to force the extrapolated data to stay within the limit with the function clip.

I left the code very clean and commented on each part so you understand what’s going on, I made a for at the beginning for the Pyaudio show me which interfaces I have in my OS:

C:\Python37>python.exe gravando.py
0 Mapeador de som da Microsoft - Input
1 Microfone (Realtek High Definit
2 Mixagem estéreo (Realtek High D
3 Mapeador de som da Microsoft - Output
4 Alto-falantes (Realtek High Def
5 Alto-falantes (Realtek High Definition Audio)
6 Microfone (Realtek High Definition Audio)
7 Mixagem estéreo (Realtek High Definition Audio)
8 Speakers (Realtek HD Audio output)
9 Mixagem estéreo (Realtek HD Audio Stereo input)
10 Microfone (Realtek HD Audio Mic input)
11 Entrada (Realtek HD Audio Line input)

This is the output I get when I run the script, I’m using Idex 0 to capture OS audio via loopback and index 1 to capture the data from the microphone, within the loop I am capturing the two instances, after the capture ends I started the entire audio decoding process, stereo audio conversion to mono, clipando the audio, and finally recording everything in mono in 44100Hz in an archive .wav ...

import pyaudio
import wave
import numpy as np


CHUNK = 1024
FORMAT = pyaudio.paInt16
RATE = 44100
RECORD_SECONDS = 2
WAVE_OUTPUT_FILENAME = "tmp.wav"


p = pyaudio.PyAudio()


for i in range(0, p.get_device_count()):
    print(i, p.get_device_info_by_index(i)['name'])


#stream usando o as_loopback para pegar som do SO
stream = p.open(
    format = FORMAT,
    channels = 2,
    rate = RATE,
    input=True,
    frames_per_buffer=CHUNK,
    input_device_index=0,
    as_loopback=True)

##stream usando o iput device do meu Microphone
stream2 = p.open(
    format = FORMAT,
    channels = 1,
    rate = RATE,
    input=True,
    frames_per_buffer=CHUNK,
    input_device_index=1)
    #as_loopback=False)


frames = []
frames2 = []


for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    data2 = stream2.read(CHUNK)
    frames.append(data)
    frames2.append(data2)


#frames = dados do som as_loopback (Speakers)
frames= b''.join(frames);

#frames2 = dados do som  Microfone
frames2= b''.join(frames2);

#decodificando os dados do Speaker
Sdecoded = np.frombuffer(frames, 'int16')

#decodificando o microfone
Mdecoded = np.frombuffer(frames2, 'int16')

#convertendo os dados do Speaker em um vetor do tipo Numpy (facilitando a vida na hora de pegar os canais de áudio)
Sdecoded= np.array(Sdecoded, dtype='int16') 

#pegando os dados do lado direito
direito=Sdecoded[1::2]

#pegando os dados do lado esquerdo
esquerdo=Sdecoded[::2]

#mixando tudo para mono = somar lado direito + lado esquerdo + os dados decofificados do Microfone q já estão em mono
mix=(direito+esquerdo+Mdecoded)

#garantindo que nenhum valor extrapole os limites do short int
signal=np.clip(mix, -32767, 32766)

#codificar os dados novamente 
encodecoded = wave.struct.pack("%dh"%(len(signal)), *list(signal))


#parar todos os streams e finalizar o pyaudio
stream.stop_stream()
stream.close()
stream2.stop_stream()
stream2.close()
p.terminate()


#gravando o áudio mixado em mono 
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes((encodecoded))
wf.close()

If everything went well you should have an audio file with the Microphone + SPEAKERS capture

PS: if you do not want the file . wav be in mono vc can add the microphone data in one of the channels (right or left) or simply try to merge the Microphone data between the two channels (I made the output in mono pq is beeeemm simpler ....)

  • Hasn’t worked yet.

  • I put the code in the description

  • It worked now!! I changed the OS input_device_index.

  • Please someone do the ederwander king of the internet

  • I just haven’t been able to use inside my project yet but the code is correct

Browser other questions tagged

You are not signed in. Login or sign up in order to post.