| import streamlit as st |
| from transformers import pipeline |
| import torchaudio |
| import tempfile |
| import os |
| import torch |
|
|
| |
| st.title("ASR with Hugging Face Whisper") |
|
|
| |
| asr = pipeline(task = "automatic-speech-recognition", model="openai/whisper-large-v2", |
| device=0 if torch.cuda.is_available() else "cpu") |
|
|
| |
| uploaded_audio = st.file_uploader("Upload an audio file (wav/mp3)") |
|
|
| |
| if uploaded_audio: |
| |
| audio_data, sample_rate = torchaudio.load(uploaded_audio) |
|
|
| |
| with st.spinner("Performing ASR..."): |
| transcriptions = asr(audio_data.numpy(), sample_rate=sample_rate) |
|
|
| |
| st.subheader("Transcription:") |
| for idx, transcription in enumerate(transcriptions): |
| st.write(f"Segment {idx + 1}: {transcription['text']}") |
|
|
| |
| st.write("Instructions:") |
| st.write("1. Upload an audio file in WAV or MP3 format.") |
| st.write("2. Click the 'Perform ASR' button to transcribe the audio.") |
|
|
| |
| st.write("Sample Audio for Testing:") |
| sample_audio = "Wave_files_demos_Welcome.wav" |
| st.audio(sample_audio, format="audio/wav") |
|
|
| |
| sample_audio_path = os.path.join(os.getcwd(), sample_audio) |
|
|
| |
| if st.button("Transcribe Sample Audio"): |
| |
| sample_audio_data, sample_audio_rate = torchaudio.load(sample_audio_path) |
|
|
| |
| with st.spinner("Performing ASR..."): |
| sample_transcriptions = asr(sample_audio_data.numpy(), sample_rate=sample_audio_rate) |
|
|
| |
| st.subheader("Transcription (Sample Audio):") |
| for idx, transcription in enumerate(sample_transcriptions): |
| st.write(f"Segment {idx + 1}: {transcription['text']}") |
|
|