Automatic Speech Recognition
Transformers
Safetensors
Danish
qwen3_asr
text-generation
audio
speech
danish
qwen3-asr
trust-remote-code
custom-code
custom_code
Instructions to use capacit-ai/saga with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use capacit-ai/saga with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("automatic-speech-recognition", model="capacit-ai/saga", trust_remote_code=True)# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("capacit-ai/saga", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
| import numpy as np | |
| from transformers import ProcessorMixin | |
| class SagaProcessor(ProcessorMixin): | |
| attributes = ["feature_extractor", "tokenizer"] | |
| def __init__(self, feature_extractor, tokenizer, **kwargs): | |
| super().__init__(feature_extractor, tokenizer, **kwargs) | |
| self.target_sr = 16000 | |
| def process_audio(self, audio, sampling_rate): | |
| if int(sampling_rate) == self.target_sr: | |
| return audio | |
| src_len = audio.shape[0] | |
| dst_len = int(round(src_len * (float(self.target_sr) / float(sampling_rate)))) | |
| if dst_len <= 1: | |
| return np.zeros((0,), dtype=np.float32) | |
| src_x = np.linspace(0.0, 1.0, num=src_len, endpoint=False) | |
| dst_x = np.linspace(0.0, 1.0, num=dst_len, endpoint=False) | |
| return np.interp(dst_x, src_x, audio).astype(np.float32) | |
| def get_prompt(self): | |
| messages = [ | |
| {"role": "system", "content": ""}, | |
| {"role": "user", "content": [{"type": "audio", "audio": ""}]}, | |
| ] | |
| prompt = self.tokenizer.apply_chat_template( | |
| messages, | |
| add_generation_prompt=True, | |
| tokenize=False, | |
| ) | |
| return prompt + "language Danish<asr_text>" |