Instructions to use dphn/dolphin-2.1-mistral-7b with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use dphn/dolphin-2.1-mistral-7b with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="dphn/dolphin-2.1-mistral-7b") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("dphn/dolphin-2.1-mistral-7b") model = AutoModelForCausalLM.from_pretrained("dphn/dolphin-2.1-mistral-7b") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Inference
- Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use dphn/dolphin-2.1-mistral-7b with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "dphn/dolphin-2.1-mistral-7b" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "dphn/dolphin-2.1-mistral-7b", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/dphn/dolphin-2.1-mistral-7b
- SGLang
How to use dphn/dolphin-2.1-mistral-7b with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "dphn/dolphin-2.1-mistral-7b" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "dphn/dolphin-2.1-mistral-7b", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "dphn/dolphin-2.1-mistral-7b" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "dphn/dolphin-2.1-mistral-7b", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use dphn/dolphin-2.1-mistral-7b with Docker Model Runner:
docker model run hf.co/dphn/dolphin-2.1-mistral-7b
tokenizer = AutoTokenizer.from_pretrained("ehartford/dolphin-2.1-mistral-7b") results in unk error related to tokens greater than 32000
ValueError Traceback (most recent call last)
Cell In[57], line 1
----> 1 tokenizer = AutoTokenizer.from_pretrained("ehartford/dolphin-2.1-mistral-7b")
File H:\py310-venv\lib\site-packages\transformers\models\auto\tokenization_auto.py:694, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
690 if tokenizer_class is None:
691 raise ValueError(
692 f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
693 )
--> 694 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
696 # Otherwise we have to be creative.
697 # if model is an encoder decoder, the encoder tokenizer class is used by default
698 if isinstance(config, EncoderDecoderConfig):
File H:\py310-venv\lib\site-packages\transformers\tokenization_utils_base.py:1812, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, *init_inputs, **kwargs)
1809 else:
1810 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 1812 return cls._from_pretrained(
1813 resolved_vocab_files,
1814 pretrained_model_name_or_path,
1815 init_configuration,
1816 *init_inputs,
1817 use_auth_token=use_auth_token,
1818 cache_dir=cache_dir,
1819 local_files_only=local_files_only,
1820 _commit_hash=commit_hash,
1821 _is_local=is_local,
1822 **kwargs,
1823 )
File H:\py310-venv\lib\site-packages\transformers\tokenization_utils_base.py:1844, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, use_auth_token, cache_dir, local_files_only, _commit_hash, _is_local, *init_inputs, **kwargs)
1842 has_tokenizer_file = resolved_vocab_files.get("tokenizer_file", None) is not None
1843 if (from_slow or not has_tokenizer_file) and cls.slow_tokenizer_class is not None:
-> 1844 slow_tokenizer = (cls.slow_tokenizer_class)._from_pretrained(
1845 copy.deepcopy(resolved_vocab_files),
1846 pretrained_model_name_or_path,
1847 copy.deepcopy(init_configuration),
1848 *init_inputs,
1849 use_auth_token=use_auth_token,
1850 cache_dir=cache_dir,
1851 local_files_only=local_files_only,
1852 _commit_hash=_commit_hash,
1853 **(copy.deepcopy(kwargs)),
1854 )
1855 else:
1856 slow_tokenizer = None
File H:\py310-venv\lib\site-packages\transformers\tokenization_utils_base.py:2031, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, use_auth_token, cache_dir, local_files_only, _commit_hash, _is_local, *init_inputs, **kwargs)
2024 raise ValueError(
2025 f"Wrong index found for {token}: should be {tokenizer.convert_tokens_to_ids(token)} but found "
2026 f"{index}."
2027 )
2028 elif not has_tokenizer_file and index != current_index:
2029 # Tokenizer slow: added token cannot already be in the vocabulary so its index needs to be the
2030 # current length of the tokenizer.
-> 2031 raise ValueError(
2032 f"Non-consecutive added token '{token}' found. "
2033 f"Should have index {current_index} but has index {index} in saved vocabulary."
2034 )
2036 is_special = bool(token in special_tokens)
2037 if is_last_special is None or is_last_special == is_special:
ValueError: Non-consecutive added token '' found. Should have index 32000 but has index 0 in saved vocabulary.
I get this same error trying to deploy on hugginface endpoints
same