refactore: add models for openai-compatible platforms (#471)

pull/472/head
sigoden 1 month ago committed by GitHub
parent 8aa18b59f5
commit 5eae392dbd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -70,6 +70,7 @@ clients:
# See https://docs.mistral.ai/
- type: openai-compatible
name: mistral
api_base: https://api.mistral.ai/v1
api_key: xxx # ENV: {client}_API_KEY
# See https://docs.cohere.com/docs/the-cohere-platform
@ -77,11 +78,15 @@ clients:
api_key: xxx # ENV: {client}_API_KEY
# See https://docs.perplexity.ai/docs/getting-started
- type: perplexity
- type: openai-compatible
name: perplexity
api_base: https://api.perplexity.ai
api_key: pplx-xxx # ENV: {client}_API_KEY
# See https://console.groq.com/docs/quickstart
- type: groq
- type: openai-compatible
name: groq
api_base: https://api.groq.com/openai/v1
api_key: gsk_xxx # ENV: {client}_API_KEY
# See https://github.com/jmorganca/ollama
@ -137,71 +142,43 @@ clients:
api_key: sk-xxx # ENV: {client}_API_KEY
# See https://platform.moonshot.cn/docs/intro
- type: moonshot
- type: openai-compatible
name: moonshot
api_base: https://api.moonshot.cn/v1
api_key: sk-xxx # ENV: {client}_API_KEY
# See https://docs.endpoints.anyscale.com/
- type: openai-compatible
name: anyscale
api_base: https://api.endpoints.anyscale.com/v1
api_key: xxx # ENV: {client}_API_KEY
models:
# https://docs.endpoints.anyscale.com/text-generation/query-a-model#select-a-model
- name: meta-llama/Meta-Llama-3-70B-Instruct
max_input_tokens: 8192
input_price: 1
output_price: 1
# See https://deepinfra.com/docs
- type: openai-compatible
name: deepinfra
api_base: https://api.deepinfra.com/v1/openai
api_key: xxx # ENV: {client}_API_KEY
models:
# https://deepinfra.com/models
- name: meta-llama/Meta-Llama-3-70B-Instruct
max_input_tokens: 8192
input_price: 0.59
output_price: 0.79
# See https://readme.fireworks.ai/docs/quickstart
- type: openai-compatible
name: fireworks
api_base: https://api.fireworks.ai/inference/v1
api_key: xxx # ENV: {client}_API_KEY
models:
# https://fireworks.ai/models
- name: accounts/fireworks/models/llama-v3-70b-instruct
max_input_tokens: 8192
input_price: 0.9
output_price: 0.9
# See https://openrouter.ai/docs#quick-start
- type: openai-compatible
name: openrouter
api_base: https://openrouter.ai/api/v1
api_key: xxx # ENV: {client}_API_KEY
models:
# https://openrouter.ai/docs#models
- name: meta-llama/llama-3-70b-instruct
max_input_tokens: 8192
input_price: 0.81
output_price: 0.81
# See https://octo.ai/docs/getting-started/quickstart
- type: openai-compatible
name: octoai
api_base: https://text.octoai.run/v1
api_key: xxx # ENV: {client}_API_KEY
models:
# https://octo.ai/docs/getting-started/inference-models
- name: meta-llama-3-70b-instruct
max_input_tokens: 8192
input_price: 0.86
output_price: 0.86
# See https://docs.together.ai/docs/quickstart
- type: openai-compatible
name: together
api_key: xxx # ENV: {client}_API_KEY
models:
# https://docs.together.ai/docs/inference-models
- name: meta-llama/Llama-3-70b-chat-hf
max_input_tokens: 8192
input_price: 0.9
output_price: 0.9
api_base: https://api.together.xyz/v1
api_key: xxx # ENV: {client}_API_KEY

@ -1,7 +1,3 @@
# NOTES:
# - This model list is scheduled to be updated with each new aichat release. Please do not submit PR to add new models.
# - This model list does not include models officially marked as legacy or beta.
- platform: openai
# docs:
# - https://platform.openai.com/docs/models
@ -129,10 +125,6 @@
max_input_tokens: 32000
input_price: 2
output_price: 6
- name: mistral-medium-latest
max_input_tokens: 32000
input_price: 2.7
output_price: 8.1
- name: mistral-large-latest
max_input_tokens: 32000
input_price: 8
@ -310,16 +302,6 @@
input_price: 0.25
output_price: 1.25
supports_vision: true
- name: meta.llama2-13b-chat-v1
max_input_tokens: 4096
max_output_tokens: 2048
input_price: 0.75
output_price: 1
- name: meta.llama2-70b-chat-v1
max_input_tokens: 4096
max_output_tokens: 2048
input_price: 1.95
output_price: 2.56
- name: meta.llama3-8b-instruct-v1:0
max_input_tokens: 8192
max_output_tokens: 4096
@ -351,21 +333,23 @@
# - https://developers.cloudflare.com/workers-ai/models/
# - https://developers.cloudflare.com/workers-ai/platform/pricing/
# notes:
# - get max_output_tokens info from models doc
# - unable to get max_output_tokens info
models:
- name: '@cf/meta/llama-2-7b-chat-fp16'
max_input_tokens: 3072
max_output_tokens: 2500
input_price: 0.56
output_price: 6.6
- name: '@cf/meta/llama-2-7b-chat-int8'
max_input_tokens: 2048
max_output_tokens: 1800
input_price: 0.16
output_price: 0.24
- name: '@cf/mistral/mistral-7b-instruct-v0.1'
input_price: 0.11
output_price: 0.19
- name: '@cf/meta/llama-3-8b-instruct'
max_input_tokens: 4096
max_output_tokens: 4096
- name: '@cf/mistral/mistral-7b-instruct-v0.2-lora'
max_input_tokens: 4096
max_output_tokens: 4096
- name: '@cf/google/gemma-7b-it-lora'
max_input_tokens: 4096
max_output_tokens: 4096
- name: '@cf/qwen/qwen1.5-14b-chat-awq'
max_input_tokens: 4096
max_output_tokens: 4096
- name: '@hf/nexusflow/starling-lm-7b-beta'
max_input_tokens: 4096
max_output_tokens: 4096
- platform: replicate
# docs:
@ -482,3 +466,310 @@
max_input_tokens: 128000
input_price: 8.4
output_price: 8.4
- platform: anyscale
# docs:
# - https://docs.endpoints.anyscale.com/text-generation/query-a-model/#select-a-model
# - https://docs.endpoints.anyscale.com/pricing
models:
- name: meta-llama/Meta-Llama-3-8B-Instruct
max_input_tokens: 8192
input_price: 0.15
output_price: 0.15
- name: meta-llama/Meta-Llama-3-70B-Instruct
max_input_tokens: 8192
input_price: 1.0
output_price: 1.0
- name: codellama/CodeLlama-70b-Instruct-hf
max_input_tokens: 4096
input_price: 1.0
output_price: 1.0
- name: mistralai/Mistral-7B-Instruct-v0.1
max_input_tokens: 16384
input_price: 0.15
output_price: 0.15
- name: mistralai/Mixtral-8x7B-Instruct-v0.1
max_input_tokens: 32768
input_price: 0.50
output_price: 0.50
- name: mistralai/Mixtral-8x22B-Instruct-v0.1
max_input_tokens: 65536
input_price: 0.90
output_price: 0.90
- name: google/gemma-7b-it
max_input_tokens: 8192
input_price: 0.15
output_price: 0.15
- platform: deepinfra
# docs:
# - https://deepinfra.com/models
# - https://deepinfra.com/pricing
models:
- name: meta-llama/Meta-Llama-3-8B-Instruct
max_input_tokens: 8192
input_price: 0.08
output_price: 0.08
- name: meta-llama/Meta-Llama-3-70B-Instruct
max_input_tokens: 8192
input_price: 0.59
output_price: 0.79
- name: mistralai/Mistral-7B-Instruct-v0.2
max_input_tokens: 32768
input_price: 0.07
output_price: 0.07
- name: mistralai/Mixtral-8x7B-Instruct-v0.1
max_input_tokens: 32768
input_price: 0.24
output_price: 0.24
- name: mistralai/Mixtral-8x22B-Instruct-v0.1
max_input_tokens: 65536
input_price: 0.65
output_price: 0.65
- name: google/gemma-1.1-7b-it
max_input_tokens: 8192
input_price: 0.07
output_price: 0.07
- name: databricks/dbrx-instruct
max_input_tokens: 32768
input_price: 0.6
output_price: 0.6
- name: 01-ai/Yi-34B-Chat
max_input_tokens: 4096
input_price: 0.6
output_price: 0.6
- platform: fireworks
# docs:
# - https://fireworks.ai/models
# - https://fireworks.ai/pricing
models:
- name: accounts/fireworks/models/llama-v3-8b-instruct
max_input_tokens: 8192
input_price: 0.2
output_price: 0.2
- name: accounts/fireworks/models/llama-v3-70b-instruct
max_input_tokens: 8192
input_price: 0.9
output_price: 0.9
- name: accounts/fireworks/models/mistral-7b-instruct-v0p2
max_input_tokens: 32768
input_price: 0.2
output_price: 0.2
- name: accounts/fireworks/models/mixtral-8x7b-instruct
max_input_tokens: 32768
input_price: 0.5
output_price: 0.5
- name: accounts/fireworks/models/mixtral-8x22b-instruct
max_input_tokens: 65536
input_price: 0.9
output_price: 0.9
- name: accounts/fireworks/models/qwen-72b-chat
max_input_tokens: 4096
input_price: 0.9
output_price: 0.9
- name: accounts/fireworks/models/gemma-7b-it
max_input_tokens: 8192
input_price: 0.2
output_price: 0.2
- name: accounts/fireworks/models/dbrx-instruct
max_input_tokens: 32768
input_price: 1.6
output_price: 1.6
- platform: openrouter
# docs:
# - https://openrouter.ai/docs#models
models:
- name: meta-llama/llama-3-8b-instruct
max_input_tokens: 8192
input_price: 0.1
output_price: 0.1
- name: meta-llama/llama-3-8b-instruct:nitro
max_input_tokens: 8192
input_price: 0.2
output_price: 0.2
- name: meta-llama/llama-3-8b-instruct:extended
max_input_tokens: 16384
input_price: 0.275
output_price: 0.283
- name: meta-llama/llama-3-70b-instruct
max_input_tokens: 8192
input_price: 0.81
output_price: 0.81
- name: meta-llama/llama-3-70b-instruct:nitro
max_input_tokens: 8192
input_price: 0.9
output_price: 0.9
- name: mistralai/mistral-7b-instruct:free
max_input_tokens: 32768
input_price: 0.0
output_price: 0.0
- name: codellama/codellama-70b-instruct
max_input_tokens: 2048
input_price: 0.81
output_price: 0.81
- name: google/gemma-7b-it:free
max_input_tokens: 8192
input_price: 0.0
output_price: 0.0
- name: 01-ai/yi-34b-chat
max_input_tokens: 4096
input_price: 0.72
output_price: 0.72
- name: openai/gpt-3.5-turbo
max_input_tokens: 16385
input_price: 0.5
output_price: 1.5
- name: openai/gpt-4-turbo
max_input_tokens: 128000
input_price: 10
output_price: 30
supports_vision: true
- name: openai/gpt-4-turbo-preview
max_input_tokens: 128000
input_price: 10
output_price: 30
- name: gpt-4-vision-preview
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 10
output_price: 30
supports_vision: true
- name: openai/gpt-4
max_input_tokens: 8192
input_price: 30
output_price: 60
- name: openai/gpt-4-32k
max_input_tokens: 32768
input_price: 60
output_price: 120
- name: google/gemini-pro
max_input_tokens: 91728
input_price: 0.125
output_price: 0.375
- name: google/gemini-pro-vision
max_input_tokens: 45875
input_price: 0.125
output_price: 0.375
supports_vision: true
- name: google/gemini-pro-1.5
max_input_tokens: 2800000
input_price: 2.5
output_price: 7.5
supports_vision: true
- name: anthropic/claude-3-opus
max_input_tokens: 200000
input_price: 15
output_price: 75
supports_vision: true
- name: anthropic/claude-3-sonnet
max_input_tokens: 200000
input_price: 3
output_price: 15
supports_vision: true
- name: anthropic/claude-3-haiku
max_input_tokens: 200000
input_price: 0.25
output_price: 1.25
supports_vision: true
- name: mistralai/mixtral-8x7b-instruct
max_input_tokens: 32768
input_price: 0.24
output_price: 0.24
- name: mistralai/mixtral-8x22b-instruct
max_input_tokens: 65536
input_price: 0.65
output_price: 0.65
- name: mistralai/mistral-small
max_input_tokens: 32000
input_price: 2
output_price: 6
- name: mistralai/mistral-large
max_input_tokens: 32000
input_price: 8
output_price: 24
- name: databricks/dbrx-instruct
max_input_tokens: 32768
input_price: 0.6
output_price: 0.6
- name: cohere/command-r
max_input_tokens: 128000
input_price: 0.5
output_price: 1.5
- name: cohere/command-r-plus
max_input_tokens: 128000
input_price: 3
output_price: 15
- platform: octoai
# docs:
# - https://octo.ai/docs/getting-started/inference-models
# - https://octo.ai/pricing/text-gen-solution/
models:
- name: meta-llama-3-8b-instruct
max_input_tokens: 8192
input_price: 0.13
output_price: 0.13
- name: meta-llama-3-70b-instruct
max_input_tokens: 8192
input_price: 0.86
output_price: 0.86
- name: mistral-7b-instruct
max_input_tokens: 32768
input_price: 0.13
output_price: 0.13
- name: mixtral-8x7b-instruct
max_input_tokens: 32768
input_price: 0.34
output_price: 0.34
- name: mixtral-8x22b-instruct
max_input_tokens: 65536
input_price: 0.86
output_price: 0.86
- platform: together
# docs:
# - https://docs.together.ai/docs/inference-models
# - https://www.together.ai/pricing
models:
- name: meta-llama/Llama-3-8b-chat-hf
max_input_tokens: 8000
input_price: 0.2
output_price: 0.2
- name: meta-llama/Llama-3-70b-chat-hf
max_input_tokens: 8000
input_price: 0.9
output_price: 0.9
- name: mistralai/Mistral-7B-Instruct-v0.2
max_input_tokens: 32768
input_price: 0.2
output_price: 0.2
- name: mistralai/Mixtral-8x7B-Instruct-v0.1
max_input_tokens: 32768
input_price: 0.9
output_price: 0.9
- name: mistralai/Mixtral-8x22B-Instruct-v0.1
max_input_tokens: 65536
input_price: 1.2
output_price: 1.2
- name: google/gemma-7b-it
max_input_tokens: 8192
input_price: 0.2
output_price: 0.2
- name: Qwen/Qwen1.5-72B-Chat
max_input_tokens: 32768
input_price: 0.9
output_price: 0.9
- name: databricks/dbrx-instruct
max_input_tokens: 32768
input_price: 1.2
output_price: 1.2
- name: zero-one-ai/Yi-34B-Chat
max_input_tokens: 4096
input_price: 0.8
output_price: 0.8
- name: allenai/OLMo-7B-Instruct
max_input_tokens: 2048
input_price: 0.2
output_price: 0.2

@ -2,7 +2,7 @@ use super::claude::{claude_build_body, claude_extract_completion};
use super::{
catch_error, generate_prompt, BedrockClient, Client, CompletionDetails, ExtraConfig, Model,
ModelConfig, PromptAction, PromptFormat, PromptKind, SendData, SseHandler,
LLAMA2_PROMPT_FORMAT, LLAMA3_PROMPT_FORMAT,
LLAMA3_PROMPT_FORMAT, MISTRAL_PROMPT_FORMAT,
};
use crate::utils::{base64_decode, encode_uri, hex_encode, hmac_sha256, sha256};
@ -140,7 +140,7 @@ async fn send_message(
match model_category {
ModelCategory::Anthropic => claude_extract_completion(&data),
ModelCategory::MetaLlama2 | ModelCategory::MetaLlama3 => llama_extract_completion(&data),
ModelCategory::MetaLlama3 => llama_extract_completion(&data),
ModelCategory::Mistral => mistral_extrat_completion(&data),
}
}
@ -183,7 +183,7 @@ async fn send_message_streaming(
}
}
}
ModelCategory::MetaLlama2 | ModelCategory::MetaLlama3 => {
ModelCategory::MetaLlama3 => {
if let Some(text) = data["generation"].as_str() {
handler.text(text)?;
}
@ -220,7 +220,6 @@ fn build_body(data: SendData, model: &Model, model_category: &ModelCategory) ->
body["anthropic_version"] = "bedrock-2023-05-31".into();
Ok(body)
}
ModelCategory::MetaLlama2 => meta_llama_build_body(data, model, LLAMA2_PROMPT_FORMAT),
ModelCategory::MetaLlama3 => meta_llama_build_body(data, model, LLAMA3_PROMPT_FORMAT),
ModelCategory::Mistral => mistral_build_body(data, model),
}
@ -256,7 +255,7 @@ fn mistral_build_body(data: SendData, model: &Model) -> Result<Value> {
top_p,
stream: _,
} = data;
let prompt = generate_prompt(&messages, LLAMA2_PROMPT_FORMAT)?;
let prompt = generate_prompt(&messages, MISTRAL_PROMPT_FORMAT)?;
let mut body = json!({ "prompt": prompt });
if let Some(v) = model.max_output_tokens {
@ -294,7 +293,6 @@ fn mistral_extrat_completion(data: &Value) -> Result<(String, CompletionDetails)
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ModelCategory {
Anthropic,
MetaLlama2,
MetaLlama3,
Mistral,
}
@ -305,8 +303,6 @@ impl FromStr for ModelCategory {
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
if s.starts_with("anthropic.") {
Ok(ModelCategory::Anthropic)
} else if s.starts_with("meta.llama2") {
Ok(ModelCategory::MetaLlama2)
} else if s.starts_with("meta.llama3") {
Ok(ModelCategory::MetaLlama3)
} else if s.starts_with("mistral") {

@ -390,10 +390,11 @@ pub fn create_openai_compatible_client_config(client: &str) -> Result<Option<(St
.find(|(name, _)| client == *name)
{
None => Ok(None),
Some((name, _)) => {
Some((name, api_base)) => {
let mut config = json!({
"type": "openai-compatible",
"name": name,
"api_base": api_base,
});
let prompts = if ALL_CLIENT_MODELS.iter().any(|v| &v.platform == name) {
vec![("api_key", "API Key:", false, PromptKind::String)]

@ -22,7 +22,7 @@ pub const GENERIC_PROMPT_FORMAT: PromptFormat<'static> = PromptFormat {
end: "### Assistant\n",
};
pub const LLAMA2_PROMPT_FORMAT: PromptFormat<'static> = PromptFormat {
pub const MISTRAL_PROMPT_FORMAT: PromptFormat<'static> = PromptFormat {
begin: "",
system_pre_message: "[INST] <<SYS>>",
system_post_message: "<</SYS>> [/INST]",
@ -136,7 +136,7 @@ pub fn smart_prompt_format(model_name: &str) -> PromptFormat<'static> {
|| model_name.contains("mistral")
|| model_name.contains("mixtral")
{
LLAMA2_PROMPT_FORMAT
MISTRAL_PROMPT_FORMAT
} else if model_name.contains("phi3") || model_name.contains("phi-3") {
PHI3_PROMPT_FORMAT
} else if model_name.contains("command-r") {

Loading…
Cancel
Save