Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

c‌hor‌e: Sync main to dev 0.5.1 #3076

Merged
merged 6 commits into from
Jun 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion core/src/types/api/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -169,5 169,5 @@ export const APIEvents = [
export type PayloadType = {
messages: ChatCompletionMessage[]
model: string
stream: Boolean
stream: boolean
}
2 changes: 1 addition & 1 deletion extensions/inference-anthropic-extension/package.json
Original file line number Diff line number Diff line change
@@ -1,7 1,7 @@
{
"name": "@janhq/inference-anthropic-extension",
"productName": "Anthropic Inference Engine",
"version": "1.0.0",
"version": "1.0.1",
"description": "This extension enables Anthropic chat completion API calls",
"main": "dist/index.js",
"module": "dist/module.js",
Expand Down
34 changes: 29 additions & 5 deletions extensions/inference-anthropic-extension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 19,7 @@ enum Settings {
}

type AnthropicPayloadType = {
stream: boolean
model?: string
max_tokens?: number
messages?: Array<{ role: string; content: string }>
Expand Down Expand Up @@ -86,16 87,22 @@ export default class JanInferenceAnthropicExtension extends RemoteOAIEngine {
// Override the transformPayload method to convert the payload to the required format
transformPayload = (payload: PayloadType): AnthropicPayloadType => {
if (!payload.messages || payload.messages.length === 0) {
return { max_tokens: this.maxTokens, messages: [], model: payload.model }
return {
max_tokens: this.maxTokens,
messages: [],
model: payload.model,
stream: payload.stream,
}
}

const convertedData: AnthropicPayloadType = {
max_tokens: this.maxTokens,
messages: [],
model: payload.model,
stream: payload.stream,
}

payload.messages.forEach((item, index) => {
payload.messages.forEach((item) => {
if (item.role === ChatCompletionRole.User) {
convertedData.messages.push({
role: 'user',
Expand All @@ -112,13 119,30 @@ export default class JanInferenceAnthropicExtension extends RemoteOAIEngine {
return convertedData
}

// Sample returned stream data from anthropic
// {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} }
// {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"} }
// {"type":"content_block_stop","index":0 }
// {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":12} }

// Override the transformResponse method to convert the response to the required format
transformResponse = (data: any): string => {
// handling stream response
if (typeof data === 'string' && data.trim().length === 0) return ''
if (typeof data === 'string' && data.startsWith('event: ')) return ''
if (typeof data === 'string' && data.startsWith('data: ')) {
data = data.replace('data: ', '')
const parsedData = JSON.parse(data)
if (parsedData.type !== 'content_block_delta') return ''
return parsedData.delta?.text ?? ''
}

// non stream response
if (data.content && data.content.length > 0 && data.content[0].text) {
return data.content[0].text
} else {
console.error('Invalid response format:', data)
return ''
}

console.error('Invalid response format:', data)
return ''
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 8192,
"prompt_template": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{system_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
"llama_model_path": "aya-23-35B-Q4_K_M.gguf",
"ngl": 40
"ngl": 41
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 8192,
"prompt_template": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{system_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
"llama_model_path": "aya-23-8B-Q4_K_M.gguf",
"ngl": 32
"ngl": 33
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 8192,
"prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:",
"llama_model_path": "codeninja-1.0-openchat-7b.Q4_K_M.gguf",
"ngl": 32
"ngl": 33
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 32000,
"prompt_template": "{system_message} [INST] {prompt} [/INST]",
"llama_model_path": "Codestral-22B-v0.1-Q4_K_M.gguf",
"ngl": 56
"ngl": 57
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 131072,
"prompt_template": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
"llama_model_path": "c4ai-command-r-v01-Q4_K_M.gguf",
"ngl": 40
"ngl": 41
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 16384,
"prompt_template": "### Instruction:\n{prompt}\n### Response:",
"llama_model_path": "deepseek-coder-1.3b-instruct.Q8_0.gguf",
"ngl": 24
"ngl": 25
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 16384,
"prompt_template": "### Instruction:\n{prompt}\n### Response:",
"llama_model_path": "deepseek-coder-33b-instruct.Q4_K_M.gguf",
"ngl": 62
"ngl": 63
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 8192,
"prompt_template": "<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model",
"llama_model_path": "gemma-2b-it-q4_k_m.gguf",
"ngl": 18
"ngl": 19
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 8192,
"prompt_template": "<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model",
"llama_model_path": "gemma-7b-it-q4_K_M.gguf",
"ngl": 28
"ngl": 29
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 4096,
"prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]",
"llama_model_path": "llama-2-70b-chat.Q4_K_M.gguf",
"ngl": 80
"ngl": 81
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 4096,
"prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]",
"llama_model_path": "llama-2-7b-chat.Q4_K_M.gguf",
"ngl": 32
"ngl": 33
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 8192,
"prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"llama_model_path": "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"ngl": 32
"ngl": 33
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 8192,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf",
"ngl": 32
"ngl": 33
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 2048,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "llamacorn-1.1b-chat.Q8_0.gguf",
"ngl": 22
"ngl": 23
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 32768,
"prompt_template": "{system_message} [INST] {prompt} [/INST]",
"llama_model_path": "Mistral-7B-Instruct-v0.3-Q4_K_M.gguf",
"ngl": 32
"ngl": 33
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 32768,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "Noromaid-7B-0.4-DPO.q4_k_m.gguf",
"ngl": 32
"ngl": 33
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 8192,
"prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:",
"llama_model_path": "openchat-3.5-0106.Q4_K_M.gguf",
"ngl": 32
"ngl": 33
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 4096,
"prompt_template": "<|user|>\n{prompt}<|end|>\n<|assistant|>\n",
"llama_model_path": "Phi-3-mini-4k-instruct-q4.gguf",
"ngl": 32
"ngl": 33
},
"parameters": {
"max_tokens": 4096,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 128000,
"prompt_template": "<|user|>\n{prompt}<|end|>\n<|assistant|>\n",
"llama_model_path": "Phi-3-medium-128k-instruct-Q4_K_M.gguf",
"ngl": 32
"ngl": 33
},
"parameters": {
"max_tokens": 128000,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 16384,
"prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant",
"llama_model_path": "phind-codellama-34b-v2.Q4_K_M.gguf",
"ngl": 48
"ngl": 49
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 32768,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "qwen1_5-7b-chat-q4_k_m.gguf",
"ngl": 32
"ngl": 33
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 32768,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "Qwen2-7B-Instruct-Q4_K_M.gguf",
"ngl": 28
"ngl": 29
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 4096,
"prompt_template": "<|user|>\n{prompt}<|endoftext|>\n<|assistant|>",
"llama_model_path": "stablelm-zephyr-3b.Q8_0.gguf",
"ngl": 32
"ngl": 33
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 32768,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "stealth-v1.3.Q4_K_M.gguf",
"ngl": 32
"ngl": 33
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 4096,
"prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>",
"llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
"ngl": 22
"ngl": 23
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 32768,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "trinity-v1.2.Q4_K_M.gguf",
"ngl": 32
"ngl": 33
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 32768,
"prompt_template": "[INST] <<SYS>>\n{system_message}\n<</SYS>>\n{prompt} [/INST]",
"llama_model_path": "vistral-7b-chat-dpo.Q4_K_M.gguf",
"ngl": 32
"ngl": 33
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 16384,
"prompt_template": "### Instruction:\n{prompt}\n### Response:",
"llama_model_path": "wizardcoder-python-13b-v1.0.Q4_K_M.gguf",
"ngl": 40
"ngl": 41
},
"parameters": {
"temperature": 0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 15,7 @@
"ctx_len": 4096,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "yi-34b-chat.Q4_K_M.gguf",
"ngl": 60
"ngl": 61
},
"parameters": {
"temperature": 0.7,
Expand Down
7 changes: 7 additions & 0 deletions web/hooks/useActiveModel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 7,7 @@ import { toaster } from '@/containers/Toast'

import { LAST_USED_MODEL_ID } from './useRecommendedModel'

import { vulkanEnabledAtom } from '@/helpers/atoms/AppConfig.atom'
import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'

Expand Down Expand Up @@ -34,6 35,7 @@ export function useActiveModel() {
const downloadedModels = useAtomValue(downloadedModelsAtom)
const setLoadModelError = useSetAtom(loadModelErrorAtom)
const [pendingModelLoad, setPendingModelLoad] = useAtom(pendingModelLoadAtom)
const isVulkanEnabled = useAtomValue(vulkanEnabledAtom)

const downloadedModelsRef = useRef<Model[]>([])

Expand Down Expand Up @@ -90,6 92,11 @@ export function useActiveModel() {
}
}

if (isVulkanEnabled) {
// @ts-expect-error flash_attn is newly added and will be migrate to cortex in the future
model.settings['flash_attn'] = false
}

localStorage.setItem(LAST_USED_MODEL_ID, model.id)
const engine = EngineManager.instance().get(model.engine)
return engine
Expand Down
4 changes: 2 additions & 2 deletions web/screens/Settings/MyModels/MyModelList/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 101,7 @@ const MyModelList = ({ model }: Props) => {
{toGibibytes(model.metadata.size)}
</Badge>

<div className="flex items-center gap-x-4">
<div className="relative flex items-center gap-x-4">
{stateModel.loading && stateModel.model?.id === model.id ? (
<Badge
className="inline-flex items-center space-x-2"
Expand Down Expand Up @@ -144,7 144,7 @@ const MyModelList = ({ model }: Props) => {
</Button>
{more && (
<div
className="absolute right-4 top-10 z-20 w-52 overflow-hidden rounded-lg border border-[hsla(var(--app-border))] bg-[hsla(var(--app-bg))] shadow-lg"
className="absolute right-8 top-0 z-20 w-52 overflow-hidden rounded-lg border border-[hsla(var(--app-border))] bg-[hsla(var(--app-bg))] shadow-lg"
ref={setMenu}
>
<Tooltip
Expand Down
Loading