Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add nitro vulkan to support AMD GPU/ APU and Intel Arc GPU #2056

Merged
merged 5 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 28,5 @@ extensions/inference-nitro-extension/bin/*/*.exp
extensions/inference-nitro-extension/bin/*/*.lib
extensions/inference-nitro-extension/bin/saved-*
extensions/inference-nitro-extension/bin/*.tar.gz

extensions/inference-nitro-extension/bin/vulkaninfoSDK.exe
extensions/inference-nitro-extension/bin/vulkaninfo
2 changes: 1 addition & 1 deletion extensions/inference-nitro-extension/download.bat
Original file line number Diff line number Diff line change
@@ -1,3 1,3 @@
@echo off
set /p NITRO_VERSION=<./bin/version.txt
.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu
.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan && .\node_modules\.bin\download https://delta.jan.ai/vulkaninfoSDK.exe -o ./bin
2 changes: 1 addition & 1 deletion extensions/inference-nitro-extension/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 8,7 @@
"license": "AGPL-3.0",
"scripts": {
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
"downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod x ./bin/linux-cuda-11-7/nitro",
"downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod x ./bin/linux-vulkan/nitro && download https://delta.jan.ai/vulkaninfo -o ./bin && chmod x ./bin/vulkaninfo",
"downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod x ./bin/mac-x64/nitro",
"downloadnitro:win32": "download.bat",
"downloadnitro": "run-script-os",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 1,11 @@
import { writeFileSync, existsSync, readFileSync } from 'fs'
import { exec } from 'child_process'
import { exec, spawn } from 'child_process'
import path from 'path'
import { getJanDataFolderPath } from '@janhq/core/node'
import { getJanDataFolderPath, log } from '@janhq/core/node'

/**
* Default GPU settings
* TODO: This needs to be refactored to support multiple accelerators
**/
const DEFALT_SETTINGS = {
notify: true,
Expand All @@ -21,12 22,17 @@ const DEFALT_SETTINGS = {
gpu_highest_vram: '',
gpus_in_use: [],
is_initial: true,
// TODO: This needs to be set based on user toggle in settings
vulkan: {
enabled: true,
gpu_in_use: '1',
},
}

/**
* Path to the settings file
**/
export const NVIDIA_INFO_FILE = path.join(
export const GPU_INFO_FILE = path.join(
getJanDataFolderPath(),
'settings',
'settings.json'
Expand All @@ -52,10 58,10 @@ export async function updateNvidiaInfo() {
if (process.platform !== 'darwin') {
let data
try {
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, 'utf-8'))
data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
} catch (error) {
data = DEFALT_SETTINGS
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2))
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
}
updateNvidiaDriverInfo()
updateGpuInfo()
Expand All @@ -79,7 85,7 @@ export async function updateNvidiaDriverInfo(): Promise<void> {
exec(
'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
(error, stdout) => {
let data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, 'utf-8'))
let data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))

if (!error) {
const firstLine = stdout.split('\n')[0].trim()
Expand All @@ -89,7 95,7 @@ export async function updateNvidiaDriverInfo(): Promise<void> {
data['nvidia_driver'].exist = false
}

writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2))
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
Promise.resolve()
}
)
Expand Down Expand Up @@ -158,42 164,77 @@ export function updateCudaExistence(
* Get GPU information
*/
export async function updateGpuInfo(): Promise<void> {
exec(
'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
(error, stdout) => {
let data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, 'utf-8'))

if (!error) {
// Get GPU info and gpu has higher memory first
let highestVram = 0
let highestVramId = '0'
let gpus = stdout
.trim()
.split('\n')
.map((line) => {
let [id, vram, name] = line.split(', ')
vram = vram.replace(/\r/g, '')
if (parseFloat(vram) > highestVram) {
highestVram = parseFloat(vram)
highestVramId = id
}
return { id, vram, name }
})

data.gpus = gpus
data.gpu_highest_vram = highestVramId
} else {
data.gpus = []
data.gpu_highest_vram = ''
let data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))

// Cuda
if (data['vulkan'] === true) {
// Vulkan
exec(
process.platform === 'win32'
? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
: `${__dirname}/../bin/vulkaninfo --summary`,
(error, stdout) => {
if (!error) {
const output = stdout.toString()
log(output)
const gpuRegex = /GPU(\d ):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g

let gpus = []
let match
while ((match = gpuRegex.exec(output)) !== null) {
const id = match[1]
const name = match[2]
gpus.push({ id, vram: 0, name })
}
data.gpus = gpus

if (!data['gpus_in_use'] || data['gpus_in_use'].length === 0) {
data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
}

data = updateCudaExistence(data)
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
}
Promise.resolve()
}

if (!data['gpus_in_use'] || data['gpus_in_use'].length === 0) {
data.gpus_in_use = [data['gpu_highest_vram']]
)
} else {
exec(
'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
(error, stdout) => {
if (!error) {
log(stdout)
// Get GPU info and gpu has higher memory first
let highestVram = 0
let highestVramId = '0'
let gpus = stdout
.trim()
.split('\n')
.map((line) => {
let [id, vram, name] = line.split(', ')
vram = vram.replace(/\r/g, '')
if (parseFloat(vram) > highestVram) {
highestVram = parseFloat(vram)
highestVramId = id
}
return { id, vram, name }
})

data.gpus = gpus
data.gpu_highest_vram = highestVramId
} else {
data.gpus = []
data.gpu_highest_vram = ''
}

if (!data['gpus_in_use'] || data['gpus_in_use'].length === 0) {
data.gpus_in_use = [data['gpu_highest_vram']]
}

data = updateCudaExistence(data)
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
Promise.resolve()
}

data = updateCudaExistence(data)
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2))
Promise.resolve()
}
)
)
}
}
36 changes: 25 additions & 11 deletions extensions/inference-nitro-extension/src/node/execute.ts
Original file line number Diff line number Diff line change
@@ -1,10 1,11 @@
import { readFileSync } from 'fs'
import * as path from 'path'
import { NVIDIA_INFO_FILE } from './nvidia'
import { GPU_INFO_FILE } from './accelerator'

export interface NitroExecutableOptions {
executablePath: string
cudaVisibleDevices: string
vkVisibleDevices: string
}
/**
* Find which executable file to run based on the current platform.
Expand All @@ -13,24 14,30 @@ export interface NitroExecutableOptions {
export const executableNitroFile = (): NitroExecutableOptions => {
let binaryFolder = path.join(__dirname, '..', 'bin') // Current directory by default
let cudaVisibleDevices = ''
let vkVisibleDevices = ''
let binaryName = 'nitro'
/**
* The binary folder is different for each platform.
*/
if (process.platform === 'win32') {
/**
* For Windows: win-cpu, win-cuda-11-7, win-cuda-12-0
* For Windows: win-cpu, win-vulkan, win-cuda-11-7, win-cuda-12-0
*/
let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, 'utf-8'))
if (nvidiaInfo['run_mode'] === 'cpu') {
let gpuInfo = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
if (gpuInfo['run_mode'] === 'cpu') {
binaryFolder = path.join(binaryFolder, 'win-cpu')
} else {
if (nvidiaInfo['cuda'].version === '11') {
if (gpuInfo['cuda']?.version === '11') {
binaryFolder = path.join(binaryFolder, 'win-cuda-11-7')
} else {
binaryFolder = path.join(binaryFolder, 'win-cuda-12-0')
}
cudaVisibleDevices = nvidiaInfo['gpus_in_use'].join(',')
cudaVisibleDevices = gpuInfo['gpus_in_use'].join(',')
}
if (gpuInfo['vulkan'] === true) {
binaryFolder = path.join(__dirname, '..', 'bin')
binaryFolder = path.join(binaryFolder, 'win-vulkan')
vkVisibleDevices = gpuInfo['gpus_in_use'].toString()
}
binaryName = 'nitro.exe'
} else if (process.platform === 'darwin') {
Expand All @@ -44,22 51,29 @@ export const executableNitroFile = (): NitroExecutableOptions => {
}
} else {
/**
* For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
* For Linux: linux-cpu, linux-vulkan, linux-cuda-11-7, linux-cuda-12-0
*/
let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, 'utf-8'))
if (nvidiaInfo['run_mode'] === 'cpu') {
let gpuInfo = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
if (gpuInfo['run_mode'] === 'cpu') {
binaryFolder = path.join(binaryFolder, 'linux-cpu')
} else {
if (nvidiaInfo['cuda'].version === '11') {
if (gpuInfo['cuda']?.version === '11') {
binaryFolder = path.join(binaryFolder, 'linux-cuda-11-7')
} else {
binaryFolder = path.join(binaryFolder, 'linux-cuda-12-0')
}
cudaVisibleDevices = nvidiaInfo['gpus_in_use'].join(',')
cudaVisibleDevices = gpuInfo['gpus_in_use'].join(',')
}

if (gpuInfo['vulkan'] === true) {
binaryFolder = path.join(__dirname, '..', 'bin')
binaryFolder = path.join(binaryFolder, 'win-vulkan')
vkVisibleDevices = gpuInfo['gpus_in_use'].toString()
}
}
return {
executablePath: path.join(binaryFolder, binaryName),
cudaVisibleDevices,
vkVisibleDevices,
}
}
6 changes: 5 additions & 1 deletion extensions/inference-nitro-extension/src/node/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 4,7 @@ import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
import tcpPortUsed from 'tcp-port-used'
import fetchRT from 'fetch-retry'
import { log, getSystemResourceInfo } from '@janhq/core/node'
import { getNitroProcessInfo, updateNvidiaInfo } from './nvidia'
import { getNitroProcessInfo, updateNvidiaInfo } from './accelerator'
import {
Model,
InferenceEngine,
Expand Down Expand Up @@ -345,6 345,10 @@ function spawnNitroProcess(): Promise<any> {
env: {
...process.env,
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
// Vulkan - Support 1 device at a time for now
...(executableOptions.vkVisibleDevices?.length > 0 && {
GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
}),
},
}
)
Expand Down
4 changes: 2 additions & 2 deletions extensions/monitoring-extension/src/module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 37,10 @@ const getCurrentLoad = () =>
}
if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
const gpuIds = data['gpus_in_use'].join(',')
if (gpuIds !== '') {
if (gpuIds !== '' && data['vulkan'] !== true) {
exec(
`nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
(error, stdout, stderr) => {
(error, stdout, _) => {
if (error) {
console.error(`exec error: ${error}`)
reject(error)
Expand Down
13 changes: 13 additions & 0 deletions web/context/FeatureToggle.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 5,9 @@ interface FeatureToggleContextType {
ignoreSSL: boolean
proxy: string
proxyEnabled: boolean
vulkanEnabled: boolean
setExperimentalFeature: (on: boolean) => void
setVulkanEnabled: (on: boolean) => void
setIgnoreSSL: (on: boolean) => void
setProxy: (value: string) => void
setProxyEnabled: (on: boolean) => void
Expand All @@ -16,7 18,9 @@ const initialContext: FeatureToggleContextType = {
ignoreSSL: false,
proxy: '',
proxyEnabled: false,
vulkanEnabled: false,
setExperimentalFeature: () => {},
setVulkanEnabled: () => {},
setIgnoreSSL: () => {},
setProxy: () => {},
setProxyEnabled: () => {},
Expand All @@ -31,13 35,15 @@ export default function FeatureToggleWrapper({
children: ReactNode
}) {
const EXPERIMENTAL_FEATURE = 'experimentalFeature'
const VULKAN_ENABLED = 'vulkanEnabled'
const IGNORE_SSL = 'ignoreSSLFeature'
const HTTPS_PROXY_FEATURE = 'httpsProxyFeature'
const PROXY_FEATURE_ENABLED = 'proxyFeatureEnabled'

const [experimentalFeature, directSetExperimentalFeature] =
useState<boolean>(false)
const [proxyEnabled, directSetProxyEnabled] = useState<boolean>(false)
const [vulkanEnabled, directEnableVulkan] = useState<boolean>(false)
const [ignoreSSL, directSetIgnoreSSL] = useState<boolean>(false)
const [proxy, directSetProxy] = useState<string>('')

Expand All @@ -57,6 63,11 @@ export default function FeatureToggleWrapper({
directSetExperimentalFeature(on)
}

const setVulkanEnabled = (on: boolean) => {
localStorage.setItem(VULKAN_ENABLED, on ? 'true' : 'false')
directEnableVulkan(on)
}

const setIgnoreSSL = (on: boolean) => {
localStorage.setItem(IGNORE_SSL, on ? 'true' : 'false')
directSetIgnoreSSL(on)
Expand All @@ -79,7 90,9 @@ export default function FeatureToggleWrapper({
ignoreSSL,
proxy,
proxyEnabled,
vulkanEnabled,
setExperimentalFeature,
setVulkanEnabled,
setIgnoreSSL,
setProxy,
setProxyEnabled,
Expand Down
Loading
Loading