Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: User Selectable GPUs and GPU-based Model Recommendations #1730

Merged
merged 9 commits into from
Feb 6, 2024
16 changes: 8 additions & 8 deletions extensions/inference-nitro-extension/src/node/execute.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 25,12 @@ export const executableNitroFile = (): NitroExecutableOptions => {
if (nvidiaInfo["run_mode"] === "cpu") {
binaryFolder = path.join(binaryFolder, "win-cpu");
} else {
if (nvidiaInfo["cuda"].version === "12") {
binaryFolder = path.join(binaryFolder, "win-cuda-12-0");
} else {
if (nvidiaInfo["cuda"].version === "11") {
binaryFolder = path.join(binaryFolder, "win-cuda-11-7");
} else {
binaryFolder = path.join(binaryFolder, "win-cuda-12-0");
}
cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
cudaVisibleDevices = nvidiaInfo["gpus_in_use"].join(",");
}
binaryName = "nitro.exe";
} else if (process.platform === "darwin") {
Expand All @@ -50,12 50,12 @@ export const executableNitroFile = (): NitroExecutableOptions => {
if (nvidiaInfo["run_mode"] === "cpu") {
binaryFolder = path.join(binaryFolder, "linux-cpu");
} else {
if (nvidiaInfo["cuda"].version === "12") {
binaryFolder = path.join(binaryFolder, "linux-cuda-12-0");
} else {
if (nvidiaInfo["cuda"].version === "11") {
binaryFolder = path.join(binaryFolder, "linux-cuda-11-7");
} else {
binaryFolder = path.join(binaryFolder, "linux-cuda-12-0");
}
cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
cudaVisibleDevices = nvidiaInfo["gpus_in_use"].join(",");
}
}
return {
Expand Down
63 changes: 30 additions & 33 deletions extensions/inference-nitro-extension/src/node/nvidia.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 19,8 @@ const DEFALT_SETTINGS = {
},
gpus: [],
gpu_highest_vram: "",
gpus_in_use: [],
is_initial: true,
};

/**
Expand Down Expand Up @@ -48,11 50,15 @@ export interface NitroProcessInfo {
*/
export async function updateNvidiaInfo() {
if (process.platform !== "darwin") {
await Promise.all([
updateNvidiaDriverInfo(),
updateCudaExistence(),
updateGpuInfo(),
]);
let data;
try {
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
} catch (error) {
data = DEFALT_SETTINGS;
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
}
updateNvidiaDriverInfo();
updateGpuInfo();
}
}

Expand All @@ -73,12 79,7 @@ export async function updateNvidiaDriverInfo(): Promise<void> {
exec(
"nvidia-smi --query-gpu=driver_version --format=csv,noheader",
(error, stdout) => {
let data;
try {
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
} catch (error) {
data = DEFALT_SETTINGS;
}
let data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));

if (!error) {
const firstLine = stdout.split("\n")[0].trim();
Expand Down Expand Up @@ -107,7 108,7 @@ export function checkFileExistenceInPaths(
/**
* Validate cuda for linux and windows
*/
export function updateCudaExistence() {
export function updateCudaExistence(data: Record<string, any> = DEFALT_SETTINGS): Record<string, any> {
let filesCuda12: string[];
let filesCuda11: string[];
let paths: string[];
Expand Down Expand Up @@ -141,34 142,24 @@ export function updateCudaExistence() {
cudaVersion = "12";
}

let data;
try {
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
} catch (error) {
data = DEFALT_SETTINGS;
}

data["cuda"].exist = cudaExists;
data["cuda"].version = cudaVersion;
if (cudaExists) {
console.log(data["is_initial"], data["gpus_in_use"]);
if (cudaExists && data["is_initial"] && data["gpus_in_use"].length > 0) {
data.run_mode = "gpu";
}
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
data.is_initial = false;
return data;
}

/**
* Get GPU information
*/
export async function updateGpuInfo(): Promise<void> {
exec(
"nvidia-smi --query-gpu=index,memory.total --format=csv,noheader,nounits",
"nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits",
(error, stdout) => {
let data;
try {
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
} catch (error) {
data = DEFALT_SETTINGS;
}
let data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));

if (!error) {
// Get GPU info and gpu has higher memory first
Expand All @@ -178,21 169,27 @@ export async function updateGpuInfo(): Promise<void> {
.trim()
.split("\n")
.map((line) => {
let [id, vram] = line.split(", ");
let [id, vram, name] = line.split(", ");
vram = vram.replace(/\r/g, "");
if (parseFloat(vram) > highestVram) {
highestVram = parseFloat(vram);
highestVramId = id;
}
return { id, vram };
return { id, vram, name };
});

data["gpus"] = gpus;
data["gpu_highest_vram"] = highestVramId;
data.gpus = gpus;
data.gpu_highest_vram = highestVramId;
} else {
data["gpus"] = [];
data.gpus = [];
data.gpu_highest_vram = "";
}

if (!data["gpus_in_use"] || data["gpus_in_use"].length === 0) {
data.gpus_in_use = [data["gpu_highest_vram"]];
}

data = updateCudaExistence(data);
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
Promise.resolve();
}
Expand Down
5 changes: 3 additions & 2 deletions extensions/monitoring-extension/package.json
Original file line number Diff line number Diff line change
@@ -1,6 1,6 @@
{
"name": "@janhq/monitoring-extension",
"version": "1.0.9",
"version": "1.0.10",
"description": "This extension provides system health and OS level data",
"main": "dist/index.js",
"module": "dist/module.js",
Expand All @@ -26,6 26,7 @@
"README.md"
],
"bundleDependencies": [
"node-os-utils"
"node-os-utils",
"@janhq/core"
]
}
54 changes: 47 additions & 7 deletions extensions/monitoring-extension/src/module.ts
Original file line number Diff line number Diff line change
@@ -1,4 1,14 @@
const nodeOsUtils = require("node-os-utils");
const getJanDataFolderPath = require("@janhq/core/node").getJanDataFolderPath;
const path = require("path");
const { readFileSync } = require("fs");
const exec = require("child_process").exec;

const NVIDIA_INFO_FILE = path.join(
getJanDataFolderPath(),
"settings",
"settings.json"
);

const getResourcesInfo = () =>
new Promise((resolve) => {
Expand All @@ -16,18 26,48 @@ const getResourcesInfo = () =>
});

const getCurrentLoad = () =>
new Promise((resolve) => {
new Promise((resolve, reject) => {
nodeOsUtils.cpu.usage().then((cpuPercentage) => {
const response = {
cpu: {
usage: cpuPercentage,
},
let data = {
run_mode: "cpu",
gpus_in_use: [],
};
resolve(response);
if (process.platform !== "darwin") {
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
}
if (data.run_mode === "gpu" && data.gpus_in_use.length > 0) {
const gpuIds = data["gpus_in_use"].join(",");
if (gpuIds !== "") {
exec(
`nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
(error, stdout, stderr) => {
if (error) {
console.error(`exec error: ${error}`);
reject(error);
return;
}
const gpuInfo = stdout.trim().split("\n").map((line) => {
const [id, name, temperature, utilization, memoryTotal, memoryFree, memoryUtilization] = line.split(", ").map(item => item.replace(/\r/g, ""));
return { id, name, temperature, utilization, memoryTotal, memoryFree, memoryUtilization };
});
resolve({
cpu: { usage: cpuPercentage },
gpu: gpuInfo
});
}
);
} else {
// Handle the case where gpuIds is empty
resolve({ cpu: { usage: cpuPercentage }, gpu: [] });
}
} else {
// Handle the case where run_mode is not 'gpu' or no GPUs are in use
resolve({ cpu: { usage: cpuPercentage }, gpu: [] });
}
});
});

module.exports = {
getResourcesInfo,
getCurrentLoad,
};
};
20 changes: 19 additions & 1 deletion web/containers/Layout/BottomBar/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 48,7 @@ const menuLinks = [

const BottomBar = () => {
const { activeModel, stateModel } = useActiveModel()
const { ram, cpu } = useGetSystemResources()
const { ram, cpu, gpus } = useGetSystemResources()
const progress = useAtomValue(appDownloadProgress)
const downloadedModels = useAtomValue(downloadedModelsAtom)

Expand All @@ -57,6 57,13 @@ const BottomBar = () => {
const setShowSelectModelModal = useSetAtom(showSelectModelModalAtom)
const [serverEnabled] = useAtom(serverEnabledAtom)

const calculateGpuMemoryUsage = (gpu: Record<string, never>) => {
const total = parseInt(gpu.memoryTotal)
const free = parseInt(gpu.memoryFree)
if (!total || !free) return 0
return Math.round(((total - free) / total) * 100)
}

return (
<div className="fixed bottom-0 left-16 z-20 flex h-12 w-[calc(100%-64px)] items-center justify-between border-t border-border bg-background/80 px-3">
<div className="flex flex-shrink-0 items-center gap-x-2">
Expand Down Expand Up @@ -119,6 126,17 @@ const BottomBar = () => {
<SystemItem name="CPU:" value={`${cpu}%`} />
<SystemItem name="Mem:" value={`${ram}%`} />
</div>
{gpus.length > 0 && (
<div className="flex items-center gap-x-2">
{gpus.map((gpu, index) => (
<SystemItem
key={index}
name={`GPU ${gpu.id}:`}
value={`${gpu.utilization}% Util, ${calculateGpuMemoryUsage(gpu)}% Mem`}
/>
))}
</div>
)}
{/* VERSION is defined by webpack, please see next.config.js */}
<span className="text-xs text-muted-foreground">
Jan v{VERSION ?? ''}
Expand Down
2 changes: 2 additions & 0 deletions web/helpers/atoms/SystemBar.atom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 5,5 @@ export const usedRamAtom = atom<number>(0)
export const availableRamAtom = atom<number>(0)

export const cpuUsageAtom = atom<number>(0)

export const nvidiaTotalVramAtom = atom<number>(0)
20 changes: 19 additions & 1 deletion web/hooks/useGetSystemResources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 10,19 @@ import {
cpuUsageAtom,
totalRamAtom,
usedRamAtom,
nvidiaTotalVramAtom,
} from '@/helpers/atoms/SystemBar.atom'

export default function useGetSystemResources() {
const [ram, setRam] = useState<number>(0)
const [cpu, setCPU] = useState<number>(0)

const [gpus, setGPUs] = useState<Record<string, never>[]>([])
const setTotalRam = useSetAtom(totalRamAtom)
const setUsedRam = useSetAtom(usedRamAtom)
const setAvailableRam = useSetAtom(availableRamAtom)
const setCpuUsage = useSetAtom(cpuUsageAtom)
const setTotalNvidiaVram = useSetAtom(nvidiaTotalVramAtom)

const getSystemResources = async () => {
if (
Expand Down Expand Up @@ -48,12 52,25 @@ export default function useGetSystemResources() {
)
setCPU(Math.round(currentLoadInfor?.cpu?.usage ?? 0))
setCpuUsage(Math.round(currentLoadInfor?.cpu?.usage ?? 0))

const gpus = currentLoadInfor?.gpu ?? []
setGPUs(gpus)

let totalNvidiaVram = 0
if (gpus.length > 0) {
totalNvidiaVram = gpus.reduce(
(total: number, gpu: { memoryTotal: string }) =>
total Number(gpu.memoryTotal),
0
)
}
setTotalNvidiaVram(totalNvidiaVram)
}

useEffect(() => {
getSystemResources()

// Fetch interval - every 0.5s
// Fetch interval - every 2s
// TODO: Will we really need this?
// There is a possibility that this will be removed and replaced by the process event hook?
const intervalId = setInterval(() => {
Expand All @@ -69,5 86,6 @@ export default function useGetSystemResources() {
totalRamAtom,
ram,
cpu,
gpus,
}
}
3 changes: 3 additions & 0 deletions web/hooks/useSettings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 47,17 @@ export const useSettings = () => {
const saveSettings = async ({
runMode,
notify,
gpusInUse,
}: {
runMode?: string | undefined
notify?: boolean | undefined
gpusInUse?: string[] | undefined
}) => {
const settingsFile = await joinPath(['file://settings', 'settings.json'])
const settings = await readSettings()
if (runMode != null) settings.run_mode = runMode
if (notify != null) settings.notify = notify
if (gpusInUse != null) settings.gpus_in_use = gpusInUse
await fs.writeFileSync(settingsFile, JSON.stringify(settings))
}

Expand Down
Loading
Loading