-
Notifications
You must be signed in to change notification settings - Fork 53
/
custom_llm.py
60 lines (46 loc) · 1.8 KB
/
custom_llm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from typing import Any, Dict, List, Optional, Type, Union
from holmes.config import Config
from holmes.core.llm import LLM
from litellm.types.utils import ModelResponse
from holmes.core.tool_calling_llm import ToolCallingLLM
from holmes.core.tools import Tool, ToolExecutor
from holmes.plugins.toolsets import load_builtin_toolsets
from rich.console import Console
from pydantic import BaseModel
from holmes.plugins.prompts import load_and_render_prompt
import sys
class MyCustomLLM(LLM):
def get_context_window_size(self) -> int:
return 128000
def get_maximum_output_token(self) -> int:
return 4096
def count_tokens_for_message(self, messages: list[dict]) -> int:
return 1
def completion(self, messages: List[Dict[str, Any]], tools: Optional[List[Tool]] = [], tool_choice: Optional[Union[str, dict]] = None, response_format: Optional[Union[dict, Type[BaseModel]]] = None, temperature:Optional[float] = None, drop_params: Optional[bool] = None) -> ModelResponse:
return ModelResponse(choices=[{
"finish_reason": "stop",
"index": 0,
"message": {
"role": "assistant",
"content": "There are no issues with your cluster"
}
}],
usage={
"prompt_tokens": 0, # Integer
"completion_tokens": 0,
"total_tokens": 0
}
)
def ask_holmes():
console = Console()
prompt = "what issues do I have in my cluster"
system_prompt = load_and_render_prompt("builtin://generic_ask.jinja2")
tool_executor = ToolExecutor(load_builtin_toolsets())
ai = ToolCallingLLM(
tool_executor,
max_steps=10,
llm=MyCustomLLM()
)
response = ai.prompt_call(system_prompt, prompt)
print(response.model_dump())
ask_holmes()