Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: reduce document selection overhead #3464

Draft
wants to merge 3 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions backend/apps/rag/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 20,7 @@

from typing import Optional

from apps.webui.models.documents import Documents
from utils.misc import get_last_user_message, add_or_update_system_message
from config import SRC_LOG_LEVELS, CHROMA_CLIENT

Expand Down Expand Up @@ -236,6 237,21 @@ def generate_multiple(query, f):
return lambda query: generate_multiple(query, func)


def _extract_collection_names(file):
if file["type"] == "collection":
# keep for backward compatibility
return file["collection_names"]
elif file["type"] == "all_documents":
return [document.collection_name for document in Documents.get_docs()]
elif file["type"] == "tag":
return [
document.collection_name
for document in Documents.get_docs_by_tag(file["name"])
]
else:
return [file["collection_name"]]
Comment on lines 241 to 252
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no need to have elif here since all your branches are returning something.

Suggested change
if file["type"] == "collection":
# keep for backward compatibility
return file["collection_names"]
elif file["type"] == "all_documents":
return [document.collection_name for document in Documents.get_docs()]
elif file["type"] == "tag":
return [
document.collection_name
for document in Documents.get_docs_by_tag(file["name"])
]
else:
return [file["collection_name"]]
if file["type"] == "collection":
# keep for backward compatibility
return file["collection_names"]
if file["type"] == "all_documents":
return [document.collection_name for document in Documents.get_docs()]
if file["type"] == "tag":
return [
document.collection_name
for document in Documents.get_docs_by_tag(file["name"])
]
return [file["collection_name"]]



def get_rag_context(
files,
messages,
Expand All @@ -254,11 270,7 @@ def get_rag_context(
for file in files:
context = None

collection_names = (
file["collection_names"]
if file["type"] == "collection"
else [file["collection_name"]]
)
collection_names = _extract_collection_names(file)

collection_names = set(collection_names).difference(extracted_collections)
if not collection_names:
Expand Down
9 changes: 9 additions & 0 deletions backend/apps/webui/models/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 112,15 @@ def get_docs(self) -> List[DocumentModel]:
DocumentModel.model_validate(doc) for doc in db.query(Document).all()
]

def get_docs_by_tag(self, tag: str) -> List[DocumentModel]:
with get_db() as db:
return [
DocumentModel.model_validate(doc)
for doc in db.query(Document)
.filter(Document.content.contains(tag))
.all()
]

def update_doc_by_name(
self, name: str, form_data: DocumentUpdateForm
) -> Optional[DocumentModel]:
Expand Down
16 changes: 12 additions & 4 deletions src/lib/components/chat/Chat.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -678,10 678,14 @@
files = [
...files,
...(lastUserMessage?.files?.filter((item) =>
['doc', 'file', 'collection', 'web_search_results'].includes(item.type)
['doc', 'file', 'collection', 'web_search_results', 'tag', 'all_documents'].includes(
item.type
)
) ?? []),
...(responseMessage?.files?.filter((item) =>
['doc', 'file', 'collection', 'web_search_results'].includes(item.type)
['doc', 'file', 'collection', 'web_search_results', 'tag', 'all_documents'].includes(
item.type
)
) ?? [])
].filter(
// Remove duplicates
Expand Down Expand Up @@ -923,10 927,14 @@
files = [
...files,
...(lastUserMessage?.files?.filter((item) =>
['doc', 'file', 'collection', 'web_search_results'].includes(item.type)
['doc', 'file', 'collection', 'web_search_results', 'tag', 'all_documents'].includes(
item.type
)
) ?? []),
...(responseMessage?.files?.filter((item) =>
['doc', 'file', 'collection', 'web_search_results'].includes(item.type)
['doc', 'file', 'collection', 'web_search_results', 'tag', 'all_documents'].includes(
item.type
)
) ?? [])
].filter(
// Remove duplicates
Expand Down
2 changes: 1 addition & 1 deletion src/lib/components/chat/MessageInput.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 613,7 @@
<div class=" text-gray-500 text-sm">{$i18n.t('Document')}</div>
</div>
</div>
{:else if file.type === 'collection'}
{:else if file.type === 'all_documents' || file.type === 'tag'}
<div
class="h-16 w-[15rem] flex items-center space-x-3 px-2.5 dark:bg-gray-600 rounded-xl border border-gray-200 dark:border-none"
>
Expand Down
23 changes: 13 additions & 10 deletions src/lib/components/chat/MessageInput/Documents.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 22,8 @@
...($documents.length > 0
? [
{
name: 'All Documents',
type: 'collection',
title: $i18n.t('All Documents'),
collection_names: $documents.map((doc) => doc.collection_name)
name: $i18n.t('All Documents'),
type: 'all_documents'
}
]
: []),
Expand All @@ -34,11 32,8 @@
return [...new Set([...a, ...(e?.content?.tags ?? []).map((tag) => tag.name)])];
}, [])
.map((tag) => ({
name: tag,
type: 'collection',
collection_names: $documents
.filter((doc) => (doc?.content?.tags ?? []).map((tag) => tag.name).includes(tag))
.map((doc) => doc.collection_name)
type: 'tag',
name: tag
}))
];

Expand Down Expand Up @@ -136,14 131,22 @@
}}
on:focus={() => {}}
>
{#if doc.type === 'collection'}
{#if doc.type === 'all_documents'}
<div class=" font-medium text-black dark:text-gray-100 line-clamp-1">
{doc?.title ?? `#${doc.name}`}
</div>

<div class=" text-xs text-gray-600 dark:text-gray-100 line-clamp-1">
{$i18n.t('Collection')}
</div>
{:else if doc.type === 'tag'}
<div class=" font-medium text-black dark:text-gray-100 line-clamp-1">
#{doc.name}
</div>

<div class=" text-xs text-gray-600 dark:text-gray-100 line-clamp-1">
{$i18n.t('Tag')}
</div>
{:else}
<div class=" font-medium text-black dark:text-gray-100 line-clamp-1">
#{doc.name} ({doc.filename})
Expand Down
31 changes: 30 additions & 1 deletion src/lib/components/chat/Messages/UserMessage.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 170,36 @@
<div class=" text-gray-500 text-sm">{$i18n.t('Document')}</div>
</div>
</button>
{:else if file.type === 'collection'}
{:else if file.type === 'all_documents'}
<button
class="h-16 w-72 flex items-center space-x-3 px-2.5 dark:bg-gray-600 rounded-xl border border-gray-200 dark:border-none text-left"
type="button"
>
<div class="p-2.5 bg-red-400 text-white rounded-lg">
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 24 24"
fill="currentColor"
class="w-6 h-6"
>
<path
d="M7.5 3.375c0-1.036.84-1.875 1.875-1.875h.375a3.75 3.75 0 0 1 3.75 3.75v1.875C13.5 8.161 14.34 9 15.375 9h1.875A3.75 3.75 0 0 1 21 12.75v3.375C21 17.16 20.16 18 19.125 18h-9.75A1.875 1.875 0 0 1 7.5 16.125V3.375Z"
/>
<path
d="M15 5.25a5.23 5.23 0 0 0-1.279-3.434 9.768 9.768 0 0 1 6.963 6.963A5.23 5.23 0 0 0 17.25 7.5h-1.875A.375.375 0 0 1 15 7.125V5.25ZM4.875 6H6v10.125A3.375 3.375 0 0 0 9.375 19.5H16.5v1.125c0 1.035-.84 1.875-1.875 1.875h-9.75A1.875 1.875 0 0 1 3 20.625V7.875C3 6.839 3.84 6 4.875 6Z"
/>
</svg>
</div>

<div class="flex flex-col justify-center -space-y-0.5">
<div class=" dark:text-gray-100 text-sm font-medium line-clamp-1">
{`#${$i18n.t('All Documents')}`}
</div>

<div class=" text-gray-500 text-sm">{$i18n.t('Collection')}</div>
</div>
</button>
{:else if file.type === 'collection' || file.type === 'tag'}
<button
class="h-16 w-72 flex items-center space-x-3 px-2.5 dark:bg-gray-600 rounded-xl border border-gray-200 dark:border-none text-left"
type="button"
Expand Down
Loading