diff --git a/backend/open_webui/routers/folders.py b/backend/open_webui/routers/folders.py index ebd0c0cb17..7dda918821 100644 --- a/backend/open_webui/routers/folders.py +++ b/backend/open_webui/routers/folders.py @@ -16,8 +16,6 @@ from open_webui.models.folders import ( Folders, ) from open_webui.models.chats import Chats -from open_webui.models.files import Files -from open_webui.models.knowledge import Knowledges from open_webui.config import UPLOAD_DIR @@ -32,6 +30,7 @@ from fastapi.responses import FileResponse, StreamingResponse from open_webui.utils.auth import get_admin_user, get_verified_user from open_webui.utils.access_control import has_permission +from open_webui.utils.access_control.files import get_accessible_folder_files log = logging.getLogger(__name__) @@ -75,20 +74,10 @@ async def get_folders( if folder.parent_id and not await Folders.get_folder_by_id_and_user_id(folder.parent_id, user.id, db=db): folder = await Folders.update_folder_parent_id_by_id_and_user_id(folder.id, user.id, None, db=db) - if folder.data: - if 'files' in folder.data: - valid_files = [] - for file in folder.data['files']: - if file.get('type') == 'file': - if await Files.check_access_by_user_id(file.get('id'), user.id, 'read', db=db): - valid_files.append(file) - elif file.get('type') == 'collection': - if await Knowledges.check_access_by_user_id(file.get('id'), user.id, 'read', db=db): - valid_files.append(file) - else: - valid_files.append(file) - - folder.data['files'] = valid_files + if folder.data and 'files' in folder.data: + accessible_files = await get_accessible_folder_files(folder.data['files'], user, db=db) + if len(accessible_files) != len(folder.data.get('files', [])): + folder.data['files'] = accessible_files await Folders.update_folder_by_id_and_user_id( folder.id, user.id, FolderUpdateForm(data=folder.data), db=db ) @@ -173,6 +162,16 @@ async def update_folder_name_by_id( detail=ERROR_MESSAGES.DEFAULT('Folder already exists'), ) + # Validate read access to every file/collection being attached. + # Folder files are consumed by chat middleware as RAG context. + if form_data.data and isinstance(form_data.data.get('files'), list): + accessible_files = await get_accessible_folder_files(form_data.data['files'], user, db=db) + if len(accessible_files) != len(form_data.data['files']): + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail=ERROR_MESSAGES.ACCESS_PROHIBITED, + ) + try: folder = await Folders.update_folder_by_id_and_user_id(id, user.id, form_data, db=db) return folder diff --git a/backend/open_webui/routers/knowledge.py b/backend/open_webui/routers/knowledge.py index f503169fc0..8ff987b610 100644 --- a/backend/open_webui/routers/knowledge.py +++ b/backend/open_webui/routers/knowledge.py @@ -31,6 +31,7 @@ from open_webui.storage.provider import Storage from open_webui.constants import ERROR_MESSAGES from open_webui.utils.auth import get_verified_user, get_admin_user from open_webui.utils.access_control import has_permission, filter_allowed_access_grants +from open_webui.utils.access_control.files import has_access_to_file from open_webui.models.access_grants import AccessGrants @@ -656,6 +657,14 @@ async def add_file_to_knowledge_by_id( detail=ERROR_MESSAGES.FILE_NOT_PROCESSED, ) + # KB write-access alone is not enough — caller must also be able to read the file. + if file.user_id != user.id and user.role != 'admin': + if not await has_access_to_file(file.id, 'read', user, db=db): + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail=ERROR_MESSAGES.ACCESS_PROHIBITED, + ) + # Add content to the vector database try: await process_file( @@ -1017,6 +1026,15 @@ async def add_files_to_knowledge_batch( detail=f'File {missing_ids[0]} not found', ) + # Per-file read-access check — same gate as the single-file endpoint. + if user.role != 'admin': + for file in files: + if file.user_id != user.id and not await has_access_to_file(file.id, 'read', user, db=db): + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail=ERROR_MESSAGES.ACCESS_PROHIBITED, + ) + # Process files try: result = await process_files_batch( diff --git a/backend/open_webui/utils/access_control/files.py b/backend/open_webui/utils/access_control/files.py index a48dfeb0f1..fb318e3c66 100644 --- a/backend/open_webui/utils/access_control/files.py +++ b/backend/open_webui/utils/access_control/files.py @@ -87,3 +87,38 @@ async def has_access_to_file( return True return False + + +async def get_accessible_folder_files( + entries: list[dict] | None, + user: UserModel, + db: AsyncSession | None = None, +) -> list[dict]: + """Filter folder.data['files'] entries to those the caller can read. + + Each entry is expected to have 'type' ('file' or 'collection') and 'id'. + Admins bypass all checks. Unknown types are kept as-is. + """ + if not entries: + return [] + if user.role == 'admin': + return list(entries) + + accessible: list[dict] = [] + for entry in entries: + if not isinstance(entry, dict): + continue + entry_type = entry.get('type') + entry_id = entry.get('id') + if not entry_id: + accessible.append(entry) + continue + if entry_type == 'file': + if await has_access_to_file(entry_id, 'read', user, db=db): + accessible.append(entry) + elif entry_type == 'collection': + if await Knowledges.check_access_by_user_id(entry_id, user.id, 'read', db=db): + accessible.append(entry) + else: + accessible.append(entry) + return accessible diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 3e25effa9a..60796fa22a 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -106,6 +106,7 @@ from open_webui.utils.tools import ( get_terminal_tools, ) from open_webui.utils.access_control import has_connection_access +from open_webui.utils.access_control.files import get_accessible_folder_files from open_webui.utils.plugin import load_function_module_by_id from open_webui.utils.filter import ( get_sorted_filter_ids, @@ -2407,15 +2408,17 @@ async def process_chat_payload(request, form_data, user, metadata, model): if 'system_prompt' in folder.data: form_data = await apply_system_prompt_to_body(folder.data['system_prompt'], form_data, metadata, user) if 'files' in folder.data: + # Defensive: filter to entries the caller can still read. + allowed_files = await get_accessible_folder_files(folder.data['files'], user) if metadata.get('params', {}).get('function_calling') != 'native': form_data['files'] = [ - *folder.data['files'], + *allowed_files, *form_data.get('files', []), ] else: # Native FC: skip RAG injection, builtin tools # will read folder knowledge from metadata. - metadata['folder_knowledge'] = folder.data['files'] + metadata['folder_knowledge'] = allowed_files # Model "Knowledge" handling user_message = get_last_user_message(form_data['messages']) @@ -2615,7 +2618,7 @@ async def process_chat_payload(request, form_data, user, metadata, model): folder = await Folders.get_folder_by_id_and_user_id(folder_id, user.id) if folder and folder.data and 'files' in folder.data: files = [f for f in files if f.get('id', None) != folder_id] - files = [*files, *folder.data['files']] + files = [*files, *await get_accessible_folder_files(folder.data['files'], user)] # files = [*files, *[{"type": "url", "url": url, "name": url} for url in urls]] # Remove duplicate files based on their content