Files
kotaemon/reference/indices/index.html

5362 lines
223 KiB
HTML

<!doctype html>
<html lang="en" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<link rel="prev" href="../embeddings/tei_endpoint_embed/">
<link rel="next" href="base/">
<link rel="icon" href="../../assets/images/favicon.png">
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.40">
<title>Indices - kotaemon Docs</title>
<link rel="stylesheet" href="../../assets/stylesheets/main.8c3ca2c6.min.css">
<link rel="stylesheet" href="../../assets/stylesheets/palette.06af60db.min.css">
<script src="../../assets/pymdownx-extras/material-extra-theme-TVq-kNRT.js" type="text/javascript"></script>
<script src="../../assets/pymdownx-extras/material-extra-3rdparty-E-i8w1WA.js" type="text/javascript"></script>
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
<link rel="stylesheet" href="../../css/timeago.css">
<link rel="stylesheet" href="../../assets/_mkdocstrings.css">
<link rel="stylesheet" href="../../extra/css/code_select.css">
<link rel="stylesheet" href="../../assets/pymdownx-extras/extra-fb5a2a1c86.css">
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
</head>
<body dir="ltr" data-md-color-scheme="dracula" data-md-color-primary="deep-purple" data-md-color-accent="deep-purple">
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#indices" class="md-skip">
Skip to content
</a>
</div>
<div data-md-component="announce">
</div>
<header class="md-header" data-md-component="header">
<nav
class="md-header__inner md-grid"
aria-label="header.title"
>
<a
href="../.."
title="kotaemon Docs"
class="md-header__button md-logo"
aria-label="kotaemon Docs"
>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
kotaemon Docs
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
Indices
</span>
</div>
</div>
</div>
<div class="md-header__options">
<div class="md-header-nav__scheme md-header-nav__button md-source__icon md-icon">
<a
href="javascript:toggleScheme();"
title="Light mode"
class="light-mode"
>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 7a5 5 0 0 1 5 5 5 5 0 0 1-5 5 5 5 0 0 1-5-5 5 5 0 0 1 5-5m0 2a3 3 0 0 0-3 3 3 3 0 0 0 3 3 3 3 0 0 0 3-3 3 3 0 0 0-3-3m0-7 2.39 3.42C13.65 5.15 12.84 5 12 5s-1.65.15-2.39.42zM3.34 7l4.16-.35A7.2 7.2 0 0 0 5.94 8.5c-.44.74-.69 1.5-.83 2.29zm.02 10 1.76-3.77a7.131 7.131 0 0 0 2.38 4.14zM20.65 7l-1.77 3.79a7.02 7.02 0 0 0-2.38-4.15zm-.01 10-4.14.36c.59-.51 1.12-1.14 1.54-1.86.42-.73.69-1.5.83-2.29zM12 22l-2.41-3.44c.74.27 1.55.44 2.41.44.82 0 1.63-.17 2.37-.44z"/></svg>
</a>
<a
href="javascript:toggleScheme();"
title="Dark mode"
class="dark-mode"
>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m17.75 4.09-2.53 1.94.91 3.06-2.63-1.81-2.63 1.81.91-3.06-2.53-1.94L12.44 4l1.06-3 1.06 3zm3.5 6.91-1.64 1.25.59 1.98-1.7-1.17-1.7 1.17.59-1.98L15.75 11l2.06-.05L18.5 9l.69 1.95zm-2.28 4.95c.83-.08 1.72 1.1 1.19 1.85-.32.45-.66.87-1.08 1.27C15.17 23 8.84 23 4.94 19.07c-3.91-3.9-3.91-10.24 0-14.14.4-.4.82-.76 1.27-1.08.75-.53 1.93.36 1.85 1.19-.27 2.86.69 5.83 2.89 8.02a9.96 9.96 0 0 0 8.02 2.89m-1.64 2.02a12.08 12.08 0 0 1-7.8-3.47c-2.17-2.19-3.33-5-3.49-7.82-2.81 3.14-2.7 7.96.31 10.98 3.02 3.01 7.84 3.12 10.98.31"/></svg>
</a>
<a
href="javascript:toggleScheme();"
title="System preference"
class="system-mode"
>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M7.5 2c-1.79 1.15-3 3.18-3 5.5s1.21 4.35 3.03 5.5C4.46 13 2 10.54 2 7.5A5.5 5.5 0 0 1 7.5 2m11.57 1.5 1.43 1.43L4.93 20.5 3.5 19.07zm-6.18 2.43L11.41 5 9.97 6l.42-1.7L9 3.24l1.75-.12.58-1.65L12 3.1l1.73.03-1.35 1.13zm-3.3 3.61-1.16-.73-1.12.78.34-1.32-1.09-.83 1.36-.09.45-1.29.51 1.27 1.36.03-1.05.87zM19 13.5a5.5 5.5 0 0 1-5.5 5.5c-1.22 0-2.35-.4-3.26-1.07l7.69-7.69c.67.91 1.07 2.04 1.07 3.26m-4.4 6.58 2.77-1.15-.24 3.35zm4.33-2.7 1.15-2.77 2.2 2.54zm1.15-4.96-1.14-2.78 3.34.24zM9.63 18.93l2.77 1.15-2.53 2.19z"/></svg>
</a>
<!-- <a
href="javascript:toggleScheme();"
title="Unknown scheme"
class="unknown-mode"
>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m15.07 11.25-.9.92C13.45 12.89 13 13.5 13 15h-2v-.5c0-1.11.45-2.11 1.17-2.83l1.24-1.26c.37-.36.59-.86.59-1.41a2 2 0 0 0-2-2 2 2 0 0 0-2 2H8a4 4 0 0 1 4-4 4 4 0 0 1 4 4 3.2 3.2 0 0 1-.93 2.25M13 19h-2v-2h2M12 2A10 10 0 0 0 2 12a10 10 0 0 0 10 10 10 10 0 0 0 10-10c0-5.53-4.5-10-10-10"/></svg>
</a> -->
</div>
</div>
<label class="md-header__button md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
</label>
<div class="md-search" data-md-component="search" role="dialog">
<label class="md-search__overlay" for="__search"></label>
<div class="md-search__inner" role="search">
<form class="md-search__form" name="search">
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
<label class="md-search__icon md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
</label>
<nav class="md-search__options" aria-label="Search">
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
</a>
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
<div class="md-search__suggest" data-md-component="search-suggest"></div>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
<div class="md-search-result" data-md-component="search-result">
<div class="md-search-result__meta">
Initializing search
</div>
<ol class="md-search-result__list" role="presentation"></ol>
</div>
</div>
</div>
</div>
</div>
<div class="md-header__source">
<a href="https://github.com/Cinnamon/kotaemon" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8M97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
</div>
<div class="md-source__repository">
Cinnamon/kotaemon
</div>
</a>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
<div class="md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item">
<a href="../.." class="md-tabs__link">
Getting Started
</a>
</li>
<li class="md-tabs__item">
<a href="../../development/" class="md-tabs__link">
Development
</a>
</li>
<li class="md-tabs__item md-tabs__item--active">
<a href="../agents/" class="md-tabs__link">
API Reference
</a>
</li>
<li class="md-tabs__item">
<a href="https://github.com/Cinnamon/kotaemon/releases" class="md-tabs__link">
Changelogs
</a>
</li>
<li class="md-tabs__item">
<a href="https://github.com/Cinnamon/kotaemon/issues" class="md-tabs__link">
Issue Tracker
</a>
</li>
<li class="md-tabs__item">
<a href="https://huggingface.co/spaces/cin-model/kotaemon-demo" class="md-tabs__link">
Live Demo
</a>
</li>
</ul>
</div>
</nav>
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../.." title="kotaemon Docs" class="md-nav__button md-logo" aria-label="kotaemon Docs" data-md-component="logo">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
</a>
kotaemon Docs
</label>
<div class="md-nav__source">
<a href="https://github.com/Cinnamon/kotaemon" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8M97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
</div>
<div class="md-source__repository">
Cinnamon/kotaemon
</div>
</a>
</div>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1" >
<div class="md-nav__link md-nav__container">
<a href="../.." class="md-nav__link ">
<span class="md-ellipsis">
Getting Started
</span>
</a>
<label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_1">
<span class="md-nav__icon md-icon"></span>
Getting Started
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../usage/" class="md-nav__link">
<span class="md-ellipsis">
Basic Usage
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
<div class="md-nav__link md-nav__container">
<a href="../../development/" class="md-nav__link ">
<span class="md-ellipsis">
Development
</span>
</a>
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2">
<span class="md-nav__icon md-icon"></span>
Development
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../pages/app/customize-flows/" class="md-nav__link">
<span class="md-ellipsis">
Customize flow logic
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../development/create-a-component/" class="md-nav__link">
<span class="md-ellipsis">
Creating a Component
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2_3" >
<label class="md-nav__link" for="__nav_2_3" id="__nav_2_3_label" tabindex="0">
<span class="md-ellipsis">
Components
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_2_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2_3">
<span class="md-nav__icon md-icon"></span>
Components
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2_3_1" >
<label class="md-nav__link" for="__nav_2_3_1" id="__nav_2_3_1_label" tabindex="0">
<span class="md-ellipsis">
Index
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_2_3_1_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2_3_1">
<span class="md-nav__icon md-icon"></span>
Index
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../pages/app/index/file/" class="md-nav__link">
<span class="md-ellipsis">
File index
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2_3_2" >
<div class="md-nav__link md-nav__container">
<a href="../../pages/app/settings/overview/" class="md-nav__link ">
<span class="md-ellipsis">
Settings
</span>
</a>
<label class="md-nav__link " for="__nav_2_3_2" id="__nav_2_3_2_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_2_3_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2_3_2">
<span class="md-nav__icon md-icon"></span>
Settings
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../pages/app/settings/user-settings/" class="md-nav__link">
<span class="md-ellipsis">
User settings
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2_3_3" >
<label class="md-nav__link" for="__nav_2_3_3" id="__nav_2_3_3_label" tabindex="0">
<span class="md-ellipsis">
Extension
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_2_3_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2_3_3">
<span class="md-nav__icon md-icon"></span>
Extension
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../pages/app/ext/user-management/" class="md-nav__link">
<span class="md-ellipsis">
User management
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../development/contributing/" class="md-nav__link">
<span class="md-ellipsis">
Contributing
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" checked>
<div class="md-nav__link md-nav__container">
<a href="../agents/" class="md-nav__link ">
<span class="md-ellipsis">
API Reference
</span>
</a>
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_3">
<span class="md-nav__icon md-icon"></span>
API Reference
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_2" >
<div class="md-nav__link md-nav__container">
<a href="../base/" class="md-nav__link ">
<span class="md-ellipsis">
Base
</span>
</a>
<label class="md-nav__link " for="__nav_3_2" id="__nav_3_2_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_2">
<span class="md-nav__icon md-icon"></span>
Base
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../base/component/" class="md-nav__link">
<span class="md-ellipsis">
Component
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../base/schema/" class="md-nav__link">
<span class="md-ellipsis">
Schema
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_3" >
<div class="md-nav__link md-nav__container">
<a href="../chatbot/" class="md-nav__link ">
<span class="md-ellipsis">
Chatbot
</span>
</a>
<label class="md-nav__link " for="__nav_3_3" id="__nav_3_3_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_3">
<span class="md-nav__icon md-icon"></span>
Chatbot
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../chatbot/base/" class="md-nav__link">
<span class="md-ellipsis">
Base
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../chatbot/simple_respondent/" class="md-nav__link">
<span class="md-ellipsis">
Simple Respondent
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../cli/" class="md-nav__link">
<span class="md-ellipsis">
CLI
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_5" >
<div class="md-nav__link md-nav__container">
<a href="../embeddings/" class="md-nav__link ">
<span class="md-ellipsis">
Embeddings
</span>
</a>
<label class="md-nav__link " for="__nav_3_5" id="__nav_3_5_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_5_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_5">
<span class="md-nav__icon md-icon"></span>
Embeddings
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../embeddings/base/" class="md-nav__link">
<span class="md-ellipsis">
Base
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../embeddings/endpoint_based/" class="md-nav__link">
<span class="md-ellipsis">
Endpoint Based
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../embeddings/fastembed/" class="md-nav__link">
<span class="md-ellipsis">
Fastembed
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../embeddings/langchain_based/" class="md-nav__link">
<span class="md-ellipsis">
Langchain Based
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../embeddings/openai/" class="md-nav__link">
<span class="md-ellipsis">
Openai
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../embeddings/tei_endpoint_embed/" class="md-nav__link">
<span class="md-ellipsis">
Tei Endpoint Embed
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_6" checked>
<div class="md-nav__link md-nav__container">
<a href="./" class="md-nav__link md-nav__link--active">
<span class="md-ellipsis">
Indices
</span>
</a>
<label class="md-nav__link md-nav__link--active" for="__nav_3_6" id="__nav_3_6_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_6_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_3_6">
<span class="md-nav__icon md-icon"></span>
Indices
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="base/" class="md-nav__link">
<span class="md-ellipsis">
Base
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_6_2" >
<div class="md-nav__link md-nav__container">
<a href="extractors/" class="md-nav__link ">
<span class="md-ellipsis">
Extractors
</span>
</a>
<label class="md-nav__link " for="__nav_3_6_2" id="__nav_3_6_2_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_3_6_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_6_2">
<span class="md-nav__icon md-icon"></span>
Extractors
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="extractors/doc_parsers/" class="md-nav__link">
<span class="md-ellipsis">
Doc Parsers
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_6_3" >
<div class="md-nav__link md-nav__container">
<a href="ingests/" class="md-nav__link ">
<span class="md-ellipsis">
Ingests
</span>
</a>
<label class="md-nav__link " for="__nav_3_6_3" id="__nav_3_6_3_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_3_6_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_6_3">
<span class="md-nav__icon md-icon"></span>
Ingests
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="ingests/files/" class="md-nav__link">
<span class="md-ellipsis">
Files
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_6_4" >
<div class="md-nav__link md-nav__container">
<a href="qa/" class="md-nav__link ">
<span class="md-ellipsis">
Qa
</span>
</a>
<label class="md-nav__link " for="__nav_3_6_4" id="__nav_3_6_4_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_3_6_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_6_4">
<span class="md-nav__icon md-icon"></span>
Qa
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="qa/citation/" class="md-nav__link">
<span class="md-ellipsis">
Citation
</span>
</a>
</li>
<li class="md-nav__item">
<a href="qa/citation_qa/" class="md-nav__link">
<span class="md-ellipsis">
Citation Qa
</span>
</a>
</li>
<li class="md-nav__item">
<a href="qa/citation_qa_inline/" class="md-nav__link">
<span class="md-ellipsis">
Citation Qa Inline
</span>
</a>
</li>
<li class="md-nav__item">
<a href="qa/format_context/" class="md-nav__link">
<span class="md-ellipsis">
Format Context
</span>
</a>
</li>
<li class="md-nav__item">
<a href="qa/utils/" class="md-nav__link">
<span class="md-ellipsis">
Utils
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_6_5" >
<div class="md-nav__link md-nav__container">
<a href="rankings/" class="md-nav__link ">
<span class="md-ellipsis">
Rankings
</span>
</a>
<label class="md-nav__link " for="__nav_3_6_5" id="__nav_3_6_5_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_3_6_5_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_6_5">
<span class="md-nav__icon md-icon"></span>
Rankings
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="rankings/base/" class="md-nav__link">
<span class="md-ellipsis">
Base
</span>
</a>
</li>
<li class="md-nav__item">
<a href="rankings/cohere/" class="md-nav__link">
<span class="md-ellipsis">
Cohere
</span>
</a>
</li>
<li class="md-nav__item">
<a href="rankings/llm/" class="md-nav__link">
<span class="md-ellipsis">
Llm
</span>
</a>
</li>
<li class="md-nav__item">
<a href="rankings/llm_scoring/" class="md-nav__link">
<span class="md-ellipsis">
Llm Scoring
</span>
</a>
</li>
<li class="md-nav__item">
<a href="rankings/llm_trulens/" class="md-nav__link">
<span class="md-ellipsis">
Llm Trulens
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_6_6" >
<div class="md-nav__link md-nav__container">
<a href="retrievers/" class="md-nav__link ">
<span class="md-ellipsis">
Retrievers
</span>
</a>
<label class="md-nav__link " for="__nav_3_6_6" id="__nav_3_6_6_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_3_6_6_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_6_6">
<span class="md-nav__icon md-icon"></span>
Retrievers
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="retrievers/jina_web_search/" class="md-nav__link">
<span class="md-ellipsis">
Jina Web Search
</span>
</a>
</li>
<li class="md-nav__item">
<a href="retrievers/tavily_web_search/" class="md-nav__link">
<span class="md-ellipsis">
Tavily Web Search
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="splitters/" class="md-nav__link">
<span class="md-ellipsis">
Splitters
</span>
</a>
</li>
<li class="md-nav__item">
<a href="vectorindex/" class="md-nav__link">
<span class="md-ellipsis">
Vectorindex
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_7" >
<div class="md-nav__link md-nav__container">
<a href="../llms/" class="md-nav__link ">
<span class="md-ellipsis">
LLMs
</span>
</a>
<label class="md-nav__link " for="__nav_3_7" id="__nav_3_7_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_7_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_7">
<span class="md-nav__icon md-icon"></span>
LLMs
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../llms/base/" class="md-nav__link">
<span class="md-ellipsis">
Base
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../llms/branching/" class="md-nav__link">
<span class="md-ellipsis">
Branching
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_7_3" >
<div class="md-nav__link md-nav__container">
<a href="../llms/chats/" class="md-nav__link ">
<span class="md-ellipsis">
Chats
</span>
</a>
<label class="md-nav__link " for="__nav_3_7_3" id="__nav_3_7_3_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_3_7_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_7_3">
<span class="md-nav__icon md-icon"></span>
Chats
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../llms/chats/base/" class="md-nav__link">
<span class="md-ellipsis">
Base
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../llms/chats/endpoint_based/" class="md-nav__link">
<span class="md-ellipsis">
Endpoint Based
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../llms/chats/langchain_based/" class="md-nav__link">
<span class="md-ellipsis">
Langchain Based
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../llms/chats/llamacpp/" class="md-nav__link">
<span class="md-ellipsis">
Llamacpp
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../llms/chats/openai/" class="md-nav__link">
<span class="md-ellipsis">
Openai
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_7_4" >
<div class="md-nav__link md-nav__container">
<a href="../llms/completions/" class="md-nav__link ">
<span class="md-ellipsis">
Completions
</span>
</a>
<label class="md-nav__link " for="__nav_3_7_4" id="__nav_3_7_4_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_3_7_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_7_4">
<span class="md-nav__icon md-icon"></span>
Completions
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../llms/completions/base/" class="md-nav__link">
<span class="md-ellipsis">
Base
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../llms/completions/langchain_based/" class="md-nav__link">
<span class="md-ellipsis">
Langchain Based
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../llms/cot/" class="md-nav__link">
<span class="md-ellipsis">
Cot
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../llms/linear/" class="md-nav__link">
<span class="md-ellipsis">
Linear
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_7_7" >
<div class="md-nav__link md-nav__container">
<a href="../llms/prompts/" class="md-nav__link ">
<span class="md-ellipsis">
Prompts
</span>
</a>
<label class="md-nav__link " for="__nav_3_7_7" id="__nav_3_7_7_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_3_7_7_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_7_7">
<span class="md-nav__icon md-icon"></span>
Prompts
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../llms/prompts/base/" class="md-nav__link">
<span class="md-ellipsis">
Base
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../llms/prompts/template/" class="md-nav__link">
<span class="md-ellipsis">
Template
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_8" >
<div class="md-nav__link md-nav__container">
<a href="../loaders/" class="md-nav__link ">
<span class="md-ellipsis">
Loaders
</span>
</a>
<label class="md-nav__link " for="__nav_3_8" id="__nav_3_8_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_8_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_8">
<span class="md-nav__icon md-icon"></span>
Loaders
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../loaders/adobe_loader/" class="md-nav__link">
<span class="md-ellipsis">
Adobe Loader
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../loaders/azureai_document_intelligence_loader/" class="md-nav__link">
<span class="md-ellipsis">
Azureai Document Intelligence Loader
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../loaders/base/" class="md-nav__link">
<span class="md-ellipsis">
Base
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../loaders/composite_loader/" class="md-nav__link">
<span class="md-ellipsis">
Composite Loader
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../loaders/docling_loader/" class="md-nav__link">
<span class="md-ellipsis">
Docling Loader
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../loaders/docx_loader/" class="md-nav__link">
<span class="md-ellipsis">
Docx Loader
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../loaders/excel_loader/" class="md-nav__link">
<span class="md-ellipsis">
Excel Loader
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../loaders/html_loader/" class="md-nav__link">
<span class="md-ellipsis">
Html Loader
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../loaders/mathpix_loader/" class="md-nav__link">
<span class="md-ellipsis">
Mathpix Loader
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../loaders/ocr_loader/" class="md-nav__link">
<span class="md-ellipsis">
Ocr Loader
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../loaders/pdf_loader/" class="md-nav__link">
<span class="md-ellipsis">
Pdf Loader
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../loaders/txt_loader/" class="md-nav__link">
<span class="md-ellipsis">
Txt Loader
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../loaders/unstructured_loader/" class="md-nav__link">
<span class="md-ellipsis">
Unstructured Loader
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_8_14" >
<div class="md-nav__link md-nav__container">
<a href="../loaders/utils/" class="md-nav__link ">
<span class="md-ellipsis">
Utils
</span>
</a>
<label class="md-nav__link " for="__nav_3_8_14" id="__nav_3_8_14_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_3_8_14_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_8_14">
<span class="md-nav__icon md-icon"></span>
Utils
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../loaders/utils/adobe/" class="md-nav__link">
<span class="md-ellipsis">
Adobe
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../loaders/utils/box/" class="md-nav__link">
<span class="md-ellipsis">
Box
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../loaders/utils/gpt4v/" class="md-nav__link">
<span class="md-ellipsis">
Gpt4V
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../loaders/utils/pdf_ocr/" class="md-nav__link">
<span class="md-ellipsis">
Pdf Ocr
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../loaders/utils/table/" class="md-nav__link">
<span class="md-ellipsis">
Table
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../loaders/web_loader/" class="md-nav__link">
<span class="md-ellipsis">
Web Loader
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_9" >
<div class="md-nav__link md-nav__container">
<a href="../parsers/" class="md-nav__link ">
<span class="md-ellipsis">
Parsers
</span>
</a>
<label class="md-nav__link " for="__nav_3_9" id="__nav_3_9_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_9_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_9">
<span class="md-nav__icon md-icon"></span>
Parsers
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../parsers/regex_extractor/" class="md-nav__link">
<span class="md-ellipsis">
Regex Extractor
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_10" >
<div class="md-nav__link md-nav__container">
<a href="../rerankings/" class="md-nav__link ">
<span class="md-ellipsis">
Rerankings
</span>
</a>
<label class="md-nav__link " for="__nav_3_10" id="__nav_3_10_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_10_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_10">
<span class="md-nav__icon md-icon"></span>
Rerankings
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../rerankings/base/" class="md-nav__link">
<span class="md-ellipsis">
Base
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../rerankings/cohere/" class="md-nav__link">
<span class="md-ellipsis">
Cohere
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../rerankings/tei_fast_rerank/" class="md-nav__link">
<span class="md-ellipsis">
Tei Fast Rerank
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_11" >
<div class="md-nav__link md-nav__container">
<a href="../storages/" class="md-nav__link ">
<span class="md-ellipsis">
Storages
</span>
</a>
<label class="md-nav__link " for="__nav_3_11" id="__nav_3_11_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_11_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_11">
<span class="md-nav__icon md-icon"></span>
Storages
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_11_1" >
<div class="md-nav__link md-nav__container">
<a href="../storages/docstores/" class="md-nav__link ">
<span class="md-ellipsis">
Docstores
</span>
</a>
<label class="md-nav__link " for="__nav_3_11_1" id="__nav_3_11_1_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_3_11_1_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_11_1">
<span class="md-nav__icon md-icon"></span>
Docstores
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../storages/docstores/base/" class="md-nav__link">
<span class="md-ellipsis">
Base
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../storages/docstores/elasticsearch/" class="md-nav__link">
<span class="md-ellipsis">
Elasticsearch
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../storages/docstores/in_memory/" class="md-nav__link">
<span class="md-ellipsis">
In Memory
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../storages/docstores/lancedb/" class="md-nav__link">
<span class="md-ellipsis">
Lancedb
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../storages/docstores/simple_file/" class="md-nav__link">
<span class="md-ellipsis">
Simple File
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_11_2" >
<div class="md-nav__link md-nav__container">
<a href="../storages/vectorstores/" class="md-nav__link ">
<span class="md-ellipsis">
Vectorstores
</span>
</a>
<label class="md-nav__link " for="__nav_3_11_2" id="__nav_3_11_2_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_3_11_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_11_2">
<span class="md-nav__icon md-icon"></span>
Vectorstores
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../storages/vectorstores/base/" class="md-nav__link">
<span class="md-ellipsis">
Base
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../storages/vectorstores/chroma/" class="md-nav__link">
<span class="md-ellipsis">
Chroma
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../storages/vectorstores/in_memory/" class="md-nav__link">
<span class="md-ellipsis">
In Memory
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../storages/vectorstores/lancedb/" class="md-nav__link">
<span class="md-ellipsis">
Lancedb
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../storages/vectorstores/milvus/" class="md-nav__link">
<span class="md-ellipsis">
Milvus
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../storages/vectorstores/qdrant/" class="md-nav__link">
<span class="md-ellipsis">
Qdrant
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../storages/vectorstores/simple_file/" class="md-nav__link">
<span class="md-ellipsis">
Simple File
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="https://github.com/Cinnamon/kotaemon/releases" class="md-nav__link">
<span class="md-ellipsis">
Changelogs
</span>
</a>
</li>
<li class="md-nav__item">
<a href="https://github.com/Cinnamon/kotaemon/issues" class="md-nav__link">
<span class="md-ellipsis">
Issue Tracker
</span>
</a>
</li>
<li class="md-nav__item">
<a href="https://huggingface.co/spaces/cin-model/kotaemon-demo" class="md-nav__link">
<span class="md-ellipsis">
Live Demo
</span>
</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="Page contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Page contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#indices" class="md-nav__link">
<span class="md-ellipsis">
indices
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#indices.VectorIndexing" class="md-nav__link">
<span class="md-ellipsis">
VectorIndexing
</span>
</a>
<nav class="md-nav" aria-label="VectorIndexing">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#indices.VectorIndexing.to_retrieval_pipeline" class="md-nav__link">
<span class="md-ellipsis">
to_retrieval_pipeline
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#indices.VectorRetrieval" class="md-nav__link">
<span class="md-ellipsis">
VectorRetrieval
</span>
</a>
<nav class="md-nav" aria-label="VectorRetrieval">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#indices.VectorRetrieval.run" class="md-nav__link">
<span class="md-ellipsis">
run
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset">
<a href="https://github.com/Cinnamon/kotaemon/edit/main/kotaemon/indices/__init__.py" title="Edit this page" class="md-content__button md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20.71 7.04c.39-.39.39-1.04 0-1.41l-2.34-2.34c-.37-.39-1.02-.39-1.41 0l-1.84 1.83 3.75 3.75M3 17.25V21h3.75L17.81 9.93l-3.75-3.75z"/></svg>
</a>
<h1>Indices</h1>
<div class="doc doc-object doc-module">
<a id="indices"></a>
<div class="doc doc-contents first">
<div class="doc doc-children">
<div class="doc doc-object doc-class">
<h2 id="indices.VectorIndexing" class="doc doc-heading">
<span class="doc doc-object-name doc-class-name">VectorIndexing</span>
<a href="#indices.VectorIndexing" class="headerlink" title="Permanent link"></a></h2>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code>BaseIndexing</code></p>
<p>Ingest the document, run through the embedding, and store the embedding in a
vector store.</p>
<details class="this-pipeline-supports-the-following-set-of-inputs" open>
<summary>This pipeline supports the following set of inputs</summary>
<ul>
<li>List of documents</li>
<li>List of texts</li>
</ul>
</details>
<details class="quote">
<summary>Source code in <code>libs/kotaemon/kotaemon/indices/vectorindex.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-21"> 21</a></span>
<span class="normal"><a href="#__codelineno-0-22"> 22</a></span>
<span class="normal"><a href="#__codelineno-0-23"> 23</a></span>
<span class="normal"><a href="#__codelineno-0-24"> 24</a></span>
<span class="normal"><a href="#__codelineno-0-25"> 25</a></span>
<span class="normal"><a href="#__codelineno-0-26"> 26</a></span>
<span class="normal"><a href="#__codelineno-0-27"> 27</a></span>
<span class="normal"><a href="#__codelineno-0-28"> 28</a></span>
<span class="normal"><a href="#__codelineno-0-29"> 29</a></span>
<span class="normal"><a href="#__codelineno-0-30"> 30</a></span>
<span class="normal"><a href="#__codelineno-0-31"> 31</a></span>
<span class="normal"><a href="#__codelineno-0-32"> 32</a></span>
<span class="normal"><a href="#__codelineno-0-33"> 33</a></span>
<span class="normal"><a href="#__codelineno-0-34"> 34</a></span>
<span class="normal"><a href="#__codelineno-0-35"> 35</a></span>
<span class="normal"><a href="#__codelineno-0-36"> 36</a></span>
<span class="normal"><a href="#__codelineno-0-37"> 37</a></span>
<span class="normal"><a href="#__codelineno-0-38"> 38</a></span>
<span class="normal"><a href="#__codelineno-0-39"> 39</a></span>
<span class="normal"><a href="#__codelineno-0-40"> 40</a></span>
<span class="normal"><a href="#__codelineno-0-41"> 41</a></span>
<span class="normal"><a href="#__codelineno-0-42"> 42</a></span>
<span class="normal"><a href="#__codelineno-0-43"> 43</a></span>
<span class="normal"><a href="#__codelineno-0-44"> 44</a></span>
<span class="normal"><a href="#__codelineno-0-45"> 45</a></span>
<span class="normal"><a href="#__codelineno-0-46"> 46</a></span>
<span class="normal"><a href="#__codelineno-0-47"> 47</a></span>
<span class="normal"><a href="#__codelineno-0-48"> 48</a></span>
<span class="normal"><a href="#__codelineno-0-49"> 49</a></span>
<span class="normal"><a href="#__codelineno-0-50"> 50</a></span>
<span class="normal"><a href="#__codelineno-0-51"> 51</a></span>
<span class="normal"><a href="#__codelineno-0-52"> 52</a></span>
<span class="normal"><a href="#__codelineno-0-53"> 53</a></span>
<span class="normal"><a href="#__codelineno-0-54"> 54</a></span>
<span class="normal"><a href="#__codelineno-0-55"> 55</a></span>
<span class="normal"><a href="#__codelineno-0-56"> 56</a></span>
<span class="normal"><a href="#__codelineno-0-57"> 57</a></span>
<span class="normal"><a href="#__codelineno-0-58"> 58</a></span>
<span class="normal"><a href="#__codelineno-0-59"> 59</a></span>
<span class="normal"><a href="#__codelineno-0-60"> 60</a></span>
<span class="normal"><a href="#__codelineno-0-61"> 61</a></span>
<span class="normal"><a href="#__codelineno-0-62"> 62</a></span>
<span class="normal"><a href="#__codelineno-0-63"> 63</a></span>
<span class="normal"><a href="#__codelineno-0-64"> 64</a></span>
<span class="normal"><a href="#__codelineno-0-65"> 65</a></span>
<span class="normal"><a href="#__codelineno-0-66"> 66</a></span>
<span class="normal"><a href="#__codelineno-0-67"> 67</a></span>
<span class="normal"><a href="#__codelineno-0-68"> 68</a></span>
<span class="normal"><a href="#__codelineno-0-69"> 69</a></span>
<span class="normal"><a href="#__codelineno-0-70"> 70</a></span>
<span class="normal"><a href="#__codelineno-0-71"> 71</a></span>
<span class="normal"><a href="#__codelineno-0-72"> 72</a></span>
<span class="normal"><a href="#__codelineno-0-73"> 73</a></span>
<span class="normal"><a href="#__codelineno-0-74"> 74</a></span>
<span class="normal"><a href="#__codelineno-0-75"> 75</a></span>
<span class="normal"><a href="#__codelineno-0-76"> 76</a></span>
<span class="normal"><a href="#__codelineno-0-77"> 77</a></span>
<span class="normal"><a href="#__codelineno-0-78"> 78</a></span>
<span class="normal"><a href="#__codelineno-0-79"> 79</a></span>
<span class="normal"><a href="#__codelineno-0-80"> 80</a></span>
<span class="normal"><a href="#__codelineno-0-81"> 81</a></span>
<span class="normal"><a href="#__codelineno-0-82"> 82</a></span>
<span class="normal"><a href="#__codelineno-0-83"> 83</a></span>
<span class="normal"><a href="#__codelineno-0-84"> 84</a></span>
<span class="normal"><a href="#__codelineno-0-85"> 85</a></span>
<span class="normal"><a href="#__codelineno-0-86"> 86</a></span>
<span class="normal"><a href="#__codelineno-0-87"> 87</a></span>
<span class="normal"><a href="#__codelineno-0-88"> 88</a></span>
<span class="normal"><a href="#__codelineno-0-89"> 89</a></span>
<span class="normal"><a href="#__codelineno-0-90"> 90</a></span>
<span class="normal"><a href="#__codelineno-0-91"> 91</a></span>
<span class="normal"><a href="#__codelineno-0-92"> 92</a></span>
<span class="normal"><a href="#__codelineno-0-93"> 93</a></span>
<span class="normal"><a href="#__codelineno-0-94"> 94</a></span>
<span class="normal"><a href="#__codelineno-0-95"> 95</a></span>
<span class="normal"><a href="#__codelineno-0-96"> 96</a></span>
<span class="normal"><a href="#__codelineno-0-97"> 97</a></span>
<span class="normal"><a href="#__codelineno-0-98"> 98</a></span>
<span class="normal"><a href="#__codelineno-0-99"> 99</a></span>
<span class="normal"><a href="#__codelineno-0-100">100</a></span>
<span class="normal"><a href="#__codelineno-0-101">101</a></span>
<span class="normal"><a href="#__codelineno-0-102">102</a></span>
<span class="normal"><a href="#__codelineno-0-103">103</a></span>
<span class="normal"><a href="#__codelineno-0-104">104</a></span>
<span class="normal"><a href="#__codelineno-0-105">105</a></span>
<span class="normal"><a href="#__codelineno-0-106">106</a></span>
<span class="normal"><a href="#__codelineno-0-107">107</a></span>
<span class="normal"><a href="#__codelineno-0-108">108</a></span>
<span class="normal"><a href="#__codelineno-0-109">109</a></span>
<span class="normal"><a href="#__codelineno-0-110">110</a></span>
<span class="normal"><a href="#__codelineno-0-111">111</a></span>
<span class="normal"><a href="#__codelineno-0-112">112</a></span>
<span class="normal"><a href="#__codelineno-0-113">113</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21"></a><span class="k">class</span> <span class="nc">VectorIndexing</span><span class="p">(</span><span class="n">BaseIndexing</span><span class="p">):</span>
</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22"></a><span class="w"> </span><span class="sd">"""Ingest the document, run through the embedding, and store the embedding in a</span>
</span><span id="__span-0-23"><a id="__codelineno-0-23" name="__codelineno-0-23"></a><span class="sd"> vector store.</span>
</span><span id="__span-0-24"><a id="__codelineno-0-24" name="__codelineno-0-24"></a>
</span><span id="__span-0-25"><a id="__codelineno-0-25" name="__codelineno-0-25"></a><span class="sd"> This pipeline supports the following set of inputs:</span>
</span><span id="__span-0-26"><a id="__codelineno-0-26" name="__codelineno-0-26"></a><span class="sd"> - List of documents</span>
</span><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27"></a><span class="sd"> - List of texts</span>
</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28"></a><span class="sd"> """</span>
</span><span id="__span-0-29"><a id="__codelineno-0-29" name="__codelineno-0-29"></a>
</span><span id="__span-0-30"><a id="__codelineno-0-30" name="__codelineno-0-30"></a> <span class="n">cache_dir</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">flowsettings</span><span class="p">,</span> <span class="s2">"KH_CHUNKS_OUTPUT_DIR"</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
</span><span id="__span-0-31"><a id="__codelineno-0-31" name="__codelineno-0-31"></a> <span class="n">vector_store</span><span class="p">:</span> <span class="n">BaseVectorStore</span>
</span><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32"></a> <span class="n">doc_store</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">BaseDocumentStore</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
</span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33"></a> <span class="n">embedding</span><span class="p">:</span> <span class="n">BaseEmbeddings</span>
</span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34"></a> <span class="n">count_</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span>
</span><span id="__span-0-35"><a id="__codelineno-0-35" name="__codelineno-0-35"></a>
</span><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a> <span class="k">def</span> <span class="nf">to_retrieval_pipeline</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
</span><span id="__span-0-37"><a id="__codelineno-0-37" name="__codelineno-0-37"></a><span class="w"> </span><span class="sd">"""Convert the indexing pipeline to a retrieval pipeline"""</span>
</span><span id="__span-0-38"><a id="__codelineno-0-38" name="__codelineno-0-38"></a> <span class="k">return</span> <span class="n">VectorRetrieval</span><span class="p">(</span>
</span><span id="__span-0-39"><a id="__codelineno-0-39" name="__codelineno-0-39"></a> <span class="n">vector_store</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">vector_store</span><span class="p">,</span>
</span><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a> <span class="n">doc_store</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span><span class="p">,</span>
</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a> <span class="n">embedding</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">embedding</span><span class="p">,</span>
</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">,</span>
</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a> <span class="p">)</span>
</span><span id="__span-0-44"><a id="__codelineno-0-44" name="__codelineno-0-44"></a>
</span><span id="__span-0-45"><a id="__codelineno-0-45" name="__codelineno-0-45"></a> <span class="k">def</span> <span class="nf">write_chunk_to_file</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">docs</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">Document</span><span class="p">]):</span>
</span><span id="__span-0-46"><a id="__codelineno-0-46" name="__codelineno-0-46"></a> <span class="c1"># save the chunks content into markdown format</span>
</span><span id="__span-0-47"><a id="__codelineno-0-47" name="__codelineno-0-47"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cache_dir</span><span class="p">:</span>
</span><span id="__span-0-48"><a id="__codelineno-0-48" name="__codelineno-0-48"></a> <span class="n">file_name</span> <span class="o">=</span> <span class="n">docs</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"file_name"</span><span class="p">)</span>
</span><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">file_name</span><span class="p">:</span>
</span><span id="__span-0-50"><a id="__codelineno-0-50" name="__codelineno-0-50"></a> <span class="k">return</span>
</span><span id="__span-0-51"><a id="__codelineno-0-51" name="__codelineno-0-51"></a>
</span><span id="__span-0-52"><a id="__codelineno-0-52" name="__codelineno-0-52"></a> <span class="n">file_name</span> <span class="o">=</span> <span class="n">Path</span><span class="p">(</span><span class="n">file_name</span><span class="p">)</span>
</span><span id="__span-0-53"><a id="__codelineno-0-53" name="__codelineno-0-53"></a> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">docs</span><span class="p">)):</span>
</span><span id="__span-0-54"><a id="__codelineno-0-54" name="__codelineno-0-54"></a> <span class="n">markdown_content</span> <span class="o">=</span> <span class="s2">""</span>
</span><span id="__span-0-55"><a id="__codelineno-0-55" name="__codelineno-0-55"></a> <span class="k">if</span> <span class="s2">"page_label"</span> <span class="ow">in</span> <span class="n">docs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">metadata</span><span class="p">:</span>
</span><span id="__span-0-56"><a id="__codelineno-0-56" name="__codelineno-0-56"></a> <span class="n">page_label</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">docs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="s2">"page_label"</span><span class="p">])</span>
</span><span id="__span-0-57"><a id="__codelineno-0-57" name="__codelineno-0-57"></a> <span class="n">markdown_content</span> <span class="o">+=</span> <span class="sa">f</span><span class="s2">"Page label: </span><span class="si">{</span><span class="n">page_label</span><span class="si">}</span><span class="s2">"</span>
</span><span id="__span-0-58"><a id="__codelineno-0-58" name="__codelineno-0-58"></a> <span class="k">if</span> <span class="s2">"file_name"</span> <span class="ow">in</span> <span class="n">docs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">metadata</span><span class="p">:</span>
</span><span id="__span-0-59"><a id="__codelineno-0-59" name="__codelineno-0-59"></a> <span class="n">filename</span> <span class="o">=</span> <span class="n">docs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="s2">"file_name"</span><span class="p">]</span>
</span><span id="__span-0-60"><a id="__codelineno-0-60" name="__codelineno-0-60"></a> <span class="n">markdown_content</span> <span class="o">+=</span> <span class="sa">f</span><span class="s2">"</span><span class="se">\n</span><span class="s2">File name: </span><span class="si">{</span><span class="n">filename</span><span class="si">}</span><span class="s2">"</span>
</span><span id="__span-0-61"><a id="__codelineno-0-61" name="__codelineno-0-61"></a> <span class="k">if</span> <span class="s2">"section"</span> <span class="ow">in</span> <span class="n">docs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">metadata</span><span class="p">:</span>
</span><span id="__span-0-62"><a id="__codelineno-0-62" name="__codelineno-0-62"></a> <span class="n">section</span> <span class="o">=</span> <span class="n">docs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="s2">"section"</span><span class="p">]</span>
</span><span id="__span-0-63"><a id="__codelineno-0-63" name="__codelineno-0-63"></a> <span class="n">markdown_content</span> <span class="o">+=</span> <span class="sa">f</span><span class="s2">"</span><span class="se">\n</span><span class="s2">Section: </span><span class="si">{</span><span class="n">section</span><span class="si">}</span><span class="s2">"</span>
</span><span id="__span-0-64"><a id="__codelineno-0-64" name="__codelineno-0-64"></a> <span class="k">if</span> <span class="s2">"type"</span> <span class="ow">in</span> <span class="n">docs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">metadata</span><span class="p">:</span>
</span><span id="__span-0-65"><a id="__codelineno-0-65" name="__codelineno-0-65"></a> <span class="k">if</span> <span class="n">docs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="s2">"type"</span><span class="p">]</span> <span class="o">==</span> <span class="s2">"image"</span><span class="p">:</span>
</span><span id="__span-0-66"><a id="__codelineno-0-66" name="__codelineno-0-66"></a> <span class="n">image_origin</span> <span class="o">=</span> <span class="n">docs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="s2">"image_origin"</span><span class="p">]</span>
</span><span id="__span-0-67"><a id="__codelineno-0-67" name="__codelineno-0-67"></a> <span class="n">image_origin</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">'&lt;p&gt;&lt;img src="</span><span class="si">{</span><span class="n">image_origin</span><span class="si">}</span><span class="s1">"&gt;&lt;/p&gt;'</span>
</span><span id="__span-0-68"><a id="__codelineno-0-68" name="__codelineno-0-68"></a> <span class="n">markdown_content</span> <span class="o">+=</span> <span class="sa">f</span><span class="s2">"</span><span class="se">\n</span><span class="s2">Image origin: </span><span class="si">{</span><span class="n">image_origin</span><span class="si">}</span><span class="s2">"</span>
</span><span id="__span-0-69"><a id="__codelineno-0-69" name="__codelineno-0-69"></a> <span class="k">if</span> <span class="n">docs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">text</span><span class="p">:</span>
</span><span id="__span-0-70"><a id="__codelineno-0-70" name="__codelineno-0-70"></a> <span class="n">markdown_content</span> <span class="o">+=</span> <span class="sa">f</span><span class="s2">"</span><span class="se">\n</span><span class="s2">text:</span><span class="se">\n</span><span class="si">{</span><span class="n">docs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">text</span><span class="si">}</span><span class="s2">"</span>
</span><span id="__span-0-71"><a id="__codelineno-0-71" name="__codelineno-0-71"></a>
</span><span id="__span-0-72"><a id="__codelineno-0-72" name="__codelineno-0-72"></a> <span class="k">with</span> <span class="nb">open</span><span class="p">(</span>
</span><span id="__span-0-73"><a id="__codelineno-0-73" name="__codelineno-0-73"></a> <span class="n">Path</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">cache_dir</span><span class="p">)</span> <span class="o">/</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">file_name</span><span class="o">.</span><span class="n">stem</span><span class="si">}</span><span class="s2">_</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">count_</span><span class="o">+</span><span class="n">i</span><span class="si">}</span><span class="s2">.md"</span><span class="p">,</span>
</span><span id="__span-0-74"><a id="__codelineno-0-74" name="__codelineno-0-74"></a> <span class="s2">"w"</span><span class="p">,</span>
</span><span id="__span-0-75"><a id="__codelineno-0-75" name="__codelineno-0-75"></a> <span class="n">encoding</span><span class="o">=</span><span class="s2">"utf-8"</span><span class="p">,</span>
</span><span id="__span-0-76"><a id="__codelineno-0-76" name="__codelineno-0-76"></a> <span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
</span><span id="__span-0-77"><a id="__codelineno-0-77" name="__codelineno-0-77"></a> <span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">markdown_content</span><span class="p">)</span>
</span><span id="__span-0-78"><a id="__codelineno-0-78" name="__codelineno-0-78"></a>
</span><span id="__span-0-79"><a id="__codelineno-0-79" name="__codelineno-0-79"></a> <span class="k">def</span> <span class="nf">add_to_docstore</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">docs</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">Document</span><span class="p">]):</span>
</span><span id="__span-0-80"><a id="__codelineno-0-80" name="__codelineno-0-80"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span><span class="p">:</span>
</span><span id="__span-0-81"><a id="__codelineno-0-81" name="__codelineno-0-81"></a> <span class="nb">print</span><span class="p">(</span><span class="s2">"Adding documents to doc store"</span><span class="p">)</span>
</span><span id="__span-0-82"><a id="__codelineno-0-82" name="__codelineno-0-82"></a> <span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">docs</span><span class="p">)</span>
</span><span id="__span-0-83"><a id="__codelineno-0-83" name="__codelineno-0-83"></a>
</span><span id="__span-0-84"><a id="__codelineno-0-84" name="__codelineno-0-84"></a> <span class="k">def</span> <span class="nf">add_to_vectorstore</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">docs</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">Document</span><span class="p">]):</span>
</span><span id="__span-0-85"><a id="__codelineno-0-85" name="__codelineno-0-85"></a> <span class="c1"># in case we want to skip embedding</span>
</span><span id="__span-0-86"><a id="__codelineno-0-86" name="__codelineno-0-86"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">vector_store</span><span class="p">:</span>
</span><span id="__span-0-87"><a id="__codelineno-0-87" name="__codelineno-0-87"></a> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Getting embeddings for </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">docs</span><span class="p">)</span><span class="si">}</span><span class="s2"> nodes"</span><span class="p">)</span>
</span><span id="__span-0-88"><a id="__codelineno-0-88" name="__codelineno-0-88"></a> <span class="n">embeddings</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">embedding</span><span class="p">(</span><span class="n">docs</span><span class="p">)</span>
</span><span id="__span-0-89"><a id="__codelineno-0-89" name="__codelineno-0-89"></a> <span class="nb">print</span><span class="p">(</span><span class="s2">"Adding embeddings to vector store"</span><span class="p">)</span>
</span><span id="__span-0-90"><a id="__codelineno-0-90" name="__codelineno-0-90"></a> <span class="bp">self</span><span class="o">.</span><span class="n">vector_store</span><span class="o">.</span><span class="n">add</span><span class="p">(</span>
</span><span id="__span-0-91"><a id="__codelineno-0-91" name="__codelineno-0-91"></a> <span class="n">embeddings</span><span class="o">=</span><span class="n">embeddings</span><span class="p">,</span>
</span><span id="__span-0-92"><a id="__codelineno-0-92" name="__codelineno-0-92"></a> <span class="n">ids</span><span class="o">=</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">doc_id</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">docs</span><span class="p">],</span>
</span><span id="__span-0-93"><a id="__codelineno-0-93" name="__codelineno-0-93"></a> <span class="p">)</span>
</span><span id="__span-0-94"><a id="__codelineno-0-94" name="__codelineno-0-94"></a>
</span><span id="__span-0-95"><a id="__codelineno-0-95" name="__codelineno-0-95"></a> <span class="k">def</span> <span class="nf">run</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">text</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">|</span> <span class="n">Document</span> <span class="o">|</span> <span class="nb">list</span><span class="p">[</span><span class="n">Document</span><span class="p">]):</span>
</span><span id="__span-0-96"><a id="__codelineno-0-96" name="__codelineno-0-96"></a> <span class="n">input_</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">Document</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-97"><a id="__codelineno-0-97" name="__codelineno-0-97"></a> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">text</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
</span><span id="__span-0-98"><a id="__codelineno-0-98" name="__codelineno-0-98"></a> <span class="n">text</span> <span class="o">=</span> <span class="p">[</span><span class="n">text</span><span class="p">]</span>
</span><span id="__span-0-99"><a id="__codelineno-0-99" name="__codelineno-0-99"></a>
</span><span id="__span-0-100"><a id="__codelineno-0-100" name="__codelineno-0-100"></a> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">cast</span><span class="p">(</span><span class="nb">list</span><span class="p">,</span> <span class="n">text</span><span class="p">):</span>
</span><span id="__span-0-101"><a id="__codelineno-0-101" name="__codelineno-0-101"></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
</span><span id="__span-0-102"><a id="__codelineno-0-102" name="__codelineno-0-102"></a> <span class="n">input_</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">Document</span><span class="p">(</span><span class="n">text</span><span class="o">=</span><span class="n">item</span><span class="p">,</span> <span class="n">id_</span><span class="o">=</span><span class="nb">str</span><span class="p">(</span><span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">())))</span>
</span><span id="__span-0-103"><a id="__codelineno-0-103" name="__codelineno-0-103"></a> <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="n">Document</span><span class="p">):</span>
</span><span id="__span-0-104"><a id="__codelineno-0-104" name="__codelineno-0-104"></a> <span class="n">input_</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
</span><span id="__span-0-105"><a id="__codelineno-0-105" name="__codelineno-0-105"></a> <span class="k">else</span><span class="p">:</span>
</span><span id="__span-0-106"><a id="__codelineno-0-106" name="__codelineno-0-106"></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
</span><span id="__span-0-107"><a id="__codelineno-0-107" name="__codelineno-0-107"></a> <span class="sa">f</span><span class="s2">"Invalid input type </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">item</span><span class="p">)</span><span class="si">}</span><span class="s2">, should be str or Document"</span>
</span><span id="__span-0-108"><a id="__codelineno-0-108" name="__codelineno-0-108"></a> <span class="p">)</span>
</span><span id="__span-0-109"><a id="__codelineno-0-109" name="__codelineno-0-109"></a>
</span><span id="__span-0-110"><a id="__codelineno-0-110" name="__codelineno-0-110"></a> <span class="bp">self</span><span class="o">.</span><span class="n">add_to_vectorstore</span><span class="p">(</span><span class="n">input_</span><span class="p">)</span>
</span><span id="__span-0-111"><a id="__codelineno-0-111" name="__codelineno-0-111"></a> <span class="bp">self</span><span class="o">.</span><span class="n">add_to_docstore</span><span class="p">(</span><span class="n">input_</span><span class="p">)</span>
</span><span id="__span-0-112"><a id="__codelineno-0-112" name="__codelineno-0-112"></a> <span class="bp">self</span><span class="o">.</span><span class="n">write_chunk_to_file</span><span class="p">(</span><span class="n">input_</span><span class="p">)</span>
</span><span id="__span-0-113"><a id="__codelineno-0-113" name="__codelineno-0-113"></a> <span class="bp">self</span><span class="o">.</span><span class="n">count_</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">input_</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
<div class="doc doc-object doc-function">
<h3 id="indices.VectorIndexing.to_retrieval_pipeline" class="doc doc-heading">
<span class="doc doc-object-name doc-function-name">to_retrieval_pipeline</span>
<a href="#indices.VectorIndexing.to_retrieval_pipeline" class="headerlink" title="Permanent link"></a></h3>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">to_retrieval_pipeline</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
<p>Convert the indexing pipeline to a retrieval pipeline</p>
<details class="quote">
<summary>Source code in <code>libs/kotaemon/kotaemon/indices/vectorindex.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-36">36</a></span>
<span class="normal"><a href="#__codelineno-0-37">37</a></span>
<span class="normal"><a href="#__codelineno-0-38">38</a></span>
<span class="normal"><a href="#__codelineno-0-39">39</a></span>
<span class="normal"><a href="#__codelineno-0-40">40</a></span>
<span class="normal"><a href="#__codelineno-0-41">41</a></span>
<span class="normal"><a href="#__codelineno-0-42">42</a></span>
<span class="normal"><a href="#__codelineno-0-43">43</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a><span class="k">def</span> <span class="nf">to_retrieval_pipeline</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
</span><span id="__span-0-37"><a id="__codelineno-0-37" name="__codelineno-0-37"></a><span class="w"> </span><span class="sd">"""Convert the indexing pipeline to a retrieval pipeline"""</span>
</span><span id="__span-0-38"><a id="__codelineno-0-38" name="__codelineno-0-38"></a> <span class="k">return</span> <span class="n">VectorRetrieval</span><span class="p">(</span>
</span><span id="__span-0-39"><a id="__codelineno-0-39" name="__codelineno-0-39"></a> <span class="n">vector_store</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">vector_store</span><span class="p">,</span>
</span><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a> <span class="n">doc_store</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span><span class="p">,</span>
</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a> <span class="n">embedding</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">embedding</span><span class="p">,</span>
</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a> <span class="o">**</span><span class="n">kwargs</span><span class="p">,</span>
</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a> <span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
</div>
</div>
</div>
<div class="doc doc-object doc-class">
<h2 id="indices.VectorRetrieval" class="doc doc-heading">
<span class="doc doc-object-name doc-class-name">VectorRetrieval</span>
<a href="#indices.VectorRetrieval" class="headerlink" title="Permanent link"></a></h2>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code>BaseRetrieval</code></p>
<p>Retrieve list of documents from vector store</p>
<details class="quote">
<summary>Source code in <code>libs/kotaemon/kotaemon/indices/vectorindex.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-116">116</a></span>
<span class="normal"><a href="#__codelineno-0-117">117</a></span>
<span class="normal"><a href="#__codelineno-0-118">118</a></span>
<span class="normal"><a href="#__codelineno-0-119">119</a></span>
<span class="normal"><a href="#__codelineno-0-120">120</a></span>
<span class="normal"><a href="#__codelineno-0-121">121</a></span>
<span class="normal"><a href="#__codelineno-0-122">122</a></span>
<span class="normal"><a href="#__codelineno-0-123">123</a></span>
<span class="normal"><a href="#__codelineno-0-124">124</a></span>
<span class="normal"><a href="#__codelineno-0-125">125</a></span>
<span class="normal"><a href="#__codelineno-0-126">126</a></span>
<span class="normal"><a href="#__codelineno-0-127">127</a></span>
<span class="normal"><a href="#__codelineno-0-128">128</a></span>
<span class="normal"><a href="#__codelineno-0-129">129</a></span>
<span class="normal"><a href="#__codelineno-0-130">130</a></span>
<span class="normal"><a href="#__codelineno-0-131">131</a></span>
<span class="normal"><a href="#__codelineno-0-132">132</a></span>
<span class="normal"><a href="#__codelineno-0-133">133</a></span>
<span class="normal"><a href="#__codelineno-0-134">134</a></span>
<span class="normal"><a href="#__codelineno-0-135">135</a></span>
<span class="normal"><a href="#__codelineno-0-136">136</a></span>
<span class="normal"><a href="#__codelineno-0-137">137</a></span>
<span class="normal"><a href="#__codelineno-0-138">138</a></span>
<span class="normal"><a href="#__codelineno-0-139">139</a></span>
<span class="normal"><a href="#__codelineno-0-140">140</a></span>
<span class="normal"><a href="#__codelineno-0-141">141</a></span>
<span class="normal"><a href="#__codelineno-0-142">142</a></span>
<span class="normal"><a href="#__codelineno-0-143">143</a></span>
<span class="normal"><a href="#__codelineno-0-144">144</a></span>
<span class="normal"><a href="#__codelineno-0-145">145</a></span>
<span class="normal"><a href="#__codelineno-0-146">146</a></span>
<span class="normal"><a href="#__codelineno-0-147">147</a></span>
<span class="normal"><a href="#__codelineno-0-148">148</a></span>
<span class="normal"><a href="#__codelineno-0-149">149</a></span>
<span class="normal"><a href="#__codelineno-0-150">150</a></span>
<span class="normal"><a href="#__codelineno-0-151">151</a></span>
<span class="normal"><a href="#__codelineno-0-152">152</a></span>
<span class="normal"><a href="#__codelineno-0-153">153</a></span>
<span class="normal"><a href="#__codelineno-0-154">154</a></span>
<span class="normal"><a href="#__codelineno-0-155">155</a></span>
<span class="normal"><a href="#__codelineno-0-156">156</a></span>
<span class="normal"><a href="#__codelineno-0-157">157</a></span>
<span class="normal"><a href="#__codelineno-0-158">158</a></span>
<span class="normal"><a href="#__codelineno-0-159">159</a></span>
<span class="normal"><a href="#__codelineno-0-160">160</a></span>
<span class="normal"><a href="#__codelineno-0-161">161</a></span>
<span class="normal"><a href="#__codelineno-0-162">162</a></span>
<span class="normal"><a href="#__codelineno-0-163">163</a></span>
<span class="normal"><a href="#__codelineno-0-164">164</a></span>
<span class="normal"><a href="#__codelineno-0-165">165</a></span>
<span class="normal"><a href="#__codelineno-0-166">166</a></span>
<span class="normal"><a href="#__codelineno-0-167">167</a></span>
<span class="normal"><a href="#__codelineno-0-168">168</a></span>
<span class="normal"><a href="#__codelineno-0-169">169</a></span>
<span class="normal"><a href="#__codelineno-0-170">170</a></span>
<span class="normal"><a href="#__codelineno-0-171">171</a></span>
<span class="normal"><a href="#__codelineno-0-172">172</a></span>
<span class="normal"><a href="#__codelineno-0-173">173</a></span>
<span class="normal"><a href="#__codelineno-0-174">174</a></span>
<span class="normal"><a href="#__codelineno-0-175">175</a></span>
<span class="normal"><a href="#__codelineno-0-176">176</a></span>
<span class="normal"><a href="#__codelineno-0-177">177</a></span>
<span class="normal"><a href="#__codelineno-0-178">178</a></span>
<span class="normal"><a href="#__codelineno-0-179">179</a></span>
<span class="normal"><a href="#__codelineno-0-180">180</a></span>
<span class="normal"><a href="#__codelineno-0-181">181</a></span>
<span class="normal"><a href="#__codelineno-0-182">182</a></span>
<span class="normal"><a href="#__codelineno-0-183">183</a></span>
<span class="normal"><a href="#__codelineno-0-184">184</a></span>
<span class="normal"><a href="#__codelineno-0-185">185</a></span>
<span class="normal"><a href="#__codelineno-0-186">186</a></span>
<span class="normal"><a href="#__codelineno-0-187">187</a></span>
<span class="normal"><a href="#__codelineno-0-188">188</a></span>
<span class="normal"><a href="#__codelineno-0-189">189</a></span>
<span class="normal"><a href="#__codelineno-0-190">190</a></span>
<span class="normal"><a href="#__codelineno-0-191">191</a></span>
<span class="normal"><a href="#__codelineno-0-192">192</a></span>
<span class="normal"><a href="#__codelineno-0-193">193</a></span>
<span class="normal"><a href="#__codelineno-0-194">194</a></span>
<span class="normal"><a href="#__codelineno-0-195">195</a></span>
<span class="normal"><a href="#__codelineno-0-196">196</a></span>
<span class="normal"><a href="#__codelineno-0-197">197</a></span>
<span class="normal"><a href="#__codelineno-0-198">198</a></span>
<span class="normal"><a href="#__codelineno-0-199">199</a></span>
<span class="normal"><a href="#__codelineno-0-200">200</a></span>
<span class="normal"><a href="#__codelineno-0-201">201</a></span>
<span class="normal"><a href="#__codelineno-0-202">202</a></span>
<span class="normal"><a href="#__codelineno-0-203">203</a></span>
<span class="normal"><a href="#__codelineno-0-204">204</a></span>
<span class="normal"><a href="#__codelineno-0-205">205</a></span>
<span class="normal"><a href="#__codelineno-0-206">206</a></span>
<span class="normal"><a href="#__codelineno-0-207">207</a></span>
<span class="normal"><a href="#__codelineno-0-208">208</a></span>
<span class="normal"><a href="#__codelineno-0-209">209</a></span>
<span class="normal"><a href="#__codelineno-0-210">210</a></span>
<span class="normal"><a href="#__codelineno-0-211">211</a></span>
<span class="normal"><a href="#__codelineno-0-212">212</a></span>
<span class="normal"><a href="#__codelineno-0-213">213</a></span>
<span class="normal"><a href="#__codelineno-0-214">214</a></span>
<span class="normal"><a href="#__codelineno-0-215">215</a></span>
<span class="normal"><a href="#__codelineno-0-216">216</a></span>
<span class="normal"><a href="#__codelineno-0-217">217</a></span>
<span class="normal"><a href="#__codelineno-0-218">218</a></span>
<span class="normal"><a href="#__codelineno-0-219">219</a></span>
<span class="normal"><a href="#__codelineno-0-220">220</a></span>
<span class="normal"><a href="#__codelineno-0-221">221</a></span>
<span class="normal"><a href="#__codelineno-0-222">222</a></span>
<span class="normal"><a href="#__codelineno-0-223">223</a></span>
<span class="normal"><a href="#__codelineno-0-224">224</a></span>
<span class="normal"><a href="#__codelineno-0-225">225</a></span>
<span class="normal"><a href="#__codelineno-0-226">226</a></span>
<span class="normal"><a href="#__codelineno-0-227">227</a></span>
<span class="normal"><a href="#__codelineno-0-228">228</a></span>
<span class="normal"><a href="#__codelineno-0-229">229</a></span>
<span class="normal"><a href="#__codelineno-0-230">230</a></span>
<span class="normal"><a href="#__codelineno-0-231">231</a></span>
<span class="normal"><a href="#__codelineno-0-232">232</a></span>
<span class="normal"><a href="#__codelineno-0-233">233</a></span>
<span class="normal"><a href="#__codelineno-0-234">234</a></span>
<span class="normal"><a href="#__codelineno-0-235">235</a></span>
<span class="normal"><a href="#__codelineno-0-236">236</a></span>
<span class="normal"><a href="#__codelineno-0-237">237</a></span>
<span class="normal"><a href="#__codelineno-0-238">238</a></span>
<span class="normal"><a href="#__codelineno-0-239">239</a></span>
<span class="normal"><a href="#__codelineno-0-240">240</a></span>
<span class="normal"><a href="#__codelineno-0-241">241</a></span>
<span class="normal"><a href="#__codelineno-0-242">242</a></span>
<span class="normal"><a href="#__codelineno-0-243">243</a></span>
<span class="normal"><a href="#__codelineno-0-244">244</a></span>
<span class="normal"><a href="#__codelineno-0-245">245</a></span>
<span class="normal"><a href="#__codelineno-0-246">246</a></span>
<span class="normal"><a href="#__codelineno-0-247">247</a></span>
<span class="normal"><a href="#__codelineno-0-248">248</a></span>
<span class="normal"><a href="#__codelineno-0-249">249</a></span>
<span class="normal"><a href="#__codelineno-0-250">250</a></span>
<span class="normal"><a href="#__codelineno-0-251">251</a></span>
<span class="normal"><a href="#__codelineno-0-252">252</a></span>
<span class="normal"><a href="#__codelineno-0-253">253</a></span>
<span class="normal"><a href="#__codelineno-0-254">254</a></span>
<span class="normal"><a href="#__codelineno-0-255">255</a></span>
<span class="normal"><a href="#__codelineno-0-256">256</a></span>
<span class="normal"><a href="#__codelineno-0-257">257</a></span>
<span class="normal"><a href="#__codelineno-0-258">258</a></span>
<span class="normal"><a href="#__codelineno-0-259">259</a></span>
<span class="normal"><a href="#__codelineno-0-260">260</a></span>
<span class="normal"><a href="#__codelineno-0-261">261</a></span>
<span class="normal"><a href="#__codelineno-0-262">262</a></span>
<span class="normal"><a href="#__codelineno-0-263">263</a></span>
<span class="normal"><a href="#__codelineno-0-264">264</a></span>
<span class="normal"><a href="#__codelineno-0-265">265</a></span>
<span class="normal"><a href="#__codelineno-0-266">266</a></span>
<span class="normal"><a href="#__codelineno-0-267">267</a></span>
<span class="normal"><a href="#__codelineno-0-268">268</a></span>
<span class="normal"><a href="#__codelineno-0-269">269</a></span>
<span class="normal"><a href="#__codelineno-0-270">270</a></span>
<span class="normal"><a href="#__codelineno-0-271">271</a></span>
<span class="normal"><a href="#__codelineno-0-272">272</a></span>
<span class="normal"><a href="#__codelineno-0-273">273</a></span>
<span class="normal"><a href="#__codelineno-0-274">274</a></span>
<span class="normal"><a href="#__codelineno-0-275">275</a></span>
<span class="normal"><a href="#__codelineno-0-276">276</a></span>
<span class="normal"><a href="#__codelineno-0-277">277</a></span>
<span class="normal"><a href="#__codelineno-0-278">278</a></span>
<span class="normal"><a href="#__codelineno-0-279">279</a></span>
<span class="normal"><a href="#__codelineno-0-280">280</a></span>
<span class="normal"><a href="#__codelineno-0-281">281</a></span>
<span class="normal"><a href="#__codelineno-0-282">282</a></span>
<span class="normal"><a href="#__codelineno-0-283">283</a></span>
<span class="normal"><a href="#__codelineno-0-284">284</a></span>
<span class="normal"><a href="#__codelineno-0-285">285</a></span>
<span class="normal"><a href="#__codelineno-0-286">286</a></span>
<span class="normal"><a href="#__codelineno-0-287">287</a></span>
<span class="normal"><a href="#__codelineno-0-288">288</a></span>
<span class="normal"><a href="#__codelineno-0-289">289</a></span>
<span class="normal"><a href="#__codelineno-0-290">290</a></span>
<span class="normal"><a href="#__codelineno-0-291">291</a></span>
<span class="normal"><a href="#__codelineno-0-292">292</a></span>
<span class="normal"><a href="#__codelineno-0-293">293</a></span>
<span class="normal"><a href="#__codelineno-0-294">294</a></span>
<span class="normal"><a href="#__codelineno-0-295">295</a></span>
<span class="normal"><a href="#__codelineno-0-296">296</a></span>
<span class="normal"><a href="#__codelineno-0-297">297</a></span>
<span class="normal"><a href="#__codelineno-0-298">298</a></span>
<span class="normal"><a href="#__codelineno-0-299">299</a></span>
<span class="normal"><a href="#__codelineno-0-300">300</a></span>
<span class="normal"><a href="#__codelineno-0-301">301</a></span>
<span class="normal"><a href="#__codelineno-0-302">302</a></span>
<span class="normal"><a href="#__codelineno-0-303">303</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-116"><a id="__codelineno-0-116" name="__codelineno-0-116"></a><span class="k">class</span> <span class="nc">VectorRetrieval</span><span class="p">(</span><span class="n">BaseRetrieval</span><span class="p">):</span>
</span><span id="__span-0-117"><a id="__codelineno-0-117" name="__codelineno-0-117"></a><span class="w"> </span><span class="sd">"""Retrieve list of documents from vector store"""</span>
</span><span id="__span-0-118"><a id="__codelineno-0-118" name="__codelineno-0-118"></a>
</span><span id="__span-0-119"><a id="__codelineno-0-119" name="__codelineno-0-119"></a> <span class="n">vector_store</span><span class="p">:</span> <span class="n">BaseVectorStore</span>
</span><span id="__span-0-120"><a id="__codelineno-0-120" name="__codelineno-0-120"></a> <span class="n">doc_store</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">BaseDocumentStore</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
</span><span id="__span-0-121"><a id="__codelineno-0-121" name="__codelineno-0-121"></a> <span class="n">embedding</span><span class="p">:</span> <span class="n">BaseEmbeddings</span>
</span><span id="__span-0-122"><a id="__codelineno-0-122" name="__codelineno-0-122"></a> <span class="n">rerankers</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="n">BaseReranking</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-123"><a id="__codelineno-0-123" name="__codelineno-0-123"></a> <span class="n">top_k</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span>
</span><span id="__span-0-124"><a id="__codelineno-0-124" name="__codelineno-0-124"></a> <span class="n">first_round_top_k_mult</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10</span>
</span><span id="__span-0-125"><a id="__codelineno-0-125" name="__codelineno-0-125"></a> <span class="n">retrieval_mode</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"hybrid"</span> <span class="c1"># vector, text, hybrid</span>
</span><span id="__span-0-126"><a id="__codelineno-0-126" name="__codelineno-0-126"></a>
</span><span id="__span-0-127"><a id="__codelineno-0-127" name="__codelineno-0-127"></a> <span class="k">def</span> <span class="nf">_filter_docs</span><span class="p">(</span>
</span><span id="__span-0-128"><a id="__codelineno-0-128" name="__codelineno-0-128"></a> <span class="bp">self</span><span class="p">,</span> <span class="n">documents</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">RetrievedDocument</span><span class="p">],</span> <span class="n">top_k</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span>
</span><span id="__span-0-129"><a id="__codelineno-0-129" name="__codelineno-0-129"></a> <span class="p">):</span>
</span><span id="__span-0-130"><a id="__codelineno-0-130" name="__codelineno-0-130"></a> <span class="k">if</span> <span class="n">top_k</span><span class="p">:</span>
</span><span id="__span-0-131"><a id="__codelineno-0-131" name="__codelineno-0-131"></a> <span class="n">documents</span> <span class="o">=</span> <span class="n">documents</span><span class="p">[:</span><span class="n">top_k</span><span class="p">]</span>
</span><span id="__span-0-132"><a id="__codelineno-0-132" name="__codelineno-0-132"></a> <span class="k">return</span> <span class="n">documents</span>
</span><span id="__span-0-133"><a id="__codelineno-0-133" name="__codelineno-0-133"></a>
</span><span id="__span-0-134"><a id="__codelineno-0-134" name="__codelineno-0-134"></a> <span class="k">def</span> <span class="nf">run</span><span class="p">(</span>
</span><span id="__span-0-135"><a id="__codelineno-0-135" name="__codelineno-0-135"></a> <span class="bp">self</span><span class="p">,</span> <span class="n">text</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="n">Document</span><span class="p">,</span> <span class="n">top_k</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span>
</span><span id="__span-0-136"><a id="__codelineno-0-136" name="__codelineno-0-136"></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="n">RetrievedDocument</span><span class="p">]:</span>
</span><span id="__span-0-137"><a id="__codelineno-0-137" name="__codelineno-0-137"></a><span class="w"> </span><span class="sd">"""Retrieve a list of documents from vector store</span>
</span><span id="__span-0-138"><a id="__codelineno-0-138" name="__codelineno-0-138"></a>
</span><span id="__span-0-139"><a id="__codelineno-0-139" name="__codelineno-0-139"></a><span class="sd"> Args:</span>
</span><span id="__span-0-140"><a id="__codelineno-0-140" name="__codelineno-0-140"></a><span class="sd"> text: the text to retrieve similar documents</span>
</span><span id="__span-0-141"><a id="__codelineno-0-141" name="__codelineno-0-141"></a><span class="sd"> top_k: number of top similar documents to return</span>
</span><span id="__span-0-142"><a id="__codelineno-0-142" name="__codelineno-0-142"></a>
</span><span id="__span-0-143"><a id="__codelineno-0-143" name="__codelineno-0-143"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-144"><a id="__codelineno-0-144" name="__codelineno-0-144"></a><span class="sd"> list[RetrievedDocument]: list of retrieved documents</span>
</span><span id="__span-0-145"><a id="__codelineno-0-145" name="__codelineno-0-145"></a><span class="sd"> """</span>
</span><span id="__span-0-146"><a id="__codelineno-0-146" name="__codelineno-0-146"></a> <span class="k">if</span> <span class="n">top_k</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
</span><span id="__span-0-147"><a id="__codelineno-0-147" name="__codelineno-0-147"></a> <span class="n">top_k</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">top_k</span>
</span><span id="__span-0-148"><a id="__codelineno-0-148" name="__codelineno-0-148"></a>
</span><span id="__span-0-149"><a id="__codelineno-0-149" name="__codelineno-0-149"></a> <span class="n">do_extend</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s2">"do_extend"</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
</span><span id="__span-0-150"><a id="__codelineno-0-150" name="__codelineno-0-150"></a> <span class="n">thumbnail_count</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s2">"thumbnail_count"</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
</span><span id="__span-0-151"><a id="__codelineno-0-151" name="__codelineno-0-151"></a>
</span><span id="__span-0-152"><a id="__codelineno-0-152" name="__codelineno-0-152"></a> <span class="k">if</span> <span class="n">do_extend</span><span class="p">:</span>
</span><span id="__span-0-153"><a id="__codelineno-0-153" name="__codelineno-0-153"></a> <span class="n">top_k_first_round</span> <span class="o">=</span> <span class="n">top_k</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">first_round_top_k_mult</span>
</span><span id="__span-0-154"><a id="__codelineno-0-154" name="__codelineno-0-154"></a> <span class="k">else</span><span class="p">:</span>
</span><span id="__span-0-155"><a id="__codelineno-0-155" name="__codelineno-0-155"></a> <span class="n">top_k_first_round</span> <span class="o">=</span> <span class="n">top_k</span>
</span><span id="__span-0-156"><a id="__codelineno-0-156" name="__codelineno-0-156"></a>
</span><span id="__span-0-157"><a id="__codelineno-0-157" name="__codelineno-0-157"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
</span><span id="__span-0-158"><a id="__codelineno-0-158" name="__codelineno-0-158"></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
</span><span id="__span-0-159"><a id="__codelineno-0-159" name="__codelineno-0-159"></a> <span class="s2">"doc_store is not provided. Please provide a doc_store to "</span>
</span><span id="__span-0-160"><a id="__codelineno-0-160" name="__codelineno-0-160"></a> <span class="s2">"retrieve the documents"</span>
</span><span id="__span-0-161"><a id="__codelineno-0-161" name="__codelineno-0-161"></a> <span class="p">)</span>
</span><span id="__span-0-162"><a id="__codelineno-0-162" name="__codelineno-0-162"></a>
</span><span id="__span-0-163"><a id="__codelineno-0-163" name="__codelineno-0-163"></a> <span class="n">result</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">RetrievedDocument</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-164"><a id="__codelineno-0-164" name="__codelineno-0-164"></a> <span class="c1"># TODO: should declare scope directly in the run params</span>
</span><span id="__span-0-165"><a id="__codelineno-0-165" name="__codelineno-0-165"></a> <span class="n">scope</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s2">"scope"</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
</span><span id="__span-0-166"><a id="__codelineno-0-166" name="__codelineno-0-166"></a> <span class="n">emb</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span>
</span><span id="__span-0-167"><a id="__codelineno-0-167" name="__codelineno-0-167"></a>
</span><span id="__span-0-168"><a id="__codelineno-0-168" name="__codelineno-0-168"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">retrieval_mode</span> <span class="o">==</span> <span class="s2">"vector"</span><span class="p">:</span>
</span><span id="__span-0-169"><a id="__codelineno-0-169" name="__codelineno-0-169"></a> <span class="n">emb</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">embedding</span><span class="p">(</span><span class="n">text</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">embedding</span>
</span><span id="__span-0-170"><a id="__codelineno-0-170" name="__codelineno-0-170"></a> <span class="n">_</span><span class="p">,</span> <span class="n">scores</span><span class="p">,</span> <span class="n">ids</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">vector_store</span><span class="o">.</span><span class="n">query</span><span class="p">(</span>
</span><span id="__span-0-171"><a id="__codelineno-0-171" name="__codelineno-0-171"></a> <span class="n">embedding</span><span class="o">=</span><span class="n">emb</span><span class="p">,</span> <span class="n">top_k</span><span class="o">=</span><span class="n">top_k_first_round</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span>
</span><span id="__span-0-172"><a id="__codelineno-0-172" name="__codelineno-0-172"></a> <span class="p">)</span>
</span><span id="__span-0-173"><a id="__codelineno-0-173" name="__codelineno-0-173"></a> <span class="n">docs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ids</span><span class="p">)</span>
</span><span id="__span-0-174"><a id="__codelineno-0-174" name="__codelineno-0-174"></a> <span class="n">result</span> <span class="o">=</span> <span class="p">[</span>
</span><span id="__span-0-175"><a id="__codelineno-0-175" name="__codelineno-0-175"></a> <span class="n">RetrievedDocument</span><span class="p">(</span><span class="o">**</span><span class="n">doc</span><span class="o">.</span><span class="n">to_dict</span><span class="p">(),</span> <span class="n">score</span><span class="o">=</span><span class="n">score</span><span class="p">)</span>
</span><span id="__span-0-176"><a id="__codelineno-0-176" name="__codelineno-0-176"></a> <span class="k">for</span> <span class="n">doc</span><span class="p">,</span> <span class="n">score</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">docs</span><span class="p">,</span> <span class="n">scores</span><span class="p">)</span>
</span><span id="__span-0-177"><a id="__codelineno-0-177" name="__codelineno-0-177"></a> <span class="p">]</span>
</span><span id="__span-0-178"><a id="__codelineno-0-178" name="__codelineno-0-178"></a> <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">retrieval_mode</span> <span class="o">==</span> <span class="s2">"text"</span><span class="p">:</span>
</span><span id="__span-0-179"><a id="__codelineno-0-179" name="__codelineno-0-179"></a> <span class="n">query</span> <span class="o">=</span> <span class="n">text</span><span class="o">.</span><span class="n">text</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">text</span><span class="p">,</span> <span class="n">Document</span><span class="p">)</span> <span class="k">else</span> <span class="n">text</span>
</span><span id="__span-0-180"><a id="__codelineno-0-180" name="__codelineno-0-180"></a> <span class="n">docs</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-181"><a id="__codelineno-0-181" name="__codelineno-0-181"></a> <span class="k">if</span> <span class="n">scope</span><span class="p">:</span>
</span><span id="__span-0-182"><a id="__codelineno-0-182" name="__codelineno-0-182"></a> <span class="n">docs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span><span class="o">.</span><span class="n">query</span><span class="p">(</span>
</span><span id="__span-0-183"><a id="__codelineno-0-183" name="__codelineno-0-183"></a> <span class="n">query</span><span class="p">,</span> <span class="n">top_k</span><span class="o">=</span><span class="n">top_k_first_round</span><span class="p">,</span> <span class="n">doc_ids</span><span class="o">=</span><span class="n">scope</span>
</span><span id="__span-0-184"><a id="__codelineno-0-184" name="__codelineno-0-184"></a> <span class="p">)</span>
</span><span id="__span-0-185"><a id="__codelineno-0-185" name="__codelineno-0-185"></a> <span class="n">result</span> <span class="o">=</span> <span class="p">[</span><span class="n">RetrievedDocument</span><span class="p">(</span><span class="o">**</span><span class="n">doc</span><span class="o">.</span><span class="n">to_dict</span><span class="p">(),</span> <span class="n">score</span><span class="o">=-</span><span class="mf">1.0</span><span class="p">)</span> <span class="k">for</span> <span class="n">doc</span> <span class="ow">in</span> <span class="n">docs</span><span class="p">]</span>
</span><span id="__span-0-186"><a id="__codelineno-0-186" name="__codelineno-0-186"></a> <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">retrieval_mode</span> <span class="o">==</span> <span class="s2">"hybrid"</span><span class="p">:</span>
</span><span id="__span-0-187"><a id="__codelineno-0-187" name="__codelineno-0-187"></a> <span class="c1"># similarity search section</span>
</span><span id="__span-0-188"><a id="__codelineno-0-188" name="__codelineno-0-188"></a> <span class="n">emb</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">embedding</span><span class="p">(</span><span class="n">text</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">embedding</span>
</span><span id="__span-0-189"><a id="__codelineno-0-189" name="__codelineno-0-189"></a> <span class="n">vs_docs</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">RetrievedDocument</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-190"><a id="__codelineno-0-190" name="__codelineno-0-190"></a> <span class="n">vs_ids</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-191"><a id="__codelineno-0-191" name="__codelineno-0-191"></a> <span class="n">vs_scores</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-192"><a id="__codelineno-0-192" name="__codelineno-0-192"></a>
</span><span id="__span-0-193"><a id="__codelineno-0-193" name="__codelineno-0-193"></a> <span class="k">def</span> <span class="nf">query_vectorstore</span><span class="p">():</span>
</span><span id="__span-0-194"><a id="__codelineno-0-194" name="__codelineno-0-194"></a> <span class="k">nonlocal</span> <span class="n">vs_docs</span>
</span><span id="__span-0-195"><a id="__codelineno-0-195" name="__codelineno-0-195"></a> <span class="k">nonlocal</span> <span class="n">vs_scores</span>
</span><span id="__span-0-196"><a id="__codelineno-0-196" name="__codelineno-0-196"></a> <span class="k">nonlocal</span> <span class="n">vs_ids</span>
</span><span id="__span-0-197"><a id="__codelineno-0-197" name="__codelineno-0-197"></a>
</span><span id="__span-0-198"><a id="__codelineno-0-198" name="__codelineno-0-198"></a> <span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
</span><span id="__span-0-199"><a id="__codelineno-0-199" name="__codelineno-0-199"></a> <span class="n">_</span><span class="p">,</span> <span class="n">vs_scores</span><span class="p">,</span> <span class="n">vs_ids</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">vector_store</span><span class="o">.</span><span class="n">query</span><span class="p">(</span>
</span><span id="__span-0-200"><a id="__codelineno-0-200" name="__codelineno-0-200"></a> <span class="n">embedding</span><span class="o">=</span><span class="n">emb</span><span class="p">,</span> <span class="n">top_k</span><span class="o">=</span><span class="n">top_k_first_round</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span>
</span><span id="__span-0-201"><a id="__codelineno-0-201" name="__codelineno-0-201"></a> <span class="p">)</span>
</span><span id="__span-0-202"><a id="__codelineno-0-202" name="__codelineno-0-202"></a> <span class="k">if</span> <span class="n">vs_ids</span><span class="p">:</span>
</span><span id="__span-0-203"><a id="__codelineno-0-203" name="__codelineno-0-203"></a> <span class="n">vs_docs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">vs_ids</span><span class="p">)</span>
</span><span id="__span-0-204"><a id="__codelineno-0-204" name="__codelineno-0-204"></a>
</span><span id="__span-0-205"><a id="__codelineno-0-205" name="__codelineno-0-205"></a> <span class="c1"># full-text search section</span>
</span><span id="__span-0-206"><a id="__codelineno-0-206" name="__codelineno-0-206"></a> <span class="n">ds_docs</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">RetrievedDocument</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-207"><a id="__codelineno-0-207" name="__codelineno-0-207"></a>
</span><span id="__span-0-208"><a id="__codelineno-0-208" name="__codelineno-0-208"></a> <span class="k">def</span> <span class="nf">query_docstore</span><span class="p">():</span>
</span><span id="__span-0-209"><a id="__codelineno-0-209" name="__codelineno-0-209"></a> <span class="k">nonlocal</span> <span class="n">ds_docs</span>
</span><span id="__span-0-210"><a id="__codelineno-0-210" name="__codelineno-0-210"></a>
</span><span id="__span-0-211"><a id="__codelineno-0-211" name="__codelineno-0-211"></a> <span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
</span><span id="__span-0-212"><a id="__codelineno-0-212" name="__codelineno-0-212"></a> <span class="n">query</span> <span class="o">=</span> <span class="n">text</span><span class="o">.</span><span class="n">text</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">text</span><span class="p">,</span> <span class="n">Document</span><span class="p">)</span> <span class="k">else</span> <span class="n">text</span>
</span><span id="__span-0-213"><a id="__codelineno-0-213" name="__codelineno-0-213"></a> <span class="k">if</span> <span class="n">scope</span><span class="p">:</span>
</span><span id="__span-0-214"><a id="__codelineno-0-214" name="__codelineno-0-214"></a> <span class="n">ds_docs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span><span class="o">.</span><span class="n">query</span><span class="p">(</span>
</span><span id="__span-0-215"><a id="__codelineno-0-215" name="__codelineno-0-215"></a> <span class="n">query</span><span class="p">,</span> <span class="n">top_k</span><span class="o">=</span><span class="n">top_k_first_round</span><span class="p">,</span> <span class="n">doc_ids</span><span class="o">=</span><span class="n">scope</span>
</span><span id="__span-0-216"><a id="__codelineno-0-216" name="__codelineno-0-216"></a> <span class="p">)</span>
</span><span id="__span-0-217"><a id="__codelineno-0-217" name="__codelineno-0-217"></a>
</span><span id="__span-0-218"><a id="__codelineno-0-218" name="__codelineno-0-218"></a> <span class="n">vs_query_thread</span> <span class="o">=</span> <span class="n">threading</span><span class="o">.</span><span class="n">Thread</span><span class="p">(</span><span class="n">target</span><span class="o">=</span><span class="n">query_vectorstore</span><span class="p">)</span>
</span><span id="__span-0-219"><a id="__codelineno-0-219" name="__codelineno-0-219"></a> <span class="n">ds_query_thread</span> <span class="o">=</span> <span class="n">threading</span><span class="o">.</span><span class="n">Thread</span><span class="p">(</span><span class="n">target</span><span class="o">=</span><span class="n">query_docstore</span><span class="p">)</span>
</span><span id="__span-0-220"><a id="__codelineno-0-220" name="__codelineno-0-220"></a>
</span><span id="__span-0-221"><a id="__codelineno-0-221" name="__codelineno-0-221"></a> <span class="n">vs_query_thread</span><span class="o">.</span><span class="n">start</span><span class="p">()</span>
</span><span id="__span-0-222"><a id="__codelineno-0-222" name="__codelineno-0-222"></a> <span class="n">ds_query_thread</span><span class="o">.</span><span class="n">start</span><span class="p">()</span>
</span><span id="__span-0-223"><a id="__codelineno-0-223" name="__codelineno-0-223"></a>
</span><span id="__span-0-224"><a id="__codelineno-0-224" name="__codelineno-0-224"></a> <span class="n">vs_query_thread</span><span class="o">.</span><span class="n">join</span><span class="p">()</span>
</span><span id="__span-0-225"><a id="__codelineno-0-225" name="__codelineno-0-225"></a> <span class="n">ds_query_thread</span><span class="o">.</span><span class="n">join</span><span class="p">()</span>
</span><span id="__span-0-226"><a id="__codelineno-0-226" name="__codelineno-0-226"></a>
</span><span id="__span-0-227"><a id="__codelineno-0-227" name="__codelineno-0-227"></a> <span class="n">result</span> <span class="o">=</span> <span class="p">[</span>
</span><span id="__span-0-228"><a id="__codelineno-0-228" name="__codelineno-0-228"></a> <span class="n">RetrievedDocument</span><span class="p">(</span><span class="o">**</span><span class="n">doc</span><span class="o">.</span><span class="n">to_dict</span><span class="p">(),</span> <span class="n">score</span><span class="o">=-</span><span class="mf">1.0</span><span class="p">)</span>
</span><span id="__span-0-229"><a id="__codelineno-0-229" name="__codelineno-0-229"></a> <span class="k">for</span> <span class="n">doc</span> <span class="ow">in</span> <span class="n">ds_docs</span>
</span><span id="__span-0-230"><a id="__codelineno-0-230" name="__codelineno-0-230"></a> <span class="k">if</span> <span class="n">doc</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">vs_ids</span>
</span><span id="__span-0-231"><a id="__codelineno-0-231" name="__codelineno-0-231"></a> <span class="p">]</span>
</span><span id="__span-0-232"><a id="__codelineno-0-232" name="__codelineno-0-232"></a> <span class="n">result</span> <span class="o">+=</span> <span class="p">[</span>
</span><span id="__span-0-233"><a id="__codelineno-0-233" name="__codelineno-0-233"></a> <span class="n">RetrievedDocument</span><span class="p">(</span><span class="o">**</span><span class="n">doc</span><span class="o">.</span><span class="n">to_dict</span><span class="p">(),</span> <span class="n">score</span><span class="o">=</span><span class="n">score</span><span class="p">)</span>
</span><span id="__span-0-234"><a id="__codelineno-0-234" name="__codelineno-0-234"></a> <span class="k">for</span> <span class="n">doc</span><span class="p">,</span> <span class="n">score</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">vs_docs</span><span class="p">,</span> <span class="n">vs_scores</span><span class="p">)</span>
</span><span id="__span-0-235"><a id="__codelineno-0-235" name="__codelineno-0-235"></a> <span class="p">]</span>
</span><span id="__span-0-236"><a id="__codelineno-0-236" name="__codelineno-0-236"></a> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Got </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">vs_docs</span><span class="p">)</span><span class="si">}</span><span class="s2"> from vectorstore"</span><span class="p">)</span>
</span><span id="__span-0-237"><a id="__codelineno-0-237" name="__codelineno-0-237"></a> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Got </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">ds_docs</span><span class="p">)</span><span class="si">}</span><span class="s2"> from docstore"</span><span class="p">)</span>
</span><span id="__span-0-238"><a id="__codelineno-0-238" name="__codelineno-0-238"></a>
</span><span id="__span-0-239"><a id="__codelineno-0-239" name="__codelineno-0-239"></a> <span class="c1"># use additional reranker to re-order the document list</span>
</span><span id="__span-0-240"><a id="__codelineno-0-240" name="__codelineno-0-240"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">rerankers</span> <span class="ow">and</span> <span class="n">text</span><span class="p">:</span>
</span><span id="__span-0-241"><a id="__codelineno-0-241" name="__codelineno-0-241"></a> <span class="k">for</span> <span class="n">reranker</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">rerankers</span><span class="p">:</span>
</span><span id="__span-0-242"><a id="__codelineno-0-242" name="__codelineno-0-242"></a> <span class="c1"># if reranker is LLMReranking, limit the document with top_k items only</span>
</span><span id="__span-0-243"><a id="__codelineno-0-243" name="__codelineno-0-243"></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">reranker</span><span class="p">,</span> <span class="n">LLMReranking</span><span class="p">):</span>
</span><span id="__span-0-244"><a id="__codelineno-0-244" name="__codelineno-0-244"></a> <span class="n">result</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filter_docs</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">top_k</span><span class="o">=</span><span class="n">top_k</span><span class="p">)</span>
</span><span id="__span-0-245"><a id="__codelineno-0-245" name="__codelineno-0-245"></a> <span class="n">result</span> <span class="o">=</span> <span class="n">reranker</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">documents</span><span class="o">=</span><span class="n">result</span><span class="p">,</span> <span class="n">query</span><span class="o">=</span><span class="n">text</span><span class="p">)</span>
</span><span id="__span-0-246"><a id="__codelineno-0-246" name="__codelineno-0-246"></a>
</span><span id="__span-0-247"><a id="__codelineno-0-247" name="__codelineno-0-247"></a> <span class="n">result</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filter_docs</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">top_k</span><span class="o">=</span><span class="n">top_k</span><span class="p">)</span>
</span><span id="__span-0-248"><a id="__codelineno-0-248" name="__codelineno-0-248"></a> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Got raw </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">result</span><span class="p">)</span><span class="si">}</span><span class="s2"> retrieved documents"</span><span class="p">)</span>
</span><span id="__span-0-249"><a id="__codelineno-0-249" name="__codelineno-0-249"></a>
</span><span id="__span-0-250"><a id="__codelineno-0-250" name="__codelineno-0-250"></a> <span class="c1"># add page thumbnails to the result if exists</span>
</span><span id="__span-0-251"><a id="__codelineno-0-251" name="__codelineno-0-251"></a> <span class="n">thumbnail_doc_ids</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
</span><span id="__span-0-252"><a id="__codelineno-0-252" name="__codelineno-0-252"></a> <span class="c1"># we should copy the text from retrieved text chunk</span>
</span><span id="__span-0-253"><a id="__codelineno-0-253" name="__codelineno-0-253"></a> <span class="c1"># to the thumbnail to get relevant LLM score correctly</span>
</span><span id="__span-0-254"><a id="__codelineno-0-254" name="__codelineno-0-254"></a> <span class="n">text_thumbnail_docs</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">RetrievedDocument</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
</span><span id="__span-0-255"><a id="__codelineno-0-255" name="__codelineno-0-255"></a>
</span><span id="__span-0-256"><a id="__codelineno-0-256" name="__codelineno-0-256"></a> <span class="n">non_thumbnail_docs</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-257"><a id="__codelineno-0-257" name="__codelineno-0-257"></a> <span class="n">raw_thumbnail_docs</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-258"><a id="__codelineno-0-258" name="__codelineno-0-258"></a> <span class="k">for</span> <span class="n">doc</span> <span class="ow">in</span> <span class="n">result</span><span class="p">:</span>
</span><span id="__span-0-259"><a id="__codelineno-0-259" name="__codelineno-0-259"></a> <span class="k">if</span> <span class="n">doc</span><span class="o">.</span><span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"type"</span><span class="p">)</span> <span class="o">==</span> <span class="s2">"thumbnail"</span><span class="p">:</span>
</span><span id="__span-0-260"><a id="__codelineno-0-260" name="__codelineno-0-260"></a> <span class="c1"># change type to image to display on UI</span>
</span><span id="__span-0-261"><a id="__codelineno-0-261" name="__codelineno-0-261"></a> <span class="n">doc</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="s2">"type"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"image"</span>
</span><span id="__span-0-262"><a id="__codelineno-0-262" name="__codelineno-0-262"></a> <span class="n">raw_thumbnail_docs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">doc</span><span class="p">)</span>
</span><span id="__span-0-263"><a id="__codelineno-0-263" name="__codelineno-0-263"></a> <span class="k">continue</span>
</span><span id="__span-0-264"><a id="__codelineno-0-264" name="__codelineno-0-264"></a> <span class="k">if</span> <span class="p">(</span>
</span><span id="__span-0-265"><a id="__codelineno-0-265" name="__codelineno-0-265"></a> <span class="s2">"thumbnail_doc_id"</span> <span class="ow">in</span> <span class="n">doc</span><span class="o">.</span><span class="n">metadata</span>
</span><span id="__span-0-266"><a id="__codelineno-0-266" name="__codelineno-0-266"></a> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">thumbnail_doc_ids</span><span class="p">)</span> <span class="o">&lt;</span> <span class="n">thumbnail_count</span>
</span><span id="__span-0-267"><a id="__codelineno-0-267" name="__codelineno-0-267"></a> <span class="p">):</span>
</span><span id="__span-0-268"><a id="__codelineno-0-268" name="__codelineno-0-268"></a> <span class="n">thumbnail_id</span> <span class="o">=</span> <span class="n">doc</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="s2">"thumbnail_doc_id"</span><span class="p">]</span>
</span><span id="__span-0-269"><a id="__codelineno-0-269" name="__codelineno-0-269"></a> <span class="n">thumbnail_doc_ids</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">thumbnail_id</span><span class="p">)</span>
</span><span id="__span-0-270"><a id="__codelineno-0-270" name="__codelineno-0-270"></a> <span class="n">text_thumbnail_docs</span><span class="p">[</span><span class="n">thumbnail_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">doc</span>
</span><span id="__span-0-271"><a id="__codelineno-0-271" name="__codelineno-0-271"></a> <span class="k">else</span><span class="p">:</span>
</span><span id="__span-0-272"><a id="__codelineno-0-272" name="__codelineno-0-272"></a> <span class="n">non_thumbnail_docs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">doc</span><span class="p">)</span>
</span><span id="__span-0-273"><a id="__codelineno-0-273" name="__codelineno-0-273"></a>
</span><span id="__span-0-274"><a id="__codelineno-0-274" name="__codelineno-0-274"></a> <span class="n">linked_thumbnail_docs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="n">thumbnail_doc_ids</span><span class="p">))</span>
</span><span id="__span-0-275"><a id="__codelineno-0-275" name="__codelineno-0-275"></a> <span class="nb">print</span><span class="p">(</span>
</span><span id="__span-0-276"><a id="__codelineno-0-276" name="__codelineno-0-276"></a> <span class="s2">"thumbnail docs"</span><span class="p">,</span>
</span><span id="__span-0-277"><a id="__codelineno-0-277" name="__codelineno-0-277"></a> <span class="nb">len</span><span class="p">(</span><span class="n">linked_thumbnail_docs</span><span class="p">),</span>
</span><span id="__span-0-278"><a id="__codelineno-0-278" name="__codelineno-0-278"></a> <span class="s2">"non-thumbnail docs"</span><span class="p">,</span>
</span><span id="__span-0-279"><a id="__codelineno-0-279" name="__codelineno-0-279"></a> <span class="nb">len</span><span class="p">(</span><span class="n">non_thumbnail_docs</span><span class="p">),</span>
</span><span id="__span-0-280"><a id="__codelineno-0-280" name="__codelineno-0-280"></a> <span class="s2">"raw-thumbnail docs"</span><span class="p">,</span>
</span><span id="__span-0-281"><a id="__codelineno-0-281" name="__codelineno-0-281"></a> <span class="nb">len</span><span class="p">(</span><span class="n">raw_thumbnail_docs</span><span class="p">),</span>
</span><span id="__span-0-282"><a id="__codelineno-0-282" name="__codelineno-0-282"></a> <span class="p">)</span>
</span><span id="__span-0-283"><a id="__codelineno-0-283" name="__codelineno-0-283"></a> <span class="n">additional_docs</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-284"><a id="__codelineno-0-284" name="__codelineno-0-284"></a>
</span><span id="__span-0-285"><a id="__codelineno-0-285" name="__codelineno-0-285"></a> <span class="k">for</span> <span class="n">thumbnail_doc</span> <span class="ow">in</span> <span class="n">linked_thumbnail_docs</span><span class="p">:</span>
</span><span id="__span-0-286"><a id="__codelineno-0-286" name="__codelineno-0-286"></a> <span class="n">text_doc</span> <span class="o">=</span> <span class="n">text_thumbnail_docs</span><span class="p">[</span><span class="n">thumbnail_doc</span><span class="o">.</span><span class="n">doc_id</span><span class="p">]</span>
</span><span id="__span-0-287"><a id="__codelineno-0-287" name="__codelineno-0-287"></a> <span class="n">doc_dict</span> <span class="o">=</span> <span class="n">thumbnail_doc</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()</span>
</span><span id="__span-0-288"><a id="__codelineno-0-288" name="__codelineno-0-288"></a> <span class="n">doc_dict</span><span class="p">[</span><span class="s2">"_id"</span><span class="p">]</span> <span class="o">=</span> <span class="n">text_doc</span><span class="o">.</span><span class="n">doc_id</span>
</span><span id="__span-0-289"><a id="__codelineno-0-289" name="__codelineno-0-289"></a> <span class="n">doc_dict</span><span class="p">[</span><span class="s2">"content"</span><span class="p">]</span> <span class="o">=</span> <span class="n">text_doc</span><span class="o">.</span><span class="n">content</span>
</span><span id="__span-0-290"><a id="__codelineno-0-290" name="__codelineno-0-290"></a> <span class="n">doc_dict</span><span class="p">[</span><span class="s2">"metadata"</span><span class="p">][</span><span class="s2">"type"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"image"</span>
</span><span id="__span-0-291"><a id="__codelineno-0-291" name="__codelineno-0-291"></a> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">text_doc</span><span class="o">.</span><span class="n">metadata</span><span class="p">:</span>
</span><span id="__span-0-292"><a id="__codelineno-0-292" name="__codelineno-0-292"></a> <span class="k">if</span> <span class="n">key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">doc_dict</span><span class="p">[</span><span class="s2">"metadata"</span><span class="p">]:</span>
</span><span id="__span-0-293"><a id="__codelineno-0-293" name="__codelineno-0-293"></a> <span class="n">doc_dict</span><span class="p">[</span><span class="s2">"metadata"</span><span class="p">][</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">text_doc</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="n">key</span><span class="p">]</span>
</span><span id="__span-0-294"><a id="__codelineno-0-294" name="__codelineno-0-294"></a>
</span><span id="__span-0-295"><a id="__codelineno-0-295" name="__codelineno-0-295"></a> <span class="n">additional_docs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">RetrievedDocument</span><span class="p">(</span><span class="o">**</span><span class="n">doc_dict</span><span class="p">,</span> <span class="n">score</span><span class="o">=</span><span class="n">text_doc</span><span class="o">.</span><span class="n">score</span><span class="p">))</span>
</span><span id="__span-0-296"><a id="__codelineno-0-296" name="__codelineno-0-296"></a>
</span><span id="__span-0-297"><a id="__codelineno-0-297" name="__codelineno-0-297"></a> <span class="n">result</span> <span class="o">=</span> <span class="n">additional_docs</span> <span class="o">+</span> <span class="n">non_thumbnail_docs</span>
</span><span id="__span-0-298"><a id="__codelineno-0-298" name="__codelineno-0-298"></a>
</span><span id="__span-0-299"><a id="__codelineno-0-299" name="__codelineno-0-299"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">result</span><span class="p">:</span>
</span><span id="__span-0-300"><a id="__codelineno-0-300" name="__codelineno-0-300"></a> <span class="c1"># return output from raw retrieved thumbnails</span>
</span><span id="__span-0-301"><a id="__codelineno-0-301" name="__codelineno-0-301"></a> <span class="n">result</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filter_docs</span><span class="p">(</span><span class="n">raw_thumbnail_docs</span><span class="p">,</span> <span class="n">top_k</span><span class="o">=</span><span class="n">thumbnail_count</span><span class="p">)</span>
</span><span id="__span-0-302"><a id="__codelineno-0-302" name="__codelineno-0-302"></a>
</span><span id="__span-0-303"><a id="__codelineno-0-303" name="__codelineno-0-303"></a> <span class="k">return</span> <span class="n">result</span>
</span></code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
<div class="doc doc-object doc-function">
<h3 id="indices.VectorRetrieval.run" class="doc doc-heading">
<span class="doc doc-object-name doc-function-name">run</span>
<a href="#indices.VectorRetrieval.run" class="headerlink" title="Permanent link"></a></h3>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">run</span><span class="p">(</span><span class="n">text</span><span class="p">,</span> <span class="n">top_k</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
<p>Retrieve a list of documents from vector store</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>text</code>
</td>
<td>
<code>str | Document</code>
</td>
<td>
<div class="doc-md-description">
<p>the text to retrieve similar documents</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>top_k</code>
</td>
<td>
<code>Optional[int]</code>
</td>
<td>
<div class="doc-md-description">
<p>number of top similar documents to return</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>list[RetrievedDocument]</code>
</td>
<td>
<div class="doc-md-description">
<p>list[RetrievedDocument]: list of retrieved documents</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>libs/kotaemon/kotaemon/indices/vectorindex.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-134">134</a></span>
<span class="normal"><a href="#__codelineno-0-135">135</a></span>
<span class="normal"><a href="#__codelineno-0-136">136</a></span>
<span class="normal"><a href="#__codelineno-0-137">137</a></span>
<span class="normal"><a href="#__codelineno-0-138">138</a></span>
<span class="normal"><a href="#__codelineno-0-139">139</a></span>
<span class="normal"><a href="#__codelineno-0-140">140</a></span>
<span class="normal"><a href="#__codelineno-0-141">141</a></span>
<span class="normal"><a href="#__codelineno-0-142">142</a></span>
<span class="normal"><a href="#__codelineno-0-143">143</a></span>
<span class="normal"><a href="#__codelineno-0-144">144</a></span>
<span class="normal"><a href="#__codelineno-0-145">145</a></span>
<span class="normal"><a href="#__codelineno-0-146">146</a></span>
<span class="normal"><a href="#__codelineno-0-147">147</a></span>
<span class="normal"><a href="#__codelineno-0-148">148</a></span>
<span class="normal"><a href="#__codelineno-0-149">149</a></span>
<span class="normal"><a href="#__codelineno-0-150">150</a></span>
<span class="normal"><a href="#__codelineno-0-151">151</a></span>
<span class="normal"><a href="#__codelineno-0-152">152</a></span>
<span class="normal"><a href="#__codelineno-0-153">153</a></span>
<span class="normal"><a href="#__codelineno-0-154">154</a></span>
<span class="normal"><a href="#__codelineno-0-155">155</a></span>
<span class="normal"><a href="#__codelineno-0-156">156</a></span>
<span class="normal"><a href="#__codelineno-0-157">157</a></span>
<span class="normal"><a href="#__codelineno-0-158">158</a></span>
<span class="normal"><a href="#__codelineno-0-159">159</a></span>
<span class="normal"><a href="#__codelineno-0-160">160</a></span>
<span class="normal"><a href="#__codelineno-0-161">161</a></span>
<span class="normal"><a href="#__codelineno-0-162">162</a></span>
<span class="normal"><a href="#__codelineno-0-163">163</a></span>
<span class="normal"><a href="#__codelineno-0-164">164</a></span>
<span class="normal"><a href="#__codelineno-0-165">165</a></span>
<span class="normal"><a href="#__codelineno-0-166">166</a></span>
<span class="normal"><a href="#__codelineno-0-167">167</a></span>
<span class="normal"><a href="#__codelineno-0-168">168</a></span>
<span class="normal"><a href="#__codelineno-0-169">169</a></span>
<span class="normal"><a href="#__codelineno-0-170">170</a></span>
<span class="normal"><a href="#__codelineno-0-171">171</a></span>
<span class="normal"><a href="#__codelineno-0-172">172</a></span>
<span class="normal"><a href="#__codelineno-0-173">173</a></span>
<span class="normal"><a href="#__codelineno-0-174">174</a></span>
<span class="normal"><a href="#__codelineno-0-175">175</a></span>
<span class="normal"><a href="#__codelineno-0-176">176</a></span>
<span class="normal"><a href="#__codelineno-0-177">177</a></span>
<span class="normal"><a href="#__codelineno-0-178">178</a></span>
<span class="normal"><a href="#__codelineno-0-179">179</a></span>
<span class="normal"><a href="#__codelineno-0-180">180</a></span>
<span class="normal"><a href="#__codelineno-0-181">181</a></span>
<span class="normal"><a href="#__codelineno-0-182">182</a></span>
<span class="normal"><a href="#__codelineno-0-183">183</a></span>
<span class="normal"><a href="#__codelineno-0-184">184</a></span>
<span class="normal"><a href="#__codelineno-0-185">185</a></span>
<span class="normal"><a href="#__codelineno-0-186">186</a></span>
<span class="normal"><a href="#__codelineno-0-187">187</a></span>
<span class="normal"><a href="#__codelineno-0-188">188</a></span>
<span class="normal"><a href="#__codelineno-0-189">189</a></span>
<span class="normal"><a href="#__codelineno-0-190">190</a></span>
<span class="normal"><a href="#__codelineno-0-191">191</a></span>
<span class="normal"><a href="#__codelineno-0-192">192</a></span>
<span class="normal"><a href="#__codelineno-0-193">193</a></span>
<span class="normal"><a href="#__codelineno-0-194">194</a></span>
<span class="normal"><a href="#__codelineno-0-195">195</a></span>
<span class="normal"><a href="#__codelineno-0-196">196</a></span>
<span class="normal"><a href="#__codelineno-0-197">197</a></span>
<span class="normal"><a href="#__codelineno-0-198">198</a></span>
<span class="normal"><a href="#__codelineno-0-199">199</a></span>
<span class="normal"><a href="#__codelineno-0-200">200</a></span>
<span class="normal"><a href="#__codelineno-0-201">201</a></span>
<span class="normal"><a href="#__codelineno-0-202">202</a></span>
<span class="normal"><a href="#__codelineno-0-203">203</a></span>
<span class="normal"><a href="#__codelineno-0-204">204</a></span>
<span class="normal"><a href="#__codelineno-0-205">205</a></span>
<span class="normal"><a href="#__codelineno-0-206">206</a></span>
<span class="normal"><a href="#__codelineno-0-207">207</a></span>
<span class="normal"><a href="#__codelineno-0-208">208</a></span>
<span class="normal"><a href="#__codelineno-0-209">209</a></span>
<span class="normal"><a href="#__codelineno-0-210">210</a></span>
<span class="normal"><a href="#__codelineno-0-211">211</a></span>
<span class="normal"><a href="#__codelineno-0-212">212</a></span>
<span class="normal"><a href="#__codelineno-0-213">213</a></span>
<span class="normal"><a href="#__codelineno-0-214">214</a></span>
<span class="normal"><a href="#__codelineno-0-215">215</a></span>
<span class="normal"><a href="#__codelineno-0-216">216</a></span>
<span class="normal"><a href="#__codelineno-0-217">217</a></span>
<span class="normal"><a href="#__codelineno-0-218">218</a></span>
<span class="normal"><a href="#__codelineno-0-219">219</a></span>
<span class="normal"><a href="#__codelineno-0-220">220</a></span>
<span class="normal"><a href="#__codelineno-0-221">221</a></span>
<span class="normal"><a href="#__codelineno-0-222">222</a></span>
<span class="normal"><a href="#__codelineno-0-223">223</a></span>
<span class="normal"><a href="#__codelineno-0-224">224</a></span>
<span class="normal"><a href="#__codelineno-0-225">225</a></span>
<span class="normal"><a href="#__codelineno-0-226">226</a></span>
<span class="normal"><a href="#__codelineno-0-227">227</a></span>
<span class="normal"><a href="#__codelineno-0-228">228</a></span>
<span class="normal"><a href="#__codelineno-0-229">229</a></span>
<span class="normal"><a href="#__codelineno-0-230">230</a></span>
<span class="normal"><a href="#__codelineno-0-231">231</a></span>
<span class="normal"><a href="#__codelineno-0-232">232</a></span>
<span class="normal"><a href="#__codelineno-0-233">233</a></span>
<span class="normal"><a href="#__codelineno-0-234">234</a></span>
<span class="normal"><a href="#__codelineno-0-235">235</a></span>
<span class="normal"><a href="#__codelineno-0-236">236</a></span>
<span class="normal"><a href="#__codelineno-0-237">237</a></span>
<span class="normal"><a href="#__codelineno-0-238">238</a></span>
<span class="normal"><a href="#__codelineno-0-239">239</a></span>
<span class="normal"><a href="#__codelineno-0-240">240</a></span>
<span class="normal"><a href="#__codelineno-0-241">241</a></span>
<span class="normal"><a href="#__codelineno-0-242">242</a></span>
<span class="normal"><a href="#__codelineno-0-243">243</a></span>
<span class="normal"><a href="#__codelineno-0-244">244</a></span>
<span class="normal"><a href="#__codelineno-0-245">245</a></span>
<span class="normal"><a href="#__codelineno-0-246">246</a></span>
<span class="normal"><a href="#__codelineno-0-247">247</a></span>
<span class="normal"><a href="#__codelineno-0-248">248</a></span>
<span class="normal"><a href="#__codelineno-0-249">249</a></span>
<span class="normal"><a href="#__codelineno-0-250">250</a></span>
<span class="normal"><a href="#__codelineno-0-251">251</a></span>
<span class="normal"><a href="#__codelineno-0-252">252</a></span>
<span class="normal"><a href="#__codelineno-0-253">253</a></span>
<span class="normal"><a href="#__codelineno-0-254">254</a></span>
<span class="normal"><a href="#__codelineno-0-255">255</a></span>
<span class="normal"><a href="#__codelineno-0-256">256</a></span>
<span class="normal"><a href="#__codelineno-0-257">257</a></span>
<span class="normal"><a href="#__codelineno-0-258">258</a></span>
<span class="normal"><a href="#__codelineno-0-259">259</a></span>
<span class="normal"><a href="#__codelineno-0-260">260</a></span>
<span class="normal"><a href="#__codelineno-0-261">261</a></span>
<span class="normal"><a href="#__codelineno-0-262">262</a></span>
<span class="normal"><a href="#__codelineno-0-263">263</a></span>
<span class="normal"><a href="#__codelineno-0-264">264</a></span>
<span class="normal"><a href="#__codelineno-0-265">265</a></span>
<span class="normal"><a href="#__codelineno-0-266">266</a></span>
<span class="normal"><a href="#__codelineno-0-267">267</a></span>
<span class="normal"><a href="#__codelineno-0-268">268</a></span>
<span class="normal"><a href="#__codelineno-0-269">269</a></span>
<span class="normal"><a href="#__codelineno-0-270">270</a></span>
<span class="normal"><a href="#__codelineno-0-271">271</a></span>
<span class="normal"><a href="#__codelineno-0-272">272</a></span>
<span class="normal"><a href="#__codelineno-0-273">273</a></span>
<span class="normal"><a href="#__codelineno-0-274">274</a></span>
<span class="normal"><a href="#__codelineno-0-275">275</a></span>
<span class="normal"><a href="#__codelineno-0-276">276</a></span>
<span class="normal"><a href="#__codelineno-0-277">277</a></span>
<span class="normal"><a href="#__codelineno-0-278">278</a></span>
<span class="normal"><a href="#__codelineno-0-279">279</a></span>
<span class="normal"><a href="#__codelineno-0-280">280</a></span>
<span class="normal"><a href="#__codelineno-0-281">281</a></span>
<span class="normal"><a href="#__codelineno-0-282">282</a></span>
<span class="normal"><a href="#__codelineno-0-283">283</a></span>
<span class="normal"><a href="#__codelineno-0-284">284</a></span>
<span class="normal"><a href="#__codelineno-0-285">285</a></span>
<span class="normal"><a href="#__codelineno-0-286">286</a></span>
<span class="normal"><a href="#__codelineno-0-287">287</a></span>
<span class="normal"><a href="#__codelineno-0-288">288</a></span>
<span class="normal"><a href="#__codelineno-0-289">289</a></span>
<span class="normal"><a href="#__codelineno-0-290">290</a></span>
<span class="normal"><a href="#__codelineno-0-291">291</a></span>
<span class="normal"><a href="#__codelineno-0-292">292</a></span>
<span class="normal"><a href="#__codelineno-0-293">293</a></span>
<span class="normal"><a href="#__codelineno-0-294">294</a></span>
<span class="normal"><a href="#__codelineno-0-295">295</a></span>
<span class="normal"><a href="#__codelineno-0-296">296</a></span>
<span class="normal"><a href="#__codelineno-0-297">297</a></span>
<span class="normal"><a href="#__codelineno-0-298">298</a></span>
<span class="normal"><a href="#__codelineno-0-299">299</a></span>
<span class="normal"><a href="#__codelineno-0-300">300</a></span>
<span class="normal"><a href="#__codelineno-0-301">301</a></span>
<span class="normal"><a href="#__codelineno-0-302">302</a></span>
<span class="normal"><a href="#__codelineno-0-303">303</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-134"><a id="__codelineno-0-134" name="__codelineno-0-134"></a><span class="k">def</span> <span class="nf">run</span><span class="p">(</span>
</span><span id="__span-0-135"><a id="__codelineno-0-135" name="__codelineno-0-135"></a> <span class="bp">self</span><span class="p">,</span> <span class="n">text</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="n">Document</span><span class="p">,</span> <span class="n">top_k</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span>
</span><span id="__span-0-136"><a id="__codelineno-0-136" name="__codelineno-0-136"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="n">RetrievedDocument</span><span class="p">]:</span>
</span><span id="__span-0-137"><a id="__codelineno-0-137" name="__codelineno-0-137"></a><span class="w"> </span><span class="sd">"""Retrieve a list of documents from vector store</span>
</span><span id="__span-0-138"><a id="__codelineno-0-138" name="__codelineno-0-138"></a>
</span><span id="__span-0-139"><a id="__codelineno-0-139" name="__codelineno-0-139"></a><span class="sd"> Args:</span>
</span><span id="__span-0-140"><a id="__codelineno-0-140" name="__codelineno-0-140"></a><span class="sd"> text: the text to retrieve similar documents</span>
</span><span id="__span-0-141"><a id="__codelineno-0-141" name="__codelineno-0-141"></a><span class="sd"> top_k: number of top similar documents to return</span>
</span><span id="__span-0-142"><a id="__codelineno-0-142" name="__codelineno-0-142"></a>
</span><span id="__span-0-143"><a id="__codelineno-0-143" name="__codelineno-0-143"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-144"><a id="__codelineno-0-144" name="__codelineno-0-144"></a><span class="sd"> list[RetrievedDocument]: list of retrieved documents</span>
</span><span id="__span-0-145"><a id="__codelineno-0-145" name="__codelineno-0-145"></a><span class="sd"> """</span>
</span><span id="__span-0-146"><a id="__codelineno-0-146" name="__codelineno-0-146"></a> <span class="k">if</span> <span class="n">top_k</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
</span><span id="__span-0-147"><a id="__codelineno-0-147" name="__codelineno-0-147"></a> <span class="n">top_k</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">top_k</span>
</span><span id="__span-0-148"><a id="__codelineno-0-148" name="__codelineno-0-148"></a>
</span><span id="__span-0-149"><a id="__codelineno-0-149" name="__codelineno-0-149"></a> <span class="n">do_extend</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s2">"do_extend"</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
</span><span id="__span-0-150"><a id="__codelineno-0-150" name="__codelineno-0-150"></a> <span class="n">thumbnail_count</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s2">"thumbnail_count"</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
</span><span id="__span-0-151"><a id="__codelineno-0-151" name="__codelineno-0-151"></a>
</span><span id="__span-0-152"><a id="__codelineno-0-152" name="__codelineno-0-152"></a> <span class="k">if</span> <span class="n">do_extend</span><span class="p">:</span>
</span><span id="__span-0-153"><a id="__codelineno-0-153" name="__codelineno-0-153"></a> <span class="n">top_k_first_round</span> <span class="o">=</span> <span class="n">top_k</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">first_round_top_k_mult</span>
</span><span id="__span-0-154"><a id="__codelineno-0-154" name="__codelineno-0-154"></a> <span class="k">else</span><span class="p">:</span>
</span><span id="__span-0-155"><a id="__codelineno-0-155" name="__codelineno-0-155"></a> <span class="n">top_k_first_round</span> <span class="o">=</span> <span class="n">top_k</span>
</span><span id="__span-0-156"><a id="__codelineno-0-156" name="__codelineno-0-156"></a>
</span><span id="__span-0-157"><a id="__codelineno-0-157" name="__codelineno-0-157"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
</span><span id="__span-0-158"><a id="__codelineno-0-158" name="__codelineno-0-158"></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
</span><span id="__span-0-159"><a id="__codelineno-0-159" name="__codelineno-0-159"></a> <span class="s2">"doc_store is not provided. Please provide a doc_store to "</span>
</span><span id="__span-0-160"><a id="__codelineno-0-160" name="__codelineno-0-160"></a> <span class="s2">"retrieve the documents"</span>
</span><span id="__span-0-161"><a id="__codelineno-0-161" name="__codelineno-0-161"></a> <span class="p">)</span>
</span><span id="__span-0-162"><a id="__codelineno-0-162" name="__codelineno-0-162"></a>
</span><span id="__span-0-163"><a id="__codelineno-0-163" name="__codelineno-0-163"></a> <span class="n">result</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">RetrievedDocument</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-164"><a id="__codelineno-0-164" name="__codelineno-0-164"></a> <span class="c1"># TODO: should declare scope directly in the run params</span>
</span><span id="__span-0-165"><a id="__codelineno-0-165" name="__codelineno-0-165"></a> <span class="n">scope</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s2">"scope"</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
</span><span id="__span-0-166"><a id="__codelineno-0-166" name="__codelineno-0-166"></a> <span class="n">emb</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span>
</span><span id="__span-0-167"><a id="__codelineno-0-167" name="__codelineno-0-167"></a>
</span><span id="__span-0-168"><a id="__codelineno-0-168" name="__codelineno-0-168"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">retrieval_mode</span> <span class="o">==</span> <span class="s2">"vector"</span><span class="p">:</span>
</span><span id="__span-0-169"><a id="__codelineno-0-169" name="__codelineno-0-169"></a> <span class="n">emb</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">embedding</span><span class="p">(</span><span class="n">text</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">embedding</span>
</span><span id="__span-0-170"><a id="__codelineno-0-170" name="__codelineno-0-170"></a> <span class="n">_</span><span class="p">,</span> <span class="n">scores</span><span class="p">,</span> <span class="n">ids</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">vector_store</span><span class="o">.</span><span class="n">query</span><span class="p">(</span>
</span><span id="__span-0-171"><a id="__codelineno-0-171" name="__codelineno-0-171"></a> <span class="n">embedding</span><span class="o">=</span><span class="n">emb</span><span class="p">,</span> <span class="n">top_k</span><span class="o">=</span><span class="n">top_k_first_round</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span>
</span><span id="__span-0-172"><a id="__codelineno-0-172" name="__codelineno-0-172"></a> <span class="p">)</span>
</span><span id="__span-0-173"><a id="__codelineno-0-173" name="__codelineno-0-173"></a> <span class="n">docs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ids</span><span class="p">)</span>
</span><span id="__span-0-174"><a id="__codelineno-0-174" name="__codelineno-0-174"></a> <span class="n">result</span> <span class="o">=</span> <span class="p">[</span>
</span><span id="__span-0-175"><a id="__codelineno-0-175" name="__codelineno-0-175"></a> <span class="n">RetrievedDocument</span><span class="p">(</span><span class="o">**</span><span class="n">doc</span><span class="o">.</span><span class="n">to_dict</span><span class="p">(),</span> <span class="n">score</span><span class="o">=</span><span class="n">score</span><span class="p">)</span>
</span><span id="__span-0-176"><a id="__codelineno-0-176" name="__codelineno-0-176"></a> <span class="k">for</span> <span class="n">doc</span><span class="p">,</span> <span class="n">score</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">docs</span><span class="p">,</span> <span class="n">scores</span><span class="p">)</span>
</span><span id="__span-0-177"><a id="__codelineno-0-177" name="__codelineno-0-177"></a> <span class="p">]</span>
</span><span id="__span-0-178"><a id="__codelineno-0-178" name="__codelineno-0-178"></a> <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">retrieval_mode</span> <span class="o">==</span> <span class="s2">"text"</span><span class="p">:</span>
</span><span id="__span-0-179"><a id="__codelineno-0-179" name="__codelineno-0-179"></a> <span class="n">query</span> <span class="o">=</span> <span class="n">text</span><span class="o">.</span><span class="n">text</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">text</span><span class="p">,</span> <span class="n">Document</span><span class="p">)</span> <span class="k">else</span> <span class="n">text</span>
</span><span id="__span-0-180"><a id="__codelineno-0-180" name="__codelineno-0-180"></a> <span class="n">docs</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-181"><a id="__codelineno-0-181" name="__codelineno-0-181"></a> <span class="k">if</span> <span class="n">scope</span><span class="p">:</span>
</span><span id="__span-0-182"><a id="__codelineno-0-182" name="__codelineno-0-182"></a> <span class="n">docs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span><span class="o">.</span><span class="n">query</span><span class="p">(</span>
</span><span id="__span-0-183"><a id="__codelineno-0-183" name="__codelineno-0-183"></a> <span class="n">query</span><span class="p">,</span> <span class="n">top_k</span><span class="o">=</span><span class="n">top_k_first_round</span><span class="p">,</span> <span class="n">doc_ids</span><span class="o">=</span><span class="n">scope</span>
</span><span id="__span-0-184"><a id="__codelineno-0-184" name="__codelineno-0-184"></a> <span class="p">)</span>
</span><span id="__span-0-185"><a id="__codelineno-0-185" name="__codelineno-0-185"></a> <span class="n">result</span> <span class="o">=</span> <span class="p">[</span><span class="n">RetrievedDocument</span><span class="p">(</span><span class="o">**</span><span class="n">doc</span><span class="o">.</span><span class="n">to_dict</span><span class="p">(),</span> <span class="n">score</span><span class="o">=-</span><span class="mf">1.0</span><span class="p">)</span> <span class="k">for</span> <span class="n">doc</span> <span class="ow">in</span> <span class="n">docs</span><span class="p">]</span>
</span><span id="__span-0-186"><a id="__codelineno-0-186" name="__codelineno-0-186"></a> <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">retrieval_mode</span> <span class="o">==</span> <span class="s2">"hybrid"</span><span class="p">:</span>
</span><span id="__span-0-187"><a id="__codelineno-0-187" name="__codelineno-0-187"></a> <span class="c1"># similarity search section</span>
</span><span id="__span-0-188"><a id="__codelineno-0-188" name="__codelineno-0-188"></a> <span class="n">emb</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">embedding</span><span class="p">(</span><span class="n">text</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">embedding</span>
</span><span id="__span-0-189"><a id="__codelineno-0-189" name="__codelineno-0-189"></a> <span class="n">vs_docs</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">RetrievedDocument</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-190"><a id="__codelineno-0-190" name="__codelineno-0-190"></a> <span class="n">vs_ids</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-191"><a id="__codelineno-0-191" name="__codelineno-0-191"></a> <span class="n">vs_scores</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-192"><a id="__codelineno-0-192" name="__codelineno-0-192"></a>
</span><span id="__span-0-193"><a id="__codelineno-0-193" name="__codelineno-0-193"></a> <span class="k">def</span> <span class="nf">query_vectorstore</span><span class="p">():</span>
</span><span id="__span-0-194"><a id="__codelineno-0-194" name="__codelineno-0-194"></a> <span class="k">nonlocal</span> <span class="n">vs_docs</span>
</span><span id="__span-0-195"><a id="__codelineno-0-195" name="__codelineno-0-195"></a> <span class="k">nonlocal</span> <span class="n">vs_scores</span>
</span><span id="__span-0-196"><a id="__codelineno-0-196" name="__codelineno-0-196"></a> <span class="k">nonlocal</span> <span class="n">vs_ids</span>
</span><span id="__span-0-197"><a id="__codelineno-0-197" name="__codelineno-0-197"></a>
</span><span id="__span-0-198"><a id="__codelineno-0-198" name="__codelineno-0-198"></a> <span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
</span><span id="__span-0-199"><a id="__codelineno-0-199" name="__codelineno-0-199"></a> <span class="n">_</span><span class="p">,</span> <span class="n">vs_scores</span><span class="p">,</span> <span class="n">vs_ids</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">vector_store</span><span class="o">.</span><span class="n">query</span><span class="p">(</span>
</span><span id="__span-0-200"><a id="__codelineno-0-200" name="__codelineno-0-200"></a> <span class="n">embedding</span><span class="o">=</span><span class="n">emb</span><span class="p">,</span> <span class="n">top_k</span><span class="o">=</span><span class="n">top_k_first_round</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span>
</span><span id="__span-0-201"><a id="__codelineno-0-201" name="__codelineno-0-201"></a> <span class="p">)</span>
</span><span id="__span-0-202"><a id="__codelineno-0-202" name="__codelineno-0-202"></a> <span class="k">if</span> <span class="n">vs_ids</span><span class="p">:</span>
</span><span id="__span-0-203"><a id="__codelineno-0-203" name="__codelineno-0-203"></a> <span class="n">vs_docs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">vs_ids</span><span class="p">)</span>
</span><span id="__span-0-204"><a id="__codelineno-0-204" name="__codelineno-0-204"></a>
</span><span id="__span-0-205"><a id="__codelineno-0-205" name="__codelineno-0-205"></a> <span class="c1"># full-text search section</span>
</span><span id="__span-0-206"><a id="__codelineno-0-206" name="__codelineno-0-206"></a> <span class="n">ds_docs</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">RetrievedDocument</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-207"><a id="__codelineno-0-207" name="__codelineno-0-207"></a>
</span><span id="__span-0-208"><a id="__codelineno-0-208" name="__codelineno-0-208"></a> <span class="k">def</span> <span class="nf">query_docstore</span><span class="p">():</span>
</span><span id="__span-0-209"><a id="__codelineno-0-209" name="__codelineno-0-209"></a> <span class="k">nonlocal</span> <span class="n">ds_docs</span>
</span><span id="__span-0-210"><a id="__codelineno-0-210" name="__codelineno-0-210"></a>
</span><span id="__span-0-211"><a id="__codelineno-0-211" name="__codelineno-0-211"></a> <span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
</span><span id="__span-0-212"><a id="__codelineno-0-212" name="__codelineno-0-212"></a> <span class="n">query</span> <span class="o">=</span> <span class="n">text</span><span class="o">.</span><span class="n">text</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">text</span><span class="p">,</span> <span class="n">Document</span><span class="p">)</span> <span class="k">else</span> <span class="n">text</span>
</span><span id="__span-0-213"><a id="__codelineno-0-213" name="__codelineno-0-213"></a> <span class="k">if</span> <span class="n">scope</span><span class="p">:</span>
</span><span id="__span-0-214"><a id="__codelineno-0-214" name="__codelineno-0-214"></a> <span class="n">ds_docs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span><span class="o">.</span><span class="n">query</span><span class="p">(</span>
</span><span id="__span-0-215"><a id="__codelineno-0-215" name="__codelineno-0-215"></a> <span class="n">query</span><span class="p">,</span> <span class="n">top_k</span><span class="o">=</span><span class="n">top_k_first_round</span><span class="p">,</span> <span class="n">doc_ids</span><span class="o">=</span><span class="n">scope</span>
</span><span id="__span-0-216"><a id="__codelineno-0-216" name="__codelineno-0-216"></a> <span class="p">)</span>
</span><span id="__span-0-217"><a id="__codelineno-0-217" name="__codelineno-0-217"></a>
</span><span id="__span-0-218"><a id="__codelineno-0-218" name="__codelineno-0-218"></a> <span class="n">vs_query_thread</span> <span class="o">=</span> <span class="n">threading</span><span class="o">.</span><span class="n">Thread</span><span class="p">(</span><span class="n">target</span><span class="o">=</span><span class="n">query_vectorstore</span><span class="p">)</span>
</span><span id="__span-0-219"><a id="__codelineno-0-219" name="__codelineno-0-219"></a> <span class="n">ds_query_thread</span> <span class="o">=</span> <span class="n">threading</span><span class="o">.</span><span class="n">Thread</span><span class="p">(</span><span class="n">target</span><span class="o">=</span><span class="n">query_docstore</span><span class="p">)</span>
</span><span id="__span-0-220"><a id="__codelineno-0-220" name="__codelineno-0-220"></a>
</span><span id="__span-0-221"><a id="__codelineno-0-221" name="__codelineno-0-221"></a> <span class="n">vs_query_thread</span><span class="o">.</span><span class="n">start</span><span class="p">()</span>
</span><span id="__span-0-222"><a id="__codelineno-0-222" name="__codelineno-0-222"></a> <span class="n">ds_query_thread</span><span class="o">.</span><span class="n">start</span><span class="p">()</span>
</span><span id="__span-0-223"><a id="__codelineno-0-223" name="__codelineno-0-223"></a>
</span><span id="__span-0-224"><a id="__codelineno-0-224" name="__codelineno-0-224"></a> <span class="n">vs_query_thread</span><span class="o">.</span><span class="n">join</span><span class="p">()</span>
</span><span id="__span-0-225"><a id="__codelineno-0-225" name="__codelineno-0-225"></a> <span class="n">ds_query_thread</span><span class="o">.</span><span class="n">join</span><span class="p">()</span>
</span><span id="__span-0-226"><a id="__codelineno-0-226" name="__codelineno-0-226"></a>
</span><span id="__span-0-227"><a id="__codelineno-0-227" name="__codelineno-0-227"></a> <span class="n">result</span> <span class="o">=</span> <span class="p">[</span>
</span><span id="__span-0-228"><a id="__codelineno-0-228" name="__codelineno-0-228"></a> <span class="n">RetrievedDocument</span><span class="p">(</span><span class="o">**</span><span class="n">doc</span><span class="o">.</span><span class="n">to_dict</span><span class="p">(),</span> <span class="n">score</span><span class="o">=-</span><span class="mf">1.0</span><span class="p">)</span>
</span><span id="__span-0-229"><a id="__codelineno-0-229" name="__codelineno-0-229"></a> <span class="k">for</span> <span class="n">doc</span> <span class="ow">in</span> <span class="n">ds_docs</span>
</span><span id="__span-0-230"><a id="__codelineno-0-230" name="__codelineno-0-230"></a> <span class="k">if</span> <span class="n">doc</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">vs_ids</span>
</span><span id="__span-0-231"><a id="__codelineno-0-231" name="__codelineno-0-231"></a> <span class="p">]</span>
</span><span id="__span-0-232"><a id="__codelineno-0-232" name="__codelineno-0-232"></a> <span class="n">result</span> <span class="o">+=</span> <span class="p">[</span>
</span><span id="__span-0-233"><a id="__codelineno-0-233" name="__codelineno-0-233"></a> <span class="n">RetrievedDocument</span><span class="p">(</span><span class="o">**</span><span class="n">doc</span><span class="o">.</span><span class="n">to_dict</span><span class="p">(),</span> <span class="n">score</span><span class="o">=</span><span class="n">score</span><span class="p">)</span>
</span><span id="__span-0-234"><a id="__codelineno-0-234" name="__codelineno-0-234"></a> <span class="k">for</span> <span class="n">doc</span><span class="p">,</span> <span class="n">score</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">vs_docs</span><span class="p">,</span> <span class="n">vs_scores</span><span class="p">)</span>
</span><span id="__span-0-235"><a id="__codelineno-0-235" name="__codelineno-0-235"></a> <span class="p">]</span>
</span><span id="__span-0-236"><a id="__codelineno-0-236" name="__codelineno-0-236"></a> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Got </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">vs_docs</span><span class="p">)</span><span class="si">}</span><span class="s2"> from vectorstore"</span><span class="p">)</span>
</span><span id="__span-0-237"><a id="__codelineno-0-237" name="__codelineno-0-237"></a> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Got </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">ds_docs</span><span class="p">)</span><span class="si">}</span><span class="s2"> from docstore"</span><span class="p">)</span>
</span><span id="__span-0-238"><a id="__codelineno-0-238" name="__codelineno-0-238"></a>
</span><span id="__span-0-239"><a id="__codelineno-0-239" name="__codelineno-0-239"></a> <span class="c1"># use additional reranker to re-order the document list</span>
</span><span id="__span-0-240"><a id="__codelineno-0-240" name="__codelineno-0-240"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">rerankers</span> <span class="ow">and</span> <span class="n">text</span><span class="p">:</span>
</span><span id="__span-0-241"><a id="__codelineno-0-241" name="__codelineno-0-241"></a> <span class="k">for</span> <span class="n">reranker</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">rerankers</span><span class="p">:</span>
</span><span id="__span-0-242"><a id="__codelineno-0-242" name="__codelineno-0-242"></a> <span class="c1"># if reranker is LLMReranking, limit the document with top_k items only</span>
</span><span id="__span-0-243"><a id="__codelineno-0-243" name="__codelineno-0-243"></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">reranker</span><span class="p">,</span> <span class="n">LLMReranking</span><span class="p">):</span>
</span><span id="__span-0-244"><a id="__codelineno-0-244" name="__codelineno-0-244"></a> <span class="n">result</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filter_docs</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">top_k</span><span class="o">=</span><span class="n">top_k</span><span class="p">)</span>
</span><span id="__span-0-245"><a id="__codelineno-0-245" name="__codelineno-0-245"></a> <span class="n">result</span> <span class="o">=</span> <span class="n">reranker</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">documents</span><span class="o">=</span><span class="n">result</span><span class="p">,</span> <span class="n">query</span><span class="o">=</span><span class="n">text</span><span class="p">)</span>
</span><span id="__span-0-246"><a id="__codelineno-0-246" name="__codelineno-0-246"></a>
</span><span id="__span-0-247"><a id="__codelineno-0-247" name="__codelineno-0-247"></a> <span class="n">result</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filter_docs</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">top_k</span><span class="o">=</span><span class="n">top_k</span><span class="p">)</span>
</span><span id="__span-0-248"><a id="__codelineno-0-248" name="__codelineno-0-248"></a> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Got raw </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">result</span><span class="p">)</span><span class="si">}</span><span class="s2"> retrieved documents"</span><span class="p">)</span>
</span><span id="__span-0-249"><a id="__codelineno-0-249" name="__codelineno-0-249"></a>
</span><span id="__span-0-250"><a id="__codelineno-0-250" name="__codelineno-0-250"></a> <span class="c1"># add page thumbnails to the result if exists</span>
</span><span id="__span-0-251"><a id="__codelineno-0-251" name="__codelineno-0-251"></a> <span class="n">thumbnail_doc_ids</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
</span><span id="__span-0-252"><a id="__codelineno-0-252" name="__codelineno-0-252"></a> <span class="c1"># we should copy the text from retrieved text chunk</span>
</span><span id="__span-0-253"><a id="__codelineno-0-253" name="__codelineno-0-253"></a> <span class="c1"># to the thumbnail to get relevant LLM score correctly</span>
</span><span id="__span-0-254"><a id="__codelineno-0-254" name="__codelineno-0-254"></a> <span class="n">text_thumbnail_docs</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">RetrievedDocument</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
</span><span id="__span-0-255"><a id="__codelineno-0-255" name="__codelineno-0-255"></a>
</span><span id="__span-0-256"><a id="__codelineno-0-256" name="__codelineno-0-256"></a> <span class="n">non_thumbnail_docs</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-257"><a id="__codelineno-0-257" name="__codelineno-0-257"></a> <span class="n">raw_thumbnail_docs</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-258"><a id="__codelineno-0-258" name="__codelineno-0-258"></a> <span class="k">for</span> <span class="n">doc</span> <span class="ow">in</span> <span class="n">result</span><span class="p">:</span>
</span><span id="__span-0-259"><a id="__codelineno-0-259" name="__codelineno-0-259"></a> <span class="k">if</span> <span class="n">doc</span><span class="o">.</span><span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"type"</span><span class="p">)</span> <span class="o">==</span> <span class="s2">"thumbnail"</span><span class="p">:</span>
</span><span id="__span-0-260"><a id="__codelineno-0-260" name="__codelineno-0-260"></a> <span class="c1"># change type to image to display on UI</span>
</span><span id="__span-0-261"><a id="__codelineno-0-261" name="__codelineno-0-261"></a> <span class="n">doc</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="s2">"type"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"image"</span>
</span><span id="__span-0-262"><a id="__codelineno-0-262" name="__codelineno-0-262"></a> <span class="n">raw_thumbnail_docs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">doc</span><span class="p">)</span>
</span><span id="__span-0-263"><a id="__codelineno-0-263" name="__codelineno-0-263"></a> <span class="k">continue</span>
</span><span id="__span-0-264"><a id="__codelineno-0-264" name="__codelineno-0-264"></a> <span class="k">if</span> <span class="p">(</span>
</span><span id="__span-0-265"><a id="__codelineno-0-265" name="__codelineno-0-265"></a> <span class="s2">"thumbnail_doc_id"</span> <span class="ow">in</span> <span class="n">doc</span><span class="o">.</span><span class="n">metadata</span>
</span><span id="__span-0-266"><a id="__codelineno-0-266" name="__codelineno-0-266"></a> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">thumbnail_doc_ids</span><span class="p">)</span> <span class="o">&lt;</span> <span class="n">thumbnail_count</span>
</span><span id="__span-0-267"><a id="__codelineno-0-267" name="__codelineno-0-267"></a> <span class="p">):</span>
</span><span id="__span-0-268"><a id="__codelineno-0-268" name="__codelineno-0-268"></a> <span class="n">thumbnail_id</span> <span class="o">=</span> <span class="n">doc</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="s2">"thumbnail_doc_id"</span><span class="p">]</span>
</span><span id="__span-0-269"><a id="__codelineno-0-269" name="__codelineno-0-269"></a> <span class="n">thumbnail_doc_ids</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">thumbnail_id</span><span class="p">)</span>
</span><span id="__span-0-270"><a id="__codelineno-0-270" name="__codelineno-0-270"></a> <span class="n">text_thumbnail_docs</span><span class="p">[</span><span class="n">thumbnail_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">doc</span>
</span><span id="__span-0-271"><a id="__codelineno-0-271" name="__codelineno-0-271"></a> <span class="k">else</span><span class="p">:</span>
</span><span id="__span-0-272"><a id="__codelineno-0-272" name="__codelineno-0-272"></a> <span class="n">non_thumbnail_docs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">doc</span><span class="p">)</span>
</span><span id="__span-0-273"><a id="__codelineno-0-273" name="__codelineno-0-273"></a>
</span><span id="__span-0-274"><a id="__codelineno-0-274" name="__codelineno-0-274"></a> <span class="n">linked_thumbnail_docs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_store</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="n">thumbnail_doc_ids</span><span class="p">))</span>
</span><span id="__span-0-275"><a id="__codelineno-0-275" name="__codelineno-0-275"></a> <span class="nb">print</span><span class="p">(</span>
</span><span id="__span-0-276"><a id="__codelineno-0-276" name="__codelineno-0-276"></a> <span class="s2">"thumbnail docs"</span><span class="p">,</span>
</span><span id="__span-0-277"><a id="__codelineno-0-277" name="__codelineno-0-277"></a> <span class="nb">len</span><span class="p">(</span><span class="n">linked_thumbnail_docs</span><span class="p">),</span>
</span><span id="__span-0-278"><a id="__codelineno-0-278" name="__codelineno-0-278"></a> <span class="s2">"non-thumbnail docs"</span><span class="p">,</span>
</span><span id="__span-0-279"><a id="__codelineno-0-279" name="__codelineno-0-279"></a> <span class="nb">len</span><span class="p">(</span><span class="n">non_thumbnail_docs</span><span class="p">),</span>
</span><span id="__span-0-280"><a id="__codelineno-0-280" name="__codelineno-0-280"></a> <span class="s2">"raw-thumbnail docs"</span><span class="p">,</span>
</span><span id="__span-0-281"><a id="__codelineno-0-281" name="__codelineno-0-281"></a> <span class="nb">len</span><span class="p">(</span><span class="n">raw_thumbnail_docs</span><span class="p">),</span>
</span><span id="__span-0-282"><a id="__codelineno-0-282" name="__codelineno-0-282"></a> <span class="p">)</span>
</span><span id="__span-0-283"><a id="__codelineno-0-283" name="__codelineno-0-283"></a> <span class="n">additional_docs</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="__span-0-284"><a id="__codelineno-0-284" name="__codelineno-0-284"></a>
</span><span id="__span-0-285"><a id="__codelineno-0-285" name="__codelineno-0-285"></a> <span class="k">for</span> <span class="n">thumbnail_doc</span> <span class="ow">in</span> <span class="n">linked_thumbnail_docs</span><span class="p">:</span>
</span><span id="__span-0-286"><a id="__codelineno-0-286" name="__codelineno-0-286"></a> <span class="n">text_doc</span> <span class="o">=</span> <span class="n">text_thumbnail_docs</span><span class="p">[</span><span class="n">thumbnail_doc</span><span class="o">.</span><span class="n">doc_id</span><span class="p">]</span>
</span><span id="__span-0-287"><a id="__codelineno-0-287" name="__codelineno-0-287"></a> <span class="n">doc_dict</span> <span class="o">=</span> <span class="n">thumbnail_doc</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()</span>
</span><span id="__span-0-288"><a id="__codelineno-0-288" name="__codelineno-0-288"></a> <span class="n">doc_dict</span><span class="p">[</span><span class="s2">"_id"</span><span class="p">]</span> <span class="o">=</span> <span class="n">text_doc</span><span class="o">.</span><span class="n">doc_id</span>
</span><span id="__span-0-289"><a id="__codelineno-0-289" name="__codelineno-0-289"></a> <span class="n">doc_dict</span><span class="p">[</span><span class="s2">"content"</span><span class="p">]</span> <span class="o">=</span> <span class="n">text_doc</span><span class="o">.</span><span class="n">content</span>
</span><span id="__span-0-290"><a id="__codelineno-0-290" name="__codelineno-0-290"></a> <span class="n">doc_dict</span><span class="p">[</span><span class="s2">"metadata"</span><span class="p">][</span><span class="s2">"type"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"image"</span>
</span><span id="__span-0-291"><a id="__codelineno-0-291" name="__codelineno-0-291"></a> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">text_doc</span><span class="o">.</span><span class="n">metadata</span><span class="p">:</span>
</span><span id="__span-0-292"><a id="__codelineno-0-292" name="__codelineno-0-292"></a> <span class="k">if</span> <span class="n">key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">doc_dict</span><span class="p">[</span><span class="s2">"metadata"</span><span class="p">]:</span>
</span><span id="__span-0-293"><a id="__codelineno-0-293" name="__codelineno-0-293"></a> <span class="n">doc_dict</span><span class="p">[</span><span class="s2">"metadata"</span><span class="p">][</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">text_doc</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="n">key</span><span class="p">]</span>
</span><span id="__span-0-294"><a id="__codelineno-0-294" name="__codelineno-0-294"></a>
</span><span id="__span-0-295"><a id="__codelineno-0-295" name="__codelineno-0-295"></a> <span class="n">additional_docs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">RetrievedDocument</span><span class="p">(</span><span class="o">**</span><span class="n">doc_dict</span><span class="p">,</span> <span class="n">score</span><span class="o">=</span><span class="n">text_doc</span><span class="o">.</span><span class="n">score</span><span class="p">))</span>
</span><span id="__span-0-296"><a id="__codelineno-0-296" name="__codelineno-0-296"></a>
</span><span id="__span-0-297"><a id="__codelineno-0-297" name="__codelineno-0-297"></a> <span class="n">result</span> <span class="o">=</span> <span class="n">additional_docs</span> <span class="o">+</span> <span class="n">non_thumbnail_docs</span>
</span><span id="__span-0-298"><a id="__codelineno-0-298" name="__codelineno-0-298"></a>
</span><span id="__span-0-299"><a id="__codelineno-0-299" name="__codelineno-0-299"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">result</span><span class="p">:</span>
</span><span id="__span-0-300"><a id="__codelineno-0-300" name="__codelineno-0-300"></a> <span class="c1"># return output from raw retrieved thumbnails</span>
</span><span id="__span-0-301"><a id="__codelineno-0-301" name="__codelineno-0-301"></a> <span class="n">result</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filter_docs</span><span class="p">(</span><span class="n">raw_thumbnail_docs</span><span class="p">,</span> <span class="n">top_k</span><span class="o">=</span><span class="n">thumbnail_count</span><span class="p">)</span>
</span><span id="__span-0-302"><a id="__codelineno-0-302" name="__codelineno-0-302"></a>
</span><span id="__span-0-303"><a id="__codelineno-0-303" name="__codelineno-0-303"></a> <span class="k">return</span> <span class="n">result</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<aside class="md-source-file">
<span class="md-source-file__fact">
<span class="md-icon" title="Last update">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21 13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6 0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1zM12.5 7v5.2l4 2.4-1 1L11 13V7zM11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10 9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1 7.5 7.1 7.9l-.1.2z"/></svg>
</span>
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-timeago"><span class="timeago" datetime="2025-02-02T08:38:33+00:00" locale="en"></span></span><span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-iso_date">2025-02-02</span>
</span>
<span class="md-source-file__fact">
<span class="md-icon" title="Created">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M14.47 15.08 11 13V7h1.5v5.25l3.08 1.83c-.41.28-.79.62-1.11 1m-1.39 4.84c-.36.05-.71.08-1.08.08-4.42 0-8-3.58-8-8s3.58-8 8-8 8 3.58 8 8c0 .37-.03.72-.08 1.08.69.1 1.33.32 1.92.64.1-.56.16-1.13.16-1.72 0-5.5-4.5-10-10-10S2 6.5 2 12s4.47 10 10 10c.59 0 1.16-.06 1.72-.16-.32-.59-.54-1.23-.64-1.92M18 15v3h-3v2h3v3h2v-3h3v-2h-3v-3z"/></svg>
</span>
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-timeago"><span class="timeago" datetime="2025-02-02T08:38:33+00:00" locale="en"></span></span><span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-iso_date">2025-02-02</span>
</span>
</aside>
</article>
</div>
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
<button type="button" class="md-top md-icon" data-md-component="top" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8z"/></svg>
Back to top
</button>
</main>
<footer class="md-footer">
<nav
class="md-footer__inner md-grid"
aria-label="footer.title"
>
<a
href="../embeddings/tei_endpoint_embed/"
class="md-footer__link md-footer__link--prev"
rel="prev"
>
<div class="md-footer__button md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
</div>
<div class="md-footer__title">
<div class="md-ellipsis">
<span class="md-footer__direction">
Previous
</span>
Tei Endpoint Embed
</div>
</div>
</a>
<a
href="base/"
class="md-footer__link md-footer__link--next"
rel="next"
>
<div class="md-footer__title">
<div class="md-ellipsis">
<span class="md-footer__direction">
Next
</span>
Base
</div>
</div>
<div class="md-footer__button md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11z"/></svg>
</div>
</a>
</nav>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<script id="__config" type="application/json">{"base": "../..", "features": ["content.action.edit", "content.tabs.link", "content.code.annotate", "content.code.annotations", "content.code.copy", "navigation.tabs", "navigation.top", "navigation.instant", "navigation.indexes", "toc.follow", "search.share", "search.highlight", "search.suggest"], "search": "../../assets/javascripts/workers/search.6ce7567c.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
<script src="../../assets/javascripts/bundle.525ec568.min.js"></script>
<script src="../../js/timeago.min.js"></script>
<script src="../../js/timeago_mkdocs_material.js"></script>
<script src="../../assets/pymdownx-extras/extra-loader-MCFnu0Wd.js"></script>
</body>
</html>