Skip to content

Index

Base reader class.

BaseReader #

Bases: ABC

Utilities for loading data from a directory.

Source code in llama-index-core/llama_index/core/readers/base.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
class BaseReader(ABC):
    """Utilities for loading data from a directory."""

    def lazy_load_data(self, *args: Any, **load_kwargs: Any) -> Iterable[Document]:
        """Load data from the input directory lazily."""
        raise NotImplementedError(
            f"{self.__class__.__name__} does not provide lazy_load_data method currently"
        )

    async def alazy_load_data(
        self, *args: Any, **load_kwargs: Any
    ) -> Iterable[Document]:
        """Load data from the input directory lazily."""
        # Fake async - just calls the sync method. Override in subclasses for real async implementations.
        return self.lazy_load_data(*args, **load_kwargs)

    def load_data(self, *args: Any, **load_kwargs: Any) -> List[Document]:
        """Load data from the input directory."""
        return list(self.lazy_load_data(*args, **load_kwargs))

    async def aload_data(self, *args: Any, **load_kwargs: Any) -> List[Document]:
        """Load data from the input directory."""
        return self.load_data(*args, **load_kwargs)

    def load_langchain_documents(self, **load_kwargs: Any) -> List["LCDocument"]:
        """Load data in LangChain document format."""
        docs = self.load_data(**load_kwargs)
        return [d.to_langchain_format() for d in docs]

lazy_load_data #

lazy_load_data(*args: Any, **load_kwargs: Any) -> Iterable[Document]

Load data from the input directory lazily.

Source code in llama-index-core/llama_index/core/readers/base.py
21
22
23
24
25
def lazy_load_data(self, *args: Any, **load_kwargs: Any) -> Iterable[Document]:
    """Load data from the input directory lazily."""
    raise NotImplementedError(
        f"{self.__class__.__name__} does not provide lazy_load_data method currently"
    )

alazy_load_data async #

alazy_load_data(*args: Any, **load_kwargs: Any) -> Iterable[Document]

Load data from the input directory lazily.

Source code in llama-index-core/llama_index/core/readers/base.py
27
28
29
30
31
32
async def alazy_load_data(
    self, *args: Any, **load_kwargs: Any
) -> Iterable[Document]:
    """Load data from the input directory lazily."""
    # Fake async - just calls the sync method. Override in subclasses for real async implementations.
    return self.lazy_load_data(*args, **load_kwargs)

load_data #

load_data(*args: Any, **load_kwargs: Any) -> List[Document]

Load data from the input directory.

Source code in llama-index-core/llama_index/core/readers/base.py
34
35
36
def load_data(self, *args: Any, **load_kwargs: Any) -> List[Document]:
    """Load data from the input directory."""
    return list(self.lazy_load_data(*args, **load_kwargs))

aload_data async #

aload_data(*args: Any, **load_kwargs: Any) -> List[Document]

Load data from the input directory.

Source code in llama-index-core/llama_index/core/readers/base.py
38
39
40
async def aload_data(self, *args: Any, **load_kwargs: Any) -> List[Document]:
    """Load data from the input directory."""
    return self.load_data(*args, **load_kwargs)

load_langchain_documents #

load_langchain_documents(**load_kwargs: Any) -> List[Document]

Load data in LangChain document format.

Source code in llama-index-core/llama_index/core/readers/base.py
42
43
44
45
def load_langchain_documents(self, **load_kwargs: Any) -> List["LCDocument"]:
    """Load data in LangChain document format."""
    docs = self.load_data(**load_kwargs)
    return [d.to_langchain_format() for d in docs]

BasePydanticReader #

Bases: BaseReader, BaseComponent

Serialiable Data Loader with Pydantic.

Source code in llama-index-core/llama_index/core/readers/base.py
48
49
50
51
52
53
54
55
class BasePydanticReader(BaseReader, BaseComponent):
    """Serialiable Data Loader with Pydantic."""

    model_config = ConfigDict(arbitrary_types_allowed=True)
    is_remote: bool = Field(
        default=False,
        description="Whether the data is loaded from a remote API or a local file.",
    )