• __all__ = ['AudioLoader', 'BaseFileLoader', 'BaseLoader', 'BlobLoader', 'CsvLoader', 'EmailLoader', 'ImageLoader', 'JsonLoader', 'PdfLoader', 'SqlLoader', 'TextLoader', 'WebLoader'] module-attribute

Bases: BaseFileLoader[AudioArtifact]

Source Code in griptape/loaders/audio_loader.py
@define
class AudioLoader(BaseFileLoader[AudioArtifact]):
    """Loads audio content into audio artifacts."""

    def try_parse(self, data: bytes) -> AudioArtifact:
        filetype_guess = filetype.guess(data)
        if filetype_guess is None:
            raise ValueError("Could not determine the file type of the audio data")
        return AudioArtifact(data, format=filetype_guess.extension)

try_parse(data)

Source Code in griptape/loaders/audio_loader.py
def try_parse(self, data: bytes) -> AudioArtifact:
    filetype_guess = filetype.guess(data)
    if filetype_guess is None:
        raise ValueError("Could not determine the file type of the audio data")
    return AudioArtifact(data, format=filetype_guess.extension)

BaseFileLoader

Bases: BaseLoader[Union[str, PathLike], bytes, A] , ABC

Source Code in griptape/loaders/base_file_loader.py
@define
class BaseFileLoader(BaseLoader[Union[str, PathLike], bytes, A], ABC):
    file_manager_driver: BaseFileManagerDriver = field(
        default=Factory(lambda: LocalFileManagerDriver()),
        kw_only=True,
    )
    encoding: str = field(default="utf-8", kw_only=True)

    def fetch(self, source: str | PathLike) -> bytes:
        # TODO: This is silly. `load_file` decodes the bytes and then we immediately re-encode them.
        data = self.file_manager_driver.load_file(str(source)).value
        if isinstance(data, str):
            return data.encode(self.encoding)
        return data

    def save(self, destination: str | PathLike, artifact: A) -> None:
        """Saves the Artifact to a destination."""
        artifact.encoding = self.encoding
        self.file_manager_driver.save_file(str(destination), artifact.to_bytes())
  • encoding = field(default='utf-8', kw_only=True) class-attribute instance-attribute

  • file_manager_driver = field(default=Factory(lambda: LocalFileManagerDriver()), kw_only=True) class-attribute instance-attribute

fetch(source)

Source Code in griptape/loaders/base_file_loader.py
def fetch(self, source: str | PathLike) -> bytes:
    # TODO: This is silly. `load_file` decodes the bytes and then we immediately re-encode them.
    data = self.file_manager_driver.load_file(str(source)).value
    if isinstance(data, str):
        return data.encode(self.encoding)
    return data

save(destination, artifact)

Source Code in griptape/loaders/base_file_loader.py
def save(self, destination: str | PathLike, artifact: A) -> None:
    """Saves the Artifact to a destination."""
    artifact.encoding = self.encoding
    self.file_manager_driver.save_file(str(destination), artifact.to_bytes())

BaseLoader

Bases: FuturesExecutorMixin , ABC, Generic[S, F, A]

Attributes

NameTypeDescription
referenceOptional[Reference]The optional Reference to set on the Artifact.
Source Code in griptape/loaders/base_loader.py
@define
class BaseLoader(FuturesExecutorMixin, ABC, Generic[S, F, A]):
    """Fetches data from a source, parses it, and returns an Artifact.

    Attributes:
        reference: The optional `Reference` to set on the Artifact.
    """

    reference: Optional[Reference] = field(default=None, kw_only=True)

    def load(self, source: S) -> A:
        data = self.fetch(source)

        return self.parse(data)

    @abstractmethod
    def fetch(self, source: S) -> F:
        """Fetches data from the source."""

    def parse(self, data: F) -> A:
        """Parses the fetched data and returns an Artifact."""
        artifact = self.try_parse(data)

        artifact.reference = self.reference

        return artifact

    def try_parse(self, data: F) -> A:
        """Parses the fetched data and returns an Artifact."""
        # TODO: Mark as abstract method for griptape 2.0
        raise NotImplementedError

    def load_collection(
        self,
        sources: list[Any],
    ) -> Mapping[str, A]:
        """Loads a collection of sources and returns a dictionary of Artifacts."""
        # Create a dictionary before actually submitting the jobs to the executor
        # to avoid duplicate work.
        sources_by_key = {self.to_key(source): source for source in sources}

        with self.create_futures_executor() as futures_executor:
            return execute_futures_dict(
                {
                    key: futures_executor.submit(with_contextvars(self.load), source)
                    for key, source in sources_by_key.items()
                },
            )

    def to_key(self, source: S) -> str:
        """Converts the source to a key for the collection."""
        if isinstance(source, bytes):
            return bytes_to_hash(source)
        return str_to_hash(str(source))
  • reference = field(default=None, kw_only=True) class-attribute instance-attribute

fetch(source)abstractmethod

Source Code in griptape/loaders/base_loader.py
@abstractmethod
def fetch(self, source: S) -> F:
    """Fetches data from the source."""

load(source)

Source Code in griptape/loaders/base_loader.py
def load(self, source: S) -> A:
    data = self.fetch(source)

    return self.parse(data)

load_collection(sources)

Source Code in griptape/loaders/base_loader.py
def load_collection(
    self,
    sources: list[Any],
) -> Mapping[str, A]:
    """Loads a collection of sources and returns a dictionary of Artifacts."""
    # Create a dictionary before actually submitting the jobs to the executor
    # to avoid duplicate work.
    sources_by_key = {self.to_key(source): source for source in sources}

    with self.create_futures_executor() as futures_executor:
        return execute_futures_dict(
            {
                key: futures_executor.submit(with_contextvars(self.load), source)
                for key, source in sources_by_key.items()
            },
        )

parse(data)

Source Code in griptape/loaders/base_loader.py
def parse(self, data: F) -> A:
    """Parses the fetched data and returns an Artifact."""
    artifact = self.try_parse(data)

    artifact.reference = self.reference

    return artifact

to_key(source)

Source Code in griptape/loaders/base_loader.py
def to_key(self, source: S) -> str:
    """Converts the source to a key for the collection."""
    if isinstance(source, bytes):
        return bytes_to_hash(source)
    return str_to_hash(str(source))

try_parse(data)

Source Code in griptape/loaders/base_loader.py
def try_parse(self, data: F) -> A:
    """Parses the fetched data and returns an Artifact."""
    # TODO: Mark as abstract method for griptape 2.0
    raise NotImplementedError

BlobLoader

Bases: BaseFileLoader[BlobArtifact]

Source Code in griptape/loaders/blob_loader.py
@define
class BlobLoader(BaseFileLoader[BlobArtifact]):
    def try_parse(self, data: bytes) -> BlobArtifact:
        if self.encoding is None:
            return BlobArtifact(data)
        return BlobArtifact(data, encoding=self.encoding)

try_parse(data)

Source Code in griptape/loaders/blob_loader.py
def try_parse(self, data: bytes) -> BlobArtifact:
    if self.encoding is None:
        return BlobArtifact(data)
    return BlobArtifact(data, encoding=self.encoding)

CsvLoader

Bases: BaseFileLoader[ListArtifact[TextArtifact]]

Source Code in griptape/loaders/csv_loader.py
@define
class CsvLoader(BaseFileLoader[ListArtifact[TextArtifact]]):
    delimiter: str = field(default=",", kw_only=True)
    encoding: str = field(default="utf-8", kw_only=True)
    format_row: Callable[[dict], str] = field(
        default=lambda value: "\n".join(f"{key}: {val}" for key, val in value.items()), kw_only=True
    )

    def try_parse(self, data: bytes) -> ListArtifact[TextArtifact]:
        reader = csv.DictReader(StringIO(data.decode(self.encoding)), delimiter=self.delimiter)

        return ListArtifact(
            [TextArtifact(self.format_row(row), meta={"row_num": row_num}) for row_num, row in enumerate(reader)]
        )
  • delimiter = field(default=',', kw_only=True) class-attribute instance-attribute

  • encoding = field(default='utf-8', kw_only=True) class-attribute instance-attribute

  • format_row = field(default=lambda value: '\n'.join(f'{key}: {val}' for (key, val) in value.items()), kw_only=True) class-attribute instance-attribute

try_parse(data)

Source Code in griptape/loaders/csv_loader.py
def try_parse(self, data: bytes) -> ListArtifact[TextArtifact]:
    reader = csv.DictReader(StringIO(data.decode(self.encoding)), delimiter=self.delimiter)

    return ListArtifact(
        [TextArtifact(self.format_row(row), meta={"row_num": row_num}) for row_num, row in enumerate(reader)]
    )

EmailLoader

Bases: BaseLoader['EmailLoader.EmailQuery', list[bytes], ListArtifact]

Source Code in griptape/loaders/email_loader.py
@define
class EmailLoader(BaseLoader["EmailLoader.EmailQuery", list[bytes], ListArtifact]):  # pyright: ignore[reportGeneralTypeIssues]
    @define(frozen=True)
    class EmailQuery:
        """An email retrieval query.

        Attributes:
            label: Label to retrieve emails from such as 'INBOX' or 'SENT'.
            key: Optional key for filtering such as 'FROM' or 'SUBJECT'.
            search_criteria: Optional search criteria to filter emails by key.
            max_count: Optional max email count.
        """

        label: str = field(kw_only=True)
        key: Optional[str] = field(default=None, kw_only=True)
        search_criteria: Optional[str] = field(default=None, kw_only=True)
        max_count: Optional[int] = field(default=None, kw_only=True)

    imap_url: str = field(kw_only=True)
    username: str = field(kw_only=True)
    password: str = field(kw_only=True)

    def fetch(self, source: EmailLoader.EmailQuery) -> list[bytes]:
        label, key, search_criteria, max_count = astuple(source)

        mail_bytes = []
        with imaplib.IMAP4_SSL(self.imap_url) as client:
            client.login(self.username, self.password)

            mailbox = client.select(f'"{label}"', readonly=True)
            if mailbox[0] != "OK":
                raise Exception(mailbox[1][0].decode())  # pyright: ignore[reportOptionalMemberAccess] Unsure what mailbox[1][0] is, so leaving as-is

            if key and search_criteria:
                _, [message_numbers] = client.search(None, key, f'"{search_criteria}"')
                messages_count = self._count_messages(message_numbers)
            elif len(mailbox) > 1 and mailbox[1] and mailbox[1][0] is not None:
                messages_count = int(mailbox[1][0])
            else:
                raise Exception("unable to parse number of messages")

            top_n = max(0, messages_count - max_count) if max_count else 0
            for i in range(messages_count, top_n, -1):
                _result, data = client.fetch(str(i), "(RFC822)")

                if data is None or not data or data[0] is None:
                    continue

                mail_bytes.append(data[0][1])

            client.close()

        return mail_bytes

    def try_parse(self, data: list[bytes]) -> ListArtifact[TextArtifact]:
        mailparser = import_optional_dependency("mailparser")
        artifacts = []
        for byte in data:
            message = mailparser.parse_from_bytes(byte)

            # Note: mailparser only populates the text_plain field
            # if the message content type is explicitly set to 'text/plain'.
            if message.text_plain:
                artifacts.append(TextArtifact("\n".join(message.text_plain)))

        return ListArtifact(artifacts)

    def _count_messages(self, message_numbers: bytes) -> int:
        return len(list(filter(None, message_numbers.decode().split(" "))))
  • imap_url = field(kw_only=True) class-attribute instance-attribute

  • password = field(kw_only=True) class-attribute instance-attribute

  • username = field(kw_only=True) class-attribute instance-attribute

EmailQuery

An email retrieval query.

Attributes

NameTypeDescription
labelstrLabel to retrieve emails from such as 'INBOX' or 'SENT'.
keyOptional[str]Optional key for filtering such as 'FROM' or 'SUBJECT'.
search_criteriaOptional[str]Optional search criteria to filter emails by key.
max_countOptional[int]Optional max email count.
Source Code in griptape/loaders/email_loader.py
@define(frozen=True)
class EmailQuery:
    """An email retrieval query.

    Attributes:
        label: Label to retrieve emails from such as 'INBOX' or 'SENT'.
        key: Optional key for filtering such as 'FROM' or 'SUBJECT'.
        search_criteria: Optional search criteria to filter emails by key.
        max_count: Optional max email count.
    """

    label: str = field(kw_only=True)
    key: Optional[str] = field(default=None, kw_only=True)
    search_criteria: Optional[str] = field(default=None, kw_only=True)
    max_count: Optional[int] = field(default=None, kw_only=True)
  • key = field(default=None, kw_only=True) class-attribute instance-attribute

  • label = field(kw_only=True) class-attribute instance-attribute

  • max_count = field(default=None, kw_only=True) class-attribute instance-attribute

  • search_criteria = field(default=None, kw_only=True) class-attribute instance-attribute

_count_messages(message_numbers)

Source Code in griptape/loaders/email_loader.py
def _count_messages(self, message_numbers: bytes) -> int:
    return len(list(filter(None, message_numbers.decode().split(" "))))

fetch(source)

Source Code in griptape/loaders/email_loader.py
def fetch(self, source: EmailLoader.EmailQuery) -> list[bytes]:
    label, key, search_criteria, max_count = astuple(source)

    mail_bytes = []
    with imaplib.IMAP4_SSL(self.imap_url) as client:
        client.login(self.username, self.password)

        mailbox = client.select(f'"{label}"', readonly=True)
        if mailbox[0] != "OK":
            raise Exception(mailbox[1][0].decode())  # pyright: ignore[reportOptionalMemberAccess] Unsure what mailbox[1][0] is, so leaving as-is

        if key and search_criteria:
            _, [message_numbers] = client.search(None, key, f'"{search_criteria}"')
            messages_count = self._count_messages(message_numbers)
        elif len(mailbox) > 1 and mailbox[1] and mailbox[1][0] is not None:
            messages_count = int(mailbox[1][0])
        else:
            raise Exception("unable to parse number of messages")

        top_n = max(0, messages_count - max_count) if max_count else 0
        for i in range(messages_count, top_n, -1):
            _result, data = client.fetch(str(i), "(RFC822)")

            if data is None or not data or data[0] is None:
                continue

            mail_bytes.append(data[0][1])

        client.close()

    return mail_bytes

try_parse(data)

Source Code in griptape/loaders/email_loader.py
def try_parse(self, data: list[bytes]) -> ListArtifact[TextArtifact]:
    mailparser = import_optional_dependency("mailparser")
    artifacts = []
    for byte in data:
        message = mailparser.parse_from_bytes(byte)

        # Note: mailparser only populates the text_plain field
        # if the message content type is explicitly set to 'text/plain'.
        if message.text_plain:
            artifacts.append(TextArtifact("\n".join(message.text_plain)))

    return ListArtifact(artifacts)

ImageLoader

Bases: BaseFileLoader[ImageArtifact]

Attributes

NameTypeDescription
formatOptional[str]If provided, attempts to ensure image artifacts are in this format when loaded. For example, when set to 'PNG', loading image.jpg will return an ImageArtifact containing the image bytes in PNG format.
Source Code in griptape/loaders/image_loader.py
@define
class ImageLoader(BaseFileLoader[ImageArtifact]):
    """Loads images into image artifacts.

    Attributes:
        format: If provided, attempts to ensure image artifacts are in this format when loaded.
                For example, when set to 'PNG', loading image.jpg will return an ImageArtifact containing the image
                    bytes in PNG format.
    """

    format: Optional[str] = field(default=None, kw_only=True)

    def try_parse(self, data: bytes) -> ImageArtifact:
        pil_image = import_optional_dependency("PIL.Image")
        image = pil_image.open(BytesIO(data))

        # Normalize format only if requested.
        if self.format is not None:
            byte_stream = BytesIO()
            image.save(byte_stream, format=self.format)
            image = pil_image.open(byte_stream)
            data = byte_stream.getvalue()

        return ImageArtifact(data, format=image.format.lower(), width=image.width, height=image.height)
  • format = field(default=None, kw_only=True) class-attribute instance-attribute

try_parse(data)

Source Code in griptape/loaders/image_loader.py
def try_parse(self, data: bytes) -> ImageArtifact:
    pil_image = import_optional_dependency("PIL.Image")
    image = pil_image.open(BytesIO(data))

    # Normalize format only if requested.
    if self.format is not None:
        byte_stream = BytesIO()
        image.save(byte_stream, format=self.format)
        image = pil_image.open(byte_stream)
        data = byte_stream.getvalue()

    return ImageArtifact(data, format=image.format.lower(), width=image.width, height=image.height)

JsonLoader

Bases: BaseFileLoader[JsonArtifact]

Source Code in griptape/loaders/json_loader.py
@define
class JsonLoader(BaseFileLoader[JsonArtifact]):
    def parse(self, data: bytes) -> JsonArtifact:
        return JsonArtifact(json.loads(data), encoding=self.encoding)

parse(data)

Source Code in griptape/loaders/json_loader.py
def parse(self, data: bytes) -> JsonArtifact:
    return JsonArtifact(json.loads(data), encoding=self.encoding)

PdfLoader

Bases: BaseFileLoader

Source Code in griptape/loaders/pdf_loader.py
@define
class PdfLoader(BaseFileLoader):
    def try_parse(
        self,
        data: bytes,
        *,
        password: Optional[str] = None,
    ) -> ListArtifact:
        pypdf = import_optional_dependency("pypdf")
        reader = pypdf.PdfReader(BytesIO(data), strict=True, password=password)
        pages = [TextArtifact(p.extract_text()) for p in reader.pages]

        return ListArtifact(pages)

try_parse(data, *, password=None)

Source Code in griptape/loaders/pdf_loader.py
def try_parse(
    self,
    data: bytes,
    *,
    password: Optional[str] = None,
) -> ListArtifact:
    pypdf = import_optional_dependency("pypdf")
    reader = pypdf.PdfReader(BytesIO(data), strict=True, password=password)
    pages = [TextArtifact(p.extract_text()) for p in reader.pages]

    return ListArtifact(pages)

SqlLoader

Bases: BaseLoader[str, list[RowResult], ListArtifact[TextArtifact]]

Source Code in griptape/loaders/sql_loader.py
@define
class SqlLoader(BaseLoader[str, list[BaseSqlDriver.RowResult], ListArtifact[TextArtifact]]):
    sql_driver: BaseSqlDriver = field(kw_only=True)
    format_row: Callable[[dict], str] = field(
        default=lambda value: "\n".join(f"{key}: {val}" for key, val in value.items()), kw_only=True
    )

    def fetch(self, source: str) -> list[BaseSqlDriver.RowResult]:
        return self.sql_driver.execute_query(source) or []

    def parse(self, data: list[BaseSqlDriver.RowResult]) -> ListArtifact[TextArtifact]:
        return ListArtifact([TextArtifact(self.format_row(row.cells)) for row in data])
  • format_row = field(default=lambda value: '\n'.join(f'{key}: {val}' for (key, val) in value.items()), kw_only=True) class-attribute instance-attribute

  • sql_driver = field(kw_only=True) class-attribute instance-attribute

fetch(source)

Source Code in griptape/loaders/sql_loader.py
def fetch(self, source: str) -> list[BaseSqlDriver.RowResult]:
    return self.sql_driver.execute_query(source) or []

parse(data)

Source Code in griptape/loaders/sql_loader.py
def parse(self, data: list[BaseSqlDriver.RowResult]) -> ListArtifact[TextArtifact]:
    return ListArtifact([TextArtifact(self.format_row(row.cells)) for row in data])

TextLoader

Bases: BaseFileLoader[TextArtifact]

Source Code in griptape/loaders/text_loader.py
@define
class TextLoader(BaseFileLoader[TextArtifact]):
    encoding: str = field(default="utf-8", kw_only=True)

    def try_parse(self, data: str | bytes) -> TextArtifact:
        if isinstance(data, str):
            return TextArtifact(data, encoding=self.encoding)
        return TextArtifact(data.decode(self.encoding), encoding=self.encoding)
  • encoding = field(default='utf-8', kw_only=True) class-attribute instance-attribute

try_parse(data)

Source Code in griptape/loaders/text_loader.py
def try_parse(self, data: str | bytes) -> TextArtifact:
    if isinstance(data, str):
        return TextArtifact(data, encoding=self.encoding)
    return TextArtifact(data.decode(self.encoding), encoding=self.encoding)

WebLoader

Bases: BaseLoader[str, str, TextArtifact]

Source Code in griptape/loaders/web_loader.py
@define
class WebLoader(BaseLoader[str, str, TextArtifact]):
    web_scraper_driver: BaseWebScraperDriver = field(
        default=Factory(lambda: TrafilaturaWebScraperDriver()),
        kw_only=True,
    )

    def fetch(self, source: str) -> str:
        return self.web_scraper_driver.fetch_url(source)

    def try_parse(self, data: str) -> TextArtifact:
        return self.web_scraper_driver.extract_page(data)
  • web_scraper_driver = field(default=Factory(lambda: TrafilaturaWebScraperDriver()), kw_only=True) class-attribute instance-attribute

fetch(source)

Source Code in griptape/loaders/web_loader.py
def fetch(self, source: str) -> str:
    return self.web_scraper_driver.fetch_url(source)

try_parse(data)

Source Code in griptape/loaders/web_loader.py
def try_parse(self, data: str) -> TextArtifact:
    return self.web_scraper_driver.extract_page(data)

Could this page be better? Report a problem or suggest an addition!