Skip to content

Commit 51bf3f2

Browse files
committed
Add support for images
1 parent 304fb2d commit 51bf3f2

2 files changed

Lines changed: 11 additions & 3 deletions

File tree

app.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from langchain.schema import Document
55

66
from rabbithole import summarize_document
7-
from rabbithole.loader import load_file
7+
from rabbithole.loader import load_file, SUPPORTED_IMG_FILE_TYPES
88
from rabbithole.mp3 import SUPPORTED_AV_FILE_TYPES
99

1010
# Global variables
@@ -36,7 +36,8 @@ def run_summarization(document: list[Document], doc_name: str):
3636

3737
st.title("RabbitHole")
3838

39-
uploaded_files = st.file_uploader("Upload content", type=["docx", "pdf", "txt", *SUPPORTED_AV_FILE_TYPES],
39+
uploaded_files = st.file_uploader("Upload content",
40+
type=["docx", "pdf", "txt", *SUPPORTED_IMG_FILE_TYPES, *SUPPORTED_AV_FILE_TYPES],
4041
accept_multiple_files=True)
4142

4243
if st.button("Summarize"):

rabbithole/loader.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
"""rabbithole.loader module"""
22
import tempfile
33

4-
from langchain.document_loaders import Docx2txtLoader, PyMuPDFLoader, TextLoader
4+
from langchain.document_loaders import Docx2txtLoader, PyMuPDFLoader, TextLoader, UnstructuredImageLoader
55
from langchain.schema import Document
66
from langchain.text_splitter import CharacterTextSplitter
77
from streamlit.runtime.uploaded_file_manager import UploadedFile
88

99
from rabbithole.mp3 import SUPPORTED_AV_FILE_TYPES, convert_to_mp3
1010
from rabbithole.transcribe import transcribe
1111

12+
SUPPORTED_IMG_FILE_TYPES = (".jpg", ".jpeg", ".png")
13+
1214

1315
def save_to_temp_file(file: UploadedFile) -> str:
1416
"""
@@ -46,6 +48,11 @@ def load_file(file: UploadedFile) -> list[Document]:
4648
temp_file = save_to_temp_file(file)
4749
return TextLoader(file_path=temp_file).load_and_split(text_splitter=text_splitter)
4850

51+
# Handle image files
52+
elif file.name.endswith(SUPPORTED_IMG_FILE_TYPES):
53+
temp_file = save_to_temp_file(file)
54+
return UnstructuredImageLoader(file_path=temp_file).load_and_split(text_splitter=text_splitter)
55+
4956
# Handle Audio and Video files
5057
elif file.name.endswith(SUPPORTED_AV_FILE_TYPES):
5158
temp_file = save_to_temp_file(file)

0 commit comments

Comments
 (0)