Skip to content

Commit 15799b5

Browse files
committed
Add keyword generation and improve UI
1 parent 1478b55 commit 15799b5

1 file changed

Lines changed: 59 additions & 11 deletions

File tree

app.py

Lines changed: 59 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,16 @@
44
from langchain.schema import Document
55

66
from rabbithole import summarize_document
7+
from rabbithole.embedding import embed_document
8+
from rabbithole.keywords import get_document_keywords
79
from rabbithole.loader import load_file, SUPPORTED_IMG_FILE_TYPES
810
from rabbithole.mp3 import SUPPORTED_AV_FILE_TYPES
911

1012
# Global variables
11-
results = {}
13+
global_documents = {}
14+
global_embeddings = {}
15+
global_keywords = {}
16+
global_summaries = {}
1217

1318

1419
def load_files_with_spinner(files: list) -> dict[str, list[Document]]:
@@ -26,12 +31,48 @@ def load_files_with_spinner(files: list) -> dict[str, list[Document]]:
2631
return documents
2732

2833

29-
def run_summarization(document: list[Document], doc_name: str):
30-
"""Execute the text summarization"""
31-
with st.spinner(f'Summarizing {doc_name}...'):
32-
summary = summarize_document(document[:2])
33-
results[doc_name] = summary
34-
st.write(f"'{doc_name}' Summary:\n{summary}")
34+
def embed_documents_with_spinner(documents: dict[str, list[Document]]) -> dict[str, list[list[float]]]:
35+
"""
36+
Embed a list of documents and return a list of dictionaries of embeddings.
37+
Display a loading animation while embedding each document.
38+
:param documents: List of documents to embed.
39+
:return: List of dictionaries of embeddings.
40+
"""
41+
# Combine the results into a single dictionary
42+
embeddings = {}
43+
for doc_name, doc_text in documents.items():
44+
with st.spinner(f'Embedding {doc_name}...'):
45+
embeddings[doc_name] = embed_document([doc.page_content for doc in doc_text])
46+
return embeddings
47+
48+
49+
def extract_keywords_with_spinner(embeddings: dict[str, list[list[float]]]):
50+
"""
51+
Extract keywords from a list of embeddings and return a list of keywords.
52+
Display a loading animation while extracting each keyword.
53+
:param embeddings: List of embeddings to extract keywords from.
54+
:return: List of keywords.
55+
"""
56+
# Combine the results into a single dictionary
57+
keywords = {}
58+
for doc_name, doc_embeddings in embeddings.items():
59+
with st.spinner(f'Extracting keywords from {doc_name}...'):
60+
keywords[doc_name] = get_document_keywords(doc_embeddings)
61+
return keywords
62+
63+
64+
def generate_summary_with_spinner(documents: dict[str, list[Document]]) -> dict[str, list[list[float]]]:
65+
"""
66+
Embed a list of documents and return a list of dictionaries of embeddings.
67+
Display a loading animation while embedding each document.
68+
:param documents: List of documents to embed.
69+
:return: List of dictionaries of embeddings.
70+
"""
71+
summaries = {}
72+
for doc_name, doc_text in documents.items():
73+
with st.spinner(f'Summarizing {doc_name}...'):
74+
summaries[doc_name] = summarize_document(doc_text[:2])
75+
return summaries
3576

3677

3778
st.title("RabbitHole")
@@ -46,10 +87,17 @@ def run_summarization(document: list[Document], doc_name: str):
4687
st.stop()
4788

4889
# Load the text from the uploaded PDF files
49-
texts = load_files_with_spinner(uploaded_files)
90+
global_documents = load_files_with_spinner(uploaded_files)
91+
global_embeddings = embed_documents_with_spinner(global_documents)
92+
global_keywords = extract_keywords_with_spinner(global_embeddings)
93+
global_summaries = generate_summary_with_spinner(global_documents)
5094

51-
# Run the summarization for each document
52-
for doc_name, doc_text in texts.items():
53-
run_summarization(doc_text, doc_name)
95+
# Display the keywords and summaries
96+
for doc_name, doc_keywords in global_keywords.items():
97+
st.header(doc_name)
98+
st.subheader("Keywords")
99+
st.write(doc_keywords)
100+
st.subheader("Summary")
101+
st.write(global_summaries[doc_name])
54102

55103
st.success('Summarization completed.')

0 commit comments

Comments
 (0)