44from langchain .schema import Document
55
66from rabbithole import summarize_document
7+ from rabbithole .embedding import embed_document
8+ from rabbithole .keywords import get_document_keywords
79from rabbithole .loader import load_file , SUPPORTED_IMG_FILE_TYPES
810from rabbithole .mp3 import SUPPORTED_AV_FILE_TYPES
911
1012# Global variables
11- results = {}
13+ global_documents = {}
14+ global_embeddings = {}
15+ global_keywords = {}
16+ global_summaries = {}
1217
1318
1419def load_files_with_spinner (files : list ) -> dict [str , list [Document ]]:
@@ -26,12 +31,48 @@ def load_files_with_spinner(files: list) -> dict[str, list[Document]]:
2631 return documents
2732
2833
29- def run_summarization (document : list [Document ], doc_name : str ):
30- """Execute the text summarization"""
31- with st .spinner (f'Summarizing { doc_name } ...' ):
32- summary = summarize_document (document [:2 ])
33- results [doc_name ] = summary
34- st .write (f"'{ doc_name } ' Summary:\n { summary } " )
34+ def embed_documents_with_spinner (documents : dict [str , list [Document ]]) -> dict [str , list [list [float ]]]:
35+ """
36+ Embed a list of documents and return a list of dictionaries of embeddings.
37+ Display a loading animation while embedding each document.
38+ :param documents: List of documents to embed.
39+ :return: List of dictionaries of embeddings.
40+ """
41+ # Combine the results into a single dictionary
42+ embeddings = {}
43+ for doc_name , doc_text in documents .items ():
44+ with st .spinner (f'Embedding { doc_name } ...' ):
45+ embeddings [doc_name ] = embed_document ([doc .page_content for doc in doc_text ])
46+ return embeddings
47+
48+
49+ def extract_keywords_with_spinner (embeddings : dict [str , list [list [float ]]]):
50+ """
51+ Extract keywords from a list of embeddings and return a list of keywords.
52+ Display a loading animation while extracting each keyword.
53+ :param embeddings: List of embeddings to extract keywords from.
54+ :return: List of keywords.
55+ """
56+ # Combine the results into a single dictionary
57+ keywords = {}
58+ for doc_name , doc_embeddings in embeddings .items ():
59+ with st .spinner (f'Extracting keywords from { doc_name } ...' ):
60+ keywords [doc_name ] = get_document_keywords (doc_embeddings )
61+ return keywords
62+
63+
64+ def generate_summary_with_spinner (documents : dict [str , list [Document ]]) -> dict [str , list [list [float ]]]:
65+ """
66+ Embed a list of documents and return a list of dictionaries of embeddings.
67+ Display a loading animation while embedding each document.
68+ :param documents: List of documents to embed.
69+ :return: List of dictionaries of embeddings.
70+ """
71+ summaries = {}
72+ for doc_name , doc_text in documents .items ():
73+ with st .spinner (f'Summarizing { doc_name } ...' ):
74+ summaries [doc_name ] = summarize_document (doc_text [:2 ])
75+ return summaries
3576
3677
3778st .title ("RabbitHole" )
@@ -46,10 +87,17 @@ def run_summarization(document: list[Document], doc_name: str):
4687 st .stop ()
4788
4889 # Load the text from the uploaded PDF files
49- texts = load_files_with_spinner (uploaded_files )
90+ global_documents = load_files_with_spinner (uploaded_files )
91+ global_embeddings = embed_documents_with_spinner (global_documents )
92+ global_keywords = extract_keywords_with_spinner (global_embeddings )
93+ global_summaries = generate_summary_with_spinner (global_documents )
5094
51- # Run the summarization for each document
52- for doc_name , doc_text in texts .items ():
53- run_summarization (doc_text , doc_name )
95+ # Display the keywords and summaries
96+ for doc_name , doc_keywords in global_keywords .items ():
97+ st .header (doc_name )
98+ st .subheader ("Keywords" )
99+ st .write (doc_keywords )
100+ st .subheader ("Summary" )
101+ st .write (global_summaries [doc_name ])
54102
55103 st .success ('Summarization completed.' )
0 commit comments