adding logs
Browse files- semantic.py +5 -0
semantic.py
CHANGED
|
@@ -90,13 +90,16 @@ class SemanticStoreFactory:
|
|
| 90 |
path.mkdir(parents=True, exist_ok=True)
|
| 91 |
_logger.info(f"Directory '{path}' created.")
|
| 92 |
|
|
|
|
| 93 |
documents = PyMuPDFLoader(META_10K_FILE_PATH).load()
|
| 94 |
semantic_chunker = SemanticChunker(
|
| 95 |
embeddings=embeddings,
|
| 96 |
breakpoint_threshold_type="percentile"
|
| 97 |
)
|
| 98 |
semantic_chunks = semantic_chunker.create_documents([d.page_content for d in documents])
|
|
|
|
| 99 |
if USE_MEMORY == True:
|
|
|
|
| 100 |
semantic_chunk_vectorstore = Qdrant.from_documents(
|
| 101 |
semantic_chunks,
|
| 102 |
embeddings,
|
|
@@ -104,6 +107,7 @@ class SemanticStoreFactory:
|
|
| 104 |
collection_name=META_SEMANTIC_COLLECTION,
|
| 105 |
force_recreate=True
|
| 106 |
)
|
|
|
|
| 107 |
else:
|
| 108 |
semantic_chunk_vectorstore = Qdrant.from_documents(
|
| 109 |
semantic_chunks,
|
|
@@ -112,6 +116,7 @@ class SemanticStoreFactory:
|
|
| 112 |
collection_name=META_SEMANTIC_COLLECTION,
|
| 113 |
force_recreate=True
|
| 114 |
)
|
|
|
|
| 115 |
|
| 116 |
return semantic_chunk_vectorstore
|
| 117 |
|
|
|
|
| 90 |
path.mkdir(parents=True, exist_ok=True)
|
| 91 |
_logger.info(f"Directory '{path}' created.")
|
| 92 |
|
| 93 |
+
_logger.info(f"loading {META_10K_FILE_PATH}")
|
| 94 |
documents = PyMuPDFLoader(META_10K_FILE_PATH).load()
|
| 95 |
semantic_chunker = SemanticChunker(
|
| 96 |
embeddings=embeddings,
|
| 97 |
breakpoint_threshold_type="percentile"
|
| 98 |
)
|
| 99 |
semantic_chunks = semantic_chunker.create_documents([d.page_content for d in documents])
|
| 100 |
+
_logger.info(f"created semantic_chunks: {len(semantic_chunks)}")
|
| 101 |
if USE_MEMORY == True:
|
| 102 |
+
_logger.info(f"\t==> creating memory vectorstore ...")
|
| 103 |
semantic_chunk_vectorstore = Qdrant.from_documents(
|
| 104 |
semantic_chunks,
|
| 105 |
embeddings,
|
|
|
|
| 107 |
collection_name=META_SEMANTIC_COLLECTION,
|
| 108 |
force_recreate=True
|
| 109 |
)
|
| 110 |
+
_logger.info(f"\t==> DONE")
|
| 111 |
else:
|
| 112 |
semantic_chunk_vectorstore = Qdrant.from_documents(
|
| 113 |
semantic_chunks,
|
|
|
|
| 116 |
collection_name=META_SEMANTIC_COLLECTION,
|
| 117 |
force_recreate=True
|
| 118 |
)
|
| 119 |
+
_logger.info(f"\t==> return vectorstore")
|
| 120 |
|
| 121 |
return semantic_chunk_vectorstore
|
| 122 |
|