Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions business_objects/data_block.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Dict, List, Optional, Any

from sqlalchemy import and_, func
from sqlalchemy.types import Text
from sqlalchemy.orm.attributes import flag_modified

Expand All @@ -26,15 +27,22 @@ def get_by_id(data_block_id: str) -> DataBlock:
return session.query(DataBlock).filter(DataBlock.id == data_block_id).first()


def get_all_by_project_id(org_id: str, project_id: str) -> List[DataBlock]:
return (
session.query(DataBlock)
.filter(
DataBlock.organization_id == org_id,
DataBlock.project_id == project_id,
)
.all()
def get_all_by_project_id(
org_id: str, project_id: str, only_executed: bool = False
) -> List[DataBlock]:
stmt = session.query(DataBlock).filter(
DataBlock.organization_id == org_id,
DataBlock.project_id == project_id,
)
if only_executed:
query_text = DataBlock.sql_config.op("->>")("query")
stmt = stmt.filter(
and_(
query_text.isnot(None),
func.trim(query_text) != "",
)
)
return stmt.all()


def get_by_project_id_and_type(
Expand Down
13 changes: 8 additions & 5 deletions cognition_objects/markdown_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,14 +252,17 @@ def update(


def delete(org_id: str, md_file_id: str, with_commit: bool = True) -> None:
md_file = session.query(CognitionMarkdownFile).filter(
md_file_query = session.query(CognitionMarkdownFile).filter(
CognitionMarkdownFile.organization_id == org_id,
CognitionMarkdownFile.id == md_file_id,
)
session.query(EtlTask).filter(
EtlTask.organization_id == org_id, EtlTask.id == md_file.etl_task_id
).delete()
md_file.delete()
md_file = md_file_query.first()
if md_file:
session.query(EtlTask).filter(
EtlTask.organization_id == org_id,
EtlTask.id == md_file.etl_task_id,
).delete()
md_file_query.delete()
general.flush_or_commit(with_commit)


Expand Down
4 changes: 2 additions & 2 deletions enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -871,7 +871,6 @@ class MacroExecutionLinkAction(Enum):
UPDATE = "UPDATE"



class FileCachingInitiator(Enum):
TMP_DOC_RETRIEVAL = "TMP_DOC_RETRIEVAL"
DATASET_MARKDOWN_FILE = "DATASET_MARKDOWN_FILE"
Expand Down Expand Up @@ -1327,7 +1326,8 @@ def from_transformers(
return cls.NO_TRANSFORMATION
if (
len(transformers) == 1
and transformers[0]["type"] == ETLTransformer.SUMMARIZE.value
and transformers[0].get("name", transformers[0].get("type"))
== ETLTransformer.SUMMARIZE.value
):
return cls.SUMMARIZE
return cls.COMMON_ETL
Expand Down
48 changes: 29 additions & 19 deletions etl_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,15 +245,16 @@ def get_full_config_for_sharepoint_integration(
"task_type": enums.CognitionMarkdownFileState.SPLITTING.value,
"task_config": {
"use_cache": False,
"strategy": enums.ETLSplitStrategy.CHUNK.value,
"chunk_size": 1000,
"rows_per_section": rows_per_section,
# "keep_first_n": integration.config.get("split_kwargs", {}).get(
# "keep_first_n", 5
# ),
# "keep_last_n": integration.config.get("split_kwargs", {}).get(
# "keep_last_n", 1
# ),
"strategy": enums.ETLSplitStrategy.SHRINK.value,
"chunk_size": integration.config.get("split_kwargs", {}).get(
"chunk_size", 16384
),
"keep_first_n": integration.config.get("split_kwargs", {}).get(
"keep_first_n", 5
),
"keep_last_n": integration.config.get("split_kwargs", {}).get(
"keep_last_n", 1
),
},
},
{
Expand All @@ -274,6 +275,25 @@ def get_full_config_for_sharepoint_integration(
"system_prompt": None,
"user_prompt": None,
},
{
"enabled": True,
"name": enums.ETLTransformer.SUMMARIZE.value,
"system_prompt": None,
"user_prompt": " ".join(
(
"You are a helpful AI assistant that summarizes documents.",
"Your task is to provide a concise summary of the provided text.",
"You will be given a context, and you should summarize it in a clear and concise manner.",
"The summary should capture the main points and key information from the context.",
(
f"You are summarizing the list of file paths in folder `{record.parent_path}`."
if record.extension == "FOLDER"
else f"You are summarizing the file `{record.name}` in folder `{record.parent_path}`."
),
f"IT IS CRUCIAL THAT YOU ONLY ANSWER IN ISO-639-1:{integration.tokenizer[:2]}",
)
),
},
],
},
},
Expand All @@ -291,16 +311,6 @@ def get_full_config_for_sharepoint_integration(
},
},
},
# {
# "task_type": enums.CognitionMarkdownFileState.NOTIFYING.value,
# "task_config": {
# "integration": [
# {
# "integration_id": str(integration.id),
# }
# ]
# },
# },
]

return full_config
Expand Down