Skip to content

Commit 0a78b47

Browse files
authored
feat: update parquet file handling and documentation for GraphRAG v2.x.x (#40)
1 parent eb6ab21 commit 0a78b47

File tree

3 files changed

+57
-37
lines changed

3 files changed

+57
-37
lines changed

README.md

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -85,12 +85,22 @@ The logic for creating relationships for text units, documents, communities, and
8585

8686
To load `.parquet` files automatically when the application starts, place your Parquet files in the `public/artifacts` directory. These files will be loaded into the application for visualization and data table display. The files can be organized as follows:
8787

88-
- `public/artifacts/create_final_entities.parquet`
89-
- `public/artifacts/create_final_relationships.parquet`
90-
- `public/artifacts/create_final_documents.parquet`
91-
- `public/artifacts/create_final_text_units.parquet`
92-
- `public/artifacts/create_final_communities.parquet`
93-
- `public/artifacts/create_final_community_reports.parquet`
94-
- `public/artifacts/create_final_covariates.parquet`
88+
- GraphRAG v2.x.x
89+
- `public/artifacts/entities.parquet`
90+
- `public/artifacts/relationships.parquet`
91+
- `public/artifacts/documents.parquet`
92+
- `public/artifacts/text_units.parquet`
93+
- `public/artifacts/communities.parquet`
94+
- `public/artifacts/community_reports.parquet`
95+
- `public/artifacts/covariates.parquet`
96+
97+
- GraphRAG v1.x.x
98+
- `public/artifacts/create_final_entities.parquet`
99+
- `public/artifacts/create_final_relationships.parquet`
100+
- `public/artifacts/create_final_documents.parquet`
101+
- `public/artifacts/create_final_text_units.parquet`
102+
- `public/artifacts/create_final_communities.parquet`
103+
- `public/artifacts/create_final_community_reports.parquet`
104+
- `public/artifacts/create_final_covariates.parquet`
95105

96106
If the files are placed in the `public/artifacts` folder, the app will automatically load and display them on startup.

src/app/components/DataTableContainer.tsx

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -114,47 +114,47 @@ const DataTableContainer: React.FC<DataTableContainerProps> = ({
114114
{selectedTable === "entities" && (
115115
<>
116116
<Typography variant="h4" gutterBottom>
117-
Entities (create_final_entities.parquet)
117+
Entities (entities.parquet)
118118
</Typography>
119119
<DataTable columns={entityColumns} data={entities} />
120120
</>
121121
)}
122122
{selectedTable === "relationships" && (
123123
<>
124124
<Typography variant="h4" gutterBottom>
125-
Relationships (create_final_relationships.parquet)
125+
Relationships (relationships.parquet)
126126
</Typography>
127127
<DataTable columns={relationshipColumns} data={relationships} />
128128
</>
129129
)}
130130
{selectedTable === "documents" && (
131131
<>
132132
<Typography variant="h4" gutterBottom>
133-
Documents (create_final_documents.parquet)
133+
Documents (documents.parquet)
134134
</Typography>
135135
<DataTable columns={documentColumns} data={documents} />
136136
</>
137137
)}
138138
{selectedTable === "textunits" && (
139139
<>
140140
<Typography variant="h4" gutterBottom>
141-
TextUnits (create_final_text_units.parquet)
141+
TextUnits (text_units.parquet)
142142
</Typography>
143143
<DataTable columns={textUnitColumns} data={textunits} />
144144
</>
145145
)}
146146
{selectedTable === "communities" && (
147147
<>
148148
<Typography variant="h4" gutterBottom>
149-
Communities (create_final_communities.parquet)
149+
Communities (communities.parquet)
150150
</Typography>
151151
<DataTable columns={communityColumns} data={communities} />
152152
</>
153153
)}
154154
{selectedTable === "communityReports" && (
155155
<>
156156
<Typography variant="h4" gutterBottom>
157-
Community Reports (create_final_community_reports.parquet)
157+
Community Reports (community_reports.parquet)
158158
</Typography>
159159
<DataTable
160160
columns={communityReportColumns}
@@ -165,7 +165,7 @@ const DataTableContainer: React.FC<DataTableContainerProps> = ({
165165
{selectedTable === "covariates" && (
166166
<>
167167
<Typography variant="h4" gutterBottom>
168-
Covariates (create_final_covariates.parquet)
168+
Covariates (covariates.parquet)
169169
</Typography>
170170
<DataTable columns={covariateColumns} data={covariates} />
171171
</>

src/app/hooks/useFileHandler.ts

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,27 +9,32 @@ import { CommunityReport } from "../models/community-report";
99
import { Covariate } from "../models/covariate";
1010
import { readParquetFile } from "../utils/parquet-utils";
1111

12-
// Paths to default files in the public folder
13-
const defaultFiles = [
14-
process.env.PUBLIC_URL + "/artifacts/create_final_entities.parquet",
15-
process.env.PUBLIC_URL + "/artifacts/create_final_relationships.parquet",
16-
process.env.PUBLIC_URL + "/artifacts/create_final_documents.parquet",
17-
process.env.PUBLIC_URL + "/artifacts/create_final_text_units.parquet",
18-
process.env.PUBLIC_URL + "/artifacts/create_final_communities.parquet",
19-
process.env.PUBLIC_URL + "/artifacts/create_final_community_reports.parquet",
20-
process.env.PUBLIC_URL + "/artifacts/create_final_covariates.parquet",
12+
const baseFileNames = [
13+
"entities.parquet",
14+
"relationships.parquet",
15+
"documents.parquet",
16+
"text_units.parquet",
17+
"communities.parquet",
18+
"community_reports.parquet",
19+
"covariates.parquet",
2120
];
2221

23-
const fileSchemas: { [key: string]: string } = {
24-
"create_final_entities.parquet": "entity",
25-
"create_final_relationships.parquet": "relationship",
26-
"create_final_text_units.parquet": "text_unit",
27-
"create_final_communities.parquet": "community",
28-
"create_final_community_reports.parquet": "community_report",
29-
"create_final_documents.parquet": "document",
30-
"create_final_covariates.parquet": "covariate",
22+
const baseMapping: { [key: string]: string } = {
23+
"entities.parquet": "entity",
24+
"relationships.parquet": "relationship",
25+
"documents.parquet": "document",
26+
"text_units.parquet": "text_unit",
27+
"communities.parquet": "community",
28+
"community_reports.parquet": "community_report",
29+
"covariates.parquet": "covariate",
3130
};
3231

32+
const fileSchemas: { [key: string]: string } = {};
33+
Object.entries(baseMapping).forEach(([key, value]) => {
34+
fileSchemas[key] = value;
35+
fileSchemas[`create_final_${key}`] = value;
36+
});
37+
3338
const useFileHandler = () => {
3439
const navigate = useNavigate();
3540
const [entities, setEntities] = useState<Entity[]>([]);
@@ -57,8 +62,8 @@ const useFileHandler = () => {
5762

5863
for (const file of files) {
5964
const fileName =
60-
typeof file === "string" ? file.split("/").pop()! : file.name;
61-
const schema = fileSchemas[fileName];
65+
typeof file === "string" ? file.split("/").pop()! : file.name;
66+
const schema = fileSchemas[fileName] || fileSchemas[`create_final_${fileName}`];
6267

6368
let data;
6469
if (typeof file === "string") {
@@ -144,12 +149,17 @@ const useFileHandler = () => {
144149
const loadDefaultFiles = async () => {
145150
const filesToLoad = [];
146151

147-
for (const file of defaultFiles) {
148-
const fileExists = await checkFileExists(file);
149-
if (fileExists) {
150-
filesToLoad.push(file); // Add to load queue if the file exists
152+
for (const baseName of baseFileNames) {
153+
const prefixedPath = process.env.PUBLIC_URL + `/artifacts/create_final_${baseName}`;
154+
const unprefixedPath = process.env.PUBLIC_URL + `/artifacts/${baseName}`;
155+
156+
if (await checkFileExists(prefixedPath)) {
157+
filesToLoad.push(prefixedPath);
158+
} else if (await checkFileExists(unprefixedPath)) {
159+
filesToLoad.push(unprefixedPath);
151160
}
152161
}
162+
153163
if (filesToLoad.length > 0) {
154164
await loadFiles(filesToLoad);
155165
navigate("/graph", { replace: true });

0 commit comments

Comments
 (0)