-
Notifications
You must be signed in to change notification settings - Fork 48
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #95 from tjmlabs/improved-max-sim
Improved max sim
- Loading branch information
Showing
4 changed files
with
63 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# Generated by Django 5.1.3 on 2024-11-18 02:51 | ||
|
||
from django.db import migrations | ||
|
||
|
||
class Migration(migrations.Migration): | ||
dependencies = [ | ||
("api", "0023_remove_document_base64"), | ||
] | ||
|
||
operations = [ | ||
migrations.RunSQL( | ||
sql=""" | ||
CREATE OR REPLACE FUNCTION max_sim(document halfvec[], query halfvec[]) RETURNS double precision AS $$ | ||
WITH queries AS ( | ||
SELECT row_number() OVER () AS query_number, * FROM (SELECT unnest(query) AS query) | ||
), | ||
documents AS ( | ||
SELECT unnest(document) AS document | ||
), | ||
similarities AS ( | ||
SELECT query_number, (document <#> query) * -1 AS similarity | ||
FROM queries CROSS JOIN documents | ||
), | ||
max_similarities AS ( | ||
SELECT MAX(similarity) AS max_similarity FROM similarities GROUP BY query_number | ||
) | ||
SELECT SUM(max_similarity) FROM max_similarities; | ||
$$ LANGUAGE SQL; | ||
""", | ||
# Rollback to original function using cosine distance | ||
reverse_sql=""" | ||
CREATE OR REPLACE FUNCTION max_sim(document halfvec[], query halfvec[]) RETURNS double precision AS $$ | ||
WITH queries AS ( | ||
SELECT row_number() OVER () AS query_number, * FROM (SELECT unnest(query) AS query) | ||
), | ||
documents AS ( | ||
SELECT unnest(document) AS document | ||
), | ||
similarities AS ( | ||
SELECT query_number, 1 - (document <=> query) AS similarity FROM queries CROSS JOIN documents | ||
), | ||
max_similarities AS ( | ||
SELECT MAX(similarity) AS max_similarity FROM similarities GROUP BY query_number | ||
) | ||
SELECT SUM(max_similarity) FROM max_similarities; | ||
$$ LANGUAGE SQL; | ||
""", | ||
) | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# Generated by Django 5.1.3 on 2024-11-19 13:15 | ||
|
||
from django.db import migrations | ||
|
||
|
||
class Migration(migrations.Migration): | ||
dependencies = [ | ||
("api", "0024_update_max_sim_function"), | ||
] | ||
|
||
operations = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters