@@ -617,26 +617,15 @@ def test_knowledge_table_embedding(
617617 row = rows .values [3 ]
618618 assert row ["Title" ] is None , row
619619 assert row ["Text" ] is None , row
620- # If embedding with invalid length is added, it will be coerced to None
621- # Original vector will be saved into state
622- response = add_table_rows (
623- client ,
624- table_type ,
625- table .id ,
626- [{"Title" : "test" , "Title Embed" : [1 , 2 , 3 ]}],
627- stream = stream ,
628- )
629- # We currently dont return anything if LLM is not called
630- assert len (response .rows ) == 0 if stream else 1
631- assert all (len (r .columns ) == 0 for r in response .rows )
632- # Check the vectors
633- rows = list_table_rows (client , table_type , table .id )
634- assert rows .total == 5
635- row = rows .values [- 1 ]
636- assert row ["Title" ] == "test" , f"{ row ['Title' ]= } "
637- assert row ["Title Embed" ] is None , f"{ row ['Title Embed' ]= } "
638- assert row ["Text" ] is None , f"{ row ['Title' ]= } "
639- assert_is_vector_or_none (row ["Text Embed" ], allow_none = False )
620+ # Embedding with invalid length will be rejected
621+ with pytest .raises (BadInputError , match = "Array input must have length 256" ):
622+ add_table_rows (
623+ client ,
624+ table_type ,
625+ table .id ,
626+ [{"Title" : "test" , "Title Embed" : [1 , 2 , 3 ]}],
627+ stream = stream ,
628+ )
640629
641630
642631@flaky (max_runs = 3 , min_passes = 1 )
@@ -1697,6 +1686,7 @@ def test_public_web_image(setup: ServingContext):
16971686 - As input to model
16981687 - Has valid raw and thumbnail URLs
16991688 - Reject private URLs
1689+ - Reject malformed URL
17001690 - Empty input is OK
17011691 """
17021692 table_type = TableType .ACTION
@@ -1729,18 +1719,21 @@ def test_public_web_image(setup: ServingContext):
17291719 response = client .file .get_thumbnail_urls ([image_uri ])
17301720 assert isinstance (response , GetURLResponse )
17311721 assert response .urls [0 ] == image_uri
1732- # Private URLs should be rejected
1733- row = add_table_rows (
1734- data = [dict (image = "https://host.docker.internal:8080" )], ** kwargs
1735- ).rows [0 ]
1736- assert "cannot be opened" in row .columns ["ocr" ].content , row
1737- row = add_table_rows (data = [dict (image = "https://localhost" )], ** kwargs ).rows [0 ]
1738- assert "cannot be opened" in row .columns ["ocr" ].content , row
1739- row = add_table_rows (data = [dict (image = "https://192.168.0.1" )], ** kwargs ).rows [0 ]
1740- assert "cannot be opened" in row .columns ["ocr" ].content , row
17411722 # Empty is OK
17421723 row = add_table_rows (data = [dict ()], ** kwargs ).rows [0 ]
17431724 assert len (row .columns ["ocr" ].content ) > 0 , row
1725+ # Private URLs should be rejected
1726+ with pytest .raises (BadInputError , match = "URL .+ invalid" ):
1727+ add_table_rows (data = [dict (image = "https://host.docker.internal:8080" )], ** kwargs )
1728+ with pytest .raises (BadInputError , match = "URL .+ invalid" ):
1729+ add_table_rows (data = [dict (image = "https://localhost" )], ** kwargs )
1730+ with pytest .raises (BadInputError , match = "URL .+ invalid" ):
1731+ add_table_rows (data = [dict (image = "https://192.168.0.1" )], ** kwargs )
1732+ # Malformed URL
1733+ with pytest .raises (BadInputError , match = "URL .+ invalid" ):
1734+ add_table_rows (
1735+ data = [dict (image = '{"url": "https://host.docker.internal:8080"}' )], ** kwargs
1736+ )
17441737
17451738
17461739@pytest .mark .parametrize ("table_type" , TABLE_TYPES )
@@ -2130,20 +2123,15 @@ def test_update_row(
21302123
21312124 # Test updating data with wrong dtype
21322125 data = dict (ID = "2" , int = "str" , float = "str" , bool = "str" )
2133- response = client .table .update_table_rows (
2134- table_type ,
2135- MultiRowUpdateRequest (table_id = table .id , data = {row ["ID" ]: data }),
2136- )
2137- assert isinstance (response , OkResponse )
2138- _rows = list_table_rows (client , table_type , table .id )
2139- assert len (_rows .items ) == 1
2140- _row = _rows .values [0 ]
2141- t2 = datetime .fromisoformat (_row ["Updated at" ])
2142- assert _row ["int" ] is None
2143- assert _row ["float" ] is None
2144- assert _row ["bool" ] is None
2145- _assert_dict_equal (row , _row , exclude = ["Updated at" , "int" , "float" , "bool" ])
2146- assert t2 > t1
2126+ with pytest .raises (BadInputError ) as e :
2127+ client .table .update_table_rows (
2128+ table_type ,
2129+ MultiRowUpdateRequest (table_id = table .id , data = {row ["ID" ]: data }),
2130+ )
2131+ assert 'Column "int": Input should be a valid integer' in str (e .value )
2132+ assert 'Column "float": Input should be a valid number' in str (e .value )
2133+ assert 'Column "bool": Input should be a valid boolean' in str (e .value )
2134+ _assert_dict_equal (_row , list_table_rows (client , table_type , table .id ).values [0 ])
21472135
21482136 if table_type == TableType .KNOWLEDGE :
21492137 # Test updating embedding columns directly
@@ -2163,26 +2151,20 @@ def test_update_row(
21632151 _rows = list_table_rows (client , table_type , table .id )
21642152 assert len (_rows .items ) == 1
21652153 _row = _rows .values [0 ]
2166- t3 = datetime .fromisoformat (_row ["Updated at" ])
2154+ t2 = datetime .fromisoformat (_row ["Updated at" ])
21672155 assert sum (_row ["Title Embed" ]) == 0
21682156 assert sum (_row ["Text Embed" ]) == len (row ["Text Embed" ])
2169- assert t3 > t2
2157+ assert t2 > t1
21702158 # Test updating embedding columns with wrong length
2171- response = client .table .update_table_rows (
2172- table_type ,
2173- MultiRowUpdateRequest (
2174- table_id = table .id ,
2175- data = {row ["ID" ]: {"Title Embed" : [0 ], "Text Embed" : [0 ]}},
2176- ),
2177- )
2178- assert isinstance (response , OkResponse )
2179- _rows = list_table_rows (client , table_type , table .id )
2180- assert len (_rows .items ) == 1
2181- _row = _rows .values [0 ]
2182- t4 = datetime .fromisoformat (_row ["Updated at" ])
2183- assert _row ["Title Embed" ] is None
2184- assert _row ["Text Embed" ] is None
2185- assert t4 > t3
2159+ with pytest .raises (BadInputError , match = "Array input must have length 256" ):
2160+ client .table .update_table_rows (
2161+ table_type ,
2162+ MultiRowUpdateRequest (
2163+ table_id = table .id ,
2164+ data = {row ["ID" ]: {"Title Embed" : [0 ], "Text Embed" : [0 ]}},
2165+ ),
2166+ )
2167+ _assert_dict_equal (_row , list_table_rows (client , table_type , table .id ).values [0 ])
21862168
21872169
21882170@pytest .mark .parametrize ("stream" , ** STREAM_PARAMS )
0 commit comments