Skip to content

Add support for lightglue validation in accuracy checker #3962

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@
from .malware_classification import MalwareClassificationDatasetConverter
from .cvat_hands_and_palm import CVATPalmDetectionConverter, CVATHandLandmarkConverter
from .parti_prompts import PartiPromptsDatasetConverter
from .hpatches import HpatchesConverter

__all__ = [
'BaseFormatConverter',
Expand Down Expand Up @@ -267,5 +268,6 @@
'MalwareClassificationDatasetConverter',
'CVATPalmDetectionConverter',
'CVATHandLandmarkConverter',
'PartiPromptsDatasetConverter'
'PartiPromptsDatasetConverter',
'HpatchesConverter'
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
"""
Copyright (c) 2024 Intel Corporation

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import os
from pathlib import Path
import numpy as np
from .format_converter import DirectoryBasedAnnotationConverter, ConverterReturn
from ..config import NumberField, StringField
from ..representation import ImageFeatureAnnotation
from ..utils import UnsupportedPackage
from ..data_readers import AnnotationDataIdentifier
from ..progress_reporters import TQDMReporter


# Large images that were ignored in previous papers
ignored_scenes = (
"i_contruction",
"i_crownnight",
"i_dc",
"i_pencils",
"i_whitebuilding",
"v_artisans",
"v_astronautis",
"v_talent",
)


class HpatchesConverter(DirectoryBasedAnnotationConverter):
__provider__ = 'hpatches_with_kornia_feature'

@classmethod
def parameters(cls):
params = super().parameters()
params.update({
'sequences_dir_name': StringField(
optional=True, default='hpatches-sequences-release',
description="Dataset subfolder name, where hpatches sequences are located."
),
'max_num_keypoints': NumberField(
optional=True, default=512, value_type=int, min_value=128, max_value=2048,
description='Maksimum number of image keypoints.'
),
'image_side_size': NumberField(
optional=True, default=480, value_type=int, min_value=128, max_value=2048,
description='Image short side size.'
)
})

return params

def configure(self):
try:
import torch # pylint: disable=import-outside-toplevel
self._torch = torch
except ImportError as torch_import_error:
UnsupportedPackage('torch', torch_import_error.msg).raise_error(self.__provider__)
try:
import kornia # pylint: disable=import-outside-toplevel
self._kornia = kornia
except ImportError as kornia_import_error:
UnsupportedPackage('kornia', kornia_import_error.msg).raise_error(self.__provider__)


self.data_dir = self.get_value_from_config('data_dir')
self.sequences_dir = self.get_value_from_config('sequences_dir_name')
self.max_num_keypoints = self.get_value_from_config('max_num_keypoints')
self.side_size = self.get_value_from_config('image_side_size')

def _get_new_image_size(self, h: int, w: int):
side_size = self.side_size
aspect_ratio = w / h
if aspect_ratio < 1.0:
size = int(side_size / aspect_ratio), side_size
else:
size = side_size, int(side_size * aspect_ratio)
return size


def _get_image_data(self, path, image_size = None):
img = self._kornia.io.load_image(path, self._kornia.io.ImageLoadType.RGB32, device='cpu')[None, ...]

h, w = img.shape[-2:]
size = h, w
size = self._get_new_image_size(h, w)
if image_size and size != image_size:
size = image_size
img = self._kornia.geometry.transform.resize(
img,
size,
side='short',
antialias=True,
align_corners=None,
interpolation='bilinear',
)
scale = self._torch.Tensor([img.shape[-1] / w, img.shape[-2] / h]).to(img)
T = np.diag([scale[0], scale[1], 1])

data = {
"scales": scale,
"image_size": np.array(size[::-1]),
"transform": T,
"original_image_size": np.array([w, h]),
"image" : img
}
return data

@staticmethod
def _read_homography(path):
with open(path, encoding="utf-8") as f:
result = []
for line in f.readlines():
while " " in line: # Remove double spaces
line = line.replace(" ", " ")
line = line.replace(" \n", "").replace("\n", "")
# Split and discard empty strings
elements = list(filter(lambda s: s, line.split(" ")))
if elements:
result.append(elements)
return np.array(result).astype(float)

def get_image_features(self, model, data):
with self._torch.inference_mode():
return model(data["image"], self.max_num_keypoints, pad_if_not_divisible=True)[0]

def convert(self, check_content=False, progress_callback=None, progress_interval=50, **kwargs):
annotations = []
items = []

sequences_dir = Path(os.path.join(self.data_dir, self.sequences_dir))
sequences = sorted([x.name for x in sequences_dir.iterdir()])

for seq in sequences:
if seq in ignored_scenes:
continue
for i in range(2, 7):
items.append((seq, i, seq[0] == "i"))

disk_model = self._kornia.feature.DISK().from_pretrained("depth")

num_iterations = len(items)
progress_reporter = TQDMReporter(print_interval=progress_interval)
progress_reporter.reset(num_iterations)

for item_id, item in enumerate(items):
seq, idx, _ = item

if idx == 2:
img_path = Path(sequences_dir / seq / "1.ppm")
data0 = self._get_image_data(img_path)
features0 = self.get_image_features(disk_model, data0)

img_path = Path(sequences_dir / seq / f"{idx}.ppm")
data1 = self._get_image_data(img_path)
features1 = self.get_image_features(disk_model, data1)

H = self._read_homography(Path(sequences_dir / seq / f"H_1_{idx}"))
H = data1["transform"] @ H @ np.linalg.inv(data0["transform"])

data = {
"keypoints0": features0.keypoints.unsqueeze(0),
"keypoints1": features1.keypoints.unsqueeze(0),
"descriptors0": features0.descriptors.unsqueeze(0),
"descriptors1" : features1.descriptors.unsqueeze(0),
"image_size0": data0["image_size"],
"image_size1": data1["image_size"],
"H_0to1": H
}

sequence = f"{seq}/{idx}"
annotated_id = AnnotationDataIdentifier(sequence, data)
annotation = ImageFeatureAnnotation(
identifier = annotated_id,
sequence = sequence
)
annotations.append(annotation)
progress_reporter.update(item_id, 1)

progress_reporter.finish()
return ConverterReturn(annotations, None, None)
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
create_reader,
REQUIRES_ANNOTATIONS
)
from .annotation_readers import AnnotationFeaturesReader
from .annotation_readers import AnnotationFeaturesReader, DiskImageFeaturesExtractor
from .binary_data_readers import PickleReader, ByteFileReader, LMDBReader
from .medical_imaging_readers import NiftiImageReader, DicomReader
from .audio_readers import WavReader, KaldiARKReader, FlacReader
Expand Down Expand Up @@ -78,6 +78,7 @@
'NiftiImageReader',
'TensorflowImageReader',
'AnnotationFeaturesReader',
'DiskImageFeaturesExtractor',
'WavReader',
'FlacReader',
'DicomReader',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,10 @@
limitations under the License.
"""

from ..config import ListField, ConfigError
from ..config import ListField, BoolField, ConfigError
from .data_reader import BaseReader, create_ann_identifier_key, AnnotationDataIdentifier
from ..utils import contains_all


class NCFDataReader(BaseReader):
__provider__ = 'ncf_data_reader'

Expand Down Expand Up @@ -68,3 +67,38 @@ def _read_list(self, data_id):
def reset(self):
self.subset = range(len(self.data_source))
self.counter = 0


class DiskImageFeaturesExtractor(BaseReader):
__provider__ = 'disk_features_extractor'

@classmethod
def parameters(cls):
parameters = super().parameters()
parameters.update({'input_is_dict_type': BoolField(
optional=True, default=True, description='Model input is dict type.')})
parameters.update({'output_is_dict_type': BoolField(
optional=True, default=True, description='Model output is dict type.')})
return parameters

def configure(self):
self.input_as_dict_type = self.get_value_from_config('input_is_dict_type')
self.output_is_dict_type = self.get_value_from_config('output_is_dict_type')

def read(self, data_id):
assert isinstance(data_id, AnnotationDataIdentifier)
data = data_id.data_id

required_keys = ["keypoints", "descriptors", "image_size", "oris"]

view0 = {
**{k: data[k + "0"] for k in required_keys if k + "0" in data},
}
view1 = {
**{k: data[k + "1"] for k in required_keys if k + "0" in data},
}

return {"image0": view0, "image1": view1}

def _read_list(self, data_id):
return self.read(data_id)
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
BaseField, StringField, ConfigValidator, ConfigError, DictField, BoolField, PathField
)

REQUIRES_ANNOTATIONS = ['annotation_features_extractor', ]
REQUIRES_ANNOTATIONS = ['annotation_features_extractor' ,'disk_features_extractor' ]
DOES_NOT_REQUIRED_DATA_SOURCE = REQUIRES_ANNOTATIONS + ['ncf_reader']
DATA_SOURCE_IS_FILE = ['opencv_capture']

Expand All @@ -39,6 +39,9 @@ def __init__(self, data, meta=None, identifier=''):
self.identifier = identifier
self.data = data
self.metadata = meta or {}

if self.metadata.get('input_is_dict_type'):
return
if np.isscalar(data):
self.metadata['image_size'] = 1
elif isinstance(data, list) and np.isscalar(data[0]):
Expand Down Expand Up @@ -211,6 +214,7 @@ def __init__(self, data_source, config=None, postpone_data_source=False, **kwarg
self.read_dispatcher.register(ParametricImageIdentifier, self._read_parametric_input)
self.read_dispatcher.register(VideoFrameIdentifier, self._read_video_frame)
self.multi_infer = False
self.data_layout = None

self.validate_config(config, data_source)
self.configure()
Expand Down Expand Up @@ -318,8 +322,13 @@ def _read_video_frame(self, data_id):
return self.read_dispatcher(data_id.frame)

def read_item(self, data_id):
meta = {
'input_is_dict_type' : self.config.get('input_is_dict_type', False),
'output_is_dict_type' : self.config.get('output_is_dict_type', False),
}
data_rep = DataRepresentation(
self.read_dispatcher(data_id),
meta = meta,
identifier=data_id if not isinstance(data_id, ListIdentifier) else list(data_id.values)
)
if self.multi_infer:
Expand Down
10 changes: 10 additions & 0 deletions tools/accuracy_checker/accuracy_checker/launcher/input_feeder.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,7 @@ def separate_data(data, num_splits):

batch_size = len(meta)
template_for_shapes = {}

if meta[0].get('multi_infer', False):
num_splits = calculate_num_splits(batch_data, batch_size)
infers_data = [{} for _ in range(num_splits)]
Expand All @@ -504,6 +505,15 @@ def separate_data(data, num_splits):

for layer_name, layer_data in batch_data.items():
layout = self.layouts_mapping.get(layer_name)
if meta[0].get('input_is_dict_type'):
layer_data_preprocessed = self.input_transform_func(
layer_data, layer_name,
layout,
self.precision_mapping.get(layer_name), template
)
batch_data[layer_name] = layer_data_preprocessed
continue

if 'data_layout' in meta[0]:
data_layout = LAYER_LAYOUT_TO_IMAGE_LAYOUT.get(meta[0]['data_layout'])
if layout is None and len(self.default_layout) == len(data_layout):
Expand Down
Loading
Loading