[detect] 使用yolov5的flask接口

eternal-echo · eternal-echo · commit 8c3b0e589ed7 · 2023-05-06T11:09:18.000+08:00
diff --git a/detect/parts_detect.py b/detect/parts_detect.py
@@ -1,187 +1,31 @@
-import argparse
-import os
-import platform
-import sys
-from pathlib import Path
-
-import torch
-
-# YOLOv5 root directory
-ROOT = Path(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'yolov5')))
-if str(ROOT) not in sys.path:
-    sys.path.append(str(ROOT))  # add ROOT to PATH
-ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
-
-from yolov5.models.common import DetectMultiBackend
-from yolov5.utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
-from yolov5.utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
-                           increment_path, non_max_suppression, print_args, scale_boxes, strip_optimizer, xyxy2xywh)
-from yolov5.utils.plots import Annotator, colors, save_one_box
-from yolov5.utils.torch_utils import select_device, smart_inference_mode
-
-@smart_inference_mode()
-def run(
-        weights=ROOT / 'yolov5s.pt',  # model path or triton URL
-        source=ROOT / 'data/images',  # file/dir/URL/glob/screen/0(webcam)
-        data=ROOT / 'data/coco128.yaml',  # dataset.yaml path
-        imgsz=(640, 640),  # inference size (height, width)
-        conf_thres=0.25,  # confidence threshold
-        iou_thres=0.45,  # NMS IOU threshold
-        max_det=1000,  # maximum detections per image
-        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
-        view_img=False,  # show results
-        save_txt=False,  # save results to *.txt
-        save_conf=False,  # save confidences in --save-txt labels
-        save_crop=False,  # save cropped prediction boxes
-        nosave=False,  # do not save images/videos
-        # 滤除暂时不分类的零件类别
-        classes=None,  # filter by class: --class 0, or --class 0 2 3
-        agnostic_nms=False,  # class-agnostic NMS
-        augment=False,  # augmented inference
-        visualize=False,  # visualize features
-        update=False,  # update all models
-        project=ROOT / 'runs/detect',  # save results to project/name
-        name='exp',  # save results to project/name
-        exist_ok=False,  # existing project/name ok, do not increment
-        line_thickness=3,  # bounding box thickness (pixels)
-        hide_labels=False,  # hide labels
-        hide_conf=False,  # hide confidences
-        half=False,  # use FP16 half-precision inference
-        dnn=False,  # use OpenCV DNN for ONNX inference
-        vid_stride=1,  # video frame-rate stride
-):
-    source = str(source)
-    save_img = not nosave and not source.endswith('.txt')  # save inference images
-    is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
-    is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
-    webcam = source.isnumeric() or source.endswith('.streams') or (is_url and not is_file)
-    screenshot = source.lower().startswith('screen')
-    if is_url and is_file:
-        source = check_file(source)  # download
-
-    # Directories
-    save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
-    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
-
-    # Load model
-    device = select_device(device)
-    model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
-    stride, names, pt = model.stride, model.names, model.pt
-    imgsz = check_img_size(imgsz, s=stride)  # check image size
-
-    # Dataloader
-    bs = 1  # batch_size
-    # 摄像头
-    if webcam:
-        view_img = check_imshow(warn=True)
-        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
-        bs = len(dataset)
-    elif screenshot:
-        dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)
-    else:
-        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
-    vid_path, vid_writer = [None] * bs, [None] * bs
-
-    # Run inference
-    model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz))  # warmup
-    seen, windows, dt = 0, [], (Profile(), Profile(), Profile())
-    for path, im, im0s, vid_cap, s in dataset:
-        with dt[0]:
-            im = torch.from_numpy(im).to(model.device)
-            im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32
-            im /= 255  # 0 - 255 to 0.0 - 1.0
-            if len(im.shape) == 3:
-                im = im[None]  # expand for batch dim
-
-        # Inference
-        with dt[1]:
-            visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
-            pred = model(im, augment=augment, visualize=visualize)
-
-        # NMS
-        with dt[2]:
-            pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
-
-        # Second-stage classifier (optional)
-        # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
-
-        # Process predictions
-        for i, det in enumerate(pred):  # per image
-            seen += 1
-            if webcam:  # batch_size >= 1
-                p, im0, frame = path[i], im0s[i].copy(), dataset.count
-                s += f'{i}: '
-            else:
-                p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
-
-            p = Path(p)  # to Path
-            save_path = str(save_dir / p.name)  # im.jpg
-            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # im.txt
-            s += '%gx%g ' % im.shape[2:]  # print string
-            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
-            imc = im0.copy() if save_crop else im0  # for save_crop
-            annotator = Annotator(im0, line_width=line_thickness, example=str(names))
-            if len(det):
-                # Rescale boxes from img_size to im0 size
-                det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
-
-                # Print results
-                for c in det[:, 5].unique():
-                    n = (det[:, 5] == c).sum()  # detections per class
-                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
-
-                # Write results
-                for *xyxy, conf, cls in reversed(det):
-                    if save_txt:  # Write to file
-                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
-                        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
-                        with open(f'{txt_path}.txt', 'a') as f:
-                            f.write(('%g ' * len(line)).rstrip() % line + '\n')
-
-                    if save_img or save_crop or view_img:  # Add bbox to image
-                        c = int(cls)  # integer class
-                        label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
-                        annotator.box_label(xyxy, label, color=colors(c, True))
-                    if save_crop:
-                        save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
-
-            # Stream results
-            im0 = annotator.result()
-            if view_img:
-                if platform.system() == 'Linux' and p not in windows:
-                    windows.append(p)
-                    cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
-                    cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
-                cv2.imshow(str(p), im0)
-                cv2.waitKey(1)  # 1 millisecond
-
-            # Save results (image with detections)
-            if save_img:
-                if dataset.mode == 'image':
-                    cv2.imwrite(save_path, im0)
-                else:  # 'video' or 'stream'
-                    if vid_path[i] != save_path:  # new video
-                        vid_path[i] = save_path
-                        if isinstance(vid_writer[i], cv2.VideoWriter):
-                            vid_writer[i].release()  # release previous video writer
-                        if vid_cap:  # video
-                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
-                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-                        else:  # stream
-                            fps, w, h = 30, im0.shape[1], im0.shape[0]
-                        save_path = str(Path(save_path).with_suffix('.mp4'))  # force *.mp4 suffix on results videos
-                        vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
-                    vid_writer[i].write(im0)
-
-        # Print time (inference-only)
-        LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
-
-    # Print results
-    t = tuple(x.t / seen * 1E3 for x in dt)  # speeds per image
-    LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
-    if save_txt or save_img:
-        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
-        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
-    if update:
-        strip_optimizer(weights[0])  # update model (to fix SourceChangeWarning)
+"""Perform test request"""
+import cv2
+import requests
+
+class Detect:
+    def __init__(self):
+        self.detection_url = "http://localhost:5000/v1/object-detection/yolov5"
+
+    def detect(self, image):
+        # 将OpenCV的Mat转换为JPEG格式
+        _, img_encoded = cv2.imencode(".jpg", image)
+        image_bytes = img_encoded.tobytes()
+
+        # 发送POST请求并获取响应
+        response = requests.post(self.detection_url, files={"image": image_bytes}).json()
+
+        return response
+
+        # # 解析响应并返回结果
+        # results = []
+        # for obj in response["predictions"]:
+        #     class_name = obj["label"]
+        #     confidence = obj["confidence"]
+        #     bbox = obj["bbox"]
+        #     x1, y1, x2, y2 = bbox
+        #     results.append({
+        #         "class": class_name,
+        #         "confidence": confidence,
+        #         "bbox": [x1, y1, x2, y2]
+        #     })
+        # return results
diff --git a/parts_sorting.py b/parts_sorting.py
@@ -8,6 +8,7 @@
 # from move.parts_moving import NozzleMoving, NozzleSetting, PartInfo, Point
 from detect.parts_segment import BackgroundModel, ConnectedComponents
 from detect.parts_tracker import Tracker, Point, Rectangle, TargetInfo, TargetTrack
+from detect.parts_detect import Detect
 
 class PartsSortingSystem:
     def __init__(self, camera: str, config_file, results_dir = 'run'):
@@ -87,6 +88,8 @@ def start(self):
             with open(os.path.join(self.config_dir, 'config.json'), 'w') as f:
                 json.dump(data, f, indent=4)
 
+        # 创建检测器对象并进行检测
+        self.detector = Detect()
         # 背景建模
         self.back_model = BackgroundModel(algo='MOG2', history=500, varThreshold=50, detectShadows=False)
         # 连通域分析
@@ -123,6 +126,11 @@ def start(self):
         # 候选目标列表，节点类型为TargetTrack
         self.candidates = list()
 
+        # 离开区域的y坐标
+        self.end_y = int(self.bbox_belt[3] / 15)
+        # 开始区域的y坐标
+        self.start_y = int(self.bbox_belt[3] * (1 - 1 / 3))
+
 
     def run(self):
         # 如果指定了保存路径，则创建VideoWriter对象保存连通域分析后的框选结果
@@ -194,14 +202,32 @@ def run(self):
                             break
 
                 # 如果不属于已有物体，并且在检测区域的下半部分，才加入候选目标队列
-                if (not matched) and y > belt.shape[0] / 3:
+                if (not matched) and y > self.start_y:
+                    # TODO: 每次添加都要显示log，观察是否有错误添加
                     self.candidates.append(TargetTrack(cur_info))
 
             # 过期检查
             for candidate in self.candidates:
-                if candidate.last.rect.y < 30:
-                    self.__logger.info("目标已经离开传送带，目标信息：{}".format(candidate.last))
+                if candidate.last.rect.y < self.end_y:
                     self.candidates.remove(candidate)
+                    # 识别目标
+                    # 截取的范围
+                    y1 = candidate.last.rect.y
+                    y2 = candidate.last.rect.y + candidate.last.rect.h
+                    x1 = candidate.last.rect.x
+                    x2 = candidate.last.rect.x + candidate.last.rect.w
+                    # src_obj = selected_belt[y1:y2, x1:x2]
+                    # # 将src_obj保存到缓存文件夹
+                    # cv2.imwrite(os.path.join(self.results_dir, "part{}.jpg".format(frame_id)), src_obj)
+
+                    # result = self.detector.detect(src_obj)
+                    result = self.detector.detect(belt)
+                    if result is not None:
+                        self.__logger.info("目标识别结果：{}".format(result))
+
+                    # bin_obj = pre_proc[y1:y2, x1:x2]
+                    # best_match, best_score, best_name = self.matcher.match(src_obj, bin_obj)
+                    # self.__logger.info("目标识别结果：{}({})， 相似度：{}".format(best_match, best_name, best_score))
                     continue
                 # 处理过期物体
                 if frame_id - candidate.last.timestamp > 3:
@@ -240,6 +266,10 @@ def run(self):
                     c_x, c_y = candidate.get_position()
                     v_x, v_y = candidate.get_velocity()
                     cv2.putText(candidates_belt, "({0:.2f}, {1:.2f}), ({2:.2f}, {3:.2f})".format(c_x, c_y, v_x, v_y), (candidate.last.rect.x, candidate.last.rect.y + candidate.last.rect.h), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), 2)
+                # 绘制开始线
+                cv2.line(candidates_belt, (0, self.start_y), (candidates_belt.shape[1], self.start_y), (0, 0, 255), 2)
+                # 绘制结束线
+                cv2.line(candidates_belt, (0, self.end_y), (candidates_belt.shape[1], self.end_y), (0, 0, 255), 2)
                 cv2.namedWindow("Candidates Belt", cv2.WINDOW_NORMAL)
                 cv2.imshow("Candidates Belt", candidates_belt)
 
@@ -249,7 +279,7 @@ def run(self):
                 self.track_video.write(candidates_belt)
     
 
-            keyboard = cv2.waitKey(30)
+            keyboard = cv2.waitKey(1)
             if keyboard == 'q' or keyboard == 27:
                 break