掌握ONNXRuntime Yolov8-OBB，輕松實作旋轉目標檢測！

2024-04-02碼農

效果

YOLOv8中OBB(Oriented Bounding Box)模型的引入標誌著物體檢測的重要一步，特別是對於有角度或旋轉的物體，提高了準確性並減少了各種套用中的背景雜訊，如航空影像和文本檢測。

模型資訊

Model Properties ------------------------- date：2024-02-26T08:38:44.171849 description：Ultralytics YOLOv8s-obb model trained on runs/DOTAv1.0-ms.yaml author：Ultralytics task：obb license：AGPL-3.0 https://ultralytics.com/license version：8.1.18 stride：32 batch：1 imgsz：[640, 640] names：{0: 'plane', 1: 'ship', 2: 'storage tank', 3: 'baseball diamond', 4: 'tennis court', 5: 'basketball court', 6: 'ground track field', 7: 'harbor', 8: 'bridge', 9: 'large vehicle', 10: 'small vehicle', 11: 'helicopter', 12: 'roundabout', 13: 'soccer ball field', 14: 'swimming pool'} --------------------------------------------------------------- Inputs ------------------------- name：images tensor：Float[1, 3, 640, 640] --------------------------------------------------------------- Outputs ------------------------- name：output0 tensor：Float[1, 20, 8400] ---------------------------------------------------------------

計畫

程式碼

using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; using OpenCvSharp; using OpenCvSharp.Dnn; using System; using System.Collections.Generic; using System.Drawing; using System.IO; using System.Linq; using System.Windows.Forms; namespace Onnx_Yolov8_Demo { public partial class Form1 : Form { public Form1() { InitializeComponent(); } string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png"; string image_path = ""; string classer_path; DateTime dt1 = DateTime.Now; DateTime dt2 = DateTime.Now; string model_path; Mat image; Mat result_image; public string[] class_lables; SessionOptions options; InferenceSession onnx_session; Tensor<float> input_tensor; List<NamedOnnxValue> input_container; IDisposableReadOnlyCollection<DisposableNamedOnnxValue> result_infer; DisposableNamedOnnxValue[] results_onnxvalue; Tensor<float> result_tensors; private void button1_Click(object sender, EventArgs e) { OpenFileDialog ofd = new OpenFileDialog(); ofd.Filter = fileFilter; if (ofd.ShowDialog() != DialogResult.OK) return; pictureBox1.Image = null; image_path = ofd.FileName; pictureBox1.Image = new Bitmap(image_path); textBox1.Text = ""; image = new Mat(image_path); pictureBox2.Image = null; } private void button2_Click(object sender, EventArgs e) { if (image_path == "") { return; } button2.Enabled = false; pictureBox2.Image = null; textBox1.Text = ""; Application.DoEvents(); //圖片縮放 image = new Mat(image_path); int max_image_length = image.Cols > image.Rows ? image.Cols : image.Rows; Mat max_image = Mat.Zeros(new OpenCvSharp.Size(max_image_length, max_image_length), MatType.CV_8UC3); Rect roi = new Rect(0, 0, image.Cols, image.Rows); image.CopyTo(new Mat(max_image, roi)); float[] result_array; float factor = (float)(max_image_length / 640.0); // 將圖片轉為RGB通道 Mat image_rgb = new Mat(); Cv2.CvtColor(max_image, image_rgb, ColorConversionCodes.BGR2RGB); Mat resize_image = new Mat(); Cv2.Resize(image_rgb, resize_image, new OpenCvSharp.Size(640, 640)); // 輸入Tensor for (int y = 0; y < resize_image.Height; y++) { for (int x = 0; x < resize_image.Width; x++) { input_tensor[0, 0, y, x] = resize_image.At<Vec3b>(y, x)[0] / 255f; input_tensor[0, 1, y, x] = resize_image.At<Vec3b>(y, x)[1] / 255f; input_tensor[0, 2, y, x] = resize_image.At<Vec3b>(y, x)[2] / 255f; } } //將 input_tensor 放入一個輸入參數的容器，並指定名稱 input_container.Add(NamedOnnxValue.CreateFromTensor("images", input_tensor)); dt1 = DateTime.Now; //執行 Inference 並獲取結果 result_infer = onnx_session.Run(input_container); dt2 = DateTime.Now; // 將輸出結果轉為DisposableNamedOnnxValue陣列 results_onnxvalue = result_infer.ToArray(); // 讀取第一個節點輸出並轉為Tensor數據 result_tensors = results_onnxvalue[0].AsTensor<float>(); result_array = result_tensors.ToArray(); Mat result_data = new Mat(20, 8400, MatType.CV_32F, result_array); result_data = result_data.T(); List<Rect2d> position_boxes = new List<Rect2d>(); List<int> class_ids = new List<int>(); List<float> confidences = new List<float>(); List<float> rotations = new List<float>(); // Preprocessing output results for (int i = 0; i < result_data.Rows; i++) { Mat classes_scores = new Mat(result_data, new Rect(4, i, 15, 1)); OpenCvSharp.Point max_ classId_point, min_ classId_point; double max_score, min_score; // Obtain the maximum value and its position in a set of data Cv2.MinMaxLoc( classes_scores, out min_score, out max_score, out min_ classId_point, out max_ classId_point); // Confidence level between 0 ~ 1 // Obtain identification box information if (max_score > 0.25) { float cx = result_data.At<float>(i, 0); float cy = result_data.At<float>(i, 1); float ow = result_data.At<float>(i, 2); float oh = result_data.At<float>(i, 3); double x = (cx - 0.5 * ow) * factor; double y = (cy - 0.5 * oh) * factor; double width = ow * factor; double height = oh * factor; Rect2d box = new Rect2d(); box.X = x; box.Y = y; box.Width = width; box.Height = height; position_boxes.Add(box); class_ids.Add(max_ classId_point.X); confidences.Add((float)max_score); rotations.Add(result_data.At<float>(i, 19)); } } // NMS int[] indexes = new int[position_boxes.Count]; CvDnn.NMSBoxes(position_boxes, confidences, 0.25f, 0.7f, out indexes); List<RotatedRect> rotated_rects = new List<RotatedRect>(); for (int i = 0; i < indexes.Length; i++) { int index = indexes[i]; float w = (float)position_boxes[index].Width; float h = (float)position_boxes[index].Height; float x = (float)position_boxes[index].X + w / 2; float y = (float)position_boxes[index].Y + h / 2; float r = rotations[index]; float w_ = w > h ? w : h; float h_ = w > h ? h : w; r = (float)((w > h ? r : (float)(r + Math.PI / 2)) % Math.PI); RotatedRect rotate = new RotatedRect(new Point2f(x, y), new Size2f(w_, h_), (float)(r * 180.0 / Math.PI)); rotated_rects.Add(rotate); } result_image = image.Clone(); for (int i = 0; i < indexes.Length; i++) { int index = indexes[i]; Point2f[] points = rotated_rects[i].Points(); for (int j = 0; j < 4; j++) { Cv2.Line(result_image, (OpenCvSharp.Point)points[j], (OpenCvSharp.Point)points[(j + 1) % 4], new Scalar(0, 255, 0), 2); } Cv2.PutText(result_image, class_lables[ class_ids[index]] + "-" + confidences[index].ToString("0.00"), (OpenCvSharp.Point)points[0], HersheyFonts.HersheySimplex, 0.8, new Scalar(0, 0, 255), 2); } pictureBox2.Image = new Bitmap(result_image.ToMemoryStream()); textBox1.Text = "推理耗時:" + (dt2 - dt1).TotalMilliseconds + "ms"; button2.Enabled = true; } private void Form1_Load(object sender, EventArgs e) { model_path = "model/yolov8s-obb.onnx"; classer_path = "model/lable.txt"; // 建立輸出會話，用於輸出模型讀取資訊 options = new SessionOptions(); options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO; options.AppendExecutionProvider_CPU(0);// 設定為CPU上執行 // 建立推理模型類，讀取本地模型檔 onnx_session = new InferenceSession(model_path, options);//model_path 為onnx模型檔的路徑 // 輸入Tensor input_tensor = new DenseTensor<float>(new[] { 1, 3, 640, 640 }); // 建立輸入容器 input_container = new List<NamedOnnxValue>(); List<string> str = new List<string>(); StreamReader sr = new StreamReader( classer_path); string line; while ((line = sr.ReadLine()) != null) { str.Add(line); } class_lables = str.ToArray(); image_path = "test_img/1.png"; pictureBox1.Image = new Bitmap(image_path); image = new Mat(image_path); } } }