diff --git a/.ci/skipped_notebooks.yml b/.ci/skipped_notebooks.yml index 9cf353e7230..edbda7364f0 100644 --- a/.ci/skipped_notebooks.yml +++ b/.ci/skipped_notebooks.yml @@ -577,4 +577,11 @@ - os: - macos-13 - ubuntu-22.04 - - windows-2022 \ No newline at end of file + - windows-2022 +- notebook: notebooks/3D-point-pillars/pointpillars.ipynb + skips: + - os: + - macos-13 + - windows-2022 + - python: + - "3.13" diff --git a/.ci/spellcheck/.pyspelling.wordlist.txt b/.ci/spellcheck/.pyspelling.wordlist.txt index 2d90908412a..a3b78214c6a 100644 --- a/.ci/spellcheck/.pyspelling.wordlist.txt +++ b/.ci/spellcheck/.pyspelling.wordlist.txt @@ -1,40 +1,40 @@ 3D abstractive -accelerometers accelerometer +accelerometers acknowledgement ActivationMap activations +AdapterConfig adaptively adas ADE adversarially AE -AEs aeroplane +AEs affective AFM Agentic agentic ai -al -AdapterConfig AISE AISEClassification AISEDetection +al Alibaba ALiBi AlpacaEval aMUSEd analytics AnimateAnyone -AnimeGAN AnimateLCM +AnimeGAN Antelopev api APIs -Arcface Arcee +Arcface argmax artstation arxiv @@ -50,12 +50,12 @@ AudioForConditionalGeneration audioldm AudioLDM autoencoder +AutoEncoder +AutoEncoders autogenerated AutoModelForXxx autoregressive autoregressively -AutoEncoder -AutoEncoders AutoTokenizer AWQ awq @@ -79,7 +79,6 @@ bfloat BGE bge BGR -BMP Bicubic bicubic bilinear @@ -87,14 +86,15 @@ biometrics BiT bitext bitnet -blockwise BLACKBOX +blockwise +BMP boolean CatVTON CausVid -CER CentOS centric +CER CFG charlist charlists @@ -105,22 +105,23 @@ ChatGLM ChatGPT ChatTTS chinese -CIN -ckpt CHW Cifar +CIN cityscape Cityscapes Civitai CivitAI +ckpt classname ClassProbabilityMap CLI cli -ClipBasedHeadEstimator CLIP's +ClipBasedHeadEstimator CMs cmu +CNNs codebase codebook codebooks @@ -140,8 +141,8 @@ Contrastive contrastive controllability ControlNet -ControlNets controlnet +ControlNets ConvE conve ConvNet @@ -151,12 +152,13 @@ ConvNeXt ConvNeXts Convolutional convolutional +coors coreference CoSENT cpm cpp -CPUs cpu +CPUs CRNN CSV CTC @@ -165,25 +167,24 @@ CUDA CustomEncoderWav CVF CVPR -CNNs +DarkIR Databricks databricks dataloader -dataloaders DataLoader +dataloaders DataLoaders DataModule dataset datasets -DarkIR DaViT DCAE +DDColor +ddcolor +DDColor's DDIM DDPMs dDPO -DDColor's -DDColor -ddcolor de Deblur deblur @@ -192,43 +193,43 @@ DeblurGANv deblurred Deblurring deblurring -deconvolution -decidiffusion Deci +decidiffusion DeciDiffusion DeciDiffusion's +deconvolution deduplicated DeepEncoder DeepFloyd DeepLabV DeepSeek DeepStack +demucs denoise denoised denoises denoising denormalization denormalized -demucs depainting deployable DepthAnything +DepthAnythingV detections +detectron +Detectron detokenization detokenizer detokenizers Dettmers dev -detectron -Detectron -DepthAnythingV dGPU dGPUs DialoGPT diarization -digitalized Diffusers diffusers +digitalized dimensionality DINOv Distil @@ -238,9 +239,8 @@ distilbert distiluse DIT DiT -DiT’s -DiT’s DiTs +DiT’s DL DocLayNet docling @@ -250,9 +250,9 @@ DoclingDocument DoclingDocuments docstring DocTags +DocumentLoaders DocVQA docvqa -DocumentLoaders doi Dollár donut @@ -302,13 +302,13 @@ FastSAM FC feedforward FeedForward -FFN +FEIL FFmpeg -FireRedTTS +FFN FIL -FEIL finetuned finetuning +FireRedTTS FLAC FLD floyd @@ -333,18 +333,18 @@ gaussian Gb gcc GEC -GELU GELAN +GELU Gemma gemma genai genAI GenerationConfig GenerationMixin +GGUF Girshick Gitee GitHub -GGUF GLA GLM glm @@ -370,14 +370,14 @@ Gu GUIs Gutendex Hafner -HugginFaceH HalBench HandBrake -heatmap HC +heatmap HED HH hoc +HugginFaceH HuggingFace huggingfacehub Hunyuan @@ -388,19 +388,22 @@ HWC hyperparameters ICIP ICPR -IEMOCAP -iGPU identation IdentityNet +IEMOCAP +iGPU iGPUs Ilija +im ImageBind +imageLink imagenet Imagenet ImageNet Imagenette ImagePipeline ImageWoof +img ImgPipeline impactful IMU @@ -414,11 +417,10 @@ Inpaint inpaint inpainting InpaintingPipeline -InternVL InsightFace installable -InstantID instantiation +InstantID InstructGPT InstructPix intel @@ -426,27 +428,26 @@ interactable InternLM internlm InternVideo +InternVL Interpolative interpretable -invertible intervaling -im -imageLink -img +invertible io +IoU ip IPs ir IRs iteratively +Javascript JAX JAX's -Javascript JFLEG -JIT Jina jina jinaai +JIT Joao JS JSON @@ -456,9 +457,9 @@ JuggernautXL Jupyter Jupyter's JupyterLab +kagglehub Kaiming Kalman -kagglehub kbps KD KDE @@ -468,6 +469,7 @@ KerasHub keypoint keypoints KiTS +KITTI Kokoro Koltun Kondate @@ -491,14 +493,12 @@ LaBSE LADD Lai LAION +laion LangChain langchain Lasinger latents LaTeX -Lysak -LocalAI -lookahead LCMs LCMScheduler LDM @@ -510,6 +510,7 @@ LibriSpeech librispeech LibriTTS librosa +LiDAR Lim LinearCameraEmbedder linearized @@ -518,40 +519,41 @@ Liu LLama LLaMa LLaMA +LlamaIndex LLaSA Llasa -LlamaIndex LLaVA llava +LLIE llm LLM +LLMPipeline LLMs lm LM LMS -LLMPipeline loc +LocalAI logits LogSoftmax -LoRA -LoRAs -lora +LOL LOLBlur LOLv -LOL -LLIE +lookahead +LoRA +lora +LoRAs lraspp LRASPP +LSTM +LSTMs LTS LTX ltx -LSTM -LSTMs Luo LVLM Lysak Lyth -laion MaaS macOS Magika @@ -566,9 +568,9 @@ MaterialMLP MaterialNet mathbf mathpix -matplotlib MathVista MatMul +matplotlib MBs MCP md @@ -590,15 +592,15 @@ minicpm MiniCPM MiniLM mistralai +MLLM +MLLMs +MLP MLS MMB +MMDiT mms MMS -MLLM -MLLMs MMVLM -MLP -MMDiT MobileCLIP MobileLLaMA mobilenet @@ -606,8 +608,8 @@ MobileNet MobileNetV mobilevlm MobileVLM -modelled ModelBest +modelled Modelscope ModelScope modelscope @@ -622,11 +624,11 @@ mpnet mpt MPT MRL -MRPC mRoPE +MRPC msi -MTVQA mT +MTVQA multiarchitecture Multiclass multiclass @@ -636,8 +638,8 @@ Multilinguality multimodal Multimodality multinomial -MusicGen MuRAG +MusicGen Müller naflex Nakayosi @@ -656,6 +658,7 @@ NeurIPS NeuSpell NeXT ng +ngram NLP nlp NMS @@ -667,6 +670,10 @@ NONINFRINGEMENT nonlinearity Notus notus +npoints +npu +NPU +NPUs nsamples nsfw NSFW @@ -674,11 +681,7 @@ NuExtract num numpy NumPy -npu -NPU -NPUs NVLabs -ngram OASST OBB obb @@ -695,10 +698,10 @@ Omnimodal omnimodal OmniParser OMZ -OneFormer -oneformer oneAPI oneDNN +OneFormer +oneformer ONNX onnx ontologies @@ -709,6 +712,7 @@ OpenCL OpenCLIP OpenCLIP's OpenCV +openjourney OpenPose OpenShift OpenVINO @@ -719,22 +723,20 @@ OpenVINOMultiModal openvoice OpenVoice OpenVoiceBaseClass -openjourney opset optimizable Orca -otsl OSNet oss +otsl OTSL OuteTTS outpainting -OVC -overfitting -overlayed ov OV OVC +overfitting +overlayed OVModel OVModelForCausalLM OVModelForVisualCausalLM @@ -742,9 +744,9 @@ OVModelForXXX OVModelForXxx OVMS OVQwen -OVStableDiffusionPipeline -OVStableDiffusionInpaintPipeline OvStableDiffusionInpaintingPipeline +OVStableDiffusionInpaintPipeline +OVStableDiffusionPipeline PaddleClas PaddleGAN paddlegan @@ -763,12 +765,13 @@ parametrize Parler parsers patchifying -perceptron Patil +PCD PDFs pdftex PEFT perceiver +perceptron PerformanceMode performant PersonaGPT @@ -776,6 +779,9 @@ PGI PhotoMaker photorealism photorealistic +PII +PIL +Pillarization Piotr Pipelining PixArt @@ -784,13 +790,14 @@ PixelShuffleUpsampleNetwork pixelwise Pixtral pixtral -PII -PIL +PLR PNDM png Pointilism pointillistic PointNet +PointPillar +PointPillars pos Postfuse postprocess @@ -801,7 +808,6 @@ PowerShell PPYOLOv PR Prateek -PLR pre Precisions precomputed @@ -824,9 +830,9 @@ Pretraining pretraining processings promptable +prompthero proto protobuf -prompthero PRs psychoacoustics PTQ @@ -841,10 +847,9 @@ Pythia pytorch PyTorch PyTorchVideo -timm QFormer -Qianwen Qi +Qianwen QK QKV qrcode @@ -887,8 +892,8 @@ repo reproducibility rerank Rerank -reranking reranker +reranking resampler Resampler rescale @@ -897,8 +902,8 @@ Rescaling ResNet resnet RetinaFace -RetroMAE Retinex +RetroMAE RGB Riffusion riffusion @@ -922,8 +927,8 @@ sagittal SALICON Saliency saliency -SAMI sam +SAMI SavedModel scalability Scalable @@ -951,10 +956,10 @@ ShareGPT Shazeer Shutterstock siggraph -sigmoid SigLIP SigLip siglip +sigmoid SISR SL SlimOrca @@ -964,8 +969,9 @@ slowmo SML sml SmolDocling -SmolVLM SmolLM +SmolVLM +snshrivas softmax softvc SoftVC @@ -974,17 +980,17 @@ SOTA SoTA soundfile Sovits -sparsity Sparisty sparsified sparsify +sparsity spatio spatiotemporal spectrogram spectrograms SpeechBrain -SpeechBrain's speechbrain +SpeechBrain's SpeechPipeline SpeechT splitters @@ -996,14 +1002,14 @@ SRT SSD SSDLite sst +stabilityai StableCascade -StableDiffusionInpaintPipeline -StableDiffusionPipeline StableDiffusionImg StableDiffusionImg2ImgPipeline -stabilityai -STFT +StableDiffusionInpaintPipeline +StableDiffusionPipeline stateful +STFT streamable Struct struct @@ -1026,13 +1032,12 @@ surya svc SVD SVTR -Swin SwiGLU +Swin SwinV sym -snshrivas -TaskManager TartanAir +TaskManager tbb TensorBoard tensorflow @@ -1044,7 +1049,10 @@ th tikz timestep timesteps +timm TinyLlama +tinyllama +TinyLLama tinynas Tokenization tokenization @@ -1062,11 +1070,11 @@ toolkits torchaudio TorchDynamo torchdynamo +TorchFX TorchMetrics TorchScript torchvision TorchVision -TorchFX transformative TrigFlow triplane @@ -1074,32 +1082,30 @@ TriplaneLearnablePositionalEmbedding triplanes Tripo TripoSR -TTS Tsinghua TsinghuaNLP +TTS tunable tv TwoStreamInterleaveTransformer TypeScript -tinyllama -TinyLLama Udnie UHD UI -UIs UINT +UIs Ultralytics +UMD Uncheck unCLIP uncomment uncompressing -UMD +uncurated UNet -UNets Unet +UNets Unimodal unsqueeze -uncurated Uparrow uparrow upcast @@ -1122,8 +1128,8 @@ validator variational VCTK Vec -VectorStore vec +VectorStore VegaRT verovio videpth @@ -1138,41 +1144,46 @@ vits VITS vitt VL -VL’s vl +Vladlen vlm VLM -VLMs VLModel VLMPipeline +VLMs +VL’s VM -Vladlen VOC Vocoder vocoder vocoding +voxel +Voxelization +voxelization +voxelizing +voxels VQ VQA VQGAN VQVAE -waveform -waveforms Wav wav +waveform +waveforms WavLM WebGL WebUI WER +WhisperPipeline WHITEBOX -WIKISQL wikipedia +WIKISQL WikiTable Wikitext wikitext WIKITQ Wofk WTQ -WhisperPipeline wuerstchen WuerstchenDiffNeXt Würstchen @@ -1182,6 +1193,7 @@ XCodec Xeon xformers xl +xml xt xvector xvectors @@ -1197,8 +1209,8 @@ youri YouTube Zafrir ZavyChromaXL -Zongyuan ZeroScope zeroscope -Zhipu zh +Zhipu +Zongyuan diff --git a/notebooks/3D-point-pillars/README.md b/notebooks/3D-point-pillars/README.md new file mode 100644 index 00000000000..8d04067493e --- /dev/null +++ b/notebooks/3D-point-pillars/README.md @@ -0,0 +1,30 @@ +# **PointPillar for 3D object detection** + +PointPillar is a fast and efficient deep-learning architecture for 3D object detection from LiDAR point clouds, commonly used in autonomous driving. + +Instead of operating directly on raw points or dense 3D voxels, PointPillar groups points into vertical columns ("pillars") and encodes per-pillar features. These pillar features are arranged into a pseudo-image that a 2D convolutional backbone can process. The pipeline is lightweight and well-suited for real-time inference. + +Core stages: +- Voxelization / Pillarization: group points into pillars and compute per-pillar statistics. +- Pillar feature encoding: a small network encodes points in each pillar into a fixed-size feature vector. +- Scatter to pseudo-image: place each pillar's feature into a 2D grid (pseudo-image) based on the pillar's X-Y location. +- 2D backbone + neck: apply 2D convolutions to produce multi-scale feature maps. +- Detection head: predict class scores, bounding box regressions, and directions on the pseudo-image. +- Post-processing: decode boxes, apply non-maximum suppression (NMS), and output final detections. + +In this tutorial we consider how to run PointPillars with OpenVINO. + +## Notebook contents +The tutorial consists from following steps: + +- Install requirements +- Build extensions +- Exporting the model for OpenVINO +- Run OpenVINO model inference + +## Installation instructions +This is a self-contained example that relies solely on its own code.
+We recommend running the notebook in a virtual environment. You only need a Jupyter server to start. +For details, please refer to [Installation Guide](../../README.md). + + diff --git a/notebooks/3D-point-pillars/pointpillars.ipynb b/notebooks/3D-point-pillars/pointpillars.ipynb new file mode 100644 index 00000000000..9b06ceaed5b --- /dev/null +++ b/notebooks/3D-point-pillars/pointpillars.ipynb @@ -0,0 +1,540 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "892bd49e", + "metadata": {}, + "source": [ + "# **PointPillar for 3D object detection**\n", + "\n", + "PointPillar is a fast and efficient deep-learning architecture for 3D object detection from LiDAR point clouds, commonly used in autonomous driving.\n", + "\n", + "Instead of operating directly on raw points or dense 3D voxels, PointPillar groups points into vertical columns (\"pillars\") and encodes per-pillar features. \n", + "These pillar features are arranged into a pseudo-image that a 2D convolutional backbone can process. The pipeline is lightweight and well-suited for real-time inference.\n", + "\n", + "Core stages:\n", + "- Voxelization / Pillarization: group points into pillars and compute per-pillar statistics.\n", + "- Pillar feature encoding: a small network encodes points in each pillar into a fixed-size feature vector.\n", + "- Scatter to pseudo-image: place each pillar's feature into a 2D grid (pseudo-image) based on the pillar's X-Y location.\n", + "- 2D backbone + neck: apply 2D convolutions to produce multi-scale feature maps.\n", + "- Detection head: predict class scores, bounding box regressions, and directions on the pseudo-image.\n", + "- Post-processing: decode boxes, apply non-maximum suppression (NMS), and output final detections.\n", + "\n", + "In this tutorial we consider how to run PointPillars with OpenVINO.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "a84ce99b", + "metadata": {}, + "source": [ + "#### Table of contents:\n", + "1. [Prerequisites](#prerequisites)\n", + "2. [Install python packages](#install-python-packages)\n", + "3. [Build Extensions](#build-extensions)\n", + "4. [Exporting the model](#exporting-the-model)\n", + "5. [Inference with OpenVINO](#inference-with-openvino)\n", + "6. Utilities\n", + " * [KITTI bin to PCD](#kitti-bin-to-pcd)\n", + "### Installation Instructions\n", + "\n", + "This is a self-contained example that relies solely on its own code.\n", + "\n", + "We recommend running the notebook in a virtual environment. You only need a Jupyter server to start.\n", + "For details, please refer to [Installation Guide](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/README.md#-installation-guide)." + ] + }, + { + "cell_type": "markdown", + "id": "7a56c177", + "metadata": {}, + "source": [ + "#### **Prerequisites:**" + ] + }, + { + "cell_type": "markdown", + "id": "0766bf8e", + "metadata": {}, + "source": [ + "```bash\n", + "# Install the required packages specific to this notebook.\n", + "sudo apt update && sudo apt install -y \\\n", + " software-properties-common \\\n", + " build-essential \\\n", + " cmake \\\n", + " git \\\n", + " libx11-6 \\\n", + " libgl1\n", + "\n", + "# Install Intel GPU runtime for OpenCL for using Intel GPU device with OpenVINO\n", + "sudo add-apt-repository -y ppa:kobuk-team/intel-graphics\n", + "sudo apt update\n", + "sudo apt install -y --no-install-recommends \\\n", + " libze-intel-gpu1 \\\n", + " intel-opencl-icd\n", + "\n", + "# Create a python 3.10 environment, conda can be used to manage environments:\n", + "conda install python=3.10\n", + "conda create -n ovpp310 python=3.10\n", + "conda activate ovpp310\n", + "# conda deactivate\n", + "# conda env remove -n ovpp310\n", + "```\n", + "\n", + "**Python 3.10** is recommended to run this notebook. We also recommend running the notebook in a _virtual environment_. \n", + "You only need a Jupyter server to start and select python environment in the kernel. \n", + "For details, please refer to [Installation Guide](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/README.md#-installation-guide).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1106d2ec", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "# Fetch `notebook_utils` module\n", + "import requests\n", + "from pathlib import Path\n", + "\n", + "if not Path(\"notebook_utils.py\").exists():\n", + " r = requests.get(\n", + " url=\"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py\",\n", + " )\n", + " open(\"notebook_utils.py\", \"w\").write(r.text)\n", + "\n", + "if not Path(\"cmd_helper.py\").exists():\n", + " r = requests.get(\n", + " url=\"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py\",\n", + " )\n", + " open(\"cmd_helper.py\", \"w\").write(r.text)\n", + "\n", + "if not Path(\"pip_helper.py\").exists():\n", + " r = requests.get(\n", + " url=\"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/pip_helper.py\",\n", + " )\n", + " open(\"pip_helper.py\", \"w\").write(r.text)\n", + "\n", + "# Read more about telemetry collection at https://github.com/openvinotoolkit/openvino_notebooks?tab=readme-ov-file#-telemetry\n", + "from notebook_utils import collect_telemetry\n", + "\n", + "collect_telemetry(\"pointpillars.ipynb\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c246d5b", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "from cmd_helper import clone_repo\n", + "from pip_helper import pip_install\n", + "\n", + "repo_dir = Path(\"openvino_contrib\").resolve(strict=False)\n", + "pp_dir = Path(repo_dir, \"modules\", \"3d\", \"pointPillars\")\n", + "revision = \"962f5e1\"\n", + "clone_repo(\"https://github.com/openvinotoolkit/openvino_contrib.git\", revision)" + ] + }, + { + "cell_type": "markdown", + "id": "07229d50", + "metadata": {}, + "source": [ + "[back to top ⬆️](#Table-of-contents:)\n", + "\n", + "#### **Install python packages:**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12350003", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "# Install the required pip packages specific to this notebook\n", + "pip_install(\"-r\", str(pp_dir / \"requirements.txt\"), \"--extra-index-url\", \"https://download.pytorch.org/whl/cpu\")" + ] + }, + { + "cell_type": "markdown", + "id": "8ac1f4b8", + "metadata": {}, + "source": [ + "[back to top ⬆️](#Table-of-contents:)\n", + "\n", + "#### **Build Extensions:**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14fe9f97", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import subprocess\n", + "import sys\n", + "\n", + "\n", + "def run(cmd, cwd=None, env=None):\n", + " print(\"Running:\", \" \".join(cmd), \"in\", cwd)\n", + " try:\n", + " subprocess.run(cmd, cwd=cwd, check=True, env=env)\n", + " except subprocess.CalledProcessError as e:\n", + " print(\"Command failed:\", e)\n", + " raise\n", + "\n", + "\n", + "# Build the openvino extension\n", + "run([\"rm\", \"-rf\", str(Path(pp_dir, \"ov_extensions\", \"build\"))])\n", + "run([\"bash\", \"build.sh\"], cwd=str(Path(pp_dir, \"ov_extensions\")))\n", + "\n", + "# Build the pytorch extensions (will be used only to export the model)\n", + "run([\"rm\", \"-rf\", str(Path(pp_dir, \"build\"))])\n", + "run([\"rm\", \"-rf\", str(Path(pp_dir, \"pointpillars\", \"ops\", \"*.so\"))])\n", + "env = os.environ.copy()\n", + "env[\"CPU_BUILD\"] = \"1\"\n", + "run([sys.executable, \"setup.py\", \"build_ext\", \"--inplace\"], cwd=str(pp_dir), env=env)" + ] + }, + { + "cell_type": "markdown", + "id": "f41acf68", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "source": [ + "[back to top ⬆️](#Table-of-contents:)\n", + "
\n", + "\n", + "
\n", + "\n", + "#### **Exporting the model:**\n", + "\n", + "In the model declaration, we can see it has five layers. \n", + "Out of these five layers, the first layer is the Pillar Layer where a pytorch extension (voxelization) is used. \n", + "For exporting the model to OpenVINO format, we skip this layer and export the remaining layers to OpenVINO format. \n", + "During inference, we run the Pillar Layer separately using the openvino extension and then pass the output to the OpenVINO model for further processing.\n", + "\n", + "```python\n", + "class NeuralNetworkPortion(nn.Module):\n", + " \"\"\"Neural network portion: PillarEncoder + Backbone + Neck + Head\"\"\"\n", + "\n", + " def __init__(self, model):\n", + " super().__init__()\n", + " self.pillar_encoder = model.pillar_encoder\n", + " self.backbone = model.backbone\n", + " self.neck = model.neck\n", + " self.head = model.head\n", + "\n", + " def forward(self, pillars, coors, npoints):\n", + " pillar_features = self.pillar_encoder(pillars, coors, npoints)\n", + " xs = self.backbone(pillar_features)\n", + " x = self.neck(xs)\n", + " cls_preds, box_preds, dir_cls_preds = self.head(x)\n", + " return cls_preds, box_preds, dir_cls_preds\n", + "```\n", + "\n", + "Getting the neural network portion of the PointPillars model:\n", + "\n", + "```python\n", + "full_model = PointPillars()\n", + "checkpoint = torch.load(checkpoint_path, map_location='cpu', weights_only=False)\n", + "full_model.load_state_dict(checkpoint)\n", + "full_model.eval()\n", + "\n", + "nn_portion = NeuralNetworkPortion(full_model)\n", + "```\n", + "\n", + "Then defining dummy inputs for the model to export:\n", + "\n", + "```python\n", + "dummy_pillars = torch.randn(max_voxels, max_points, 4)\n", + "dummy_npoints = torch.randint(1, max_points, (max_voxels,)).long()\n", + "\n", + "vx, vy, vz = voxel_size[0], voxel_size[1], voxel_size[2]\n", + "x_l = int((point_cloud_range[3] - point_cloud_range[0]) / vx)\n", + "y_l = int((point_cloud_range[4] - point_cloud_range[1]) / vy)\n", + "z_l = int((point_cloud_range[5] - point_cloud_range[2]) / vz)\n", + "dummy_coors = torch.empty((max_voxels, 4), dtype=torch.long)\n", + "dummy_coors[:, 0] = 0 # batch index\n", + "if z_l > 0:\n", + " dummy_coors[:, 1] = torch.randint(0, z_l, (max_voxels,))\n", + "else:\n", + " dummy_coors[:, 1] = 0\n", + "dummy_coors[:, 2] = torch.randint(0, y_l, (max_voxels,))\n", + "dummy_coors[:, 3] = torch.randint(0, x_l, (max_voxels,))\n", + "```\n", + "\n", + "Finally, exporting the model to OpenVINO format:\n", + "\n", + "```python\n", + "with torch.no_grad():\n", + " traced_nn = torch.jit.trace(\n", + " nn_portion,\n", + " (dummy_pillars, dummy_coors, dummy_npoints),\n", + " check_trace=False,\n", + " strict=False\n", + " )\n", + " ov_nn_model = ov.convert_model(\n", + " traced_nn,\n", + " example_input=(dummy_pillars, dummy_coors, dummy_npoints),\n", + " input=[\n", + " ov.PartialShape([-1, max_points, 4]), # pillars\n", + " ov.PartialShape([-1, 4]), # coors\n", + " ov.PartialShape([-1]), # npoints\n", + " ]\n", + " )\n", + "```\n", + "\n", + "And saving the neural network model:\n", + "\n", + "```python\n", + "ov.save_model(ov_nn_model, nn_xml_path)\n", + "```\n", + "\n", + "The other preprocessing and post-processing steps (including voxelization, non-maximum suppression, etc.) are handled \n", + "separately using OpenVINO custom extensions. The details of the xml and other parameters are implemented in `create_pillar_layer_ir` and `create_postprocessing_ir` functions available in `export_ov_e2e.py`. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81b436e5", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "# Export the PointPillars model to OpenVINO format\n", + "run(\n", + " [\n", + " sys.executable,\n", + " \"export_ov_e2e.py\",\n", + " \"--checkpoint\",\n", + " str(Path(pp_dir, \"pretrained\", \"epoch_160.pth\")),\n", + " \"--output\",\n", + " str(Path(pp_dir, \"pretrained\", \"pointpillars_ov\")),\n", + " ],\n", + " cwd=str(pp_dir),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "7af815d9", + "metadata": {}, + "source": [ + "[back to top ⬆️](#Table-of-contents:)\n", + "#### **Select inference device**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a42be8c0", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "from notebook_utils import device_widget\n", + "\n", + "device = device_widget(default=\"CPU\", exclude=[\"NPU\"])\n", + "\n", + "device.value" + ] + }, + { + "cell_type": "markdown", + "id": "27b97dcf", + "metadata": {}, + "source": [ + "[back to top ⬆️](#Table-of-contents:)\n", + "\n", + "#### **Inference with OpenVINO:**\n", + "\n", + "For inference, implemented in `e2eOVInference.py`, first voxelizing the raw point cloud with a custom OpenVINO voxelization op, then running the compiled neural network IR on the resulting pillars/coors/npoints, and finally applying a custom OpenVINO post-processing op to decode boxes, apply NMS and output final bboxes, labels and scores.\n", + "\n", + "**Setup (`__init__`):**\n", + "\n", + "Load the JSON config produced by export_ov_e2e.py (paths to IRs and extension library) and create an OpenVINO Core instance.\n", + "Load the custom extension library and compile three models: voxelization (CPU), neural network (device, e.g. CPU/GPU), and post-processing (CPU).\n", + "\n", + "**Step 1 — Voxelization (custom OpenVINO op):**\n", + "\n", + "Call compiled voxel model with the raw points, a filtered point cloud as an N×4 float array (x, y, z, intensity).\n", + "Output: pillars (float tensor, each row holds the point for one pillar, zero‑padded to max_points), coors (int tensor, grid coordinates for each pillar), npoints (int tensor, number of valid points in each corresponding pillar).\n", + "\n", + "**Step 2 — Neural network inference:**\n", + "\n", + "Feed pillars, coors, npoints into the compiled NN IR.\n", + "Output: `cls_preds`, `box_preds`, `dir_cls_preds` (network predictions).\n", + "\n", + "**Step 3 — Post-processing (custom OpenVINO op):**\n", + "\n", + "Call compiled `postproc_model` with the network outputs.\n", + "Output: final bboxes, labels, and scores (already decoded, NMS applied, rotated IoU handled by the custom op).\n", + "Post-processing parameters (anchors, NMS thresholds, etc.) come from the config exported earlier." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4014604c", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "# Run inference with OpenVINO\n", + "run(\n", + " [\n", + " sys.executable,\n", + " \"e2eOVInference.py\",\n", + " \"--config\",\n", + " str(Path(pp_dir, \"pretrained\", \"pointpillars_ov_config.json\")),\n", + " \"--pc_path\",\n", + " str(Path(pp_dir, \"pointpillars\", \"dataset\", \"demo_data\", \"test\", \"000002.bin\")),\n", + " ],\n", + " cwd=str(pp_dir),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92500019", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "# Run test with OpenVINO for Kitti evaluation\n", + "run(\n", + " [\n", + " sys.executable,\n", + " \"test-e2eOV.py\",\n", + " \"--config\",\n", + " str(Path(pp_dir, \"pretrained\", \"pointpillars_ov_config.json\")),\n", + " \"--pc_path\",\n", + " str(Path(pp_dir, \"pointpillars\", \"dataset\", \"demo_data\", \"test\", \"000002.bin\")),\n", + " \"--device\",\n", + " device.value,\n", + " ],\n", + " cwd=str(pp_dir),\n", + ")\n", + "\n", + "# With nn in GPU\n", + "# run([sys.executable, \"test-e2eOV.py\", \"--config\", str(Path(pp_dir, \"pretrained\", \"pointpillars_ov_config.json\")),\n", + "# \"--pc_path\", str(Path(pp_dir, \"pointpillars\", \"dataset\", \"demo_data\", \"test\", \"000002.bin\")), \"--device\", \"GPU\"], cwd=str(pp_dir))" + ] + }, + { + "cell_type": "markdown", + "id": "2b4643da", + "metadata": {}, + "source": [ + "[back to top ⬆️](#Table-of-contents:)\n", + "\n", + "#### **KITTI bin to PCD:**\n", + "\n", + "To convert KITTI dataset files to PCD format, python scripts are provided in the `utils` folder of the PointPillars repository. \n", + "Below are example commands to convert a single `.bin` file and an entire KITTI split to PCD format." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e85b4558", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "# Convert single KITTI .bin file to .pcd\n", + "run([sys.executable, \"pointpillars/utils/convert_bin_to_pcd.py\", \"pointpillars/dataset/demo_data/test/000002.bin\", \"--overwrite\", \"--verify\"], cwd=str(pp_dir))\n", + "\n", + "# Convert entire KITTI val split to .pcd files\n", + "# python pointpillars/utils/convert_kitti_split_to_pcd.py --data-root --split val --overwrite --verify" + ] + }, + { + "cell_type": "markdown", + "id": "d54d54e4", + "metadata": {}, + "source": [ + "[back to top ⬆️](#Table-of-contents:)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".pp310venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.19" + }, + "openvino_notebooks": { + "imageUrl": "", + "tags": { + "categories": [ + "Model Demos" + ], + "libraries": [], + "other": [], + "tasks": [ + "Object Detection" + ] + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}