Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,7 @@ xnnpack_cc_library(
":microparams_h",
":operator_h",
":xnnpack_h",
"@FXdiv",
],
)

Expand Down
2 changes: 2 additions & 0 deletions BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -918,6 +918,7 @@ xnnpack_source_set("scalar_microkernels") {
deps = [
":microkernel_defs",
":microkernel_headers",
"//third_party/fxdiv",
]
sources = ALL_SCALAR_MICROKERNEL_SRCS
}
Expand Down Expand Up @@ -965,6 +966,7 @@ xnnpack_source_set("xnnpack") {
":scalar_microkernels",
":subgraph",
":table",
"//third_party/fxdiv",
]
if (xnnpack_enable_arm_kleidiai) {
deps += [ "//third_party/kleidiai" ]
Expand Down
36 changes: 35 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,16 @@ IF(NOT XNNPACK_USE_SYSTEM_LIBS)
EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build .
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/cpuinfo-download")
SET(CPUINFO_SOURCE_DIR "${CMAKE_BINARY_DIR}/cpuinfo-source" CACHE STRING "cpuinfo source directory")
ENDIF()

IF(NOT DEFINED FXDIV_SOURCE_DIR)
MESSAGE(STATUS "Downloading FXdiv to ${CMAKE_BINARY_DIR}/FXdiv-source (define FXDIV_SOURCE_DIR to avoid it)")
CONFIGURE_FILE(cmake/DownloadFXdiv.cmake "${CMAKE_BINARY_DIR}/FXdiv-download/CMakeLists.txt")
EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/FXdiv-download")
EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build .
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/FXdiv-download")
SET(FXDIV_SOURCE_DIR "${CMAKE_BINARY_DIR}/FXdiv-source" CACHE STRING "FXdiv source directory")
ENDIF()

IF(NOT DEFINED PTHREADPOOL_SOURCE_DIR)
Expand Down Expand Up @@ -1300,7 +1309,32 @@ IF(NOT TARGET pthreadpool)
ENDIF()
TARGET_LINK_LIBRARIES(xnnpack-base INTERFACE pthreadpool)


# ---[ Configure FXdiv
IF(NOT TARGET fxdiv)
IF(NOT XNNPACK_USE_SYSTEM_LIBS)
SET(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
SET(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
ADD_SUBDIRECTORY(
"${FXDIV_SOURCE_DIR}"
"${CMAKE_BINARY_DIR}/FXdiv")
ELSE()
FIND_FILE(FXDIV_HDR fxdiv.h PATH_SUFFIXES include PATHS "${FXDIV_SOURCE_DIR}")
IF(NOT FXDIV_HDR)
MESSAGE(FATAL_ERROR "Cannot find fxdiv")
ENDIF()
ADD_LIBRARY(fxdiv STATIC "${FXDIV_HDR}")
TARGET_INCLUDE_DIRECTORIES(fxdiv INTERFACE "${FXDIV_SOURCE_DIR}/include")
SET_PROPERTY(TARGET fxdiv PROPERTY LINKER_LANGUAGE C)
ENDIF()
ENDIF()
IF(XNNPACK_BUILD_ALL_MICROKERNELS)
TARGET_LINK_LIBRARIES(xnnpack-microkernels-all PRIVATE fxdiv)
ENDIF()
TARGET_LINK_LIBRARIES(xnnpack-microkernels-prod PRIVATE fxdiv)
TARGET_LINK_LIBRARIES(xnnpack-indirection PRIVATE fxdiv)
IF(XNNPACK_BUILD_LIBRARY)
TARGET_LINK_LIBRARIES(XNNPACK PRIVATE fxdiv)
ENDIF()

IF(XNNPACK_BUILD_LIBRARY)
INSTALL(TARGETS XNNPACK xnnpack-microkernels-prod
Expand Down
5 changes: 3 additions & 2 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,16 @@ http_archive(
urls = ["https://github.com/google/benchmark/archive/7da00e8f6763d6e8c284d172c9cfcc5ae0ce9b7a.zip"],
)
# LINT.ThenChange(cmake/DownloadGoogleBenchmark.cmake)

# LINT.IfChange(FXdiv)
# FXdiv library, used for repeated integer division by the same factor
http_archive(
name = "FXdiv",
sha256 = "ab7dfb08829bee33dca38405d647868fb214ac685e379ec7ef2bebcd234cd44d",
strip_prefix = "FXdiv-b408327ac2a15ec3e43352421954f5b1967701d1",
urls = ["https://github.com/Maratyszcza/FXdiv/archive/b408327ac2a15ec3e43352421954f5b1967701d1.zip"],
)


# LINT.ThenChange(cmake/DownloadFXdiv.cmake)

# LINT.IfChange(pthreadpool)
# pthreadpool library, used for parallelization
Expand Down
1 change: 1 addition & 0 deletions build_params.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ XNNPACK_PARAMS_FOR_ARCH = {
],
extra_deps = [
"//src/configs:config_hdrs",
"@FXdiv",
],
),

Expand Down
30 changes: 30 additions & 0 deletions cmake/DownloadFXdiv.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# Copyright 2019 Google LLC
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

CMAKE_MINIMUM_REQUIRED(VERSION 3.5 FATAL_ERROR)

PROJECT(fxdiv-download NONE)

# Set file timestamps to the time of extraction.
IF(POLICY CMP0135)
CMAKE_POLICY(SET CMP0135 NEW)
ENDIF()

# LINT.IfChange
INCLUDE(ExternalProject)
ExternalProject_Add(fxdiv
URL https://github.com/Maratyszcza/FXdiv/archive/b408327ac2a15ec3e43352421954f5b1967701d1.zip
URL_HASH SHA256=ab7dfb08829bee33dca38405d647868fb214ac685e379ec7ef2bebcd234cd44d
SOURCE_DIR "${CMAKE_BINARY_DIR}/FXdiv-source"
BINARY_DIR "${CMAKE_BINARY_DIR}/FXdiv"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
# LINT.ThenChange(../MODULE.bazel:FXdiv)
61 changes: 15 additions & 46 deletions src/indirection.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "src/xnnpack/indirection.h"

#include <assert.h>
#include <fxdiv.h>
#include <math.h>
#include <stddef.h>
#include <stdint.h>
Expand Down Expand Up @@ -40,25 +41,17 @@ void xnn_indirection_init_conv2d(
size_t input_padding_top,
size_t input_padding_left)
{
assert(output_height != 0);
assert(output_width != 0);
const size_t output_size = output_height * output_width;
const size_t kernel_size = kernel_height * kernel_width;

size_t cur_oy = output_start / output_width;
size_t cur_ox = output_start % output_width;
const size_t clamp_oy = (output_size - 1) / output_width;
const size_t clamp_ox = (output_size - 1) % output_width;
const struct fxdiv_divisor_size_t output_width_divisor = fxdiv_init_size_t(output_width);

for (size_t output_tile_start = output_start; output_tile_start < output_end; output_tile_start += output_tile_size) {
size_t oy = cur_oy;
size_t ox = cur_ox;
for (size_t output_tile_offset = 0; output_tile_offset < output_tile_size; output_tile_offset++) {
const size_t output_index = output_tile_start + output_tile_offset;
const bool clamped = output_index >= output_size - 1;
const size_t output_x = clamped ? clamp_ox : ox;
const size_t output_y = clamped ? clamp_oy : oy;

const size_t output_index = min(output_tile_start + output_tile_offset, output_size - 1);
const struct fxdiv_result_size_t output_y_x = fxdiv_divide_size_t(output_index, output_width_divisor);
const size_t output_x = output_y_x.remainder;
const size_t output_y = output_y_x.quotient;
for (size_t kernel_y = 0; kernel_y < kernel_height; kernel_y++) {
const size_t input_y = output_y * stride_height + kernel_y * dilation_height - input_padding_top;
if (input_y < input_height) {
Expand All @@ -81,17 +74,7 @@ void xnn_indirection_init_conv2d(
}
}
}

if (output_index < output_size - 1) {
ox++;
if (ox == output_width) {
ox = 0;
oy++;
}
}
}
cur_oy = oy;
cur_ox = ox;
}
}

Expand All @@ -118,26 +101,22 @@ void xnn_indirection_init_deconv2d(
const size_t tiled_output_size = round_up(output_size, output_tile_size);
const size_t kernel_size = kernel_height * kernel_width;

size_t cur_oy = 0;
size_t cur_ox = 0;
const size_t clamp_oy = (output_size - 1) / output_width;
const size_t clamp_ox = (output_size - 1) % output_width;
const struct fxdiv_divisor_size_t output_width_divisor = fxdiv_init_size_t(output_width);
const struct fxdiv_divisor_size_t stride_height_divisor = fxdiv_init_size_t(stride_height);
const struct fxdiv_divisor_size_t stride_width_divisor = fxdiv_init_size_t(stride_width);

for (size_t output_tile_start = 0; output_tile_start < tiled_output_size; output_tile_start += output_tile_size) {
size_t oy = cur_oy;
size_t ox = cur_ox;
for (size_t output_tile_offset = 0; output_tile_offset < output_tile_size; output_tile_offset++) {
const size_t output_index = output_tile_start + output_tile_offset;
const bool clamped = output_index >= output_size - 1;
const size_t output_x = clamped ? clamp_ox : ox;
const size_t output_y = clamped ? clamp_oy : oy;

const size_t output_index = min(output_tile_start + output_tile_offset, output_size - 1);
const struct fxdiv_result_size_t output_y_x = fxdiv_divide_size_t(output_index, output_width_divisor);
const size_t output_x = output_y_x.remainder;
const size_t output_y = output_y_x.quotient;
for (size_t kernel_y = 0; kernel_y < kernel_height; kernel_y++) {
const size_t y = output_y + padding_top - kernel_y * dilation_height;
const size_t input_y = y / stride_height;
const size_t input_y = fxdiv_quotient_size_t(y, stride_height_divisor);
for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {
const size_t x = output_x + padding_left - kernel_x * dilation_width;
const size_t input_x = x / stride_width;
const size_t input_x = fxdiv_quotient_size_t(x, stride_width_divisor);
const size_t kernel_index = kernel_y * kernel_width + kernel_x;
const size_t index = output_tile_start * kernel_size + kernel_index * output_tile_size + output_tile_offset;
if (input_y * stride_height == y && input_y < input_height && input_x * stride_width == x && input_x < input_width) {
Expand All @@ -147,17 +126,7 @@ void xnn_indirection_init_deconv2d(
}
}
}

if (output_index < output_size - 1) {
ox++;
if (ox == output_width) {
ox = 0;
oy++;
}
}
}
cur_oy = oy;
cur_ox = ox;
}
}

Expand Down
5 changes: 4 additions & 1 deletion src/u8-lut32norm/u8-lut32norm-scalar.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

#include <assert.h>

#include <fxdiv.h>

#include "src/xnnpack/lut.h"


Expand Down Expand Up @@ -37,11 +39,12 @@ void xnn_u8_lut32norm_ukernel__scalar(
const uint32_t vsum = compute_sum(n, x, t);
assert(vsum != 0);

struct fxdiv_divisor_uint32_t vsum_divisor = fxdiv_init_uint32_t(vsum);
const uint32_t vrounding = (vsum >> 1);
do {
const size_t vx = *x++;
const uint32_t vt = t[vx];
const uint32_t vq = ((vt << 8) + vrounding) / vsum;
const uint32_t vq = fxdiv_quotient_uint32_t((vt << 8) + vrounding, vsum_divisor);
const uint8_t vy = vq > 255 ? UINT8_C(255) : (uint8_t) vq;
*y++ = vy;
} while (--n != 0);
Expand Down
Loading