diff --git a/docs/guides/cinn/cinn_ir_cn.md b/docs/guides/cinn/cinn_ir_cn.md new file mode 100644 index 00000000000..cf0b6c9b90d --- /dev/null +++ b/docs/guides/cinn/cinn_ir_cn.md @@ -0,0 +1,510 @@ +# CINN 后端 Ir 数据结构概览 + +## 一、数据结构概况 + +Ir 着重学习两部分:数据和运算,例如 Datatype,Operation + +重点 Ir,层级自顶向下包括:Module、Block、ScheduleBlock、ScheduleBlockRealize、For、Load、Store、Var、\_Tensor\_ + +对于本文未梳理的 Ir,可以查阅 ir_printer.h 头文件,查看相应 Ir 的打印信息 + +## 二、继承关系梳理 + +本文采用 ADT(Algebraic Data Type,代数数据类型)对 CINN 后端 Ir 的继承关系进行梳理。 + +ADT 和继承的对应关系可以理解为: + +- ADT:IrNode = ExprNode | FunctionBase | IntrinsicOp 表示 IrNode 是由 ExprNode | FunctionBase | IntrinsicOp 组成的 sum type + +- 继承关系:IrNode 是基类,ExprNode | FunctionBase | IntrinsicOp 是 IrNode 的三个派生类 + +后续两个代码框分别总结了 IrNode 和 IrNodeRef 与其对应子类的派生关系: + +``` +IrNode = ExprNode + | FunctionBase + | IntrinsicOp + +ExprNode = _Buffer_ + | _BufferRange_ + | Cast + | Let + | Call + | _Var_ + | Reduce + | Select + | Load + | Store + | Alloc + | Free + | IfThenElse + | For + | PolyFor + | Ramp + | Broadcast + | Product + | Sum + | Block + | ScheduleBlock + | ScheduleBlockRealize + | _Module_ + | PrimitiveNode + | _Tensor_ + | IntImm + | UIntImm + | FloatImm + | StringImm + | UnaryOpNode + | BinaryOpNode + +FunctionBase = _Operation_ + +IntrinsicOp = BufferGetDataHandle + | BufferGetDataConstHandle + | PodValueToX + | BufferCreate + | GetAddr + | ArgsConstruct + | BuiltinIntrin + +UnaryOpNode = Minus | Not + +BinaryOpNode = Add + | Sub + | Mul + | Div + | Mod + | Min + | Max + | EQ + | NE + | LT + | LE + | GT + | GE + | And + | Or + | FracOp + +_Operation_ = PlaceholderOp + | CallOp + | PrecedingViewOp + | BufferShareOp + | ComputeOp +``` + +``` +IrNodeRef = Module + | Buffer + | BufferRange + | LoweredFunc + | Var + | Tensor + | Expr + | FunctionRef + +FunctionRef = Operation +``` + +## 三、子类 Ir 详解 + +### 3.1 Module + +Module 可以理解为一个编译的基本单元 + +``` +struct _Module_ : public ExprNode<_Module_> { + std::string name; + Target target; + // 【所有 Kernel 的输入输出,待确认】 + std::vector buffers; + // 【一个 function 对应一个 Loweredfunc,即一个 Kernel】 + std::vector functions; + // 【当前字段使用不多】 + std::vector submodules; +}; +``` + +### 3.2 Block、ScheduleBlock、ScheduleBlockRealizeIr + +**疑问:** Block、ScheduleBlock 和 ScheduleBlockRealize 的关系如何理解? + +**回答:** 最外层是 Block,Block 里面的每个 stmt 都是 ScheduleBlockRealize,也可以是深层的 For 循环。ScheduleBlockRealize 里面有 ScheduleBlock 字段。schedule 过程中修改的是 ScheduleBlockRealize 内的 iter_values。ScheduleBlockRealize 和 ScheduleBlock 一般是在 for 循环的最内层 + +``` + serial for (i_fused, 0, 16) + { + ScheduleBlock(var_36) + { + // 【i0_14 是用于 Load 和 Store 的下标】 + i0_14 = axis.bind(i_fused) + var_36[i0_14] = (var_5[i0_14] * factor_1[i0_14]) + } + } +``` +``` +struct Block : public ExprNode { + std::vector stmts; +}; +``` +``` +// ScheduleBlockRealize is used to execute ScheduleBlock with the binding +// iter_values +struct ScheduleBlockRealize : public ExprNode { + // values of the iter_vars + // 【i_fused】【iter_values 是循环变量 i_fused 的仿射函数】 + std::vector iter_values; + Expr schedule_block; +}; +``` +``` +// ScheduleBlock is the unit of schedule IR which represents tensor's +// computation +struct ScheduleBlock : public ExprNode { + // 【i0_14】 + std::vector iter_vars; + // BufferRange(s) which is read in this schedule block, it is used to + // analyze, not a real computation expression. Must be AST DFS order. + // 【var_5,factor_1】【读取的 buffer】 + std::vector read_buffers; + // BufferRange(s) which is written in this schedule block, it is used to + // analyze, not a real computation expression. Must be AST DFS order. + // 【var_36】【写入的 buffer】 + std::vector write_buffers; + // Additional attributes about this schedulable block, + // which take some auxiliary hints for future transformations. + std::map attrs; + std::string name; + // 【Store 节点,可以理解为等号,表达 var_36[i0_14] = (var_5[i0_14] * factor_1[i0_14]) 】 + Expr body; +}; +``` +### 3.3 For +``` +struct For : public ExprNode, public ForBase { + //! The loop variable. + Var loop_var; + //【循环下界】! The minimum value of the iteration. + Expr min; + //【循环上界】! The extent of the iteration. + Expr extent; + //【body 一定是个 Block{},Block 内可以有多条语句】 + Expr body; + + DeviceAPI device_api; + + LLVMForLoopMeta metadata; +}; + +struct ForBase { + private: + // 【串行并行向量化】 + ForType for_type_{ForType::Serial}; + VectorizeInfo vectorize_info_; + // 【为 CUDA 服务,bind 到 block 和 thread 的 idx】 + BindInfo bind_info_; +}; +``` +### 3.4 Load 和 Store + +注意:Load 里面实际上有两个成员变量:Expr tensor(来自 LoadStoreAddrMnger) 和 std::vector indices + +``` +/** + * Load the value from a buffer (as an array). + */ +struct Load : public ExprNode, public LoadStoreAddrMnger { + std::vector indices; +}; + +struct LoadStoreAddrMnger { + Expr tensor; // Should be a tensor or a scalar. + //! Tell whether the address is a tensor. + bool is_addr_tensor() const; + //! Tell whether the address is a scalar. + bool is_addr_scalar() const; +}; +``` +``` +/** + * Store a `value` to the buffer at a given `index`. + */ +struct Store : public ExprNode, public LoadStoreAddrMnger { + Expr value; + // 【下标】【indices.size() == len(tensor.shape)】 + std::vector indices; +}; +``` + +### 3.5 Tensor、Buffer 和 Var + +**疑问:** Tensor、Buffer 和 Var 的区别? + +**回答:** Tensor 像是 Buffer 的一个 View,例如多个 Tensor 可以指向同一个 Buffer。_Var_指迭代量,可以理解为标量的一个值 + +``` +class _Buffer_ : public ExprNode<_Buffer_> { + public: + //! The shape of the buffer. + std::vector shape; + //! The strides of each dimension. + // This can be empty, indicating that the array is contiguous. + std::vector strides; + //! The name of the buffer. + std::string name; + //! The storage scope of the buffer, empty if global. + std::string scope; + //! The offset in terms of number of dtype elements (including lanes). + Expr elem_offset; + //! Factor of elem_offset field. + // elem_offset is guaranteed to be multiple of offset_factor. + int offset_factor{0}; + //! The place the buffer locates. + Target target{UnkTarget()}; + //! Aignment requirement of data pointer in bytes. + mutable int data_alignment{0}; + //! The memory type of the buffer. + MemoryType memory_type{MemoryType::Heap}; + + //! The data type of the elements. + //! This is different from `type`, a buffer's type should always be + //! `cinn_buffer_t*`. + Type dtype; +}; +``` + +注意:_Tensor_里面包含成员变量 Buffer + +``` +/** + * _Tensor_ holds the content of a Tensor. + * + * NOTE(All) Some rules: + * + * 1. a _Tensor_ is a node in SSA, so every tensor's name should be unique, + * 2. never try to change a tensor's name, that will cause chaos. + */ +class _Tensor_ : public ExprNode<_Tensor_> { + public: + //! Shape of this tensor(buffer). + std::vector shape; + //! The domain of each axis(without reduce_axis) + // TODO(Superjomn) support ISL domain. + std::vector domain; + + std::vector reduce_axis; + //! The operation that generates Tensor. + FunctionRef operation; + //! Name of this tensor. + std::string name; + //! The bound buffer, for each tensor if it is not inline. + Buffer buffer; + //! Normal axis. + mutable std::vector axis_; + + std::vector new_indices{}; +}; +``` + +注意:_Var_只作为迭代量,可以理解为标量的一个值,类似于 int a = 1,其中_Var_.name = a,包含 is_reduce_axis 信息 + +``` +/** + * Variable used as iterator value or bound definition. + */ +struct _Var_ : public ExprNode<_Var_> { + std::string name; + // 【当 Var 作为下标去 Load 和 Store 时,当前下标是否为 Reduce 的】 + bool is_reduce_axis{false}; + //! Lower bound and upper bound of a axis. + // @{ + // 【迭代量的最小值】 + Expr lower_bound; + // 【迭代量的最大值】 + Expr upper_bound; + // @} + + // ! Extra tag of this variable/axis. + std::string tag; +}; +``` + +### 3.6 其他 Ir + +本节并未囊括后端所有 Ir,在学习的过程中随时更新,本文档共同建设。 + +``` +Cast() : ExprNode(1) {} +``` + +``` +struct Let : public ExprNode { + Expr symbol; + Expr body; +}; +``` + +注意:Call 有成员变量 FunctionRef + +``` +struct Call : public ExprNode { + explicit Call(Type t) : ExprNode(t) {} + + //! The name of the function/intrinsic. + std::string name; + //! The arguments. + std::vector read_args; + std::vector write_args; + //! the attribute of this CallNode. + std::map attrs; + //! Type of calls. + CallType call_type; + //! The function to be called. + FunctionRef func; + //! The output value index if func's value is a tuple. + int value_index{-1}; +}; +``` + +Reduce 中 init :加法的初始值是 0,而乘法的初始值是 1 + +``` +struct Reduce : public ExprNode { + enum ReduceType { + kSum = 0, + kSub, + kMul, + kDiv, + kMax, + kMin, + kAll, + kAny, + }; + + //! The initial value. + Expr init; + + // ! The body. + Expr body; + + utils::SmallVector reduce_axis; + + //! The type of the reduce operation. + ReduceType reduce_type; +}; +``` + +**疑问:** Alloc 中 condition 和 body 成员变量的语义? + +``` +/** + * Allocate a buffer with the given type and size. The buffer lives for at most + * the duration of the body statement, within which it is freed. + */ +struct Alloc : public ExprNode { + //! The destination of the allocation, this might be a buffer or a variable. + Expr destination; + //! Dimensions of this buffer (as a multi-dimensional array). + std::vector extents; + // NOTE the condition might be undefined, that means always true. + Expr condition; + // NOTE the body might be undefined, that means no specific logic other than + // default. + Expr body; +}; +``` + +**疑问:** 离开一个 ScheduleBlock 后,需要显示调用 Free,释放 Alloc 出来的结果吗? + +``` +/** + * Free the resources associated with the given buffer. + */ +struct Free : public ExprNode { + Expr destination; +}; +``` + +注意:IfThenElse 和 Select 的语义好像有重复? + +``` +struct Select : public ExprNode