-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtenpack.h
161 lines (145 loc) · 4.57 KB
/
tenpack.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
/**
* @file tenpack.h
* @author Ashot Vardanian
* @brief
* @version 0.1
* @date 2022-08-02
*
* @brief Micro-library exporting variable-length encoded data into regular Tensors,
* that Machine Learning libraries can accept. Can submit both singular media objects,
* and batches, as well as streams, decoding and reshaping frames along the way.
* TenPack has very little logic internally, and mostly just links codecs/libs together.
* Supports Apache Arrow inputs.
*
* @section Supported Formats
* Images: JPEG, PNG, GIF?
* Audios: WAV?
* Videos: MPEG4?
*
* @section Supported Transforms
* Images: resize, transpose.
* Audios: resize, fft.
* Videos: resize, transpose.
*/
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include <stdarg.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
enum tenpack_format_t {
// Images:
tenpack_bmp_k,
tenpack_gif_k,
tenpack_jxr_k,
tenpack_png_k,
tenpack_psd_k,
tenpack_dwg_k,
tenpack_ico_k,
tenpack_jpeg_k,
tenpack_jpeg2000_k,
// Audio
tenpack_wav_k,
// Video
tenpack_avi_k,
tenpack_mpeg4_k,
};
struct tenpack_dimensions_t {
// A spatial dimension.
size_t width;
// A spatial dimension.
size_t height;
// A spatial dimension.
size_t channels;
// A temporal dimension.
size_t frames;
// The resolution of every exported numerical value.
size_t bytes_per_scalar;
};
typedef void const* tenpack_input_t;
typedef void* tenpack_output_t;
typedef void* tenpack_ctx_t;
bool tenpack_context_free(tenpack_ctx_t);
/**
* @brief Guesses the format of binary data just by comparing various binary signatures.
*
* @param[in] data Pointer to the start of binary media data.
* @param[in] len Length of the binary blob.
* @param[inout] format Pointer, where the guess will be written.
* @param[inout] context Pointer, where the guess will be written.
*
* @return true If the type was successfully guessed.
* @return false If error occurred.
*/
bool tenpack_guess_format( //
tenpack_input_t const data,
size_t const len,
tenpack_format_t* format,
tenpack_ctx_t* context);
/**
* @brief Guesses the format of binary data just by comparing various binary signatures.
*
* @param[in] data Pointer to the start of binary media data.
* @param[in] len Length of the binary blob.
* @param[in] format The format of data in `[data, data+len)`.
* @param[inout]dims Output dimensions of image.
* > For JPEG and PNG, 3 dims: width, height, channels.
* > For GIF, 3 dims: width, height, frames.
* > For AVI, 4 dims: width, height, channels, frames.
* @return true If the type was successfully guessed.
* @return false If error occurred.
*/
bool tenpack_guess_dimensions( //
tenpack_input_t const data,
size_t const len,
tenpack_format_t const format,
tenpack_dimensions_t* dims,
tenpack_ctx_t* context);
/**
* @brief Guesses the format of binary data just by comparing various binary signatures.
*
* @param[in] data Pointer to the start of binary media data.
* @param[in] len Length of the binary blob.
* @param[in] format The format of data in `[data, data+len)`.
*
* @return true If the type was successfully guessed.
* @return false If error occurred.
*/
bool tenpack_unpack( //
tenpack_input_t const data,
size_t const len,
tenpack_format_t const format,
tenpack_dimensions_t const* output_dimensions,
tenpack_output_t output,
tenpack_ctx_t* context);
/**
* @brief Changes/transposes the content order in an Array-of-Structures to
* Structure-of-Arrays form, more familiar to machine-learning libraries.
* Overall volume of content-populated memory will remain the same, but
* intermediate allocations may still take place for the @param context.
*/
bool tenpack_transpose( //
tenpack_dimensions_t const* dimensions,
tenpack_output_t output,
tenpack_ctx_t* context);
/**
* @brief Accepts an Apache Arrow binary "StringsArray" and performs the
* entire introspection and extraction pipeline on each of its members:
* > guess format,
* > guess dimensions,
* > unpack,
* > transpose.
*/
bool tenpack_export( //
tenpack_input_t const input_tape_start,
uint32_t const* input_tape_offsets,
size_t const count,
tenpack_output_t output_tensor_start,
tenpack_dimensions_t const* output_sample_dimensions,
tenpack_ctx_t* context);
bool tenpack_context_free(tenpack_ctx_t);
#ifdef __cplusplus
} /* end extern "C" */
#endif