|
| 1 | +/* |
| 2 | +This file is part of GUFI, which is part of MarFS, which is released |
| 3 | +under the BSD license. |
| 4 | +
|
| 5 | +
|
| 6 | +Copyright (c) 2017, Los Alamos National Security (LANS), LLC |
| 7 | +All rights reserved. |
| 8 | +
|
| 9 | +Redistribution and use in source and binary forms, with or without modification, |
| 10 | +are permitted provided that the following conditions are met: |
| 11 | +
|
| 12 | +1. Redistributions of source code must retain the above copyright notice, this |
| 13 | +list of conditions and the following disclaimer. |
| 14 | +
|
| 15 | +2. Redistributions in binary form must reproduce the above copyright notice, |
| 16 | +this list of conditions and the following disclaimer in the documentation and/or |
| 17 | +other materials provided with the distribution. |
| 18 | +
|
| 19 | +3. Neither the name of the copyright holder nor the names of its contributors |
| 20 | +may be used to endorse or promote products derived from this software without |
| 21 | +specific prior written permission. |
| 22 | +
|
| 23 | +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
| 24 | +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| 25 | +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| 26 | +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, |
| 27 | +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
| 28 | +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 29 | +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 30 | +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE |
| 31 | +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
| 32 | +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 33 | +
|
| 34 | +
|
| 35 | +From Los Alamos National Security, LLC: |
| 36 | +LA-CC-15-039 |
| 37 | +
|
| 38 | +Copyright (c) 2017, Los Alamos National Security, LLC All rights reserved. |
| 39 | +Copyright 2017. Los Alamos National Security, LLC. This software was produced |
| 40 | +under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National |
| 41 | +Laboratory (LANL), which is operated by Los Alamos National Security, LLC for |
| 42 | +the U.S. Department of Energy. The U.S. Government has rights to use, |
| 43 | +reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS |
| 44 | +ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR |
| 45 | +ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is |
| 46 | +modified to produce derivative works, such modified software should be |
| 47 | +clearly marked, so as not to confuse it with the version available from |
| 48 | +LANL. |
| 49 | +
|
| 50 | +THIS SOFTWARE IS PROVIDED BY LOS ALAMOS NATIONAL SECURITY, LLC AND CONTRIBUTORS |
| 51 | +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
| 52 | +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 53 | +ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL SECURITY, LLC OR |
| 54 | +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 55 | +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT |
| 56 | +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 57 | +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 58 | +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING |
| 59 | +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY |
| 60 | +OF SUCH DAMAGE. |
| 61 | +*/ |
| 62 | + |
| 63 | +/* |
| 64 | + * An example Lustre plugin for GUFI. |
| 65 | + * |
| 66 | + * I compiled this with: |
| 67 | + * |
| 68 | + * # set based on the lustre source location on your machine: |
| 69 | + * $ export LUSTRE_INCLUDE_DIR=/home/$USER/git/lustre-release/lustre/include |
| 70 | + * $ export LUSTRE_LIBRARY_DIR=/home/$USER/git/lustre-release/lustre/utils/.libs |
| 71 | + * |
| 72 | + * $ cd contrib |
| 73 | + * |
| 74 | + * $ gcc -g -O0 -c -fPIC -I../build/deps/sqlite3/include -I../include -I$LUSTRE_INCLUDE_DIR -I$LUSTRE_INCLUDE_DIR/uapi lustre_plugin.c |
| 75 | + * |
| 76 | + * $ gcc -shared -o liblustre_plugin.so lustre_plugin.o -L$LUSTRE_LIBRARY_DIR -llustreapi |
| 77 | + * |
| 78 | + * Note that this includes the sqlite3 header from the GUFI sources, but this should not |
| 79 | + * statically link sqlite3. Instead, it should dynamically link to the sqlite3 symbols in |
| 80 | + * the main GUFI binary when this code is dlopen()ed. |
| 81 | + * |
| 82 | + * I ran this with: |
| 83 | + * $ LD_LIBRARY_PATH=$LUSTRE_LIBRARY_DIR ./src/gufi_dir2index -U ../contrib/liblustre_plugin.so -n1 /mnt/lustre /tmp/gufi_index/ |
| 84 | + */ |
| 85 | + |
| 86 | +#include <errno.h> |
| 87 | +#include <inttypes.h> |
| 88 | +#include <stdio.h> |
| 89 | +#include <string.h> |
| 90 | + |
| 91 | +#include "plugin.h" |
| 92 | +#include "sqlite3.h" |
| 93 | +#include "lustre/lustreapi.h" |
| 94 | + |
| 95 | +static char *my_basename(char *path) { |
| 96 | + char *base = path; |
| 97 | + char *p = path; |
| 98 | + int next_component = 0; |
| 99 | + |
| 100 | + while (*p) { |
| 101 | + if (next_component) { |
| 102 | + base = p; |
| 103 | + next_component = 0; |
| 104 | + } |
| 105 | + |
| 106 | + if (*p == '/') { |
| 107 | + next_component = 1; |
| 108 | + } |
| 109 | + |
| 110 | + p++; |
| 111 | + } |
| 112 | + |
| 113 | + return base; |
| 114 | +} |
| 115 | + |
| 116 | +/* |
| 117 | + * This struct tracks the number of components seen on each OST. For each file |
| 118 | + * processed, and for each component of that file, add one to the count for |
| 119 | + * that particular OST. |
| 120 | + */ |
| 121 | +struct stripe_tracker { |
| 122 | + /* |
| 123 | + * num_components[i] stores the number of components that have been seen |
| 124 | + * on the OST with index i. |
| 125 | + */ |
| 126 | + uint64_t *stripe_count; |
| 127 | + /* |
| 128 | + * Stores the size of the num_components table. This needs to grow when we see a |
| 129 | + * new OST number that is higher than the maximum index in the table so far. |
| 130 | + */ |
| 131 | + uint32_t array_size; |
| 132 | + /* |
| 133 | + * Tracks the highest OST index seen. This will likely be smaller than the |
| 134 | + * array size, so separately tracking it means we don't have to loop through the |
| 135 | + * useless high indexes in the stripe_count array when saving the stripe info |
| 136 | + * to the database. |
| 137 | + */ |
| 138 | + uint32_t max_ost_idx; |
| 139 | +}; |
| 140 | + |
| 141 | +/* |
| 142 | + * Allocate and Initialize a new stripe_tracker. |
| 143 | + */ |
| 144 | +static struct stripe_tracker *new_stripe_tracker(void) { |
| 145 | + uint32_t initial_size = 64; |
| 146 | + |
| 147 | + struct stripe_tracker *new = malloc(sizeof *new); |
| 148 | + if (!new) { |
| 149 | + return NULL; |
| 150 | + } |
| 151 | + |
| 152 | + new->stripe_count = calloc(initial_size, sizeof(*new->stripe_count)); |
| 153 | + if (!new->stripe_count) { |
| 154 | + free(new); |
| 155 | + return NULL; |
| 156 | + } |
| 157 | + |
| 158 | + new->array_size = initial_size; |
| 159 | + |
| 160 | + new->max_ost_idx = 0; |
| 161 | + |
| 162 | + return new; |
| 163 | +} |
| 164 | + |
| 165 | +/* |
| 166 | + * Clean up a stripe_tracker, freeing its allocations. |
| 167 | + */ |
| 168 | +static void destroy_stripe_tracker(struct stripe_tracker *p) { |
| 169 | + if (p) { |
| 170 | + free(p->stripe_count); |
| 171 | + } |
| 172 | + |
| 173 | + free(p); |
| 174 | +} |
| 175 | + |
| 176 | +/* |
| 177 | + * If necessary, grow the stripe_count array in `s` to be large enough to |
| 178 | + * accomodate `ost_index`. |
| 179 | + * |
| 180 | + * Returns 0 if growing succeeded, or 1 if it failed. |
| 181 | + */ |
| 182 | +static int grow_stripe_tracker(struct stripe_tracker *s, uint32_t ost_index) { |
| 183 | + if (ost_index > s->max_ost_idx) { |
| 184 | + s->max_ost_idx = ost_index; |
| 185 | + } |
| 186 | + if (ost_index < s->array_size) { |
| 187 | + /* Nothing needs to be done: yay! */ |
| 188 | + return 0; |
| 189 | + } |
| 190 | + |
| 191 | + uint32_t new_size = s->array_size; |
| 192 | + |
| 193 | + while (ost_index >= new_size) { |
| 194 | + if (new_size >= UINT32_MAX / 2) { |
| 195 | + /* In case we somehow get a filesystem with an insane number of OSTs, |
| 196 | + * don't let that overflow and ruin our array: */ |
| 197 | + return 1; |
| 198 | + } |
| 199 | + new_size *= 2; |
| 200 | + } |
| 201 | + |
| 202 | + uint64_t *new_array = realloc(s->stripe_count, new_size); |
| 203 | + if (!new_array) { |
| 204 | + return 1; |
| 205 | + } |
| 206 | + |
| 207 | + /* The new space is not initialized, so do that now: */ |
| 208 | + memset(new_array + s->array_size, 0, new_size - s->array_size); |
| 209 | + |
| 210 | + s->stripe_count = new_array; |
| 211 | + s->array_size = new_size; |
| 212 | + |
| 213 | + return 0; |
| 214 | +} |
| 215 | + |
| 216 | +/* |
| 217 | + * Given the `stripe_array` which contains `stripe_count` stripes, increment |
| 218 | + * the count for each OST that the stripe lives on. |
| 219 | + */ |
| 220 | +static void track_file_stripes(struct stripe_tracker *s, |
| 221 | + struct lov_user_ost_data_v1 *stripe_array, |
| 222 | + uint16_t stripe_count) { |
| 223 | + for (int i = 0; i < stripe_count; i++) { |
| 224 | + uint32_t ost_index = stripe_array[i].l_ost_idx; |
| 225 | + if (grow_stripe_tracker(s, ost_index)) { |
| 226 | + /* Just give up if we couldn't grow the stripe array large enough :( */ |
| 227 | + fprintf(stderr, "lustre plugin: could not allocate memory, information may be incomplete"); |
| 228 | + return; |
| 229 | + } |
| 230 | + |
| 231 | + s->stripe_count[ost_index] += 1; |
| 232 | + } |
| 233 | +} |
| 234 | + |
| 235 | +/* |
| 236 | + * Set up initial state for tracking Lustre stripe info. |
| 237 | + */ |
| 238 | +void *db_init(sqlite3 *db) { |
| 239 | + struct stripe_tracker *state = new_stripe_tracker(); |
| 240 | + if (!state) { |
| 241 | + fprintf(stderr, "lustre plugin: could not allocate memory to track stripe info"); |
| 242 | + return NULL; |
| 243 | + } |
| 244 | + |
| 245 | + static const char text[] = "CREATE TABLE lustre_summary (ost_index INTEGER PRIMARY KEY, num_files INTEGER);"; |
| 246 | + char *error; |
| 247 | + |
| 248 | + int res = sqlite3_exec(db, text, NULL, NULL, &error); |
| 249 | + if (res != SQLITE_OK) { |
| 250 | + fprintf(stderr, "lustre plugin: db_init(): error executing statement: %d %s\n", res, error); |
| 251 | + } |
| 252 | + |
| 253 | + sqlite3_free(error); |
| 254 | + |
| 255 | + return state; |
| 256 | +} |
| 257 | + |
| 258 | +/* |
| 259 | + * Save stripe tracking info to the database and clean up state. |
| 260 | + */ |
| 261 | +void db_exit(sqlite3 *db, void *user_data) { |
| 262 | + struct stripe_tracker *state = (struct stripe_tracker *) user_data; |
| 263 | + |
| 264 | + for (uint32_t i = 0; i <= state->max_ost_idx; i++) { |
| 265 | + char *text = sqlite3_mprintf("INSERT INTO %s VALUES(%" PRIu32 ", %" PRIu64 ");", |
| 266 | + "lustre_summary", i, state->stripe_count[i]); |
| 267 | + char *error; |
| 268 | + |
| 269 | + int res = sqlite3_exec(db, text, NULL, NULL, &error); |
| 270 | + if (res != SQLITE_OK) { |
| 271 | + fprintf(stderr, "lustre plugin: db_exit(): error executing statement: %d %s\n", |
| 272 | + res, error); |
| 273 | + } |
| 274 | + |
| 275 | + sqlite3_free(text); |
| 276 | + sqlite3_free(error); |
| 277 | + } |
| 278 | + |
| 279 | + destroy_stripe_tracker(state); |
| 280 | +}; |
| 281 | + |
| 282 | +/* |
| 283 | + * This method of determining the maximum possible size of a `lov_user_md` was suggested by |
| 284 | + * man 3 llapi_file_get_stripe |
| 285 | + */ |
| 286 | +static const size_t v1_size = sizeof(struct lov_user_md_v1) + LOV_MAX_STRIPE_COUNT * sizeof(struct lov_user_ost_data_v1); |
| 287 | +static const size_t v3_size = sizeof(struct lov_user_md_v3) + LOV_MAX_STRIPE_COUNT * sizeof(struct lov_user_ost_data_v1); |
| 288 | +static const size_t lum_size = v1_size > v3_size ? v1_size : v3_size; |
| 289 | + |
| 290 | +static void *alloc_lum() { |
| 291 | + return calloc(1, lum_size); |
| 292 | +} |
| 293 | + |
| 294 | +void process_file(char *path, sqlite3 *db, void *user_data) { |
| 295 | + sqlite3_stmt *statement; |
| 296 | + |
| 297 | + struct lov_user_md *layout_info = alloc_lum(); |
| 298 | + |
| 299 | + int res = llapi_file_get_stripe(path, layout_info); |
| 300 | + |
| 301 | + if (res) { |
| 302 | + fprintf(stderr, "lustre plugin: error getting stripe info for %s: %s\n", |
| 303 | + path, strerror(errno)); |
| 304 | + free(layout_info); |
| 305 | + return; |
| 306 | + } |
| 307 | + |
| 308 | + char *text = sqlite3_mprintf("UPDATE entries SET ossint4 = %d where name = '%q';", |
| 309 | + layout_info->lmm_stripe_size, my_basename(path)); |
| 310 | + char *error; |
| 311 | + |
| 312 | + res = sqlite3_exec(db, text, NULL, NULL, &error); |
| 313 | + if (res != SQLITE_OK) { |
| 314 | + fprintf(stderr, "lustre plugin: process_file(): error executing statement: %d %s\n", |
| 315 | + res, error); |
| 316 | + goto out; |
| 317 | + } |
| 318 | + |
| 319 | + struct lov_user_ost_data_v1 *stripe_array; |
| 320 | + uint16_t stripe_count; |
| 321 | + |
| 322 | + if (layout_info->lmm_magic == LOV_USER_MAGIC_V1) { |
| 323 | + struct lov_user_md_v1 *v1= (struct lov_user_md_v1 *) layout_info; |
| 324 | + stripe_array = v1->lmm_objects; |
| 325 | + stripe_count = v1->lmm_stripe_count; |
| 326 | + } else if (layout_info->lmm_magic == LOV_USER_MAGIC_V3) { |
| 327 | + struct lov_user_md_v3 *v3= (struct lov_user_md_v3 *) layout_info; |
| 328 | + stripe_array = v3->lmm_objects; |
| 329 | + stripe_count = v3->lmm_stripe_count; |
| 330 | + } else { |
| 331 | + fprintf(stderr, "lustre plugin: unknown layout format on file %s: %d\n", path, layout_info->lmm_magic); |
| 332 | + goto out; |
| 333 | + } |
| 334 | + |
| 335 | + struct stripe_tracker *tracker = (struct stripe_tracker *) user_data; |
| 336 | + track_file_stripes(tracker, stripe_array, stripe_count); |
| 337 | + |
| 338 | + out: |
| 339 | + free(layout_info); |
| 340 | + sqlite3_free(error); |
| 341 | + sqlite3_free(text); |
| 342 | +} |
| 343 | + |
| 344 | +struct plugin_operations exported_operations = { |
| 345 | + .db_init = db_init, |
| 346 | + .process_dir = NULL, |
| 347 | + .process_file = process_file, |
| 348 | + .db_exit = db_exit, |
| 349 | +}; |
0 commit comments