-
Notifications
You must be signed in to change notification settings - Fork 514
BUDA: allow output files to be gzipped. #6876
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5684e1e
8f96ad1
4d377d5
54156a2
3ea0066
211f5a4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -286,14 +286,20 @@ function get_outfile_phys_names($result) { | |||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||
| function get_outfile_log_names($result) { | ||||||||||||||||||||||||||||||||||||
| $names = []; | ||||||||||||||||||||||||||||||||||||
| $xml = "<a>".$result->xml_doc_in."</a>"; | ||||||||||||||||||||||||||||||||||||
| $r = simplexml_load_string($xml); | ||||||||||||||||||||||||||||||||||||
| if (!$r) return $names; | ||||||||||||||||||||||||||||||||||||
| if (!$r) { | ||||||||||||||||||||||||||||||||||||
| return [[],[]]; | ||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||
| $names = []; | ||||||||||||||||||||||||||||||||||||
| $gzip = []; | ||||||||||||||||||||||||||||||||||||
| foreach ($r->result->file_ref as $fr) { | ||||||||||||||||||||||||||||||||||||
| $names[] = (string)($fr->open_name); | ||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||
| return $names; | ||||||||||||||||||||||||||||||||||||
| foreach ($r->file_info as $fi) { | ||||||||||||||||||||||||||||||||||||
| $gzip[] = isset($fi->gzip_when_done); | ||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||
| } | |
| } | |
| // Ensure that the arrays have the same length so callers can safely | |
| // index them in parallel. If the XML is inconsistent, truncate both | |
| // arrays to the minimum length and log a warning. | |
| $names_count = count($names); | |
| $gzip_count = count($gzip); | |
| if ($names_count !== $gzip_count) { | |
| $min_count = min($names_count, $gzip_count); | |
| log_write("get_outfile_log_names: mismatched counts: names=$names_count, gzip=$gzip_count; truncating to $min_count"); | |
| if ($names_count > $min_count) { | |
| $names = array_slice($names, 0, $min_count); | |
| } | |
| if ($gzip_count > $min_count) { | |
| $gzip = array_slice($gzip, 0, $min_count); | |
| } | |
| } |
Copilot
AI
Feb 20, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
get_outfile_log_names() now returns a 2-tuple [$names, $gzip], but on XML parse failure it returns only $names. Callers destructuring the return (e.g., [$log_names, $gzip] = ...) will error. Return [$names, $gzip] consistently on all paths (including parse failure).
Copilot
AI
Feb 20, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Indexing $gzip[$index] without checking it exists can raise notices and can incorrectly omit/append .gz when the gzip list length doesn’t match $log_names. Use an isset()/!empty() check (or default false) when reading $gzip[$index].
| return sprintf('../../results/%d/%s__file_%s%s', | |
| $wu->batch, $wu->name, $log_names[$index], | |
| $gzip[$index]?'.gz':'' | |
| $gzip_flag = !empty($gzip[$index]); | |
| return sprintf('../../results/%d/%s__file_%s%s', | |
| $wu->batch, $wu->name, $log_names[$index], | |
| $gzip_flag ? '.gz' : '' |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -67,7 +67,7 @@ function check_auth($auth, $batch) { | |
|
|
||
| function do_result_aux($result, $batch, $file_num=null) { | ||
| $phys_names = get_outfile_phys_names($result); | ||
| $log_names = get_outfile_log_names($result); | ||
| [$log_names,] = get_outfile_log_names($result); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P2: Destructuring a potentially empty array triggers an 'Undefined array key 0' warning and assigns Prompt for AI agents |
||
| if ($file_num !== null) { | ||
| $path = upload_path($phys_names[$file_num]); | ||
| do_download($path, | ||
|
|
@@ -138,7 +138,7 @@ function do_batch($batch_id, $auth) { | |
| foreach ($wus as $wu) { | ||
| $result = BoincResult::lookup_id($wu->canonical_resultid); | ||
| $phys_names = get_outfile_phys_names($result); | ||
| $log_names = get_outfile_log_names($result); | ||
| [$log_names,] = get_outfile_log_names($result); | ||
| if (count($phys_names) == 1) { | ||
| $cmd = sprintf('ln -s %s %s/%s__%s', | ||
| upload_path($phys_names[0]), | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -35,19 +35,34 @@ | |
| require_once("../inc/util.inc"); | ||
| require_once("../inc/submit_util.inc"); | ||
|
|
||
| function check_auth($batch_id){ | ||
| $user = get_logged_in_user(); | ||
| $batch = BoincBatch::lookup_id($batch_id); | ||
| if (!$batch || $user->id != $batch->user_id) { | ||
| error_page('not owner'); | ||
| } | ||
| } | ||
|
|
||
| // show or download a single output file, | ||
| // identified by result ID and file index | ||
| // | ||
| function get_file() { | ||
| $result_id = get_int('result_id'); | ||
| $index = get_int('index'); | ||
| $result = BoincResult::lookup_id($result_id); | ||
| if (!$result) error_page('no result'); | ||
| if (!$result) { | ||
| error_page('no result'); | ||
| } | ||
| $wu = BoincWorkunit::lookup_id($result->workunitid); | ||
| if (!$wu) error_page('no workunit'); | ||
| $log_names = get_outfile_log_names($result); | ||
| if ($index >= count($log_names)) error_page('bad index'); | ||
| $path = assim_move_outfile_path($wu, $index, $log_names); | ||
| if (!$wu) { | ||
| error_page('no workunit'); | ||
| } | ||
| check_auth($wu->batch); | ||
| [$log_names, $gzip] = get_outfile_log_names($result); | ||
| if ($index >= count($log_names)) { | ||
| error_page('bad index'); | ||
| } | ||
| $path = assim_move_outfile_path($wu, $index, $log_names, $gzip); | ||
|
|
||
| if (get_str('download', true)) { | ||
| do_download($path); | ||
|
|
@@ -60,20 +75,51 @@ function get_file() { | |
|
|
||
| // download a zip of the given directory | ||
| // | ||
| function get_batch() { | ||
| function get_batch_zip() { | ||
| $batch_id = get_int('batch_id'); | ||
| check_auth($batch_id); | ||
| $dir = "../../results/$batch_id"; | ||
| if (!is_dir($dir)) die('no batch dir'); | ||
| if (!is_dir($dir)) { | ||
| die('no batch dir'); | ||
| } | ||
| $name = "batch_$batch_id.zip"; | ||
| $cmd = "cd $dir; rm -f $name; zip -q $name *"; | ||
| system($cmd); | ||
| $line = system($cmd, $ret); | ||
| if ($ret) { | ||
| error_page("Zip failed: $line"); | ||
| } | ||
| do_download("$dir/$name"); | ||
| unlink("$dir/$name"); | ||
|
||
| } | ||
|
|
||
| function get_batch_tar() { | ||
| $batch_id = get_int('batch_id'); | ||
| check_auth($batch_id); | ||
| $dir = "../../results/$batch_id"; | ||
| if (!is_dir($dir)) { | ||
| die('no batch dir'); | ||
| } | ||
| $name = "batch_$batch_id.tar"; | ||
| $cmd = "cd $dir; rm -f $name; tar -cf $name *"; | ||
| $line = system($cmd, $ret); | ||
| if ($ret) { | ||
| error_page("Tar failed: $line"); | ||
| } | ||
| do_download("$dir/$name"); | ||
| unlink("$dir/$name"); | ||
| } | ||
|
Comment on lines
+95
to
110
|
||
|
|
||
| $action = get_str('action'); | ||
| switch ($action) { | ||
| case 'get_file': get_file(); break; | ||
| case 'get_batch': get_batch(); break; | ||
| case 'get_file': | ||
| get_file(); | ||
| break; | ||
| case 'get_batch_zip': | ||
| get_batch_zip(); | ||
| break; | ||
| case 'get_batch_tar': | ||
| get_batch_tar(); | ||
| break; | ||
| } | ||
|
|
||
| ?> | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -638,6 +638,22 @@ function progress_bar($batch, $wus, $width) { | |||||||||||||||||||||||||||||
| return $x; | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| // see if the batch's output files are gzipped | ||||||||||||||||||||||||||||||
| // | ||||||||||||||||||||||||||||||
| function is_batch_gzipped($wus) { | ||||||||||||||||||||||||||||||
| foreach ($wus as $wu) { | ||||||||||||||||||||||||||||||
| if ($wu->canonical_resultid == 0) continue; | ||||||||||||||||||||||||||||||
| $result = BoincResult::lookup_id($wu->canonical_resultid); | ||||||||||||||||||||||||||||||
| if (!$result) return false; | ||||||||||||||||||||||||||||||
| [, $gzip] = get_outfile_log_names($result); | ||||||||||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P1: Destructuring an empty array assigns null to $gzip, causing a TypeError in foreach. Prompt for AI agents
Suggested change
|
||||||||||||||||||||||||||||||
| foreach ($gzip as $flag) { | ||||||||||||||||||||||||||||||
| if ($flag) return true; | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
| return false; | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
| return false; | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
|
Comment on lines
+652
to
+656
|
||||||||||||||||||||||||||||||
| return false; | |
| } | |
| return false; | |
| } | |
| } | |
| return false; | |
| } |
Copilot
AI
Feb 20, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The function is_batch_gzipped has a logic bug. It returns false after checking only the first workunit with a canonical result, even if that workunit has no gzipped files. This means the function will always return false unless the very first workunit has gzipped output files.
The return false statement on line 652 should be moved outside the foreach loop to check all workunits before concluding there are no gzipped files. Alternatively, restructure the logic to check all files from the first valid workunit before returning.
| return false; | |
| } | |
| return false; | |
| } | |
| } | |
| return false; | |
| } |
Copilot
AI
Feb 20, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Duplicate echo statement. Line 717 already outputs a paragraph tag, making this redundant.
| echo "<p>"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
P0: Leftover debugging code if (true) makes the else block unreachable and forces all downloads to tar.
Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At html/user/submit.php, line 720:
<comment>Leftover debugging code `if (true)` makes the else block unreachable and forces all downloads to tar.</comment>
<file context>
@@ -717,7 +717,8 @@ function handle_query_batch($user) {
echo "<p>";
if ($is_assim_move) {
- if (is_batch_gzipped($wus)) {
+ //if (is_batch_gzipped($wus)) {
+ if (true) {
$url = "get_output3.php?action=get_batch_tar&batch_id=$batch->id";
</file context>
| //if (is_batch_gzipped($wus)) { | |
| if (true) { | |
| if (is_batch_gzipped($wus)) { |
Copilot
AI
Feb 20, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The is_batch_gzipped function check is commented out and replaced with a hardcoded 'if (true)', meaning tar format will always be used regardless of whether files are actually gzipped. This appears to be test/debugging code that shouldn't be in production.
Either uncomment the is_batch_gzipped check or remove the conditional entirely if the intent is to always use tar.
| //if (is_batch_gzipped($wus)) { | |
| if (true) { | |
| if (is_batch_gzipped($wus)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
P1: Destructuring an empty array assigns null to $log_names, causing a Fatal TypeError in count().
Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At html/user/submit.php, line 890:
<comment>Destructuring an empty array assigns null to $log_names, causing a Fatal TypeError in count().</comment>
<file context>
@@ -863,12 +887,12 @@ function handle_query_job($user) {
if ($is_assim_move) {
if ($result->id == $wu->canonical_resultid) {
- $log_names = get_outfile_log_names($result);
+ [$log_names, $gzip] = get_outfile_log_names($result);
$nfiles = count($log_names);
for ($i=0; $i<$nfiles; $i++) {
</file context>
| [$log_names, $gzip] = get_outfile_log_names($result); | |
| [$log_names, $gzip] = get_outfile_log_names($result) + [[], []]; |
cubic-dev-ai[bot] marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
P2: Avoid potential undefined offset warnings when accessing $gzip[$i].
Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At html/user/submit.php, line 901:
<comment>Avoid potential undefined offset warnings when accessing `$gzip[$i]`.</comment>
<file context>
@@ -896,8 +897,8 @@ function handle_query_job($user) {
- // don't show 'view' link if it's a .zip
- if (!strstr($name, '.zip')) {
+ // don't show 'view' link if it's zipped
+ if (!strstr($name, '.zip') && !$gzip[$i]) {
$y .= sprintf(
'<a href=get_output3.php?action=get_file&result_id=%d&index=%d>view</a> · ',
</file context>
| if (!strstr($name, '.zip') && !$gzip[$i]) { | |
| if (!strstr($name, '.zip') && empty($gzip[$i])) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
P1: Destructuring an empty array assigns null to $log_names, causing a Fatal TypeError in count().
Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At html/user/submit.php, line 921:
<comment>Destructuring an empty array assigns null to $log_names, causing a Fatal TypeError in count().</comment>
<file context>
@@ -894,7 +918,7 @@ function handle_query_job($user) {
if ($result->server_state == RESULT_SERVER_STATE_OVER) {
$phys_names = get_outfile_phys_names($result);
- $log_names = get_outfile_log_names($result);
+ [$log_names,] = get_outfile_log_names($result);
$nfiles = count($log_names);
for ($i=0; $i<$nfiles; $i++) {
</file context>
| [$log_names,] = get_outfile_log_names($result); | |
| [$log_names,] = get_outfile_log_names($result) + [[], []]; |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -18,7 +18,17 @@ | |||||||||||||||||||||||||||||||||||||||||
| # in the 2nd case, write the error code | ||||||||||||||||||||||||||||||||||||||||||
| # to results/<batch_id>/<wu_name>_error | ||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||
| import sys, os | ||||||||||||||||||||||||||||||||||||||||||
| import sys, os, gzip | ||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||
| def is_gzip(path): | ||||||||||||||||||||||||||||||||||||||||||
| if os.path.getsize(path) == 0: | ||||||||||||||||||||||||||||||||||||||||||
| return False | ||||||||||||||||||||||||||||||||||||||||||
| try: | ||||||||||||||||||||||||||||||||||||||||||
| with gzip.open(path, 'rb') as f: | ||||||||||||||||||||||||||||||||||||||||||
| f.read(1) | ||||||||||||||||||||||||||||||||||||||||||
| return True | ||||||||||||||||||||||||||||||||||||||||||
| except: | ||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
| except: | |
| except (OSError, gzip.BadGzipFile): |
Copilot
AI
Feb 20, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This helper prints debug messages on every file and uses a bare except, which can hide real errors and pollute assimilator output. Remove the prints and catch specific exceptions (e.g., OSError, gzip.BadGzipFile) or use a cheap header check (first two bytes 0x1f, 0x8b) to avoid relying on exceptions for control flow.
| if os.path.getsize(path) == 0: | |
| return False | |
| try: | |
| with gzip.open(path, 'rb') as f: | |
| f.read(1) | |
| print('is gzip') | |
| return True | |
| except: | |
| print('not gzip') | |
| return False | |
| # Return True if the file appears to be gzip-compressed, based on magic bytes. | |
| if os.path.getsize(path) == 0: | |
| return False | |
| try: | |
| with open(path, 'rb') as f: | |
| magic = f.read(2) | |
| except OSError: | |
| # If the file can't be read, treat it as non-gzip. | |
| return False | |
| return magic == b'\x1f\x8b' |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
P2: The
get_outfile_log_namesfunction returns two separate arrays (namesandgzip), but the array counts may not match if there are fewerfile_infoelements thanfile_refelements. This can lead to undefined array key access when iterating overgzipusing the$iindex inassim_move_outfile_pathand calling functions.Prompt for AI agents