Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 20 additions & 9 deletions html/inc/submit_util.inc
Original file line number Diff line number Diff line change
Expand Up @@ -286,14 +286,20 @@ function get_outfile_phys_names($result) {
}

Copy link
Contributor

@cubic-dev-ai cubic-dev-ai bot Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: The get_outfile_log_names function returns two separate arrays (names and gzip), but the array counts may not match if there are fewer file_info elements than file_ref elements. This can lead to undefined array key access when iterating over gzip using the $i index in assim_move_outfile_path and calling functions.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At html/inc/submit_util.inc, line 290:

<comment>The `get_outfile_log_names` function returns two separate arrays (`names` and `gzip`), but the array counts may not match if there are fewer `file_info` elements than `file_ref` elements. This can lead to undefined array key access when iterating over `gzip` using the `$i` index in `assim_move_outfile_path` and calling functions.</comment>

<file context>
@@ -287,13 +287,17 @@ function get_outfile_phys_names($result) {
 
 function get_outfile_log_names($result) {
     $names = [];
+    $gzip = [];
     $xml = "<a>".$result->xml_doc_in."</a>";
     $r = simplexml_load_string($xml);
</file context>
Fix with Cubic

function get_outfile_log_names($result) {
$names = [];
$xml = "<a>".$result->xml_doc_in."</a>";
$r = simplexml_load_string($xml);
if (!$r) return $names;
if (!$r) {
return [[],[]];
}
$names = [];
$gzip = [];
foreach ($r->result->file_ref as $fr) {
$names[] = (string)($fr->open_name);
}
return $names;
foreach ($r->file_info as $fi) {
$gzip[] = isset($fi->gzip_when_done);
}
Copy link

Copilot AI Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function builds two arrays (names and gzip) by iterating over different XML elements (result->file_ref for names, file_info for gzip). While these should typically be in the same order and have the same count based on how templates are generated, there's no validation that the arrays have the same length before returning them.

If the XML structure is malformed or inconsistent, this could lead to array index mismatches when the caller uses the same index for both arrays (as seen in lines like 'assim_move_outfile_path($wu, $i, $log_names, $gzip)'). Consider adding a check to ensure count($names) == count($gzip) before returning, or document this assumption clearly.

Suggested change
}
}
// Ensure that the arrays have the same length so callers can safely
// index them in parallel. If the XML is inconsistent, truncate both
// arrays to the minimum length and log a warning.
$names_count = count($names);
$gzip_count = count($gzip);
if ($names_count !== $gzip_count) {
$min_count = min($names_count, $gzip_count);
log_write("get_outfile_log_names: mismatched counts: names=$names_count, gzip=$gzip_count; truncating to $min_count");
if ($names_count > $min_count) {
$names = array_slice($names, 0, $min_count);
}
if ($gzip_count > $min_count) {
$gzip = array_slice($gzip, 0, $min_count);
}
}

Copilot uses AI. Check for mistakes.
return [$names, $gzip];
}
Comment on lines 288 to 303
Copy link

Copilot AI Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_outfile_log_names() now returns a 2-tuple [$names, $gzip], but on XML parse failure it returns only $names. Callers destructuring the return (e.g., [$log_names, $gzip] = ...) will error. Return [$names, $gzip] consistently on all paths (including parse failure).

Copilot uses AI. Check for mistakes.

// get output file paths for non-assim-move apps
Expand All @@ -319,11 +325,12 @@ function get_outfile_paths($result) {
// sched/sample_assimilator.cpp
// and with tools/query_job
//
function assim_move_outfile_path($wu, $index, $log_names) {
function assim_move_outfile_path($wu, $index, $log_names, $gzip) {
if (!is_valid_filename($wu->name)) error_page("bad WU name");
if (!is_valid_filename($log_names[$index])) error_page("bad logical name");
return sprintf('../../results/%d/%s__file_%s',
$wu->batch, $wu->name, $log_names[$index]
return sprintf('../../results/%d/%s__file_%s%s',
$wu->batch, $wu->name, $log_names[$index],
$gzip[$index]?'.gz':''
Comment on lines +331 to +333
Copy link

Copilot AI Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indexing $gzip[$index] without checking it exists can raise notices and can incorrectly omit/append .gz when the gzip list length doesn’t match $log_names. Use an isset()/!empty() check (or default false) when reading $gzip[$index].

Suggested change
return sprintf('../../results/%d/%s__file_%s%s',
$wu->batch, $wu->name, $log_names[$index],
$gzip[$index]?'.gz':''
$gzip_flag = !empty($gzip[$index]);
return sprintf('../../results/%d/%s__file_%s%s',
$wu->batch, $wu->name, $log_names[$index],
$gzip_flag ? '.gz' : ''

Copilot uses AI. Check for mistakes.
);
}

Expand Down Expand Up @@ -522,21 +529,25 @@ function file_ref_in($fname) {
$fname
));
}
function file_info_out($i, $max_nbytes) {
function file_info_out($i, $max_nbytes, $gzip_output) {
if (!$max_nbytes) {
$max_nbytes = MEGA;
}
return sprintf(
$x = sprintf(
' <file_info>
<name><OUTFILE_%d/></name>
<generated_locally/>
<upload_when_present/>
<max_nbytes>%d</max_nbytes>
<url><UPLOAD_URL/></url>
</file_info>
',
$i, $max_nbytes
);
if ($gzip_output) {
$x .= " <gzip_when_done/>\n";
}
$x .= " </file_info>\n";
return $x;
}

function file_ref_out($i, $fname) {
Expand Down
20 changes: 17 additions & 3 deletions html/user/buda.php
Original file line number Diff line number Diff line change
Expand Up @@ -266,14 +266,16 @@ function create_templates($app, $desc, $dir) {
$x = "<output_template>\n";
$i = 0;
foreach ($desc->output_file_names as $fname) {
$x .= file_info_out($i++, $desc->max_nbytes_mb*MEGA);
$x .= file_info_out(
$i++, $desc->max_nbytes_mb*MEGA, $desc->gzip_output
);
}
$x .= " <result>\n";
$x .= " <result>\n";
$i = 0;
foreach ($desc->output_file_names as $fname) {
$x .= file_ref_out($i++, $fname);
}
$x .= " </result>\n</output_template>\n";
$x .= " </result>\n</output_template>\n";
file_put_contents("$dir/template_out", $x);
}

Expand Down Expand Up @@ -519,6 +521,13 @@ function app_form($desc=null) {
'max_nbytes_mb',
$desc->max_nbytes_mb
);
if (empty($desc->gzip_output)) {
$desc->gzip_output = false;
}
form_checkboxes(
'Gzip output files?',
[['gzip_output', '', $desc->gzip_output]]
);
form_input_text(
'Run at most this many total instances of each job',
'max_total',
Expand Down Expand Up @@ -637,6 +646,7 @@ function app_action($user) {
$desc->max_total = $max_total;
$desc->max_delay_days = $max_delay_days;
$desc->description = get_str('description');
$desc->gzip_output = get_str('gzip_output', true)?true:false;
$desc->sci_kw = array_map('intval', get_array('sci_kw'));
$desc->submitters = [];
$x = get_str('submitters');
Expand Down Expand Up @@ -715,6 +725,10 @@ function app_details($user) {
$desc->max_nbytes_mb
);
}
row2(
'Gzip output files?',
empty($desc->gzip_output)?'no':'yes'
);
if (!empty($desc->max_total)) {
row2('Max total instances per job:', $desc->max_total);
} else {
Expand Down
4 changes: 2 additions & 2 deletions html/user/get_output2.php
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ function check_auth($auth, $batch) {

function do_result_aux($result, $batch, $file_num=null) {
$phys_names = get_outfile_phys_names($result);
$log_names = get_outfile_log_names($result);
[$log_names,] = get_outfile_log_names($result);
Copy link
Contributor

@cubic-dev-ai cubic-dev-ai bot Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: Destructuring a potentially empty array triggers an 'Undefined array key 0' warning and assigns null to $log_names.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At html/user/get_output2.php, line 70:

<comment>Destructuring a potentially empty array triggers an 'Undefined array key 0' warning and assigns `null` to `$log_names`.</comment>

<file context>
@@ -67,7 +67,7 @@ function check_auth($auth, $batch) {
 function do_result_aux($result, $batch, $file_num=null) {
     $phys_names = get_outfile_phys_names($result);
-    $log_names = get_outfile_log_names($result);
+    [$log_names,] = get_outfile_log_names($result);
     if ($file_num !== null) {
         $path = upload_path($phys_names[$file_num]);
</file context>
Fix with Cubic

if ($file_num !== null) {
$path = upload_path($phys_names[$file_num]);
do_download($path,
Expand Down Expand Up @@ -138,7 +138,7 @@ function do_batch($batch_id, $auth) {
foreach ($wus as $wu) {
$result = BoincResult::lookup_id($wu->canonical_resultid);
$phys_names = get_outfile_phys_names($result);
$log_names = get_outfile_log_names($result);
[$log_names,] = get_outfile_log_names($result);
if (count($phys_names) == 1) {
$cmd = sprintf('ln -s %s %s/%s__%s',
upload_path($phys_names[0]),
Expand Down
66 changes: 56 additions & 10 deletions html/user/get_output3.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,34 @@
require_once("../inc/util.inc");
require_once("../inc/submit_util.inc");

function check_auth($batch_id){
$user = get_logged_in_user();
$batch = BoincBatch::lookup_id($batch_id);
if (!$batch || $user->id != $batch->user_id) {
error_page('not owner');
}
}

// show or download a single output file,
// identified by result ID and file index
//
function get_file() {
$result_id = get_int('result_id');
$index = get_int('index');
$result = BoincResult::lookup_id($result_id);
if (!$result) error_page('no result');
if (!$result) {
error_page('no result');
}
$wu = BoincWorkunit::lookup_id($result->workunitid);
if (!$wu) error_page('no workunit');
$log_names = get_outfile_log_names($result);
if ($index >= count($log_names)) error_page('bad index');
$path = assim_move_outfile_path($wu, $index, $log_names);
if (!$wu) {
error_page('no workunit');
}
check_auth($wu->batch);
[$log_names, $gzip] = get_outfile_log_names($result);
if ($index >= count($log_names)) {
error_page('bad index');
}
$path = assim_move_outfile_path($wu, $index, $log_names, $gzip);

if (get_str('download', true)) {
do_download($path);
Expand All @@ -60,20 +75,51 @@ function get_file() {

// download a zip of the given directory
//
function get_batch() {
function get_batch_zip() {
$batch_id = get_int('batch_id');
check_auth($batch_id);
$dir = "../../results/$batch_id";
if (!is_dir($dir)) die('no batch dir');
if (!is_dir($dir)) {
die('no batch dir');
}
$name = "batch_$batch_id.zip";
$cmd = "cd $dir; rm -f $name; zip -q $name *";
system($cmd);
$line = system($cmd, $ret);
if ($ret) {
error_page("Zip failed: $line");
}
do_download("$dir/$name");
unlink("$dir/$name");
Copy link

Copilot AI Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The unlink() call doesn't check for errors or handle the case where deletion fails. If the file can't be deleted (e.g., due to permissions), it will accumulate in the results directory over time, potentially consuming disk space.

Consider adding error handling or at least logging if the deletion fails, e.g., if (!unlink("$dir/$name")) { error_log("Failed to delete temporary archive: $dir/$name"); }

Copilot uses AI. Check for mistakes.
}

function get_batch_tar() {
$batch_id = get_int('batch_id');
check_auth($batch_id);
$dir = "../../results/$batch_id";
if (!is_dir($dir)) {
die('no batch dir');
}
$name = "batch_$batch_id.tar";
$cmd = "cd $dir; rm -f $name; tar -cf $name *";
$line = system($cmd, $ret);
if ($ret) {
error_page("Tar failed: $line");
}
do_download("$dir/$name");
unlink("$dir/$name");
}
Comment on lines +95 to 110
Copy link

Copilot AI Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using tar -cvf will write verbose output to stdout, which can corrupt the HTTP response and/or prevent do_download() from sending clean headers/body. Drop the -v (or redirect tar output), and also exclude the archive itself (or create the tar outside the target dir) to avoid including batch_$batch_id.tar in the archive.

Copilot uses AI. Check for mistakes.

$action = get_str('action');
switch ($action) {
case 'get_file': get_file(); break;
case 'get_batch': get_batch(); break;
case 'get_file':
get_file();
break;
case 'get_batch_zip':
get_batch_zip();
break;
case 'get_batch_tar':
get_batch_tar();
break;
}

?>
56 changes: 41 additions & 15 deletions html/user/submit.php
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,22 @@ function progress_bar($batch, $wus, $width) {
return $x;
}

// see if the batch's output files are gzipped
//
function is_batch_gzipped($wus) {
foreach ($wus as $wu) {
if ($wu->canonical_resultid == 0) continue;
$result = BoincResult::lookup_id($wu->canonical_resultid);
if (!$result) return false;
[, $gzip] = get_outfile_log_names($result);
Copy link
Contributor

@cubic-dev-ai cubic-dev-ai bot Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1: Destructuring an empty array assigns null to $gzip, causing a TypeError in foreach.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At html/user/submit.php, line 648:

<comment>Destructuring an empty array assigns null to $gzip, causing a TypeError in foreach.</comment>

<file context>
@@ -638,6 +638,22 @@ function progress_bar($batch, $wus, $width) {
+        if ($wu->canonical_resultid == 0) continue;
+        $result = BoincResult::lookup_id($wu->canonical_resultid);
+        if (!$result) return false;
+        [, $gzip] = get_outfile_log_names($result);
+        foreach ($gzip as $flag) {
+            if ($flag) return true;
</file context>
Suggested change
[, $gzip] = get_outfile_log_names($result);
[, $gzip] = get_outfile_log_names($result) + [[], []];
Fix with Cubic

foreach ($gzip as $flag) {
if ($flag) return true;
}
return false;
}
return false;
}

Comment on lines +652 to +656
Copy link

Copilot AI Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is_batch_gzipped() returns false after inspecting only the first eligible workunit (return false; at line 652 is inside the loop), so it can miss gzipped outputs in later WUs. Move the return false to after the outer foreach so the whole batch is checked.

Suggested change
return false;
}
return false;
}
}
return false;
}

Copilot uses AI. Check for mistakes.
Comment on lines +652 to +656
Copy link

Copilot AI Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function is_batch_gzipped has a logic bug. It returns false after checking only the first workunit with a canonical result, even if that workunit has no gzipped files. This means the function will always return false unless the very first workunit has gzipped output files.

The return false statement on line 652 should be moved outside the foreach loop to check all workunits before concluding there are no gzipped files. Alternatively, restructure the logic to check all files from the first valid workunit before returning.

Suggested change
return false;
}
return false;
}
}
return false;
}

Copilot uses AI. Check for mistakes.
// show the details of an existing batch.
// $user has access to abort/retire the batch
// and to get its output files
Expand All @@ -646,7 +662,9 @@ function handle_query_batch($user) {
$batch_id = get_int('batch_id');
$status = get_int('status', true);
$batch = BoincBatch::lookup_id($batch_id);
if (!$batch) error_page('no batch');
if (!$batch) {
error_page('no batch');
}
$app = BoincApp::lookup_id($batch->app_id);
$wus = BoincWorkunit::enum_fields(
'id, name, rsc_fpops_est, canonical_credit, canonical_resultid, error_mask',
Expand Down Expand Up @@ -696,16 +714,22 @@ function handle_query_batch($user) {
);
}
end_table();
echo "<p>";

echo "<p>";
Copy link

Copilot AI Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicate echo statement. Line 717 already outputs a paragraph tag, making this redundant.

Suggested change
echo "<p>";

Copilot uses AI. Check for mistakes.
if ($is_assim_move) {
$url = "get_output3.php?action=get_batch&batch_id=$batch->id";
//if (is_batch_gzipped($wus)) {
if (true) {
Comment on lines +720 to +721
Copy link
Contributor

@cubic-dev-ai cubic-dev-ai bot Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P0: Leftover debugging code if (true) makes the else block unreachable and forces all downloads to tar.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At html/user/submit.php, line 720:

<comment>Leftover debugging code `if (true)` makes the else block unreachable and forces all downloads to tar.</comment>

<file context>
@@ -717,7 +717,8 @@ function handle_query_batch($user) {
     echo "<p>";
     if ($is_assim_move) {
-        if (is_batch_gzipped($wus)) {
+        //if (is_batch_gzipped($wus)) {
+        if (true) {
             $url = "get_output3.php?action=get_batch_tar&batch_id=$batch->id";
</file context>
Suggested change
//if (is_batch_gzipped($wus)) {
if (true) {
if (is_batch_gzipped($wus)) {
Fix with Cubic

Comment on lines +720 to +721
Copy link

Copilot AI Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The is_batch_gzipped function check is commented out and replaced with a hardcoded 'if (true)', meaning tar format will always be used regardless of whether files are actually gzipped. This appears to be test/debugging code that shouldn't be in production.

Either uncomment the is_batch_gzipped check or remove the conditional entirely if the intent is to always use tar.

Suggested change
//if (is_batch_gzipped($wus)) {
if (true) {
if (is_batch_gzipped($wus)) {

Copilot uses AI. Check for mistakes.
$url = "get_output3.php?action=get_batch_tar&batch_id=$batch->id";
show_button($url, "Get tarred output files");
} else {
$url = "get_output3.php?action=get_batch_zip&batch_id=$batch->id";
show_button($url, "Get zipped output files");
}
} else {
$url = "get_output2.php?cmd=batch&batch_id=$batch->id";
show_button($url, "Get zipped output files");
}
echo "<p>";
show_button($url, "Get zipped output files");
echo "<p>";
switch ($batch->state) {
case BATCH_STATE_IN_PROGRESS:
case BATCH_STATE_INIT:
Expand Down Expand Up @@ -861,19 +885,20 @@ function handle_query_job($user) {
time_str($result->received_time),
$result->priority
];
$files = [];
if ($is_assim_move) {
if ($result->id == $wu->canonical_resultid) {
$log_names = get_outfile_log_names($result);
[$log_names, $gzip] = get_outfile_log_names($result);
Copy link
Contributor

@cubic-dev-ai cubic-dev-ai bot Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1: Destructuring an empty array assigns null to $log_names, causing a Fatal TypeError in count().

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At html/user/submit.php, line 890:

<comment>Destructuring an empty array assigns null to $log_names, causing a Fatal TypeError in count().</comment>

<file context>
@@ -863,12 +887,12 @@ function handle_query_job($user) {
         if ($is_assim_move) {
             if ($result->id == $wu->canonical_resultid) {
-                $log_names = get_outfile_log_names($result);
+                [$log_names, $gzip] = get_outfile_log_names($result);
                 $nfiles = count($log_names);
                 for ($i=0; $i<$nfiles; $i++) {
</file context>
Suggested change
[$log_names, $gzip] = get_outfile_log_names($result);
[$log_names, $gzip] = get_outfile_log_names($result) + [[], []];
Fix with Cubic

$nfiles = count($log_names);
for ($i=0; $i<$nfiles; $i++) {
$name = $log_names[$i];
$path = assim_move_outfile_path($wu, $i, $log_names);
$path = assim_move_outfile_path($wu, $i, $log_names, $gzip);
if (file_exists($path)) {
$y = sprintf('%s (%s): ',
$name, size_string(filesize($path))
);
// don't show 'view' link if it's a .zip
if (!strstr($name, '.zip')) {
// don't show 'view' link if it's zipped
if (!strstr($name, '.zip') && !$gzip[$i]) {
Copy link
Contributor

@cubic-dev-ai cubic-dev-ai bot Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: Avoid potential undefined offset warnings when accessing $gzip[$i].

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At html/user/submit.php, line 901:

<comment>Avoid potential undefined offset warnings when accessing `$gzip[$i]`.</comment>

<file context>
@@ -896,8 +897,8 @@ function handle_query_job($user) {
-                        // don't show 'view' link if it's a .zip
-                        if (!strstr($name, '.zip')) {
+                        // don't show 'view' link if it's zipped
+                        if (!strstr($name, '.zip') && !$gzip[$i]) {
                             $y .= sprintf(
                                 '<a href=get_output3.php?action=get_file&result_id=%d&index=%d>view</a> &middot; ',
</file context>
Suggested change
if (!strstr($name, '.zip') && !$gzip[$i]) {
if (!strstr($name, '.zip') && empty($gzip[$i])) {
Fix with Cubic

$y .= sprintf(
'<a href=get_output3.php?action=get_file&result_id=%d&index=%d>view</a> &middot; ',
$result->id, $i
Expand All @@ -886,15 +911,15 @@ function handle_query_job($user) {
} else {
$y = sprintf('%s: MISSING', $name);
}
$x[] = $y;
$files[] = $y;
}
} else {
$x[] = '---';
$files[] = '---';
}
} else {
if ($result->server_state == RESULT_SERVER_STATE_OVER) {
$phys_names = get_outfile_phys_names($result);
$log_names = get_outfile_log_names($result);
[$log_names,] = get_outfile_log_names($result);
Copy link
Contributor

@cubic-dev-ai cubic-dev-ai bot Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1: Destructuring an empty array assigns null to $log_names, causing a Fatal TypeError in count().

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At html/user/submit.php, line 921:

<comment>Destructuring an empty array assigns null to $log_names, causing a Fatal TypeError in count().</comment>

<file context>
@@ -894,7 +918,7 @@ function handle_query_job($user) {
             if ($result->server_state == RESULT_SERVER_STATE_OVER) {
                 $phys_names = get_outfile_phys_names($result);
-                $log_names = get_outfile_log_names($result);
+                [$log_names,] = get_outfile_log_names($result);
                 $nfiles = count($log_names);
                 for ($i=0; $i<$nfiles; $i++) {
</file context>
Suggested change
[$log_names,] = get_outfile_log_names($result);
[$log_names,] = get_outfile_log_names($result) + [[], []];
Fix with Cubic

$nfiles = count($log_names);
for ($i=0; $i<$nfiles; $i++) {
$path = dir_hier_path(
Expand All @@ -907,19 +932,20 @@ function handle_query_job($user) {
);
$s = stat($path);
$size = $s['size'];
$x[] = sprintf('<a href=%s>%s</a> (%s bytes)<br/>',
$files[] = sprintf('<a href=%s>%s</a> (%s bytes)<br/>',
$url,
$log_names[$i],
number_format($size)
);
} else {
$x[] = sprintf("file '%s' is missing", $log_names[$i]);
$files[] = sprintf("file '%s' is missing", $log_names[$i]);
}
}
} else {
$x[] = '---';
$files[] = '---';
}
}
$x[] = implode('<br>', $files);
row_array($x);
}
end_table();
Expand Down
17 changes: 14 additions & 3 deletions tools/sample_assimilate.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,17 @@
# in the 2nd case, write the error code
# to results/<batch_id>/<wu_name>_error

import sys, os
import sys, os, gzip

def is_gzip(path):
if os.path.getsize(path) == 0:
return False
try:
with gzip.open(path, 'rb') as f:
f.read(1)
return True
except:
Copy link

Copilot AI Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bare except clause catches all exceptions including SystemExit and KeyboardInterrupt. Use 'except Exception:' or specify the expected exception types (e.g., 'except (OSError, gzip.BadGzipFile):').

Suggested change
except:
except (OSError, gzip.BadGzipFile):

Copilot uses AI. Check for mistakes.
return False
Comment on lines +24 to +31
Copy link

Copilot AI Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This helper prints debug messages on every file and uses a bare except, which can hide real errors and pollute assimilator output. Remove the prints and catch specific exceptions (e.g., OSError, gzip.BadGzipFile) or use a cheap header check (first two bytes 0x1f, 0x8b) to avoid relying on exceptions for control flow.

Suggested change
if os.path.getsize(path) == 0:
return False
try:
with gzip.open(path, 'rb') as f:
f.read(1)
print('is gzip')
return True
except:
print('not gzip')
return False
# Return True if the file appears to be gzip-compressed, based on magic bytes.
if os.path.getsize(path) == 0:
return False
try:
with open(path, 'rb') as f:
magic = f.read(2)
except OSError:
# If the file can't be read, treat it as non-gzip.
return False
return magic == b'\x1f\x8b'

Copilot uses AI. Check for mistakes.

if sys.argv[1] == '--error':
error_code = sys.argv[2]
Expand All @@ -43,8 +53,9 @@
for i in range(nfiles):
outfile_path = sys.argv[2*i+3]
logical_name = sys.argv[2*i+4]
cmd = 'mv %s %s/%s__file_%s'%(
outfile_path, outdir, wu_name, logical_name
cmd = 'mv %s %s/%s__file_%s%s'%(
outfile_path, outdir, wu_name, logical_name,
'.gz' if is_gzip(outfile_path) else ''
)
if os.system(cmd):
#raise Exception('%s failed'%(cmd))
Expand Down