Skip to content

Commit 305efeb

Browse files
committed
Suggested solution for proper port of dcopy.f to ulmBLAS.
Bugfix/Optimization in ddot.c.
1 parent 7b70355 commit 305efeb

File tree

2 files changed

+63
-8
lines changed

2 files changed

+63
-8
lines changed

level1/dcopy.c

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,61 @@
11
void
2-
dcopy_(const int *n,
2+
dcopy_(const int *_n,
33
const double *x,
4-
const int *incX,
4+
const int *_incX,
55
double *y,
6-
const int *incY)
6+
const int *_incY)
77
{
8+
//
9+
// Dereference scalar parameters
10+
//
11+
int n = *_n;
12+
int incX = *_incX;
13+
int incY = *_incY;
14+
15+
//
16+
// Local scalars
17+
//
18+
int i, m;
19+
//
20+
// Quick return if possible
21+
//
22+
if (n==0) {
23+
return;
24+
}
25+
if (incX==1 && incY==1) {
26+
//
27+
// Code for both increments equal to 1
28+
//
29+
m = n % 7;
30+
if (m!=0) {
31+
for (i=0; i<m; ++i) {
32+
y[i] = x[i];
33+
}
34+
if (n<7) {
35+
return;
36+
}
37+
}
38+
for (i=m; i<n; i+=7) {
39+
y[i ] = x[i ];
40+
y[i+1] = x[i+1];
41+
y[i+2] = x[i+2];
42+
y[i+3] = x[i+3];
43+
y[i+4] = x[i+4];
44+
y[i+5] = x[i+5];
45+
y[i+6] = x[i+6];
46+
}
47+
} else {
48+
//
49+
// Code for unequal increments or equal increments not equal to 1
50+
//
51+
if (incX<0) {
52+
x -= incX*(n-1);
53+
}
54+
if (incY<0) {
55+
y -= incY*(n-1);
56+
}
57+
for (i=0; i<n; ++i, x+=incX, y+=incY) {
58+
(*y) = (*x);
59+
}
60+
}
861
}

level1/ddot.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,13 @@ ddot_(const int *_n,
3030
// Code for both increments equal to 1
3131
//
3232
m = n % 5;
33-
for (i=0; i<m; ++i) {
34-
result += x[i] * y[i];
35-
}
36-
if (n<5) {
37-
return result;
33+
if (m!=0) {
34+
for (i=0; i<m; ++i) {
35+
result += x[i] * y[i];
36+
}
37+
if (n<5) {
38+
return result;
39+
}
3840
}
3941
for (i=m; i<n; i+=5) {
4042
result += x[i ] * y[i ] ;

0 commit comments

Comments
 (0)