-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathGetField.sas
348 lines (319 loc) · 13.1 KB
/
GetField.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
%macro GetField(PorojectPath =,
ALS =,
DictBook =,
DictLink =,
BlkBook =,
NOLOCK =
);
*------------------- Define related file path --------------------;
filename DictBook "&PorojectPath/&DictBook..txt";
filename DictLink "&PorojectPath/&DictLink..txt";
filename BlkBook "&PorojectPath/&BlkBook..txt";
proc datasets memtype=data lib=work kill nolist;run;quit;
*------------------- Read ALS --------------------;
%macro import(out=,key=,keep=);
proc import datafile="&ALS..xlsx" out=&out. dbms=xlsx replace;
sheet = "&out.";
run;
%if &out = Forms %then %str(proc sort nodupkey;by OID;run;);
data &out.;
set &out.(&keep);
array test(*) _Char_;
do i = 1 to dim(test);
test(i) = compress(test(i),,'kw');
end;
%if &out = Fields %then %do; /* Change logline which not following general rule */
if formOID='CM_ONC_001' and fieldOID = "CMPRIOR_ADD" then IsLog='TRUE';
if formOID='CM_ONC_003' and fieldOID = "CMSUBSE_ADD" then IsLog='TRUE';
%end;
%if &out = DataDictionaryEntries %then %do;
ordinal_ = input(ordinal,best.);
length = lengthn(UserDataString);
drop ordinal;
%end;
run;
%mend;
/* Forms */
%import(out=Forms,keep=);
/* Fields */
%import(out=Fields,keep=);
/* DataDictionary */
%import(out=DataDictionaryEntries,keep=%str(keep=DataDictionaryName codeddata ordinal UserDataString));
proc sort data=DataDictionaryEntries;by DataDictionaryName descending length;run;
data DataDictionaryEntries_2;
set DataDictionaryEntries;
where ordinal_ <=40;
by DataDictionaryName descending length;
if first.DataDictionaryName;
run;
proc sort data=DataDictionaryEntries;by DataDictionaryName ordinal_;run;
data DataDictionaryEntries;
set DataDictionaryEntries;
by DataDictionaryName ordinal_;
if first.DataDictionaryName then n=.;
n+1;
ordinal = put(n,best. -l);
keep DataDictionaryName codeddata UserDataString ordinal;
run;
data DataDictionaryEntries_1;
set DataDictionaryEntries;
where input(ordinal,best.) > 40;
run;
*------------------- Define internal macros --------------------;
%macro update_form(in_data=, out_data=, check=); /* Macro to update form names as there are discrepancies between ALS and CRF Bookmarks */
%let n1 = ✓
%let n2 = %eval(&check+1);
proc sql;
create table check_forms as
select distinct DraftFormName as form length=200, OID as formOID, input(ordinal_,best.) as ordinal
from forms(where=(^missing(DraftFormName) and DraftFormActive='TRUE') rename=ordinal=ordinal_)
order by ordinal;
quit;
%put &sqlobs;
data _null_;
set &in_data nobs=n;
if _n_ = 1 then call symputx('InObs',cats(n));
run;
%if &sqlobs=&InObs %then %do;
proc sort data=&in_data;by page;run;
data &out_data;
set &in_data;
set check_forms(keep=form formOID ordinal);
run;
proc sort data=&out_data;by descending page form;run;
%end;
%else %do;
proc sort data=&in_data;by form;run;
proc sort data=check_forms;by form;run;
data check_✓
merge &in_data(in=a) check_forms(in=b);
by form;
if a and not b then do;
n&n1. +1;
call symputx("check&n1._"||cats(n&n1.),strip(form),'g');
call symputx("ck&n1",cats(n&n1.),'g');
end;
if not a and b then do;
n&n2. +1;
call symputx("check&n2._"||cats(n&n2),strip(form),'g');
call symputx("ck&n2",cats(n&n2.),'g');
end;
run;
%end;
%mend;
*------------------- Read bookmarks and update form names --------------------;
data RaveBook_1;
infile DictBook;
input;
length text $500;
if ^missing(compress(_infile_,,'kw')) and _n_ >1;
text =strip(_infile_);
form =compress(prxchange('s/(.+)\x9+\d+$/$1/io',-1,strip(text)),,'kw');
page =input(compress(prxchange('s/.+\x9+(\d+)$/$1/io',-1,strip(text)),,'kw'),best.);
keep form page;
run;
proc sort nodupkey;by descending page form;run;/* Remove duplicate bookmarks */
data Raveform_;
set RaveBook_1;
where not prxmatch('/^Annotations$/io',strip(form)) and ^missing(form);
run;
proc sort;by page form;run;
%update_form(in_data=Raveform_, out_data=Raveform, check=1);
%if %symexist(ck1) or %symexist(ck2) %then %RETURN ;
proc sql;
create table RaveBook_2 as
select coalescec(b.form,a.form) as form length=200, a.page, b.formOID, b.ordinal
from RaveBook_1 as a left join Raveform as b
on a.page =b.page
order by page desc, form;
quit;
data RaveBook_3; /* Add record for long-break forms */
set RaveBook_2;
by descending page form;
retain page_;
if prxmatch('/^Annotations$/io',strip(form)) then page_ = page;
if not prxmatch('/^Annotations$/io',strip(form)) then Output;
if not prxmatch('/^Annotations$/io',strip(form)) and page_ - page>1 then do;
do i = 1 to page_-page-1;
page=page+1;
Output;
end;
end;
keep page form;
run;
*------------------- Read links --------------------;
data RaveLink;
infile DictLink dlm=',';
input page i$ s$ o1 o2 n x1 x2 y2 y1;
length text $500;
text=strip(_infile_);
keep page x1 x2 y2 y1;
run;
*------------------- Join bookmark and links together --------------------;
proc sql;
create table rave as
select a.*,b.x1,b.x2,b.y1,b.y2
from RaveBook_3 as a inner join RaveLink as b
on a.page=b.page
order by page,y2,y1;
quit;
data BlkBook_1;
infile BlkBook;
input;
length text $500;
if ^missing(compress(_infile_,,'kw')) and _n_ >1;
text=strip(_infile_);
form =compress(prxchange('s/(.+)\x9+\d+$/$1/io',-1,strip(text)),,'kw');
page =input(compress(prxchange('s/.+\x9+(\d+)$/$1/io',-1,strip(text)),,'kw'),best.);
keep form page;
run;
proc sort nodupkey;by descending page form;run;/* Remove duplicate bookmarks */
%update_form(in_data=BlkBook_1, out_data=BlkBook_2, check=3);
%if %symexist(ck3) or %symexist(ck4) %then %RETURN ;
data BlkBook_3;
set BlkBook_2;
by descending page form;
page_next_ = lag(page);
run;
proc sql;
create table BlkBook_4 as
select a.*, coalesce(a.page_next_,b.page_max) as page_next
from BlkBook_3 as a , (select max(page) as page_max from RaveBook_1) as b
order by page;
quit;
*------------------- Update page in rave dataset --------------------;
proc sql;
create table rave_update as
select a.*,b.page as page_blk,b.page_next
from rave as a left join BlkBook_4 as b
on a.form=b.form
order by form ,page, x1, x2, y1 desc, y2 desc;
quit;
data coordinate(rename=page_new=page);
set rave_update;
by form page x1 x2 descending y1 descending y2;
retain page_diff;
if first.form then do;
page_diff = page_blk-page;
order = .;
end;
order+1;
if missing(page_next) then page_next = page_blk;
if page+page_diff< page_next then page_new = page+page_diff;
else page_new = page_next-1;
drop page;
run;
proc sort;by form order page x1 x2 descending y1 descending y2 ;run;
*------------------- Join ALS Fields and Forms with coordinate dataset above --------------------;
%macro joinALS(_NOLOCK=&NOLOCK);
*------------------- Tidy up --------------------;
proc sql;
create table als_1 as
select b.DraftFormName as form length=200 label='', input(b.Ordinal,best.) as form_seq, a.FieldOID, a.AnalyteName,
prxchange('s/^\d+$//o',-1,strip(a.DefaultValue)) as DefaultValue length=200,
case when ^missing(c.DataDictionaryName) then 'Y' else '' end as flag
,input(a.Ordinal,best.) as field_seq,a.VariableOID, a.DataFormat, a.ControlType, a.PreText, a.IsLog, a.formoid, a.DataDictionaryName, a.UnitDictionaryName, a.FixedUnit
from Fields(where=(/* ^missing(PreText) and */ DraftFieldActive ='TRUE' and PreText ne 'EXNOW' /* and PreText ne 'DUMMY' */
%if &_NOLOCK = Y %then and pretext ne "NOLOCK"; /* and VariableOID ne 'NOLOCK' */)) as a
inner join forms(where=(DraftFormActive='TRUE')) as b
on a.formoid=b.oid
left join (select distinct DataDictionaryName from DataDictionaryEntries_1) as c
on a.DataDictionaryName=c.DataDictionaryName
order by form, IsLog ,field_seq;
quit;
*------------------- Check dismatch between RAVE and Blank CRF --------------------;
data _null_; /* Check form dismatch */
set als_1;
by form;
where flag='Y';
if first.form then do;
n5 +1;
call symputx("check5_"||cats(n5),strip(form),'g');
call symputx("ck5",cats(n5),'g');
end;
run;
/* Check field possible dismatch */
proc sql;
create table check6 as
select distinct form
from als_1
group by FormOID
having count(distinct ISlog) >1
order by Form;
quit;
data check6;
set check6 end=last;
call symputx("check6_"||cats(_n_),strip(form),'g');
if last then call symputx("ck6",cats(_n_),'g');
run;
/* Output ALS */
data als;
set als_1 end=last nobs=nobs;
by form IsLog field_seq;
if not last then set als_1(firstobs=2 keep=form VariableOID PreText rename=(VariableOID=VariableOID_next PreText=PreText_next form=form_next));
if first.form then order=.;
if ^missing(VariableOID) then order+1;
if form =form_next and missing(VariableOID_next) then label_flag='Y';
if ^missing(VariableOID) then output;
run;
proc sort;by form order;run;
*------------------- Join field with postion dataset --------------------;
data fdf_adjust check7 check8;
retain form formOID Form_seq PreText fieldOID Field_seq page order x1 x2 y1 y2 DataFormat ControlType DataDictionaryName UnitDictionaryName IsLog;
merge als(in=a) coordinate(in=b);
by form order;
if a and not b then do;
output check7;
n7 +1;
call symputx("check7_"||cats(n7),strip(form),'g');
call symputx("ck7",cats(n7),'g');
end;
if b and not a then do;
output check8;
n8 +1;
call symputx("check8_"||cats(n8),strip(form),'g');
call symputx("ck8",cats(n8),'g');
end;
if b then output fdf_adjust;
keep form formOID Form_seq PreText fieldOID Field_seq page order x1 x2 y1 y2 DataFormat ControlType AnalyteName DefaultValue DataDictionaryName UnitDictionaryName IsLog label_flag FixedUnit;
run;
proc sort;by form page form x1 x2 descending y1 descending y2;run;
%mend;
%joinALS(_NOLOCK=&NOLOCK);
%if %symexist(ck7) or %symexist(ck8) %then %do;
%if %symexist(ck7) %then %symdel ck7;
%if %symexist(ck8) %then %symdel ck8;
%if &NOLOCK=Y %then %let NOLOCK=N;
%if &NOLOCK=N %then %let NOLOCK=Y;
%joinALS(_NOLOCK=&NOLOCK);
%end;
%if %symexist(ck7) or %symexist(ck8) %then %RETURN ;
data fdf_adjust2;
set fdf_adjust;
by form page x1 x2 descending y1 descending y2;
length comment background style page_ $200;
comment = strip(put(order,best. -l))||': '||strip(PreText);
background = "0.25 0.666656 0.333328";
style = 'font: italic bold Arial 6.0pt; text-align:left; color:#0000FF';
page_ = put(page-1,best. -l);
run;
data fdf_adjust2;
set fdf_adjust2;
by form page x1 x2 descending y1 descending y2;
width = GetWidth(strip(comment) , 'Arial','Bold Italic','10');
run;
data fdf_adjust2;
set fdf_adjust2;
by form page x1 x2 descending y1 descending y2;
length coordinate $200;
coordinate = catx(' ',put(x1,best. -l),put(y2-6,best. -l),put(x1+85,best. -l),put(y2,best. -l));
run;
%include "&program/MakeFDF.sas";
%MakeFDF(data = fdf_adjust2,
output = &PorojectPath/fdf_adjust.fdf,
page = page_,
background = background,
comment = comment,
style = style,
coordinate = coordinate);
%mend GetField;