-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathAviation.Classification.Utils.cls
143 lines (113 loc) · 5.38 KB
/
Aviation.Classification.Utils.cls
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
/// This sample illustrates how to build and test Text Categorization models programmatically
/// Use or operation of this code is subject to acceptance of the license available in the code repository for this code.
Class Aviation.Classification.Utils
{
/// Demonstration method using the <method>%iKnow.Classification.Builder</method> API to
/// create a simple classifier on the Aviation Events demo.
ClassMethod CreateClassifier() As %Status
{
set tSC = $$$OK
try {
set tDomId = $system.iKnow.GetDomainId("Aviation Events demo")
if 'tDomId || '##class(%iKnow.Queries.SourceAPI).GetCountByDomain(tDomId) {
write !,"Aviation events domain is not set up. Run ##class(Aviation.Utils).SetupStandalone() first",!
quit
}
// Create training and test set using a rather random split based on the modulo of the record ID
// Using the modulo operator ensures we'll get the same results each time (unlike using RandomFilter)
set tTrainingSet = ##class(%iKnow.Filters.SqlFilter).%New(tDomId, "SELECT ':SQL:'||YEAR(EventDate)||':'||EventID AS ExternalID FROM Aviation.Event WHERE (ID # 10) <= 5")
set tTestSet = ##class(%iKnow.Filters.SqlFilter).%New(tDomId, "SELECT ':SQL:'||YEAR(EventDate)||':'||EventID AS ExternalID FROM Aviation.Event WHERE (ID # 10) > 5")
// Instantiate builder and configure for use with highest injury
set tBuilder = ##class(%iKnow.Classification.IKnowBuilder).%New("Aviation Events demo",tTrainingSet)
set tBuilder.TestSet = tTestSet
set tSC = tBuilder.%LoadMetadataCategories("HighestInjury")
quit:$$$ISERR(tSC)
// Note that "HighestInjury" is perhaps not that convenient to use as a category, as
// for example, a "HighestInjury=Serious" record could well mention some minor injuries as well.
// Populate the model with a few terms
set tSC = tBuilder.%PopulateTerms(50)
quit:$$$ISERR(tSC)
set tSC = tBuilder.%AddEntity("crash site")
quit:$$$ISERR(tSC)
set tSC = tBuilder.%AddTermsFromSQL("SELECT TOP 50 EntityValue AS Term FROM %iKnow_Objects.EntityUnique WHERE DomainID = "_tDomId_" AND EntityValue LIKE '%injur%' ORDER BY CorpusSpread DESC")
quit:$$$ISERR(tSC)
// Add more terms as you see fit
// ...
// Generate classifier class
set tSC = tBuilder.%CreateClassifierClass("Aviation.Classification.MyClassifier", 1, 1, 1)
quit:$$$ISERR(tSC)
} catch (ex) {
set tSC = ex.AsStatus()
}
quit tSC
}
/// Code sample using the <method>%iKnow.Classification.Optimizer</method> class to increase the
/// accuracy of our classification model.
ClassMethod OptimizeClassifier() As %Status
{
set tSC = $$$OK
try {
if '$$$defClassDefined("Aviation.Classification.MyClassifier") {
w !,"Please run the ..CreateClassifier() method first",!
quit
}
set tDomId = $system.iKnow.GetDomainId("Aviation Events demo")
// Restore a builder object from our class definition
set tSC = ##class(%iKnow.Classification.Builder).%LoadFromDefinition("Aviation.Classification.MyClassifier", .tBuilder)
quit:$$$ISERR(tSC)
// Instantiate a builder object
set tOptimizer = ##class(%iKnow.Classification.Optimizer).%New()
set tOptimizer.Builder = tBuilder
// Load candidate entities
set tSC = ##class(%iKnow.Queries.EntityAPI).GetTopBM25(.tCandidates, tDomId, 1, 200)
quit:$$$ISERR(tSC)
// Skip the first 50, as we already added them in ..CreateClassifier()
for i=1:1:50 { kill tCandidates(i) }
set tSC = tOptimizer.LoadTermsArray(.tCandidates, 2)
quit:$$$ISERR(tSC)
// Fire off the optimization process
set tOptimizer.Verbose = 1
set tSC = tOptimizer.Optimize(20)
quit:$$$ISERR(tSC)
write !
// Save updated class
set tSC = tOptimizer.SaveClassifier("Aviation.Classificaton.MyClassifier", 1)
quit:$$$ISERR(tSC)
} catch (ex) {
set tSC = ex.AsStatus()
}
quit tSC
}
/// Code sample using the batch testing features offered by <class>%iKnow.Classification.Utils</class>
ClassMethod TestClassifier() As %Status
{
set tSC = $$$OK
try {
if '$$$defClassDefined("Aviation.Classification.MyClassifier") {
w !,"Please run the ..CreateClassifier() method first",!
quit
}
set tDomId = $system.iKnow.GetDomainId("Aviation Events demo")
set tTestSet = ##class(%iKnow.Filters.SqlFilter).%New(tDomId, "SELECT ':SQL:'||YEAR(EventDate)||':'||EventID AS ExternalID FROM Aviation.Event WHERE (ID # 10) > 5")
set tSC = ##class(%iKnow.Classification.Utils).%RunModelFromDomain(.tResId, "Aviation.Classification.MyClassifier", tDomId, "HighestInjury", tTestSet)
quit:$$$ISERR(tSC)
&SQL(SELECT COUNT(*), SUM(CASE WHEN ActualValue=PredictedValue THEN 1 ELSE 0 END)
INTO :tTotal, :tRight
FROM %DeepSee_PMML_Utils.TempResult
WHERE TestId = :tResId)
write !,"We got ",tRight," records right out of ",tTotal,!
// Retrieve precision and recall metrics for this test result
set tSC = ##class(%DeepSee.PMML.Utils.TempResult).GetAggregatePrecisionMetrics(tResId,.tPrecision,.tMPrecision,.tRecall,.tMRecall,.tFMeasure,.tMFMeasure)
quit:$$$ISERR(tSC)
write !," micro-AVG macro-AVG"
write !,"Precision: ",$fnumber(tMPrecision,"",2),"% ",$fnumber(tPrecision,"",2),"%"
write !,"Recall: ",$fnumber(tMRecall,"",2),"% ",$fnumber(tRecall,"",2),"%"
write !,"F-measure: ",$fnumber(tMFMeasure,"",2),"% ",$fnumber(tFMeasure,"",2),"%",!
// Drop test results
do ##class(%DeepSee.PMML.Utils).%DropResults(tResId)
} catch (ex) {
set tSC = ex.AsStatus()
}
quit tSC
}
}