1+ /**
2+ * Azure AI Foundry Agent Evaluation - Tutorial 1: Modern Workplace Assistant
3+ *
4+ * This evaluation system demonstrates enterprise AI quality assurance patterns:
5+ * - Business-focused evaluation scenarios
6+ * - Multi-source knowledge validation (SharePoint + MCP)
7+ * - Response quality assessment
8+ * - Source attribution verification
9+ * - Performance and reliability measurement
10+ *
11+ * Educational Focus:
12+ * - Shows how to evaluate enterprise AI systems
13+ * - Demonstrates quality metrics for business scenarios
14+ * - Provides foundation for governance and monitoring
15+ */
16+
17+ using Azure . AI . Projects ;
18+ using Azure . AI . Agents . Models ;
19+ using Azure . Identity ;
20+ using System ;
21+ using System . Collections . Generic ;
22+ using System . IO ;
23+ using System . Linq ;
24+ using System . Threading . Tasks ;
25+ using System . Text . Json ;
26+
27+ namespace ModernWorkplaceAssistant . Evaluation
28+ {
29+ public class EvaluationResult
30+ {
31+ public string Question { get ; set ; }
32+ public string Answer { get ; set ; }
33+ public string Status { get ; set ; }
34+ public List < string > Sources { get ; set ; } = new List < string > ( ) ;
35+ public DateTime Timestamp { get ; set ; }
36+ public double ResponseTimeMs { get ; set ; }
37+ public string ExpectedSource { get ; set ; }
38+ public bool SourceMatch { get ; set ; }
39+ }
40+
41+ public class TestQuestion
42+ {
43+ public string question { get ; set ; }
44+ public string expected_source { get ; set ; }
45+ public string category { get ; set ; }
46+ }
47+
48+ public class AgentEvaluator
49+ {
50+ private readonly AIProjectClient projectClient ;
51+
52+ public AgentEvaluator ( )
53+ {
54+ // Load environment variables
55+ LoadEnvironmentVariables ( ) ;
56+
57+ var credential = new DefaultAzureCredential ( ) ;
58+ projectClient = new AIProjectClient (
59+ new Uri ( Environment . GetEnvironmentVariable ( "PROJECT_ENDPOINT" ) ) ,
60+ credential
61+ ) ;
62+ }
63+
64+ private static void LoadEnvironmentVariables ( )
65+ {
66+ var envFile = Path . Combine ( Directory . GetCurrentDirectory ( ) , ".env" ) ;
67+ if ( File . Exists ( envFile ) )
68+ {
69+ var lines = File . ReadAllLines ( envFile ) ;
70+ foreach ( var line in lines )
71+ {
72+ if ( string . IsNullOrWhiteSpace ( line ) || line . StartsWith ( "#" ) )
73+ continue ;
74+
75+ var parts = line . Split ( '=' , 2 ) ;
76+ if ( parts . Length == 2 )
77+ {
78+ Environment . SetEnvironmentVariable ( parts [ 0 ] . Trim ( ) , parts [ 1 ] . Trim ( ) ) ;
79+ }
80+ }
81+ }
82+ }
83+
84+ public async Task < List < EvaluationResult > > RunEvaluationAsync ( )
85+ {
86+ Console . WriteLine ( "🧪 Starting Modern Workplace Assistant Evaluation" ) ;
87+ Console . WriteLine ( "==================================================" ) ;
88+
89+ // Load test questions
90+ var questions = await LoadTestQuestionsAsync ( ) ;
91+ Console . WriteLine ( $ "📝 Loaded { questions . Count } test questions") ;
92+
93+ // Create agent for evaluation
94+ Console . WriteLine ( "🤖 Creating evaluation agent..." ) ;
95+ var agentConfig = await CreateWorkplaceAssistantAsync ( ) ;
96+
97+ var results = new List < EvaluationResult > ( ) ;
98+
99+ // Run evaluation for each question
100+ for ( int i = 0 ; i < questions . Count ; i ++ )
101+ {
102+ var question = questions [ i ] ;
103+ Console . WriteLine ( $ "\n [{ i + 1 } /{ questions . Count } ] Testing: { question . category } ") ;
104+ Console . WriteLine ( $ "❓ Question: { question . question } ") ;
105+
106+ var result = await EvaluateQuestionAsync ( agentConfig . Agent , question ) ;
107+ results . Add ( result ) ;
108+
109+ // Display result
110+ Console . WriteLine ( $ "✅ Status: { result . Status } ") ;
111+ Console . WriteLine ( $ "⏱️ Response time: { result . ResponseTimeMs : F0} ms") ;
112+ Console . WriteLine ( $ "📚 Sources found: { result . Sources . Count } ") ;
113+ Console . WriteLine ( $ "🎯 Expected source match: { ( result . SourceMatch ? "✅" : "⚠️" ) } ") ;
114+
115+ if ( result . Sources . Any ( ) )
116+ {
117+ Console . WriteLine ( " Sources:" ) ;
118+ foreach ( var source in result . Sources . Take ( 3 ) )
119+ {
120+ Console . WriteLine ( $ " - { source } ") ;
121+ }
122+ }
123+ }
124+
125+ // Display summary
126+ DisplayEvaluationSummary ( results ) ;
127+
128+ // Cleanup
129+ await CleanupAgentAsync ( agentConfig . Agent ) ;
130+
131+ return results ;
132+ }
133+
134+ private async Task < List < TestQuestion > > LoadTestQuestionsAsync ( )
135+ {
136+ var questionsFile = "questions.jsonl" ;
137+ if ( ! File . Exists ( questionsFile ) )
138+ {
139+ throw new FileNotFoundException ( $ "Test questions file not found: { questionsFile } ") ;
140+ }
141+
142+ var questions = new List < TestQuestion > ( ) ;
143+ var lines = await File . ReadAllLinesAsync ( questionsFile ) ;
144+
145+ foreach ( var line in lines )
146+ {
147+ if ( string . IsNullOrWhiteSpace ( line ) )
148+ continue ;
149+
150+ try
151+ {
152+ var question = JsonSerializer . Deserialize < TestQuestion > ( line ) ;
153+ questions . Add ( question ) ;
154+ }
155+ catch ( JsonException ex )
156+ {
157+ Console . WriteLine ( $ "⚠️ Failed to parse question: { line } - { ex . Message } ") ;
158+ }
159+ }
160+
161+ return questions ;
162+ }
163+
164+ private async Task < ( Agent Agent , McpTool McpTool , SharepointTool SharepointTool ) > CreateWorkplaceAssistantAsync ( )
165+ {
166+ // Create agent using the same logic as the main program
167+ var sharePointResourceName = Environment . GetEnvironmentVariable ( "SHAREPOINT_RESOURCE_NAME" ) ;
168+ SharepointTool sharePointTool = null ;
169+
170+ try
171+ {
172+ var sharePointConn = await projectClient . Connections . GetConnectionAsync ( sharePointResourceName ) ;
173+ sharePointTool = new SharepointTool ( sharePointConn . Id ) ;
174+ }
175+ catch ( Exception ex )
176+ {
177+ Console . WriteLine ( $ "⚠️ SharePoint connection failed: { ex . Message } ") ;
178+ }
179+
180+ var mcpTool = new McpTool ( "microsoft_learn" , Environment . GetEnvironmentVariable ( "MCP_SERVER_URL" ) ) ;
181+
182+ var instructions = sharePointTool != null
183+ ? "You are a Modern Workplace Assistant. Use SharePoint for company policies and Microsoft Learn for technical guidance. Always cite your sources."
184+ : "You are a Technical Assistant with Microsoft Learn access. Provide technical guidance and cite sources." ;
185+
186+ var tools = new List < ToolDefinition > ( ) ;
187+ if ( sharePointTool != null )
188+ tools . AddRange ( sharePointTool . Definitions ) ;
189+ tools . AddRange ( mcpTool . Definitions ) ;
190+
191+ var agent = await projectClient . Agents . CreateAgentAsync (
192+ Environment . GetEnvironmentVariable ( "MODEL_DEPLOYMENT_NAME" ) ,
193+ name : "Evaluation Agent" ,
194+ instructions : instructions ,
195+ tools : tools
196+ ) ;
197+
198+ return ( agent , mcpTool , sharePointTool ) ;
199+ }
200+
201+ private async Task < EvaluationResult > EvaluateQuestionAsync ( Agent agent , TestQuestion question )
202+ {
203+ var startTime = DateTime . UtcNow ;
204+ var result = new EvaluationResult
205+ {
206+ Question = question . question ,
207+ ExpectedSource = question . expected_source ,
208+ Timestamp = startTime
209+ } ;
210+
211+ try
212+ {
213+ // Create thread and run conversation
214+ var thread = await projectClient . Agents . CreateThreadAsync ( ) ;
215+
216+ await projectClient . Agents . CreateMessageAsync ( thread . Id , MessageRole . User , question . question ) ;
217+ var run = await projectClient . Agents . CreateRunAsync ( thread . Id , agent . Id ) ;
218+
219+ // Wait for completion
220+ while ( run . Status == RunStatus . InProgress || run . Status == RunStatus . Queued )
221+ {
222+ await Task . Delay ( 1000 ) ;
223+ run = await projectClient . Agents . GetRunAsync ( thread . Id , run . Id ) ;
224+ }
225+
226+ var endTime = DateTime . UtcNow ;
227+ result . ResponseTimeMs = ( endTime - startTime ) . TotalMilliseconds ;
228+
229+ if ( run . Status == RunStatus . Completed )
230+ {
231+ var messages = await projectClient . Agents . GetMessagesAsync ( thread . Id ) ;
232+ var assistantMessage = messages . Value
233+ . Where ( m => m . Role == MessageRole . Assistant )
234+ . OrderByDescending ( m => m . CreatedAt )
235+ . FirstOrDefault ( ) ;
236+
237+ if ( assistantMessage != null )
238+ {
239+ result . Answer = assistantMessage . Content . FirstOrDefault ( ) ? . Text ?? "" ;
240+ result . Status = "Completed" ;
241+
242+ // Extract sources from response
243+ result . Sources = ExtractSourcesFromResponse ( result . Answer ) ;
244+ result . SourceMatch = CheckSourceMatch ( result . Sources , question . expected_source ) ;
245+ }
246+ else
247+ {
248+ result . Status = "No response" ;
249+ }
250+ }
251+ else
252+ {
253+ result . Status = $ "Failed: { run . Status } ";
254+ }
255+
256+ // Cleanup thread
257+ await projectClient . Agents . DeleteThreadAsync ( thread . Id ) ;
258+ }
259+ catch ( Exception ex )
260+ {
261+ result . Status = $ "Error: { ex . Message } ";
262+ }
263+
264+ return result ;
265+ }
266+
267+ private List < string > ExtractSourcesFromResponse ( string response )
268+ {
269+ var sources = new List < string > ( ) ;
270+
271+ // Look for common source indicators
272+ var sourceIndicators = new [ ] { "SharePoint" , "Microsoft Learn" , "learn.microsoft.com" , "documentation" } ;
273+
274+ foreach ( var indicator in sourceIndicators )
275+ {
276+ if ( response . Contains ( indicator , StringComparison . OrdinalIgnoreCase ) )
277+ {
278+ sources . Add ( indicator ) ;
279+ }
280+ }
281+
282+ return sources . Distinct ( ) . ToList ( ) ;
283+ }
284+
285+ private bool CheckSourceMatch ( List < string > foundSources , string expectedSource )
286+ {
287+ if ( string . IsNullOrEmpty ( expectedSource ) )
288+ return true ;
289+
290+ return foundSources . Any ( source =>
291+ source . Contains ( expectedSource , StringComparison . OrdinalIgnoreCase ) ||
292+ expectedSource . Contains ( source , StringComparison . OrdinalIgnoreCase ) ) ;
293+ }
294+
295+ private void DisplayEvaluationSummary ( List < EvaluationResult > results )
296+ {
297+ Console . WriteLine ( "\n 📊 EVALUATION SUMMARY" ) ;
298+ Console . WriteLine ( "=====================" ) ;
299+
300+ var successful = results . Count ( r => r . Status == "Completed" ) ;
301+ var avgResponseTime = results . Where ( r => r . Status == "Completed" )
302+ . Average ( r => r . ResponseTimeMs ) ;
303+ var sourceMatches = results . Count ( r => r . SourceMatch ) ;
304+
305+ Console . WriteLine ( $ "✅ Successful responses: { successful } /{ results . Count } ({ 100.0 * successful / results . Count : F1} %)") ;
306+ Console . WriteLine ( $ "⏱️ Average response time: { avgResponseTime : F0} ms") ;
307+ Console . WriteLine ( $ "🎯 Source attribution accuracy: { sourceMatches } /{ results . Count } ({ 100.0 * sourceMatches / results . Count : F1} %)") ;
308+
309+ // Show failed cases
310+ var failed = results . Where ( r => r . Status != "Completed" ) . ToList ( ) ;
311+ if ( failed . Any ( ) )
312+ {
313+ Console . WriteLine ( "\n ⚠️ Failed Cases:" ) ;
314+ foreach ( var fail in failed )
315+ {
316+ Console . WriteLine ( $ " - { fail . Question } : { fail . Status } ") ;
317+ }
318+ }
319+ }
320+
321+ private async Task CleanupAgentAsync ( Agent agent )
322+ {
323+ try
324+ {
325+ await projectClient . Agents . DeleteAgentAsync ( agent . Id ) ;
326+ Console . WriteLine ( "🧹 Cleanup completed" ) ;
327+ }
328+ catch ( Exception ex )
329+ {
330+ Console . WriteLine ( $ "⚠️ Cleanup warning: { ex . Message } ") ;
331+ }
332+ }
333+ }
334+
335+ // Entry point for evaluation
336+ public class EvaluationProgram
337+ {
338+ public static async Task Main ( string [ ] args )
339+ {
340+ Console . WriteLine ( "Azure AI Foundry - Modern Workplace Assistant Evaluation" ) ;
341+ Console . WriteLine ( "========================================================" ) ;
342+
343+ try
344+ {
345+ var evaluator = new AgentEvaluator ( ) ;
346+ var results = await evaluator . RunEvaluationAsync ( ) ;
347+
348+ Console . WriteLine ( $ "\n 🎉 Evaluation completed with { results . Count } test cases") ;
349+ }
350+ catch ( Exception ex )
351+ {
352+ Console . WriteLine ( $ "❌ Evaluation failed: { ex . Message } ") ;
353+ Environment . Exit ( 1 ) ;
354+ }
355+ }
356+ }
357+ }
0 commit comments