11using System ;
22using System . Collections . Generic ;
3+ using System . Linq ;
34using System . Net ;
45using System . Net . Http ;
56using System . Text ;
67using System . Threading . Tasks ;
78using System . Xml ;
9+ using AngleSharp ;
10+ using AngleSharp . Dom ;
11+ using AngleSharp . XPath ;
812using ExcelDna . Integration ;
913using ExcelDna . Registration ;
10- using HtmlAgilityPack ;
1114
1215namespace ImportFunctions
1316{
1417 public static class Functions
1518 {
16- // We will be using the single HttpClient from multiple threads,
17- // which is OK as long as we're not changing the default request headers.
18- static readonly HttpClient _httpClient ;
19+ //// We will be using the single HttpClient from multiple threads,
20+ //// which is OK as long as we're not changing the default request headers.
21+ // static readonly HttpClient _httpClient;
1922
2023 static Functions ( )
2124 {
22- _httpClient = new HttpClient ( ) ;
25+ // _httpClient = new HttpClient();
2326 ServicePointManager . SecurityProtocol =
2427 SecurityProtocolType . Tls |
2528 SecurityProtocolType . Tls11 |
@@ -44,12 +47,22 @@ public static async Task<object> ImportXml(string url, string xpathQuery)
4447
4548 try
4649 {
47- var response = await _httpClient . GetStringAsync ( url ) ;
48- var doc = new HtmlDocument ( ) ;
49- doc . LoadHtml ( response ) ;
50+ IConfiguration config = Configuration . Default . WithDefaultLoader ( ) ;
51+ IBrowsingContext context = BrowsingContext . New ( config ) ;
52+ IDocument document = await context . OpenAsync ( url ) ;
5053
51- var node = doc . DocumentNode . SelectSingleNode ( xpathQuery ) ;
52- return node ? . InnerText ?? "Error: No data found for the given XPath query" ;
54+ var nodes = document . Body . SelectNodes ( xpathQuery ) ;
55+
56+ if ( nodes == null || nodes . Count == 0 )
57+ return "Error: No data found for the given XPath query" ;
58+
59+ // return an object[] array with a single column containing the InnterText of the nodes
60+ var resultArray = new object [ nodes . Count , 1 ] ;
61+ for ( int i = 0 ; i < nodes . Count ; i ++ )
62+ {
63+ resultArray [ i , 0 ] = nodes [ i ] . TextContent ;
64+ }
65+ return resultArray ;
5366 }
5467 catch ( HttpRequestException rex )
5568 {
@@ -88,38 +101,17 @@ public static async Task<object> ImportHtml(
88101
89102 try
90103 {
91- var response = await _httpClient . GetStringAsync ( url ) ;
92- var doc = new HtmlDocument ( ) ;
93- doc . LoadHtml ( response ) ;
104+ IConfiguration config = Configuration . Default . WithDefaultLoader ( ) ;
105+ IBrowsingContext context = BrowsingContext . New ( config ) ;
106+ IDocument document = await context . OpenAsync ( url ) ;
94107
108+ object result ;
95109 if ( dataType == "table" )
96- return ExtractTable ( doc , index ) ;
110+ result = ExtractTable ( document , index ) ;
97111 else
98- return ExtractList ( doc , index ) ;
99- }
100- catch ( HttpRequestException rex )
101- {
102- return $ "Error: Unable to fetch data from the URL - { rex . Message } ";
103- }
104- catch ( Exception ex )
105- {
106- return $ "Error: { ex . Message } ";
107- }
108- }
112+ result = ExtractList ( document , index ) ;
109113
110- [ ExcelFunction ( Description = "Imports data from a given URL" ) ]
111- public static async Task < object > HttpGet ( string url )
112- {
113- if ( string . IsNullOrWhiteSpace ( url ) )
114- {
115- return "Error: URL is required" ;
116- // return ExcelError.ExcelErrorValue;
117- }
118-
119- try
120- {
121- var response = await _httpClient . GetStringAsync ( url ) ;
122- return response ;
114+ return result ;
123115 }
124116 catch ( HttpRequestException rex )
125117 {
@@ -131,21 +123,45 @@ public static async Task<object> HttpGet(string url)
131123 }
132124 }
133125
134- static object ExtractTable ( HtmlDocument doc , int indexOneBased )
126+ //[ExcelFunction(Description = "Imports data from a given URL")]
127+ //public static async Task<object> HttpGet(string url)
128+ //{
129+ // if (string.IsNullOrWhiteSpace(url))
130+ // {
131+ // return "Error: URL is required";
132+ // // return ExcelError.ExcelErrorValue;
133+ // }
134+
135+ // try
136+ // {
137+ // var response = await _httpClient.GetStringAsync(url);
138+ // return response;
139+ // }
140+ // catch (HttpRequestException rex)
141+ // {
142+ // return $"Error: Unable to fetch data from the URL - {rex.Message}";
143+ // }
144+ // catch (Exception ex)
145+ // {
146+ // return $"Error: {ex.Message}";
147+ // }
148+ //}
149+
150+ static object ExtractTable ( IDocument document , int indexOneBased )
135151 {
136- var tables = doc . DocumentNode . SelectNodes ( "//table" ) ;
152+ var tables = document . Body . SelectNodes ( "//table" ) ;
137153 if ( tables == null || tables . Count < indexOneBased )
138154 return "Error: Table not found" ;
139155
140- var table = tables [ indexOneBased - 1 ] ;
156+ var table = ( IElement ) tables [ indexOneBased - 1 ] ;
141157
142158 var results = new List < List < string > > ( ) ;
143- foreach ( var row in table . SelectNodes ( ".//tr" ) )
159+ foreach ( var row in table . SelectNodes ( ".//tr" ) . Cast < IElement > ( ) )
144160 {
145161 var rowResult = new List < string > ( ) ;
146- foreach ( var cell in row . SelectNodes ( ".//th|.//td" ) )
162+ foreach ( var cell in row . SelectNodes ( ".//th|.//td" ) . Cast < IElement > ( ) )
147163 {
148- rowResult . Add ( cell . InnerText . Trim ( ) ) ;
164+ rowResult . Add ( cell . TextContent ) ;
149165 }
150166 results . Add ( rowResult ) ;
151167 }
@@ -162,21 +178,21 @@ static object ExtractTable(HtmlDocument doc, int indexOneBased)
162178 resultArray [ i , j ] = results [ i ] [ j ] ;
163179 }
164180 }
165- return results ;
181+ return resultArray ;
166182 }
167183
168- static object ExtractList ( HtmlDocument doc , int indexOneBased )
184+ static object ExtractList ( IDocument document , int indexOneBased )
169185 {
170- var lists = doc . DocumentNode . SelectNodes ( "//ul | //ol" ) ;
186+ var lists = document . Body . SelectNodes ( "//ul | //ol" ) ;
171187 if ( lists == null || lists . Count < indexOneBased )
172188 return "Error: List not found" ;
173189
174- var list = lists [ indexOneBased - 1 ] ;
190+ var list = ( IElement ) lists [ indexOneBased - 1 ] ;
175191
176192 var results = new List < string > ( ) ;
177193 foreach ( var item in list . SelectNodes ( ".//li" ) )
178194 {
179- results . Add ( item . InnerText . Trim ( ) ) ;
195+ results . Add ( item . TextContent ) ;
180196 }
181197
182198 // Convert results to a 2D object array with a single column
@@ -186,7 +202,7 @@ static object ExtractList(HtmlDocument doc, int indexOneBased)
186202 resultArray [ i , 0 ] = results [ i ] ;
187203 }
188204
189- return results ;
205+ return resultArray ;
190206 }
191207 }
192208}
0 commit comments