22using System . Diagnostics ;
33using System . Text . RegularExpressions ;
44using System . Globalization ;
5-
5+ using System . Net ;
66using OpenQA . Selenium ;
77using OpenQA . Selenium . Chrome ;
88using OpenQA . Selenium . Support . UI ;
@@ -56,10 +56,10 @@ public void Start(string domain, string timeStampFormat, bool split = false)
5656 Console . ReadKey ( ) ;
5757 }
5858
59- Scrape ( timeStampFormat , split ) ;
59+ Scrape ( timeStampFormat , domain , split ) ;
6060 }
6161
62- private void Scrape ( string timeStampFormat , bool split )
62+ private void Scrape ( string timeStampFormat , string domain , bool split )
6363 {
6464 if ( _wait is null )
6565 {
@@ -95,6 +95,14 @@ private void Scrape(string timeStampFormat, bool split)
9595 Directory . CreateDirectory ( Path . Combine ( AppDomain . CurrentDomain . BaseDirectory , exportName ) ) ;
9696 }
9797
98+ string imgDir = Path . Combine (
99+ AppDomain . CurrentDomain . BaseDirectory ,
100+ split ? $ "{ exportName } \\ images" : $ "images_{ currentExportDate } "
101+ ) ;
102+
103+ // create directory for exported images
104+ Directory . CreateDirectory ( imgDir ) ;
105+
98106 bool isFirst = true ; // this check is needed, because it usually opens first note automatically
99107
100108 while ( true )
@@ -116,7 +124,7 @@ private void Scrape(string timeStampFormat, bool split)
116124 {
117125 // if element is not the first one, use following element
118126 element = _wait . Until ( e => e . FindElement ( By . XPath ( @"//div[contains(@class, 'open')]/following::div" ) ) ) ;
119- }
127+ }
120128 else
121129 {
122130 element = _wait . Until ( e => e . FindElement ( By . XPath ( @"//div[contains(@class, 'open')]" ) ) ) ;
@@ -135,15 +143,15 @@ private void Scrape(string timeStampFormat, bool split)
135143 if ( createdString . ToLower ( ) . Contains ( "yesterday" ) )
136144 {
137145 createdDate = DateTime . Now . AddDays ( - 1 ) . Date ; // get yesterday's date
138- }
146+ }
139147 else if ( createdString . EndsWith ( "ago" ) )
140148 {
141149 createdDate = RelativeTimeParser . Parse ( createdString ) ;
142- }
150+ }
143151 else if ( SimplifiedDateParser . TryParseMdHm ( createdString , out DateTime parsedSimple ) )
144152 {
145153 createdDate = parsedSimple ;
146- }
154+ }
147155 else
148156 {
149157 createdDate = DateTime . Parse ( createdString , new CultureInfo ( "en-US" ) ) ;
@@ -152,12 +160,12 @@ private void Scrape(string timeStampFormat, bool split)
152160 try
153161 {
154162 innerWait . Until ( e => e . FindElements ( By . XPath ( @"//div[contains(@class, 'open')]/div[2][not(./i)]" ) ) . Count == 1 ) ;
155- }
163+ }
156164 catch
157165 {
158166 // found note that is not supported, log this fact and continue
159167 SaveToFile (
160- ! split ? fileName : $ "{ exportName } \\ { $ "note_{ createdDate . ToString ( timeStampFormat ) } "} .md ",
168+ ! split ? fileName : $ "{ exportName } \\ { $ "note_{ createdDate . ToString ( timeStampFormat ) } "} ",
161169 $ "** Unsupported note type (Mind-map or Sound note) (Created at: { createdDate : dd/MM/yyyy HH:mm} )**"
162170 ) ;
163171 ExecuteScroll ( notesList , element ) ;
@@ -167,14 +175,37 @@ private void Scrape(string timeStampFormat, bool split)
167175
168176 _wait . Until ( e => e . FindElement ( By . XPath ( @"//div[contains(@class, 'origin-title')]/div" ) ) . Displayed ) ;
169177
178+ var noteContainer = _wait . Until ( e => e . FindElement ( By . XPath ( @"//div[contains(@class, 'pm-container')]" ) ) ) ;
179+
170180 string title = _wait . Until ( e => e . FindElement ( By . XPath ( @"//div[contains(@class, 'origin-title')]/div" ) ) ) . Text ;
171- string value = _wait . Until ( e => e . FindElement ( By . XPath ( @"//div[contains(@class, 'pm-container')]" ) ) ) . Text ;
181+ string value = noteContainer . Text ;
172182
173183 SaveToFile (
174- ! split ? fileName : $ "{ exportName } \\ { $ "note_{ createdDate . ToString ( timeStampFormat ) } "} .md ",
184+ ! split ? fileName : $ "{ exportName } \\ { $ "note_{ createdDate . ToString ( timeStampFormat ) } "} ",
175185 value ,
176186 title
177187 ) ;
188+
189+ var embeddedImages = noteContainer . FindElements ( By . XPath ( @".//div[contains(@class, 'image-view')]/img" ) ) ;
190+
191+ if ( embeddedImages . Count != 0 )
192+ {
193+ var cookies = _driver . Manage ( ) . Cookies . AllCookies ;
194+
195+ // IWebElement because non nullish type is needed (force typing)
196+ foreach ( var t in embeddedImages . Select ( ( item , idx ) => ( idx , ( IWebElement ) item ) ) )
197+ {
198+ int idx = t . idx ;
199+ IWebElement item = t . Item2 ;
200+
201+ var imgSrc = item . GetAttribute ( "src" ) ;
202+ string imgName = $ "note_img_{ idx } _{ createdDate . ToString ( timeStampFormat ) } .png";
203+ string imgPath = Path . Combine ( imgDir , imgName ) ;
204+
205+ SaveImage ( imgPath , imgSrc , domain , cookies ) ;
206+ }
207+ }
208+
178209 ExecuteScroll ( notesList , element ) ;
179210 currentNote ++ ;
180211 }
@@ -187,7 +218,7 @@ private void Scrape(string timeStampFormat, bool split)
187218 if ( split )
188219 {
189220 Console . WriteLine ( $ "Successfully exported notes to { exportName . Pastel ( Color . WhiteSmoke ) } directory\n ". Pastel ( Color . LimeGreen ) ) ;
190- }
221+ }
191222 else
192223 {
193224 Console . WriteLine ( $ "Successfully exported notes to { fileName . Pastel ( Color . WhiteSmoke ) } \n ". Pastel ( Color . LimeGreen ) ) ;
@@ -219,6 +250,40 @@ private static void SaveToFile(string fileName, string content, string? title =
219250 sw . WriteLine ( content ) ;
220251 }
221252
253+ private static void SaveImage ( string path , string ? src , string domain , IEnumerable < OpenQA . Selenium . Cookie > cookies )
254+ {
255+ if ( File . Exists ( path ) )
256+ {
257+ return ;
258+ }
259+
260+ var handler = new HttpClientHandler
261+ {
262+ CookieContainer = new CookieContainer ( )
263+ } ;
264+
265+ var uri = new Uri ( $ "https://{ domain } { src } ") ;
266+
267+ foreach ( var cookie in cookies )
268+ {
269+ handler . CookieContainer . Add (
270+ new System . Net . Cookie ( cookie . Name , cookie . Value , cookie . Path , cookie . Domain )
271+ ) ;
272+ }
273+
274+ using var client = new HttpClient ( handler ) ;
275+
276+ try
277+ {
278+ byte [ ] imageBytes = client . GetByteArrayAsync ( src ) . Result ;
279+ File . WriteAllBytes ( path , imageBytes ) ;
280+ }
281+ catch ( Exception )
282+ {
283+ Console . WriteLine ( $ "{ "[ERROR]" . Pastel ( Color . Red ) } Couldn't fetch image.") ;
284+ }
285+ }
286+
222287 private void ExecuteScroll ( IWebElement notesList , IWebElement currentElement )
223288 {
224289 ( ( IJavaScriptExecutor ) _driver ) . ExecuteScript ( "arguments[0].scrollBy(0, arguments[1]);" , notesList , currentElement . Size . Height ) ;
0 commit comments