Skip to content

Commit b74f572

Browse files
authored
fix: optimized image download (#40)
* fix: add additional checks for image fetching * add tags for release * feat: add image export disable flag in cmd * add help message and proper documentation * fix: infinite loop when fetching images from note container * chore: update packages
1 parent e87c9aa commit b74f572

7 files changed

Lines changed: 193 additions & 84 deletions

File tree

README.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,17 @@ Download [latest release](https://github.com/nogiszd/xiaomi-note-exporter/releas
3333
```
3434
Options:
3535
-h, --help
36-
Show this help message.
36+
Show this help message and exit
3737
3838
-d, --domain <domain> (default: us.i.mi.com)
3939
Mi Notes domain that you were redirected to.
4040
4141
-s, --split <timestamp> (default: dd-MM-yyyy_HH-mm-ss)
4242
Split notes into separate files with provided timestamp format. Must be compatible with:
4343
https://learn.microsoft.com/en-us/dotnet/standard/base-types/custom-date-and-time-format-strings
44+
45+
-di, --disable-images
46+
Disable default image export behavior, images will not be downloaded from the notes.
4447
```
4548

4649
## 🗒️ Note splitting
@@ -51,6 +54,12 @@ With `-s` flag you can enable note splitting - which exports notes to separate d
5154

5255
This enables user to input specific format for timestamp - **but it must be compatible with [.NET specification](https://learn.microsoft.com/en-us/dotnet/standard/base-types/custom-date-and-time-format-strings)**.
5356

57+
## 🖼️ Image export
58+
59+
Since `v1.7.0` this app exports images present in notes by default.
60+
61+
You can disable this feature by `-di` flag in CLI args.
62+
5463
---
5564

5665
## 🔧 How about maintenance?

xiaomiNoteExporter/ConsoleHelp.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@ internal class ConsoleHelp(Version? version)
1212
$"{"Xiaomi Note Exporter".Pastel(Color.FromArgb(252, 106, 0))} {version.GetVersionString()}\n",
1313
$"Usage: xiaomiNoteExporter.exe {"[options]".Pastel(Color.DimGray)}\n",
1414
"Options:",
15-
" -h, --help\t\tShow this help message and exit\n",
15+
" -h, --help\n\tShow this help message and exit\n",
1616
$" -d, --domain <domain> {"(default: us.i.mi.com)".Pastel(Color.DimGray)}\n\tMi Notes domain that you were redirected to.\n",
17-
$" -s, --split <timestamp> {"(default: dd-MM-yyyy_HH-mm-ss)".Pastel(Color.DimGray)}\n\tSplit notes into separate files with provided timestamp format. Must be compatible with:\n\thttps://learn.microsoft.com/en-us/dotnet/standard/base-types/custom-date-and-time-format-strings"
17+
$" -s, --split <timestamp> {"(default: dd-MM-yyyy_HH-mm-ss)".Pastel(Color.DimGray)}\n\tSplit notes into separate files with provided timestamp format. Must be compatible with:\n\thttps://learn.microsoft.com/en-us/dotnet/standard/base-types/custom-date-and-time-format-strings\n",
18+
$" -di, --disable-images\n\tDisable default image export behavior, images will not be downloaded from the notes.\n",
1819
};
1920

2021
public void Print()
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
using OpenQA.Selenium;
2+
using OpenQA.Selenium.Support.UI;
3+
4+
namespace xiaomiNoteExporter;
5+
6+
public static class DriverHelpers
7+
{
8+
public static IReadOnlyCollection<IWebElement> TryFindImages(IWebElement scope)
9+
{
10+
// temporarily set implicit wait to zero
11+
var driver = ((IWrapsDriver)scope).WrappedDriver;
12+
var timeouts = driver.Manage().Timeouts();
13+
var originalImplicitWait = timeouts.ImplicitWait;
14+
timeouts.ImplicitWait = TimeSpan.Zero;
15+
16+
try
17+
{
18+
var found = scope.FindElements(By.CssSelector(".image-view img"));
19+
return found.Count > 0 ? found : Array.Empty<IWebElement>();
20+
}
21+
finally
22+
{
23+
// restore original implicit wait timeout
24+
timeouts.ImplicitWait = originalImplicitWait;
25+
}
26+
}
27+
28+
public static void WaitUntilImagesAreRealAndLoaded(IWebDriver driver, IReadOnlyCollection<IWebElement> imgs, TimeSpan periodPerItem)
29+
{
30+
foreach (var img in imgs)
31+
{
32+
var w = new WebDriverWait(driver, periodPerItem);
33+
w.IgnoreExceptionTypes(typeof(StaleElementReferenceException), typeof(NoSuchElementException));
34+
w.Until(d => IsRealImageLoaded(d, img));
35+
}
36+
}
37+
38+
public static string GetCurrentSrc(IWebDriver driver, IWebElement img)
39+
{
40+
try
41+
{
42+
var js = (IJavaScriptExecutor)driver;
43+
var src = (string?)js.ExecuteScript("return arguments[0].currentSrc || arguments[0].src || '';", img);
44+
return src ?? string.Empty;
45+
}
46+
catch
47+
{
48+
return img.GetAttribute("src") ?? string.Empty;
49+
}
50+
}
51+
52+
private static bool IsRealImageLoaded(IWebDriver driver, IWebElement imgEl)
53+
{
54+
try
55+
{
56+
var js = (IJavaScriptExecutor)driver;
57+
58+
var src = (string?)js.ExecuteScript("return arguments[0].currentSrc || arguments[0].src || '';", imgEl) ?? "";
59+
if (string.IsNullOrWhiteSpace(src)) return false;
60+
61+
if (src.StartsWith("data:image/svg+xml", StringComparison.OrdinalIgnoreCase)) return false;
62+
63+
var complete = (bool)(js.ExecuteScript("return arguments[0].complete === true;", imgEl) ?? false);
64+
var hasSize = (bool)(js.ExecuteScript("return (arguments[0].naturalWidth||0) > 0;", imgEl) ?? false);
65+
66+
return complete && hasSize;
67+
}
68+
catch
69+
{
70+
return false;
71+
}
72+
}
73+
}

xiaomiNoteExporter/Program.cs

Lines changed: 36 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ class Program
1717
static bool _shouldSplit = false;
1818
static string _timestampFormat = "dd-MM-yyyy_HH-mm-ss";
1919

20+
static bool _disableImages = false;
21+
2022
readonly static Driver _driver = new(Array.Empty<string>());
2123
static ChromeDriver? driver;
2224

@@ -52,14 +54,14 @@ public static void Main(string[] args)
5254
$"{"Xiaomi Note Exporter".Pastel(Color.FromArgb(252, 106, 0))} - Export your notes to {"Markdown".Pastel(Color.SkyBlue)}!\n"
5355
);
5456

55-
string? domain = _shouldAskForDomain
57+
string? domain = _shouldAskForDomain
5658
? new Prompt(
5759
$"{"[OPTIONAL]".Pastel(Color.DimGray)} Input Mi Notes domain that you were redirected to (default \"{defaultDomain}\"):",
5860
defaultDomain
59-
).Ask()
61+
).Ask()
6062
: defaultDomain;
6163

62-
new Scraper(driver, ShutdownHandler_Handler).Start(domain, _timestampFormat, _shouldSplit);
64+
new Scraper(driver, ShutdownHandler_Handler).Start(domain, _timestampFormat, _shouldSplit, !_disableImages);
6365
}
6466

6567
private static void ShowHelp() => new ConsoleHelp(appVersion).Print();
@@ -72,55 +74,56 @@ private static void ParseArgs(string[] args)
7274

7375
if (arg.Includes("-d", "--domain"))
7476
{
75-
if (i + 1 < args.Length)
77+
if (TryGetArgValue(args, i, out var domain) && !string.IsNullOrEmpty(domain))
7678
{
77-
string domain = args[i + 1];
78-
79-
if (!string.IsNullOrEmpty(domain))
80-
{
81-
defaultDomain = domain; // set global domain to the provided value
82-
_shouldAskForDomain = false; // shouldn't ask for domain, since it was provided as argument
83-
}
84-
else
85-
{
86-
Console.WriteLine($"{"[ERROR]".Pastel(Color.Red)} Provided domain is invalid.");
87-
Environment.Exit(1);
88-
}
79+
defaultDomain = domain; // set global domain to the provided value
80+
_shouldAskForDomain = false; // shouldn't ask for domain, since it was provided as argument
8981

9082
i++; // skip next argument - this was a value
91-
}
83+
}
9284
else
9385
{
9486
Console.WriteLine($"{"[ERROR]".Pastel(Color.Red)} Domain address is required with domain flag.");
9587
Environment.Exit(1);
9688
}
97-
}
89+
}
9890
else if (arg.Includes("-s", "--split"))
9991
{
100-
if (i + 1 < args.Length)
92+
if (TryGetArgValue(args, i, out var timestampFormat) && !string.IsNullOrEmpty(timestampFormat))
10193
{
102-
string timestampFormat = args[i + 1];
103-
104-
if (!string.IsNullOrEmpty(timestampFormat))
94+
try
10595
{
106-
try
107-
{
108-
DateTime.Now.ToString(timestampFormat);
109-
}
110-
catch (FormatException)
111-
{
112-
Console.WriteLine($"{"[ERROR]".Pastel(Color.Red)} Invalid timestamp format.");
113-
Environment.Exit(1);
114-
}
115-
116-
_timestampFormat = timestampFormat;
96+
DateTime.Now.ToString(timestampFormat);
11797
}
98+
catch (FormatException)
99+
{
100+
Console.WriteLine($"{"[ERROR]".Pastel(Color.Red)} Invalid timestamp format.");
101+
Environment.Exit(1);
102+
}
103+
104+
_timestampFormat = timestampFormat;
118105

119106
i++; // skip next argument - this was a value
120107
}
121108

122109
_shouldSplit = true; // if flag is present, split is enabled (even if no value is provided)
123110
}
111+
else if (arg.Includes("-di", "--disable-images"))
112+
{
113+
_disableImages = true;
114+
}
115+
}
116+
}
117+
118+
private static bool TryGetArgValue(string[] args, int index, out string value)
119+
{
120+
if (index + 1 < args.Length)
121+
{
122+
value = args[index + 1];
123+
return true;
124124
}
125+
126+
value = string.Empty;
127+
return false;
125128
}
126129
}

xiaomiNoteExporter/Scraper.cs

Lines changed: 64 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ public partial class Scraper(ChromeDriver driver, Action shutdownHandler)
3030
/// <param name="domain">Domain address to be visited by <c>ChromeDriver</c>.</param>
3131
/// <param name="timeStampFormat">Format of the timestamp for file (or directory) name.</param>
3232
/// <param name="split">If <c>true</c> then notes will be split as separate files.</param>
33-
public void Start(string domain, string timeStampFormat, bool split = false)
33+
public void Start(string domain, string timeStampFormat, bool split = false, bool exportImages = true)
3434
{
3535
_wait = _driver.GetWait(TimeSpan.FromSeconds(10));
3636

@@ -56,10 +56,10 @@ public void Start(string domain, string timeStampFormat, bool split = false)
5656
Console.ReadKey();
5757
}
5858

59-
Scrape(timeStampFormat, domain, split);
59+
Scrape(timeStampFormat, domain, split, exportImages);
6060
}
6161

62-
private void Scrape(string timeStampFormat, string domain, bool split)
62+
private void Scrape(string timeStampFormat, string domain, bool split, bool exportImages)
6363
{
6464
if (_wait is null)
6565
{
@@ -138,28 +138,7 @@ private void Scrape(string timeStampFormat, string domain, bool split)
138138
string createdString = element.FindElement(By.XPath(@".//div[2]/div[1]")).Text;
139139

140140
// creation date (calculated from retrieved text)
141-
DateTime createdDate;
142-
143-
if (createdString.ToLower().Contains("now"))
144-
{
145-
createdDate = DateTime.Now; // get current date
146-
}
147-
else if (createdString.ToLower().Contains("yesterday"))
148-
{
149-
createdDate = DateTime.Now.AddDays(-1).Date; // get yesterday's date
150-
}
151-
else if (createdString.EndsWith("ago"))
152-
{
153-
createdDate = RelativeTimeParser.Parse(createdString);
154-
}
155-
else if (SimplifiedDateParser.TryParseMdHm(createdString, out DateTime parsedSimple))
156-
{
157-
createdDate = parsedSimple;
158-
}
159-
else
160-
{
161-
createdDate = DateTime.Parse(createdString, new CultureInfo("en-US"));
162-
}
141+
GetCreatedDate(createdString, out DateTime createdDate);
163142

164143
try
165144
{
@@ -190,23 +169,45 @@ private void Scrape(string timeStampFormat, string domain, bool split)
190169
title
191170
);
192171

193-
var embeddedImages = noteContainer.FindElements(By.XPath(@".//div[contains(@class, 'image-view')]/img"));
172+
if (!exportImages)
173+
{
174+
// skip image export if user chose so
175+
ExecuteScroll(notesList, element);
176+
currentNote++;
177+
continue;
178+
}
179+
180+
var initialImgs = DriverHelpers.TryFindImages(noteContainer);
194181

195-
if (embeddedImages.Count != 0)
182+
if (initialImgs.Count > 0)
196183
{
197-
var cookies = _driver.Manage().Cookies.AllCookies;
184+
DriverHelpers.WaitUntilImagesAreRealAndLoaded(_driver, initialImgs, TimeSpan.FromSeconds(3));
198185

199-
// IWebElement because non nullish type is needed (force typing)
200-
foreach (var t in embeddedImages.Select((item, idx) => (idx, (IWebElement)item)))
186+
var embeddedImages = noteContainer.FindElements(By.CssSelector(".image-view img"));
187+
188+
if (embeddedImages.Count != 0)
201189
{
202-
int idx = t.idx;
203-
IWebElement item = t.Item2;
190+
var cookies = _driver.Manage().Cookies.AllCookies;
191+
192+
// IWebElement because non nullish type is needed (force typing)
193+
foreach (var t in embeddedImages.Select((item, idx) => (idx, (IWebElement)item)))
194+
{
195+
int idx = t.idx;
196+
IWebElement item = t.Item2;
197+
198+
var imgSrc = DriverHelpers.GetCurrentSrc(_driver, item);
204199

205-
var imgSrc = item.GetAttribute("src");
206-
string imgName = $"note_img_{idx}_{createdDate.ToString(timeStampFormat)}.png";
207-
string imgPath = Path.Combine(imgDir, imgName);
200+
if (string.IsNullOrWhiteSpace(imgSrc) || imgSrc.Contains("data:"))
201+
{
202+
// skip base64 images and empty sources
203+
continue;
204+
}
208205

209-
SaveImage(imgPath, imgSrc, domain, cookies);
206+
string imgName = $"note_img_{idx}_{createdDate.ToString(timeStampFormat)}.png";
207+
string imgPath = Path.Combine(imgDir, imgName);
208+
209+
SaveImage(imgPath, imgSrc, cookies);
210+
}
210211
}
211212
}
212213

@@ -254,7 +255,7 @@ private static void SaveToFile(string fileName, string content, string? title =
254255
sw.WriteLine(content);
255256
}
256257

257-
private static void SaveImage(string path, string? src, string domain, IEnumerable<OpenQA.Selenium.Cookie> cookies)
258+
private static void SaveImage(string path, string? src, IEnumerable<OpenQA.Selenium.Cookie> cookies)
258259
{
259260
if (File.Exists(path))
260261
{
@@ -266,8 +267,6 @@ private static void SaveImage(string path, string? src, string domain, IEnumerab
266267
CookieContainer = new CookieContainer()
267268
};
268269

269-
var uri = new Uri($"https://{domain}{src}");
270-
271270
foreach (var cookie in cookies)
272271
{
273272
handler.CookieContainer.Add(
@@ -282,9 +281,33 @@ private static void SaveImage(string path, string? src, string domain, IEnumerab
282281
byte[] imageBytes = client.GetByteArrayAsync(src).Result;
283282
File.WriteAllBytes(path, imageBytes);
284283
}
285-
catch (Exception)
284+
catch (Exception e)
285+
{
286+
Console.WriteLine($"\n{"[ERROR]".Pastel(Color.Red)} Couldn't fetch image.\nError: {e.Message}");
287+
}
288+
}
289+
290+
private static void GetCreatedDate(string createdString, out DateTime createdDate)
291+
{
292+
if (createdString.ToLower().Contains("now"))
293+
{
294+
createdDate = DateTime.Now; // get current date
295+
}
296+
else if (createdString.ToLower().Contains("yesterday"))
297+
{
298+
createdDate = DateTime.Now.AddDays(-1).Date; // get yesterday's date
299+
}
300+
else if (createdString.EndsWith("ago"))
301+
{
302+
createdDate = RelativeTimeParser.Parse(createdString);
303+
}
304+
else if (SimplifiedDateParser.TryParseMdHm(createdString, out DateTime parsedSimple))
305+
{
306+
createdDate = parsedSimple;
307+
}
308+
else
286309
{
287-
Console.WriteLine($"{"[ERROR]".Pastel(Color.Red)} Couldn't fetch image.");
310+
createdDate = DateTime.Parse(createdString, new CultureInfo("en-US"));
288311
}
289312
}
290313

0 commit comments

Comments
 (0)