Added file encoding auto-detect on open.

Attempts to identify file encoding, falling back to UTF-8 as the default. Detection uses a BOM if present, otherwise attempts to use heuristics (English/European based) to determine encoding.
2012-02-03 21:04:10 +00:00 · 2012-02-03 21:04:10 +00:00 · 79a1478b0d
commit 79a1478b0d
parent 89ae4a7910
2 changed files with 7 additions and 2 deletions
--- a/trunk/Editor/SyntaxRichTextBox.cs
+++ b/trunk/Editor/SyntaxRichTextBox.cs
@ -54,6 +54,9 @@ using System.Drawing.Printing;
 using System.Text.RegularExpressions;
 using System.Runtime.InteropServices;
 using System.Diagnostics;
+
+using LSLEditor.Helpers;
+
 namespace LSLEditor
 {
 	public delegate void IsDirtyHandler(object sender, EventArgs e);
@ -2223,7 +2226,9 @@ namespace LSLEditor
 			{
 				if (File.Exists(path))
 				{
-					StreamReader sr = new StreamReader(path, Encoding.UTF8);
+					// TODO needs to be refactored to read the file in once and pass the byte array to be checked.
+					Encoding fileEncoding = TextFileEncodingDetector.DetectTextFileEncoding(path, Encoding.UTF8);
+					StreamReader sr = new StreamReader(path, fileEncoding);
 					this.Text = sr.ReadToEnd();
 					sr.Close();
 				}
--- a/trunk/Helpers/TextFileEncodingDetector.cs
+++ b/trunk/Helpers/TextFileEncodingDetector.cs
@ -23,7 +23,7 @@ namespace LSLEditor.Helpers
 		 *      Windows-1252 (in .Net, also incorrectly called "ASCII") encodings, we use a
 		 *      heuristic - so the more of the file we can sample the better the guess. If you
 		 *      are going to read the whole file into memory at some point, then best to pass
-		 *      in the whole byte byte array directly. Otherwise, decide how to trade off
+		 *      in the whole byte array directly. Otherwise, decide how to trade off
 		 *      reliability against performance / memory usage.
 		 *
 		 *  - The UTF-8 detection heuristic only works for western text, as it relies on