Added file encoding auto-detect on open.

Attempts to identify file encoding, falling back to UTF-8 as the default.
Detection uses a BOM if present, otherwise attempts to use heuristics (English/European based) to determine encoding.
This commit is contained in:
niel-archer 2012-02-03 21:04:10 +00:00
parent e1105d3b5a
commit ac53f6c0b6
2 changed files with 7 additions and 2 deletions

View file

@ -54,6 +54,9 @@ using System.Drawing.Printing;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
using System.Diagnostics; using System.Diagnostics;
using LSLEditor.Helpers;
namespace LSLEditor namespace LSLEditor
{ {
public delegate void IsDirtyHandler(object sender, EventArgs e); public delegate void IsDirtyHandler(object sender, EventArgs e);
@ -2223,7 +2226,9 @@ namespace LSLEditor
{ {
if (File.Exists(path)) if (File.Exists(path))
{ {
StreamReader sr = new StreamReader(path, Encoding.UTF8); // TODO needs to be refactored to read the file in once and pass the byte array to be checked.
Encoding fileEncoding = TextFileEncodingDetector.DetectTextFileEncoding(path, Encoding.UTF8);
StreamReader sr = new StreamReader(path, fileEncoding);
this.Text = sr.ReadToEnd(); this.Text = sr.ReadToEnd();
sr.Close(); sr.Close();
} }

View file

@ -23,7 +23,7 @@ namespace LSLEditor.Helpers
* Windows-1252 (in .Net, also incorrectly called "ASCII") encodings, we use a * Windows-1252 (in .Net, also incorrectly called "ASCII") encodings, we use a
* heuristic - so the more of the file we can sample the better the guess. If you * heuristic - so the more of the file we can sample the better the guess. If you
* are going to read the whole file into memory at some point, then best to pass * are going to read the whole file into memory at some point, then best to pass
* in the whole byte byte array directly. Otherwise, decide how to trade off * in the whole byte array directly. Otherwise, decide how to trade off
* reliability against performance / memory usage. * reliability against performance / memory usage.
* *
* - The UTF-8 detection heuristic only works for western text, as it relies on * - The UTF-8 detection heuristic only works for western text, as it relies on