Added file encoding auto-detect on open.

Attempts to identify file encoding, falling back to UTF-8 as the default.
Detection uses a BOM if present, otherwise attempts to use heuristics (English/European based) to determine encoding.
This commit is contained in:
niel-archer 2012-02-03 21:04:10 +00:00
parent 89ae4a7910
commit 79a1478b0d
2 changed files with 7 additions and 2 deletions

View file

@ -54,6 +54,9 @@ using System.Drawing.Printing;
using System.Text.RegularExpressions;
using System.Runtime.InteropServices;
using System.Diagnostics;
using LSLEditor.Helpers;
namespace LSLEditor
{
public delegate void IsDirtyHandler(object sender, EventArgs e);
@ -2223,7 +2226,9 @@ namespace LSLEditor
{
if (File.Exists(path))
{
StreamReader sr = new StreamReader(path, Encoding.UTF8);
// TODO needs to be refactored to read the file in once and pass the byte array to be checked.
Encoding fileEncoding = TextFileEncodingDetector.DetectTextFileEncoding(path, Encoding.UTF8);
StreamReader sr = new StreamReader(path, fileEncoding);
this.Text = sr.ReadToEnd();
sr.Close();
}

View file

@ -23,7 +23,7 @@ namespace LSLEditor.Helpers
* Windows-1252 (in .Net, also incorrectly called "ASCII") encodings, we use a
* heuristic - so the more of the file we can sample the better the guess. If you
* are going to read the whole file into memory at some point, then best to pass
* in the whole byte byte array directly. Otherwise, decide how to trade off
* in the whole byte array directly. Otherwise, decide how to trade off
* reliability against performance / memory usage.
*
* - The UTF-8 detection heuristic only works for western text, as it relies on