Added file encoding auto-detect on open.
Attempts to identify file encoding, falling back to UTF-8 as the default. Detection uses a BOM if present, otherwise attempts to use heuristics (English/European based) to determine encoding.
This commit is contained in:
parent
89ae4a7910
commit
79a1478b0d
2 changed files with 7 additions and 2 deletions
|
@ -54,6 +54,9 @@ using System.Drawing.Printing;
|
|||
using System.Text.RegularExpressions;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Diagnostics;
|
||||
|
||||
using LSLEditor.Helpers;
|
||||
|
||||
namespace LSLEditor
|
||||
{
|
||||
public delegate void IsDirtyHandler(object sender, EventArgs e);
|
||||
|
@ -2223,7 +2226,9 @@ namespace LSLEditor
|
|||
{
|
||||
if (File.Exists(path))
|
||||
{
|
||||
StreamReader sr = new StreamReader(path, Encoding.UTF8);
|
||||
// TODO needs to be refactored to read the file in once and pass the byte array to be checked.
|
||||
Encoding fileEncoding = TextFileEncodingDetector.DetectTextFileEncoding(path, Encoding.UTF8);
|
||||
StreamReader sr = new StreamReader(path, fileEncoding);
|
||||
this.Text = sr.ReadToEnd();
|
||||
sr.Close();
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ namespace LSLEditor.Helpers
|
|||
* Windows-1252 (in .Net, also incorrectly called "ASCII") encodings, we use a
|
||||
* heuristic - so the more of the file we can sample the better the guess. If you
|
||||
* are going to read the whole file into memory at some point, then best to pass
|
||||
* in the whole byte byte array directly. Otherwise, decide how to trade off
|
||||
* in the whole byte array directly. Otherwise, decide how to trade off
|
||||
* reliability against performance / memory usage.
|
||||
*
|
||||
* - The UTF-8 detection heuristic only works for western text, as it relies on
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue