Added file encoding auto-detect on open.
Attempts to identify file encoding, falling back to UTF-8 as the default. Detection uses a BOM if present, otherwise attempts to use heuristics (English/European based) to determine encoding.
This commit is contained in:
parent
e1105d3b5a
commit
ac53f6c0b6
2 changed files with 7 additions and 2 deletions
|
@ -54,6 +54,9 @@ using System.Drawing.Printing;
|
||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
using System.Runtime.InteropServices;
|
using System.Runtime.InteropServices;
|
||||||
using System.Diagnostics;
|
using System.Diagnostics;
|
||||||
|
|
||||||
|
using LSLEditor.Helpers;
|
||||||
|
|
||||||
namespace LSLEditor
|
namespace LSLEditor
|
||||||
{
|
{
|
||||||
public delegate void IsDirtyHandler(object sender, EventArgs e);
|
public delegate void IsDirtyHandler(object sender, EventArgs e);
|
||||||
|
@ -2223,7 +2226,9 @@ namespace LSLEditor
|
||||||
{
|
{
|
||||||
if (File.Exists(path))
|
if (File.Exists(path))
|
||||||
{
|
{
|
||||||
StreamReader sr = new StreamReader(path, Encoding.UTF8);
|
// TODO needs to be refactored to read the file in once and pass the byte array to be checked.
|
||||||
|
Encoding fileEncoding = TextFileEncodingDetector.DetectTextFileEncoding(path, Encoding.UTF8);
|
||||||
|
StreamReader sr = new StreamReader(path, fileEncoding);
|
||||||
this.Text = sr.ReadToEnd();
|
this.Text = sr.ReadToEnd();
|
||||||
sr.Close();
|
sr.Close();
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,7 @@ namespace LSLEditor.Helpers
|
||||||
* Windows-1252 (in .Net, also incorrectly called "ASCII") encodings, we use a
|
* Windows-1252 (in .Net, also incorrectly called "ASCII") encodings, we use a
|
||||||
* heuristic - so the more of the file we can sample the better the guess. If you
|
* heuristic - so the more of the file we can sample the better the guess. If you
|
||||||
* are going to read the whole file into memory at some point, then best to pass
|
* are going to read the whole file into memory at some point, then best to pass
|
||||||
* in the whole byte byte array directly. Otherwise, decide how to trade off
|
* in the whole byte array directly. Otherwise, decide how to trade off
|
||||||
* reliability against performance / memory usage.
|
* reliability against performance / memory usage.
|
||||||
*
|
*
|
||||||
* - The UTF-8 detection heuristic only works for western text, as it relies on
|
* - The UTF-8 detection heuristic only works for western text, as it relies on
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue