2009-11-03 08:48:49 +00:00
|
|
|
using System;
|
2009-03-08 00:46:58 +00:00
|
|
|
using System.IO;
|
2009-08-17 12:28:22 +00:00
|
|
|
using System.Text.RegularExpressions;
|
2009-03-08 00:46:58 +00:00
|
|
|
using System.Xml;
|
|
|
|
using System.Xml.Xsl;
|
|
|
|
|
|
|
|
namespace Bind
|
|
|
|
{
|
|
|
|
class DocProcessor
|
|
|
|
{
|
|
|
|
static readonly Regex remove_mathml = new Regex(@"<(mml:math)[^>]*?>(?:.|\n)*?</\s*\1\s*>",
|
|
|
|
RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
|
|
|
|
|
2009-08-21 20:28:14 +00:00
|
|
|
static readonly XslCompiledTransform xslt = new XslCompiledTransform();
|
2009-03-08 00:46:58 +00:00
|
|
|
static readonly XmlReaderSettings settings = new XmlReaderSettings();
|
|
|
|
|
2010-10-12 11:00:46 +00:00
|
|
|
string Text;
|
|
|
|
string LastFile;
|
|
|
|
|
2009-03-08 00:46:58 +00:00
|
|
|
public DocProcessor(string transform_file)
|
|
|
|
{
|
|
|
|
xslt.Load(transform_file);
|
|
|
|
settings.ProhibitDtd = false;
|
|
|
|
settings.XmlResolver = null;
|
|
|
|
}
|
|
|
|
|
2009-03-08 18:08:35 +00:00
|
|
|
// Strips MathML tags from the source and replaces the equations with the content
|
|
|
|
// found in the <!-- eqn: :--> comments in the docs.
|
|
|
|
// Todo: Some simple MathML tags do not include comments, find a solution.
|
|
|
|
// Todo: Some files include more than 1 function - find a way to map these extra functions.
|
2009-03-08 00:46:58 +00:00
|
|
|
public string ProcessFile(string file)
|
|
|
|
{
|
2010-10-12 11:00:46 +00:00
|
|
|
if (LastFile == file)
|
|
|
|
return Text;
|
|
|
|
|
|
|
|
LastFile = file;
|
|
|
|
Text = File.ReadAllText(file);
|
2009-03-08 00:46:58 +00:00
|
|
|
|
2010-10-12 11:00:46 +00:00
|
|
|
Match m = remove_mathml.Match(Text);
|
2009-03-08 00:46:58 +00:00
|
|
|
while (m.Length > 0)
|
|
|
|
{
|
2010-10-12 11:00:46 +00:00
|
|
|
string removed = Text.Substring(m.Index, m.Length);
|
|
|
|
Text = Text.Remove(m.Index, m.Length);
|
2009-03-08 18:08:35 +00:00
|
|
|
int equation = removed.IndexOf("eqn");
|
|
|
|
if (equation > 0)
|
|
|
|
{
|
2010-10-12 11:00:46 +00:00
|
|
|
Text = Text.Insert(m.Index,
|
2009-03-08 18:08:35 +00:00
|
|
|
"<![CDATA[" +
|
|
|
|
removed.Substring(equation + 4, removed.IndexOf(":-->") - equation - 4) +
|
|
|
|
"]]>");
|
|
|
|
}
|
2010-10-12 11:00:46 +00:00
|
|
|
m = remove_mathml.Match(Text);
|
2009-03-08 00:46:58 +00:00
|
|
|
}
|
|
|
|
|
2009-03-08 18:08:35 +00:00
|
|
|
XmlReader doc = null;
|
|
|
|
try
|
|
|
|
{
|
|
|
|
// The pure XmlReader is ~20x faster than the XmlTextReader.
|
2010-10-12 11:00:46 +00:00
|
|
|
doc = XmlReader.Create(new StringReader(Text), settings);
|
2009-03-08 18:08:35 +00:00
|
|
|
//doc = new XmlTextReader(new StringReader(text));
|
2009-11-03 08:48:49 +00:00
|
|
|
|
2009-03-08 18:08:35 +00:00
|
|
|
using (StringWriter sw = new StringWriter())
|
|
|
|
{
|
|
|
|
xslt.Transform(doc, null, sw);
|
2010-10-12 11:00:46 +00:00
|
|
|
Text = sw.ToString().TrimEnd('\n');
|
|
|
|
return Text;
|
2009-03-08 18:08:35 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
catch (XmlException e)
|
2009-03-08 00:46:58 +00:00
|
|
|
{
|
2009-03-08 18:08:35 +00:00
|
|
|
Console.WriteLine(e.ToString());
|
|
|
|
Console.WriteLine(doc.ToString());
|
|
|
|
return String.Empty;
|
2009-03-08 00:46:58 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|