452 lines
20 KiB
C#
452 lines
20 KiB
C#
using System.IO;
|
|
using System.Xml;
|
|
using System.Xml.Schema;
|
|
using System.Text;
|
|
using System.Text.RegularExpressions;
|
|
|
|
namespace XSDVisualiser.Core;
|
|
|
|
/// <summary>
|
|
/// Validates an XML document against a compiled XSD schema set and a specific global element (node).
|
|
/// </summary>
|
|
public static class XmlValidator
|
|
{
|
|
public static XmlValidationResult ValidateAgainstElement(string xsdPath, string elementName, string? elementNamespace, string xmlPath)
|
|
{
|
|
var set = BuildSchemaSet(xsdPath);
|
|
return ValidateAgainstElement(set, elementName, elementNamespace, xmlPath);
|
|
}
|
|
|
|
public static XmlValidationResult ValidateAgainstElement(XmlSchemaSet schemas, string elementName, string? elementNamespace, string xmlPath)
|
|
{
|
|
var result = new XmlValidationResult();
|
|
|
|
// Probe XML root element first, we may use its namespace as a hint
|
|
(string localName, string nsUri)? rootInfo = TryReadRoot(xmlPath);
|
|
if (rootInfo is null)
|
|
{
|
|
// Provide a more informative error by attempting to parse and capture XmlException details
|
|
try
|
|
{
|
|
using var probe = XmlReader.Create(xmlPath, new XmlReaderSettings
|
|
{
|
|
DtdProcessing = DtdProcessing.Ignore,
|
|
IgnoreWhitespace = true,
|
|
IgnoreComments = true,
|
|
CloseInput = true,
|
|
ConformanceLevel = ConformanceLevel.Document
|
|
});
|
|
while (probe.Read())
|
|
{
|
|
if (probe.NodeType == XmlNodeType.Element && probe.Depth == 0)
|
|
{
|
|
// Should not happen since TryReadRoot failed, but just in case
|
|
rootInfo = (probe.LocalName, probe.NamespaceURI);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (rootInfo is null)
|
|
result.AddError("XML appears to be empty or does not contain a document root element.");
|
|
}
|
|
catch (XmlException xe)
|
|
{
|
|
result.AddError($"XML parsing error: {xe.Message}", xe.LineNumber, xe.LinePosition);
|
|
TryAddEncodingDiagnostics(xmlPath, xe, result);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
var (rootLocal, rootNs) = rootInfo.Value;
|
|
|
|
// Try to ensure the requested element exists in the schema set; if not, try to infer the correct namespace instead of failing hard.
|
|
var qname = new XmlQualifiedName(elementName, elementNamespace ?? string.Empty);
|
|
if (schemas.GlobalElements[qname] is not XmlSchemaElement)
|
|
{
|
|
// Try to find candidates with the same local name across namespaces
|
|
var candidates = schemas.GlobalElements.Names.Cast<XmlQualifiedName>().Where(n => string.Equals(n.Name, elementName, StringComparison.Ordinal)).Distinct().ToList();
|
|
if (candidates.Count == 1)
|
|
{
|
|
elementNamespace = candidates[0].Namespace;
|
|
qname = new XmlQualifiedName(elementName, elementNamespace ?? string.Empty);
|
|
result.AddWarning($"Element '{{{qname.Namespace}}}{qname.Name}' was not found with the provided namespace. Using detected namespace '{candidates[0].Namespace}'.");
|
|
}
|
|
else if (candidates.Count > 1)
|
|
{
|
|
// Prefer a candidate matching the XML root namespace if any
|
|
var preferred = candidates.FirstOrDefault(c => string.Equals(c.Namespace ?? string.Empty, rootNs ?? string.Empty, StringComparison.Ordinal));
|
|
if (preferred != null)
|
|
{
|
|
elementNamespace = preferred.Namespace;
|
|
qname = new XmlQualifiedName(elementName, elementNamespace ?? string.Empty);
|
|
result.AddWarning($"Element namespace adjusted to match XML root namespace: '{{{preferred.Namespace}}}{preferred.Name}'.");
|
|
}
|
|
else
|
|
{
|
|
var list = string.Join(", ", candidates.Select(c => $"'{{{c.Namespace}}}{c.Name}'"));
|
|
result.AddWarning($"Element '{{{qname.Namespace}}}{qname.Name}' was not found in the compiled schema set. Candidates by name: {list}. Proceeding with best-effort validation.");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// No candidates at all; attempt to locate a global type with the same QName and synthesize a matching element for validation.
|
|
TryAddSyntheticElementForMatchingType(schemas, ref qname, result);
|
|
}
|
|
|
|
// After any adjustments/synthesis, re-check presence
|
|
if (schemas.GlobalElements[qname] is not XmlSchemaElement)
|
|
{
|
|
// Still not found; continue and let the validator report more actionable errors.
|
|
result.AddWarning($"Element '{{{qname.Namespace}}}{qname.Name}' was not found in the compiled schema set. Proceeding with best-effort validation.");
|
|
}
|
|
}
|
|
|
|
var matchesRoot = string.Equals(rootLocal, elementName, StringComparison.Ordinal) && string.Equals(rootNs ?? string.Empty, elementNamespace ?? string.Empty, StringComparison.Ordinal);
|
|
|
|
var settings = new XmlReaderSettings
|
|
{
|
|
DtdProcessing = DtdProcessing.Ignore,
|
|
ValidationType = ValidationType.Schema,
|
|
Schemas = schemas,
|
|
CloseInput = true,
|
|
ConformanceLevel = ConformanceLevel.Auto
|
|
};
|
|
settings.ValidationFlags = XmlSchemaValidationFlags.ReportValidationWarnings | XmlSchemaValidationFlags.ProcessIdentityConstraints;
|
|
|
|
void Handler(object? sender, ValidationEventArgs e)
|
|
{
|
|
if (e.Severity == XmlSeverityType.Warning)
|
|
result.AddWarning(e.Message, e.Exception?.LineNumber, e.Exception?.LinePosition);
|
|
else
|
|
result.AddError(e.Message, e.Exception?.LineNumber, e.Exception?.LinePosition);
|
|
}
|
|
|
|
settings.ValidationEventHandler += Handler;
|
|
|
|
if (matchesRoot)
|
|
{
|
|
using var reader = XmlReader.Create(xmlPath, settings);
|
|
try
|
|
{
|
|
while (reader.Read())
|
|
{
|
|
// just advance to trigger validation callbacks
|
|
}
|
|
}
|
|
catch (XmlException xe)
|
|
{
|
|
result.AddError($"XML parsing error: {xe.Message}", xe.LineNumber, xe.LinePosition);
|
|
TryAddEncodingDiagnostics(xmlPath, xe, result);
|
|
}
|
|
return result;
|
|
}
|
|
else
|
|
{
|
|
// Root does not match the selected schema element. Try to locate the first matching subtree and validate only that fragment.
|
|
// This enables validating an XML file towards a selected node from the XSD.
|
|
var fragmentSettings = new XmlReaderSettings
|
|
{
|
|
DtdProcessing = DtdProcessing.Ignore,
|
|
ValidationType = ValidationType.Schema,
|
|
Schemas = schemas,
|
|
CloseInput = true,
|
|
ConformanceLevel = ConformanceLevel.Fragment
|
|
};
|
|
fragmentSettings.ValidationFlags = settings.ValidationFlags;
|
|
fragmentSettings.ValidationEventHandler += Handler;
|
|
|
|
try
|
|
{
|
|
var (elementNode, loadError) = FindFirstElementNode(xmlPath, elementName, elementNamespace);
|
|
if (loadError is not null)
|
|
{
|
|
result.AddError(loadError.Value.Message, loadError.Value.LineNumber, loadError.Value.LinePosition);
|
|
return result;
|
|
}
|
|
|
|
if (elementNode is null)
|
|
{
|
|
// Try again ignoring namespace, in case the provided namespace was incorrect or omitted
|
|
var retry = FindFirstElementNode(xmlPath, elementName, null).Node;
|
|
if (retry is not null)
|
|
{
|
|
result.AddWarning($"Could not find element '{{{elementNamespace}}}{elementName}' with the specified namespace; validating first occurrence by local name only.");
|
|
elementNode = retry;
|
|
}
|
|
else
|
|
{
|
|
result.AddError($"Could not find any element '{{{elementNamespace}}}{elementName}' in the XML document to validate against.");
|
|
return result;
|
|
}
|
|
}
|
|
|
|
// Inform as a warning that we validate a subtree instead of the document root
|
|
result.AddWarning($"Validating against the first occurrence of '{{{elementNamespace}}}{elementName}' found in the document (root is '{{{rootNs}}}{rootLocal}').");
|
|
|
|
using var nodeReader = new XmlNodeReader(elementNode);
|
|
using var validatingReader = XmlReader.Create(nodeReader, fragmentSettings);
|
|
while (validatingReader.Read())
|
|
{
|
|
// advance to trigger validation callbacks for the subtree
|
|
}
|
|
}
|
|
catch (XmlException xe)
|
|
{
|
|
result.AddError($"XML parsing error: {xe.Message}", xe.LineNumber, xe.LinePosition);
|
|
TryAddEncodingDiagnostics(xmlPath, xe, result);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
}
|
|
|
|
private static XmlSchemaSet BuildSchemaSet(string xsdPath)
|
|
{
|
|
var set = new XmlSchemaSet
|
|
{
|
|
XmlResolver = new XmlUrlResolver(),
|
|
CompilationSettings = new XmlSchemaCompilationSettings { EnableUpaCheck = true }
|
|
};
|
|
|
|
using var reader = XmlReader.Create(xsdPath, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore });
|
|
var schema = XmlSchema.Read(reader, null);
|
|
if (schema != null)
|
|
set.Add(schema);
|
|
set.Compile();
|
|
return set;
|
|
}
|
|
|
|
private static (string localName, string nsUri)? TryReadRoot(string xmlPath)
|
|
{
|
|
using var reader = XmlReader.Create(xmlPath, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore, IgnoreWhitespace = true, IgnoreComments = true, CloseInput = true, ConformanceLevel = ConformanceLevel.Document });
|
|
try
|
|
{
|
|
while (reader.Read())
|
|
{
|
|
if (reader.NodeType == XmlNodeType.Element && reader.Depth == 0)
|
|
{
|
|
return (reader.LocalName, reader.NamespaceURI);
|
|
}
|
|
}
|
|
}
|
|
catch
|
|
{
|
|
// ignored; higher level will report XmlException separately
|
|
}
|
|
return null;
|
|
}
|
|
|
|
private static (XmlElement? Node, (string Message, int LineNumber, int LinePosition)? LoadError) FindFirstElementNode(string xmlPath, string elementName, string? elementNamespace)
|
|
{
|
|
try
|
|
{
|
|
var xr = XmlReader.Create(xmlPath, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore });
|
|
var doc = new XmlDocument();
|
|
doc.PreserveWhitespace = false;
|
|
doc.Load(xr);
|
|
|
|
static XmlElement? Traverse(XmlNode node, string name, string? ns)
|
|
{
|
|
if (node is XmlElement el)
|
|
{
|
|
if (string.Equals(el.LocalName, name, StringComparison.Ordinal) && string.Equals(el.NamespaceURI ?? string.Empty, ns ?? string.Empty, StringComparison.Ordinal))
|
|
return el;
|
|
}
|
|
foreach (XmlNode child in node.ChildNodes)
|
|
{
|
|
var found = Traverse(child, name, ns);
|
|
if (found != null) return found;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
var match = Traverse(doc, elementName, elementNamespace);
|
|
return (match, null);
|
|
}
|
|
catch (XmlException xe)
|
|
{
|
|
return (null, ($"XML parsing error: {xe.Message}", xe.LineNumber, xe.LinePosition));
|
|
}
|
|
}
|
|
|
|
private static void TryAddSyntheticElementForMatchingType(XmlSchemaSet schemas, ref XmlQualifiedName qname, XmlValidationResult result)
|
|
{
|
|
try
|
|
{
|
|
// Try exact QName match first
|
|
var typeObj = schemas.GlobalTypes[qname];
|
|
XmlQualifiedName? matchedTypeQName = null;
|
|
if (typeObj != null)
|
|
{
|
|
matchedTypeQName = qname;
|
|
}
|
|
else
|
|
{
|
|
// Fallbacks:
|
|
// 1) Find by exact local name across namespaces
|
|
var localName = qname.Name;
|
|
var byName = schemas.GlobalTypes.Names.Cast<XmlQualifiedName>()
|
|
.FirstOrDefault(n => string.Equals(n.Name, localName, StringComparison.Ordinal));
|
|
if (byName != null)
|
|
{
|
|
matchedTypeQName = byName;
|
|
// Align element namespace to the found type's namespace
|
|
qname = new XmlQualifiedName(localName, byName.Namespace ?? string.Empty);
|
|
}
|
|
else
|
|
{
|
|
// 2) Heuristic: try common suffix pattern "<ElementName>Type"
|
|
var candidateLocal = localName + "Type";
|
|
var requestedNs = qname.Namespace ?? string.Empty;
|
|
// Prefer same namespace if available
|
|
var sameNsCandidate = schemas.GlobalTypes.Names.Cast<XmlQualifiedName>()
|
|
.FirstOrDefault(n => string.Equals(n.Name, candidateLocal, StringComparison.Ordinal) && string.Equals(n.Namespace ?? string.Empty, requestedNs, StringComparison.Ordinal));
|
|
if (sameNsCandidate != null)
|
|
{
|
|
matchedTypeQName = sameNsCandidate;
|
|
}
|
|
else
|
|
{
|
|
// Fallback to any namespace
|
|
var anyNsCandidate = schemas.GlobalTypes.Names.Cast<XmlQualifiedName>()
|
|
.FirstOrDefault(n => string.Equals(n.Name, candidateLocal, StringComparison.Ordinal));
|
|
if (anyNsCandidate != null)
|
|
{
|
|
matchedTypeQName = anyNsCandidate;
|
|
// Align the element namespace to the found type's namespace
|
|
qname = new XmlQualifiedName(localName, anyNsCandidate.Namespace ?? string.Empty);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (matchedTypeQName == null)
|
|
return;
|
|
|
|
// Create a minimal schema that declares the missing element pointing to the matched global type
|
|
var synthetic = new XmlSchema { TargetNamespace = qname.Namespace };
|
|
var el = new XmlSchemaElement
|
|
{
|
|
Name = qname.Name,
|
|
SchemaTypeName = matchedTypeQName
|
|
};
|
|
synthetic.Items.Add(el);
|
|
|
|
schemas.Add(synthetic);
|
|
schemas.Compile();
|
|
|
|
var typeDesc = $"{{{matchedTypeQName.Namespace}}}{matchedTypeQName.Name}";
|
|
string hint;
|
|
if (string.Equals(matchedTypeQName.Name, qname.Name, StringComparison.Ordinal))
|
|
hint = "exact type name match";
|
|
else if (string.Equals(matchedTypeQName.Name, qname.Name + "Type", StringComparison.Ordinal))
|
|
hint = "matched by '<ElementName>Type' heuristic";
|
|
else
|
|
hint = "matched by best-effort lookup";
|
|
|
|
result.AddWarning($"Element '{{{qname.Namespace}}}{qname.Name}' was not declared, but a global type {typeDesc} exists ({hint}). Added a synthetic element for validation.");
|
|
}
|
|
catch (XmlSchemaException xse)
|
|
{
|
|
result.AddWarning($"Failed to add synthetic element for '{{{qname.Namespace}}}{qname.Name}': {xse.Message}", xse.LineNumber, xse.LinePosition);
|
|
}
|
|
}
|
|
|
|
private static void TryAddEncodingDiagnostics(string xmlPath, XmlException xe, XmlValidationResult result)
|
|
{
|
|
try
|
|
{
|
|
var (bomId, bomName) = DetectBom(xmlPath);
|
|
var declared = ReadDeclaredEncoding(xmlPath);
|
|
|
|
bool missingBomButUtf16Declared = (declared != null && declared.StartsWith("utf-16", StringComparison.OrdinalIgnoreCase) && string.IsNullOrEmpty(bomId));
|
|
bool explicitNoBomError = xe.Message.Contains("There is no Unicode byte order mark", StringComparison.OrdinalIgnoreCase);
|
|
|
|
if (missingBomButUtf16Declared || explicitNoBomError)
|
|
{
|
|
var actual = string.IsNullOrEmpty(bomId) ? "none" : bomName;
|
|
var decl = declared ?? "unspecified";
|
|
var hint = "The XML declares UTF-16 but the file does not have a UTF-16 BOM. Either save the file as UTF-16 LE with BOM, or change the XML declaration to encoding=\"utf-8\" and save as UTF-8.";
|
|
result.AddWarning($"Encoding diagnostic: Declared encoding='{decl}'; BOM detected={actual}. {hint}");
|
|
return;
|
|
}
|
|
|
|
if (!string.IsNullOrEmpty(bomId) && declared != null && !IsBomCompatibleWithDeclared(bomId, declared))
|
|
{
|
|
result.AddWarning($"Encoding diagnostic: Declared encoding='{declared}', but BOM indicates '{bomName}'. Align the XML declaration with the actual file encoding.");
|
|
}
|
|
}
|
|
catch
|
|
{
|
|
// Swallow any diagnostics errors to avoid masking the primary parsing error
|
|
}
|
|
}
|
|
|
|
private static (string? BomId, string Friendly) DetectBom(string path)
|
|
{
|
|
using var fs = File.OpenRead(path);
|
|
Span<byte> buf = stackalloc byte[4];
|
|
int read = fs.Read(buf);
|
|
if (read >= 4)
|
|
{
|
|
if (buf[0] == 0x00 && buf[1] == 0x00 && buf[2] == 0xFE && buf[3] == 0xFF) return ("utf-32-be", "UTF-32 BE BOM");
|
|
if (buf[0] == 0xFF && buf[1] == 0xFE && buf[2] == 0x00 && buf[3] == 0x00) return ("utf-32-le", "UTF-32 LE BOM");
|
|
}
|
|
if (read >= 3 && buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) return ("utf-8", "UTF-8 BOM");
|
|
if (read >= 2)
|
|
{
|
|
if (buf[0] == 0xFE && buf[1] == 0xFF) return ("utf-16-be", "UTF-16 BE BOM");
|
|
if (buf[0] == 0xFF && buf[1] == 0xFE) return ("utf-16-le", "UTF-16 LE BOM");
|
|
}
|
|
return (null, "none");
|
|
}
|
|
|
|
private static string? ReadDeclaredEncoding(string path)
|
|
{
|
|
byte[] bytes;
|
|
using (var fs = File.OpenRead(path))
|
|
{
|
|
var len = (int)Math.Min(1024, fs.Length);
|
|
bytes = new byte[len];
|
|
_ = fs.Read(bytes, 0, len);
|
|
}
|
|
var sample = Encoding.UTF8.GetString(bytes);
|
|
var m = Regex.Match(sample, "<\\?xml\\s+version\\s*=\\s*['\"][^'\"]+['\"][^>]*encoding\\s*=\\s*['\"]([^'\"]+)['\"][^>]*\\?>", RegexOptions.IgnoreCase);
|
|
if (m.Success) return m.Groups[1].Value.Trim();
|
|
return null;
|
|
}
|
|
|
|
private static bool IsBomCompatibleWithDeclared(string? bomId, string declared)
|
|
{
|
|
if (string.IsNullOrEmpty(bomId)) return true;
|
|
var d = declared.ToLowerInvariant();
|
|
return (bomId == "utf-8" && d == "utf-8")
|
|
|| (bomId == "utf-16-le" && (d == "utf-16" || d == "utf-16le"))
|
|
|| (bomId == "utf-16-be" && (d == "utf-16" || d == "utf-16be"))
|
|
|| (bomId == "utf-32-le" && (d == "utf-32" || d == "utf-32le"))
|
|
|| (bomId == "utf-32-be" && (d == "utf-32" || d == "utf-32be"));
|
|
}
|
|
}
|
|
|
|
public sealed class XmlValidationResult
|
|
{
|
|
private readonly List<XmlValidationIssue> _issues = new();
|
|
|
|
public bool IsValid => _issues.TrueForAll(i => i.Severity != XmlSeverityType.Error);
|
|
|
|
public IReadOnlyList<XmlValidationIssue> Issues => _issues;
|
|
|
|
public IEnumerable<XmlValidationIssue> Errors => _issues.Where(i => i.Severity == XmlSeverityType.Error);
|
|
public IEnumerable<XmlValidationIssue> Warnings => _issues.Where(i => i.Severity == XmlSeverityType.Warning);
|
|
|
|
internal void AddError(string message, int? line = null, int? position = null) =>
|
|
_issues.Add(new XmlValidationIssue(XmlSeverityType.Error, message, line ?? 0, position ?? 0));
|
|
|
|
internal void AddWarning(string message, int? line = null, int? position = null) =>
|
|
_issues.Add(new XmlValidationIssue(XmlSeverityType.Warning, message, line ?? 0, position ?? 0));
|
|
}
|
|
|
|
public sealed record XmlValidationIssue(XmlSeverityType Severity, string Message, int LineNumber, int LinePosition);
|
|
|
|
|