XSDVisualizer/XSDVisualiser/Utils/XmlValidation.cs

492 lines
22 KiB
C#

using System.IO;
using System.Xml;
using System.Xml.Schema;
using System.Text;
using System.Text.RegularExpressions;
namespace XSDVisualiser.Core;
/// <summary>
/// Validates an XML document against a compiled XSD schema set and a specific global element (node).
/// </summary>
public static class XmlValidator
{
/// <summary>
/// Validates an XML document against the global element specified by name and optional namespace
/// from the XSD located at <paramref name="xsdPath"/>.
/// </summary>
/// <param name="xsdPath">Path to the XSD file containing the target element/type definitions.</param>
/// <param name="elementName">The local name of the global element to validate against.</param>
/// <param name="elementNamespace">The namespace URI of the element; may be null to auto-detect.</param>
/// <param name="xmlPath">Path to the XML file to validate.</param>
/// <returns>Aggregated validation result with errors/warnings and diagnostics.</returns>
public static XmlValidationResult ValidateAgainstElement(string xsdPath, string elementName, string? elementNamespace, string xmlPath)
{
var set = BuildSchemaSet(xsdPath);
return ValidateAgainstElement(set, elementName, elementNamespace, xmlPath);
}
/// <summary>
/// Validates an XML document against a specific global element within an already built <see cref="XmlSchemaSet"/>.
/// </summary>
/// <param name="schemas">Compiled schema set to use for validation.</param>
/// <param name="elementName">The local name of the global element to validate against.</param>
/// <param name="elementNamespace">The namespace URI of the element; may be null to auto-detect.</param>
/// <param name="xmlPath">Path to the XML file to validate.</param>
/// <returns>Aggregated validation result with errors/warnings and diagnostics.</returns>
public static XmlValidationResult ValidateAgainstElement(XmlSchemaSet schemas, string elementName, string? elementNamespace, string xmlPath)
{
var result = new XmlValidationResult();
// Probe XML root element first, we may use its namespace as a hint
(string localName, string nsUri)? rootInfo = TryReadRoot(xmlPath);
if (rootInfo is null)
{
// Provide a more informative error by attempting to parse and capture XmlException details
try
{
using var probe = XmlReader.Create(xmlPath, new XmlReaderSettings
{
DtdProcessing = DtdProcessing.Ignore,
IgnoreWhitespace = true,
IgnoreComments = true,
CloseInput = true,
ConformanceLevel = ConformanceLevel.Document
});
while (probe.Read())
{
if (probe.NodeType == XmlNodeType.Element && probe.Depth == 0)
{
// Should not happen since TryReadRoot failed, but just in case
rootInfo = (probe.LocalName, probe.NamespaceURI);
break;
}
}
if (rootInfo is null)
result.AddError("XML appears to be empty or does not contain a document root element.");
}
catch (XmlException xe)
{
result.AddError($"XML parsing error: {xe.Message}", xe.LineNumber, xe.LinePosition);
TryAddEncodingDiagnostics(xmlPath, xe, result);
}
return result;
}
var (rootLocal, rootNs) = rootInfo.Value;
// Try to ensure the requested element exists in the schema set; if not, try to infer the correct namespace instead of failing hard.
var qname = new XmlQualifiedName(elementName, elementNamespace ?? string.Empty);
if (schemas.GlobalElements[qname] is not XmlSchemaElement)
{
// Try to find candidates with the same local name across namespaces
var candidates = schemas.GlobalElements.Names.Cast<XmlQualifiedName>().Where(n => string.Equals(n.Name, elementName, StringComparison.Ordinal)).Distinct().ToList();
if (candidates.Count == 1)
{
elementNamespace = candidates[0].Namespace;
qname = new XmlQualifiedName(elementName, elementNamespace ?? string.Empty);
result.AddWarning($"Element '{{{qname.Namespace}}}{qname.Name}' was not found with the provided namespace. Using detected namespace '{candidates[0].Namespace}'.");
}
else if (candidates.Count > 1)
{
// Prefer a candidate matching the XML root namespace if any
var preferred = candidates.FirstOrDefault(c => string.Equals(c.Namespace ?? string.Empty, rootNs ?? string.Empty, StringComparison.Ordinal));
if (preferred != null)
{
elementNamespace = preferred.Namespace;
qname = new XmlQualifiedName(elementName, elementNamespace ?? string.Empty);
result.AddWarning($"Element namespace adjusted to match XML root namespace: '{{{preferred.Namespace}}}{preferred.Name}'.");
}
else
{
var list = string.Join(", ", candidates.Select(c => $"'{{{c.Namespace}}}{c.Name}'"));
result.AddWarning($"Element '{{{qname.Namespace}}}{qname.Name}' was not found in the compiled schema set. Candidates by name: {list}. Proceeding with best-effort validation.");
}
}
else
{
// No candidates at all; attempt to locate a global type with the same QName and synthesize a matching element for validation.
TryAddSyntheticElementForMatchingType(schemas, ref qname, result);
}
// After any adjustments/synthesis, re-check presence
if (schemas.GlobalElements[qname] is not XmlSchemaElement)
{
// Still not found; continue and let the validator report more actionable errors.
result.AddWarning($"Element '{{{qname.Namespace}}}{qname.Name}' was not found in the compiled schema set. Proceeding with best-effort validation.");
}
}
var matchesRoot = string.Equals(rootLocal, elementName, StringComparison.Ordinal) && string.Equals(rootNs ?? string.Empty, elementNamespace ?? string.Empty, StringComparison.Ordinal);
var settings = new XmlReaderSettings
{
DtdProcessing = DtdProcessing.Ignore,
ValidationType = ValidationType.Schema,
Schemas = schemas,
CloseInput = true,
ConformanceLevel = ConformanceLevel.Auto
};
settings.ValidationFlags = XmlSchemaValidationFlags.ReportValidationWarnings | XmlSchemaValidationFlags.ProcessIdentityConstraints;
void Handler(object? sender, ValidationEventArgs e)
{
if (e.Severity == XmlSeverityType.Warning)
result.AddWarning(e.Message, e.Exception?.LineNumber, e.Exception?.LinePosition);
else
result.AddError(e.Message, e.Exception?.LineNumber, e.Exception?.LinePosition);
}
settings.ValidationEventHandler += Handler;
if (matchesRoot)
{
using var reader = XmlReader.Create(xmlPath, settings);
try
{
while (reader.Read())
{
// just advance to trigger validation callbacks
}
}
catch (XmlException xe)
{
result.AddError($"XML parsing error: {xe.Message}", xe.LineNumber, xe.LinePosition);
TryAddEncodingDiagnostics(xmlPath, xe, result);
}
return result;
}
else
{
// Root does not match the selected schema element. Try to locate the first matching subtree and validate only that fragment.
// This enables validating an XML file towards a selected node from the XSD.
var fragmentSettings = new XmlReaderSettings
{
DtdProcessing = DtdProcessing.Ignore,
ValidationType = ValidationType.Schema,
Schemas = schemas,
CloseInput = true,
ConformanceLevel = ConformanceLevel.Fragment
};
fragmentSettings.ValidationFlags = settings.ValidationFlags;
fragmentSettings.ValidationEventHandler += Handler;
try
{
var (elementNode, loadError) = FindFirstElementNode(xmlPath, elementName, elementNamespace);
if (loadError is not null)
{
result.AddError(loadError.Value.Message, loadError.Value.LineNumber, loadError.Value.LinePosition);
return result;
}
if (elementNode is null)
{
// Try again ignoring namespace, in case the provided namespace was incorrect or omitted
var retry = FindFirstElementNode(xmlPath, elementName, null).Node;
if (retry is not null)
{
result.AddWarning($"Could not find element '{{{elementNamespace}}}{elementName}' with the specified namespace; validating first occurrence by local name only.");
elementNode = retry;
}
else
{
result.AddError($"Could not find any element '{{{elementNamespace}}}{elementName}' in the XML document to validate against.");
return result;
}
}
// Inform as a warning that we validate a subtree instead of the document root
result.AddWarning($"Validating against the first occurrence of '{{{elementNamespace}}}{elementName}' found in the document (root is '{{{rootNs}}}{rootLocal}').");
using var nodeReader = new XmlNodeReader(elementNode);
using var validatingReader = XmlReader.Create(nodeReader, fragmentSettings);
while (validatingReader.Read())
{
// advance to trigger validation callbacks for the subtree
}
}
catch (XmlException xe)
{
result.AddError($"XML parsing error: {xe.Message}", xe.LineNumber, xe.LinePosition);
TryAddEncodingDiagnostics(xmlPath, xe, result);
}
return result;
}
}
private static XmlSchemaSet BuildSchemaSet(string xsdPath)
{
var set = new XmlSchemaSet
{
XmlResolver = new XmlUrlResolver(),
CompilationSettings = new XmlSchemaCompilationSettings { EnableUpaCheck = true }
};
using var reader = XmlReader.Create(xsdPath, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore });
var schema = XmlSchema.Read(reader, null);
if (schema != null)
set.Add(schema);
set.Compile();
return set;
}
private static (string localName, string nsUri)? TryReadRoot(string xmlPath)
{
using var reader = XmlReader.Create(xmlPath, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore, IgnoreWhitespace = true, IgnoreComments = true, CloseInput = true, ConformanceLevel = ConformanceLevel.Document });
try
{
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.Element && reader.Depth == 0)
{
return (reader.LocalName, reader.NamespaceURI);
}
}
}
catch
{
// ignored; higher level will report XmlException separately
}
return null;
}
private static (XmlElement? Node, (string Message, int LineNumber, int LinePosition)? LoadError) FindFirstElementNode(string xmlPath, string elementName, string? elementNamespace)
{
try
{
var xr = XmlReader.Create(xmlPath, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore });
var doc = new XmlDocument();
doc.PreserveWhitespace = false;
doc.Load(xr);
static XmlElement? Traverse(XmlNode node, string name, string? ns)
{
if (node is XmlElement el)
{
if (string.Equals(el.LocalName, name, StringComparison.Ordinal) && string.Equals(el.NamespaceURI ?? string.Empty, ns ?? string.Empty, StringComparison.Ordinal))
return el;
}
foreach (XmlNode child in node.ChildNodes)
{
var found = Traverse(child, name, ns);
if (found != null) return found;
}
return null;
}
var match = Traverse(doc, elementName, elementNamespace);
return (match, null);
}
catch (XmlException xe)
{
return (null, ($"XML parsing error: {xe.Message}", xe.LineNumber, xe.LinePosition));
}
}
private static void TryAddSyntheticElementForMatchingType(XmlSchemaSet schemas, ref XmlQualifiedName qname, XmlValidationResult result)
{
try
{
// Try exact QName match first
var typeObj = schemas.GlobalTypes[qname];
XmlQualifiedName? matchedTypeQName = null;
if (typeObj != null)
{
matchedTypeQName = qname;
}
else
{
// Fallbacks:
// 1) Find by exact local name across namespaces
var localName = qname.Name;
var byName = schemas.GlobalTypes.Names.Cast<XmlQualifiedName>()
.FirstOrDefault(n => string.Equals(n.Name, localName, StringComparison.Ordinal));
if (byName != null)
{
matchedTypeQName = byName;
// Align element namespace to the found type's namespace
qname = new XmlQualifiedName(localName, byName.Namespace ?? string.Empty);
}
else
{
// 2) Heuristic: try common suffix pattern "<ElementName>Type"
var candidateLocal = localName + "Type";
var requestedNs = qname.Namespace ?? string.Empty;
// Prefer same namespace if available
var sameNsCandidate = schemas.GlobalTypes.Names.Cast<XmlQualifiedName>()
.FirstOrDefault(n => string.Equals(n.Name, candidateLocal, StringComparison.Ordinal) && string.Equals(n.Namespace ?? string.Empty, requestedNs, StringComparison.Ordinal));
if (sameNsCandidate != null)
{
matchedTypeQName = sameNsCandidate;
}
else
{
// Fallback to any namespace
var anyNsCandidate = schemas.GlobalTypes.Names.Cast<XmlQualifiedName>()
.FirstOrDefault(n => string.Equals(n.Name, candidateLocal, StringComparison.Ordinal));
if (anyNsCandidate != null)
{
matchedTypeQName = anyNsCandidate;
// Align the element namespace to the found type's namespace
qname = new XmlQualifiedName(localName, anyNsCandidate.Namespace ?? string.Empty);
}
}
}
}
if (matchedTypeQName == null)
return;
// Create a minimal schema that declares the missing element pointing to the matched global type
var synthetic = new XmlSchema { TargetNamespace = qname.Namespace };
var el = new XmlSchemaElement
{
Name = qname.Name,
SchemaTypeName = matchedTypeQName
};
synthetic.Items.Add(el);
schemas.Add(synthetic);
schemas.Compile();
var typeDesc = $"{{{matchedTypeQName.Namespace}}}{matchedTypeQName.Name}";
string hint;
if (string.Equals(matchedTypeQName.Name, qname.Name, StringComparison.Ordinal))
hint = "exact type name match";
else if (string.Equals(matchedTypeQName.Name, qname.Name + "Type", StringComparison.Ordinal))
hint = "matched by '<ElementName>Type' heuristic";
else
hint = "matched by best-effort lookup";
result.AddWarning($"Element '{{{qname.Namespace}}}{qname.Name}' was not declared, but a global type {typeDesc} exists ({hint}). Added a synthetic element for validation.");
}
catch (XmlSchemaException xse)
{
result.AddWarning($"Failed to add synthetic element for '{{{qname.Namespace}}}{qname.Name}': {xse.Message}", xse.LineNumber, xse.LinePosition);
}
}
private static void TryAddEncodingDiagnostics(string xmlPath, XmlException xe, XmlValidationResult result)
{
try
{
var (bomId, bomName) = DetectBom(xmlPath);
var declared = ReadDeclaredEncoding(xmlPath);
bool missingBomButUtf16Declared = (declared != null && declared.StartsWith("utf-16", StringComparison.OrdinalIgnoreCase) && string.IsNullOrEmpty(bomId));
bool explicitNoBomError = xe.Message.Contains("There is no Unicode byte order mark", StringComparison.OrdinalIgnoreCase);
if (missingBomButUtf16Declared || explicitNoBomError)
{
var actual = string.IsNullOrEmpty(bomId) ? "none" : bomName;
var decl = declared ?? "unspecified";
var hint = "The XML declares UTF-16 but the file does not have a UTF-16 BOM. Either save the file as UTF-16 LE with BOM, or change the XML declaration to encoding=\"utf-8\" and save as UTF-8.";
result.AddWarning($"Encoding diagnostic: Declared encoding='{decl}'; BOM detected={actual}. {hint}");
return;
}
if (!string.IsNullOrEmpty(bomId) && declared != null && !IsBomCompatibleWithDeclared(bomId, declared))
{
result.AddWarning($"Encoding diagnostic: Declared encoding='{declared}', but BOM indicates '{bomName}'. Align the XML declaration with the actual file encoding.");
}
}
catch
{
// Swallow any diagnostics errors to avoid masking the primary parsing error
}
}
private static (string? BomId, string Friendly) DetectBom(string path)
{
using var fs = File.OpenRead(path);
Span<byte> buf = stackalloc byte[4];
int read = fs.Read(buf);
if (read >= 4)
{
if (buf[0] == 0x00 && buf[1] == 0x00 && buf[2] == 0xFE && buf[3] == 0xFF) return ("utf-32-be", "UTF-32 BE BOM");
if (buf[0] == 0xFF && buf[1] == 0xFE && buf[2] == 0x00 && buf[3] == 0x00) return ("utf-32-le", "UTF-32 LE BOM");
}
if (read >= 3 && buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) return ("utf-8", "UTF-8 BOM");
if (read >= 2)
{
if (buf[0] == 0xFE && buf[1] == 0xFF) return ("utf-16-be", "UTF-16 BE BOM");
if (buf[0] == 0xFF && buf[1] == 0xFE) return ("utf-16-le", "UTF-16 LE BOM");
}
return (null, "none");
}
private static string? ReadDeclaredEncoding(string path)
{
byte[] bytes;
using (var fs = File.OpenRead(path))
{
var len = (int)Math.Min(1024, fs.Length);
bytes = new byte[len];
_ = fs.Read(bytes, 0, len);
}
var sample = Encoding.UTF8.GetString(bytes);
var m = Regex.Match(sample, "<\\?xml\\s+version\\s*=\\s*['\"][^'\"]+['\"][^>]*encoding\\s*=\\s*['\"]([^'\"]+)['\"][^>]*\\?>", RegexOptions.IgnoreCase);
if (m.Success) return m.Groups[1].Value.Trim();
return null;
}
private static bool IsBomCompatibleWithDeclared(string? bomId, string declared)
{
if (string.IsNullOrEmpty(bomId)) return true;
var d = declared.ToLowerInvariant();
return (bomId == "utf-8" && d == "utf-8")
|| (bomId == "utf-16-le" && (d == "utf-16" || d == "utf-16le"))
|| (bomId == "utf-16-be" && (d == "utf-16" || d == "utf-16be"))
|| (bomId == "utf-32-le" && (d == "utf-32" || d == "utf-32le"))
|| (bomId == "utf-32-be" && (d == "utf-32" || d == "utf-32be"));
}
}
/// <summary>
/// Aggregates XML validation outcomes, including errors, warnings, and overall validity.
/// </summary>
public sealed class XmlValidationResult
{
private readonly List<XmlValidationIssue> _issues = new();
/// <summary>
/// True if no validation errors have been recorded.
/// </summary>
public bool IsValid => _issues.TrueForAll(i => i.Severity != XmlSeverityType.Error);
/// <summary>
/// All recorded validation issues (errors and warnings) in chronological order.
/// </summary>
public IReadOnlyList<XmlValidationIssue> Issues => _issues;
/// <summary>
/// All recorded validation errors.
/// </summary>
public IEnumerable<XmlValidationIssue> Errors => _issues.Where(i => i.Severity == XmlSeverityType.Error);
/// <summary>
/// All recorded validation warnings.
/// </summary>
public IEnumerable<XmlValidationIssue> Warnings => _issues.Where(i => i.Severity == XmlSeverityType.Warning);
internal void AddError(string message, int? line = null, int? position = null) =>
_issues.Add(new XmlValidationIssue(XmlSeverityType.Error, message, line ?? 0, position ?? 0));
internal void AddWarning(string message, int? line = null, int? position = null) =>
_issues.Add(new XmlValidationIssue(XmlSeverityType.Warning, message, line ?? 0, position ?? 0));
}
/// <summary>
/// Represents a single validation issue (error or warning) with optional location information.
/// </summary>
/// <param name="Severity">Issue severity (Error or Warning).</param>
/// <param name="Message">Human-readable description of the issue.</param>
/// <param name="LineNumber">Line number in the XML where the issue occurred, if available.</param>
/// <param name="LinePosition">Column position in the XML where the issue occurred, if available.</param>
public sealed record XmlValidationIssue(XmlSeverityType Severity, string Message, int LineNumber, int LinePosition);