using System.IO; using System.Xml; using System.Xml.Schema; using System.Text; using System.Text.RegularExpressions; namespace XSDVisualiser.Core; /// /// Validates an XML document against a compiled XSD schema set and a specific global element (node). /// public static class XmlValidator { /// /// Validates an XML document against the global element specified by name and optional namespace /// from the XSD located at . /// /// Path to the XSD file containing the target element/type definitions. /// The local name of the global element to validate against. /// The namespace URI of the element; may be null to auto-detect. /// Path to the XML file to validate. /// Aggregated validation result with errors/warnings and diagnostics. public static XmlValidationResult ValidateAgainstElement(string xsdPath, string elementName, string? elementNamespace, string xmlPath) { var set = BuildSchemaSet(xsdPath); return ValidateAgainstElement(set, elementName, elementNamespace, xmlPath); } /// /// Validates an XML document against a specific global element within an already built . /// /// Compiled schema set to use for validation. /// The local name of the global element to validate against. /// The namespace URI of the element; may be null to auto-detect. /// Path to the XML file to validate. /// Aggregated validation result with errors/warnings and diagnostics. public static XmlValidationResult ValidateAgainstElement(XmlSchemaSet schemas, string elementName, string? elementNamespace, string xmlPath) { var result = new XmlValidationResult(); // Probe XML root element first, we may use its namespace as a hint (string localName, string nsUri)? rootInfo = TryReadRoot(xmlPath); if (rootInfo is null) { // Provide a more informative error by attempting to parse and capture XmlException details try { using var probe = XmlReader.Create(xmlPath, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore, IgnoreWhitespace = true, IgnoreComments = true, CloseInput = true, ConformanceLevel = ConformanceLevel.Document }); while (probe.Read()) { if (probe.NodeType == XmlNodeType.Element && probe.Depth == 0) { // Should not happen since TryReadRoot failed, but just in case rootInfo = (probe.LocalName, probe.NamespaceURI); break; } } if (rootInfo is null) result.AddError("XML appears to be empty or does not contain a document root element."); } catch (XmlException xe) { result.AddError($"XML parsing error: {xe.Message}", xe.LineNumber, xe.LinePosition); TryAddEncodingDiagnostics(xmlPath, xe, result); } return result; } var (rootLocal, rootNs) = rootInfo.Value; // Try to ensure the requested element exists in the schema set; if not, try to infer the correct namespace instead of failing hard. var qname = new XmlQualifiedName(elementName, elementNamespace ?? string.Empty); if (schemas.GlobalElements[qname] is not XmlSchemaElement) { // Try to find candidates with the same local name across namespaces var candidates = schemas.GlobalElements.Names.Cast().Where(n => string.Equals(n.Name, elementName, StringComparison.Ordinal)).Distinct().ToList(); if (candidates.Count == 1) { elementNamespace = candidates[0].Namespace; qname = new XmlQualifiedName(elementName, elementNamespace ?? string.Empty); result.AddWarning($"Element '{{{qname.Namespace}}}{qname.Name}' was not found with the provided namespace. Using detected namespace '{candidates[0].Namespace}'."); } else if (candidates.Count > 1) { // Prefer a candidate matching the XML root namespace if any var preferred = candidates.FirstOrDefault(c => string.Equals(c.Namespace ?? string.Empty, rootNs ?? string.Empty, StringComparison.Ordinal)); if (preferred != null) { elementNamespace = preferred.Namespace; qname = new XmlQualifiedName(elementName, elementNamespace ?? string.Empty); result.AddWarning($"Element namespace adjusted to match XML root namespace: '{{{preferred.Namespace}}}{preferred.Name}'."); } else { var list = string.Join(", ", candidates.Select(c => $"'{{{c.Namespace}}}{c.Name}'")); result.AddWarning($"Element '{{{qname.Namespace}}}{qname.Name}' was not found in the compiled schema set. Candidates by name: {list}. Proceeding with best-effort validation."); } } else { // No candidates at all; attempt to locate a global type with the same QName and synthesize a matching element for validation. TryAddSyntheticElementForMatchingType(schemas, ref qname, result); } // After any adjustments/synthesis, re-check presence if (schemas.GlobalElements[qname] is not XmlSchemaElement) { // Still not found; continue and let the validator report more actionable errors. result.AddWarning($"Element '{{{qname.Namespace}}}{qname.Name}' was not found in the compiled schema set. Proceeding with best-effort validation."); } } var matchesRoot = string.Equals(rootLocal, elementName, StringComparison.Ordinal) && string.Equals(rootNs ?? string.Empty, elementNamespace ?? string.Empty, StringComparison.Ordinal); var settings = new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore, ValidationType = ValidationType.Schema, Schemas = schemas, CloseInput = true, ConformanceLevel = ConformanceLevel.Auto }; settings.ValidationFlags = XmlSchemaValidationFlags.ReportValidationWarnings | XmlSchemaValidationFlags.ProcessIdentityConstraints; void Handler(object? sender, ValidationEventArgs e) { if (e.Severity == XmlSeverityType.Warning) result.AddWarning(e.Message, e.Exception?.LineNumber, e.Exception?.LinePosition); else result.AddError(e.Message, e.Exception?.LineNumber, e.Exception?.LinePosition); } settings.ValidationEventHandler += Handler; if (matchesRoot) { using var reader = XmlReader.Create(xmlPath, settings); try { while (reader.Read()) { // just advance to trigger validation callbacks } } catch (XmlException xe) { result.AddError($"XML parsing error: {xe.Message}", xe.LineNumber, xe.LinePosition); TryAddEncodingDiagnostics(xmlPath, xe, result); } return result; } else { // Root does not match the selected schema element. Try to locate the first matching subtree and validate only that fragment. // This enables validating an XML file towards a selected node from the XSD. var fragmentSettings = new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore, ValidationType = ValidationType.Schema, Schemas = schemas, CloseInput = true, ConformanceLevel = ConformanceLevel.Fragment }; fragmentSettings.ValidationFlags = settings.ValidationFlags; fragmentSettings.ValidationEventHandler += Handler; try { var (elementNode, loadError) = FindFirstElementNode(xmlPath, elementName, elementNamespace); if (loadError is not null) { result.AddError(loadError.Value.Message, loadError.Value.LineNumber, loadError.Value.LinePosition); return result; } if (elementNode is null) { // Try again ignoring namespace, in case the provided namespace was incorrect or omitted var retry = FindFirstElementNode(xmlPath, elementName, null).Node; if (retry is not null) { result.AddWarning($"Could not find element '{{{elementNamespace}}}{elementName}' with the specified namespace; validating first occurrence by local name only."); elementNode = retry; } else { result.AddError($"Could not find any element '{{{elementNamespace}}}{elementName}' in the XML document to validate against."); return result; } } // Inform as a warning that we validate a subtree instead of the document root result.AddWarning($"Validating against the first occurrence of '{{{elementNamespace}}}{elementName}' found in the document (root is '{{{rootNs}}}{rootLocal}')."); using var nodeReader = new XmlNodeReader(elementNode); using var validatingReader = XmlReader.Create(nodeReader, fragmentSettings); while (validatingReader.Read()) { // advance to trigger validation callbacks for the subtree } } catch (XmlException xe) { result.AddError($"XML parsing error: {xe.Message}", xe.LineNumber, xe.LinePosition); TryAddEncodingDiagnostics(xmlPath, xe, result); } return result; } } private static XmlSchemaSet BuildSchemaSet(string xsdPath) { var set = new XmlSchemaSet { XmlResolver = new XmlUrlResolver(), CompilationSettings = new XmlSchemaCompilationSettings { EnableUpaCheck = true } }; using var reader = XmlReader.Create(xsdPath, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore }); var schema = XmlSchema.Read(reader, null); if (schema != null) set.Add(schema); set.Compile(); return set; } private static (string localName, string nsUri)? TryReadRoot(string xmlPath) { using var reader = XmlReader.Create(xmlPath, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore, IgnoreWhitespace = true, IgnoreComments = true, CloseInput = true, ConformanceLevel = ConformanceLevel.Document }); try { while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element && reader.Depth == 0) { return (reader.LocalName, reader.NamespaceURI); } } } catch { // ignored; higher level will report XmlException separately } return null; } private static (XmlElement? Node, (string Message, int LineNumber, int LinePosition)? LoadError) FindFirstElementNode(string xmlPath, string elementName, string? elementNamespace) { try { var xr = XmlReader.Create(xmlPath, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore }); var doc = new XmlDocument(); doc.PreserveWhitespace = false; doc.Load(xr); static XmlElement? Traverse(XmlNode node, string name, string? ns) { if (node is XmlElement el) { if (string.Equals(el.LocalName, name, StringComparison.Ordinal) && string.Equals(el.NamespaceURI ?? string.Empty, ns ?? string.Empty, StringComparison.Ordinal)) return el; } foreach (XmlNode child in node.ChildNodes) { var found = Traverse(child, name, ns); if (found != null) return found; } return null; } var match = Traverse(doc, elementName, elementNamespace); return (match, null); } catch (XmlException xe) { return (null, ($"XML parsing error: {xe.Message}", xe.LineNumber, xe.LinePosition)); } } private static void TryAddSyntheticElementForMatchingType(XmlSchemaSet schemas, ref XmlQualifiedName qname, XmlValidationResult result) { try { // Try exact QName match first var typeObj = schemas.GlobalTypes[qname]; XmlQualifiedName? matchedTypeQName = null; if (typeObj != null) { matchedTypeQName = qname; } else { // Fallbacks: // 1) Find by exact local name across namespaces var localName = qname.Name; var byName = schemas.GlobalTypes.Names.Cast() .FirstOrDefault(n => string.Equals(n.Name, localName, StringComparison.Ordinal)); if (byName != null) { matchedTypeQName = byName; // Align element namespace to the found type's namespace qname = new XmlQualifiedName(localName, byName.Namespace ?? string.Empty); } else { // 2) Heuristic: try common suffix pattern "Type" var candidateLocal = localName + "Type"; var requestedNs = qname.Namespace ?? string.Empty; // Prefer same namespace if available var sameNsCandidate = schemas.GlobalTypes.Names.Cast() .FirstOrDefault(n => string.Equals(n.Name, candidateLocal, StringComparison.Ordinal) && string.Equals(n.Namespace ?? string.Empty, requestedNs, StringComparison.Ordinal)); if (sameNsCandidate != null) { matchedTypeQName = sameNsCandidate; } else { // Fallback to any namespace var anyNsCandidate = schemas.GlobalTypes.Names.Cast() .FirstOrDefault(n => string.Equals(n.Name, candidateLocal, StringComparison.Ordinal)); if (anyNsCandidate != null) { matchedTypeQName = anyNsCandidate; // Align the element namespace to the found type's namespace qname = new XmlQualifiedName(localName, anyNsCandidate.Namespace ?? string.Empty); } } } } if (matchedTypeQName == null) return; // Create a minimal schema that declares the missing element pointing to the matched global type var synthetic = new XmlSchema { TargetNamespace = qname.Namespace }; var el = new XmlSchemaElement { Name = qname.Name, SchemaTypeName = matchedTypeQName }; synthetic.Items.Add(el); schemas.Add(synthetic); schemas.Compile(); var typeDesc = $"{{{matchedTypeQName.Namespace}}}{matchedTypeQName.Name}"; string hint; if (string.Equals(matchedTypeQName.Name, qname.Name, StringComparison.Ordinal)) hint = "exact type name match"; else if (string.Equals(matchedTypeQName.Name, qname.Name + "Type", StringComparison.Ordinal)) hint = "matched by 'Type' heuristic"; else hint = "matched by best-effort lookup"; result.AddWarning($"Element '{{{qname.Namespace}}}{qname.Name}' was not declared, but a global type {typeDesc} exists ({hint}). Added a synthetic element for validation."); } catch (XmlSchemaException xse) { result.AddWarning($"Failed to add synthetic element for '{{{qname.Namespace}}}{qname.Name}': {xse.Message}", xse.LineNumber, xse.LinePosition); } } private static void TryAddEncodingDiagnostics(string xmlPath, XmlException xe, XmlValidationResult result) { try { var (bomId, bomName) = DetectBom(xmlPath); var declared = ReadDeclaredEncoding(xmlPath); bool missingBomButUtf16Declared = (declared != null && declared.StartsWith("utf-16", StringComparison.OrdinalIgnoreCase) && string.IsNullOrEmpty(bomId)); bool explicitNoBomError = xe.Message.Contains("There is no Unicode byte order mark", StringComparison.OrdinalIgnoreCase); if (missingBomButUtf16Declared || explicitNoBomError) { var actual = string.IsNullOrEmpty(bomId) ? "none" : bomName; var decl = declared ?? "unspecified"; var hint = "The XML declares UTF-16 but the file does not have a UTF-16 BOM. Either save the file as UTF-16 LE with BOM, or change the XML declaration to encoding=\"utf-8\" and save as UTF-8."; result.AddWarning($"Encoding diagnostic: Declared encoding='{decl}'; BOM detected={actual}. {hint}"); return; } if (!string.IsNullOrEmpty(bomId) && declared != null && !IsBomCompatibleWithDeclared(bomId, declared)) { result.AddWarning($"Encoding diagnostic: Declared encoding='{declared}', but BOM indicates '{bomName}'. Align the XML declaration with the actual file encoding."); } } catch { // Swallow any diagnostics errors to avoid masking the primary parsing error } } private static (string? BomId, string Friendly) DetectBom(string path) { using var fs = File.OpenRead(path); Span buf = stackalloc byte[4]; int read = fs.Read(buf); if (read >= 4) { if (buf[0] == 0x00 && buf[1] == 0x00 && buf[2] == 0xFE && buf[3] == 0xFF) return ("utf-32-be", "UTF-32 BE BOM"); if (buf[0] == 0xFF && buf[1] == 0xFE && buf[2] == 0x00 && buf[3] == 0x00) return ("utf-32-le", "UTF-32 LE BOM"); } if (read >= 3 && buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) return ("utf-8", "UTF-8 BOM"); if (read >= 2) { if (buf[0] == 0xFE && buf[1] == 0xFF) return ("utf-16-be", "UTF-16 BE BOM"); if (buf[0] == 0xFF && buf[1] == 0xFE) return ("utf-16-le", "UTF-16 LE BOM"); } return (null, "none"); } private static string? ReadDeclaredEncoding(string path) { byte[] bytes; using (var fs = File.OpenRead(path)) { var len = (int)Math.Min(1024, fs.Length); bytes = new byte[len]; _ = fs.Read(bytes, 0, len); } var sample = Encoding.UTF8.GetString(bytes); var m = Regex.Match(sample, "<\\?xml\\s+version\\s*=\\s*['\"][^'\"]+['\"][^>]*encoding\\s*=\\s*['\"]([^'\"]+)['\"][^>]*\\?>", RegexOptions.IgnoreCase); if (m.Success) return m.Groups[1].Value.Trim(); return null; } private static bool IsBomCompatibleWithDeclared(string? bomId, string declared) { if (string.IsNullOrEmpty(bomId)) return true; var d = declared.ToLowerInvariant(); return (bomId == "utf-8" && d == "utf-8") || (bomId == "utf-16-le" && (d == "utf-16" || d == "utf-16le")) || (bomId == "utf-16-be" && (d == "utf-16" || d == "utf-16be")) || (bomId == "utf-32-le" && (d == "utf-32" || d == "utf-32le")) || (bomId == "utf-32-be" && (d == "utf-32" || d == "utf-32be")); } } /// /// Aggregates XML validation outcomes, including errors, warnings, and overall validity. /// public sealed class XmlValidationResult { private readonly List _issues = new(); /// /// True if no validation errors have been recorded. /// public bool IsValid => _issues.TrueForAll(i => i.Severity != XmlSeverityType.Error); /// /// All recorded validation issues (errors and warnings) in chronological order. /// public IReadOnlyList Issues => _issues; /// /// All recorded validation errors. /// public IEnumerable Errors => _issues.Where(i => i.Severity == XmlSeverityType.Error); /// /// All recorded validation warnings. /// public IEnumerable Warnings => _issues.Where(i => i.Severity == XmlSeverityType.Warning); internal void AddError(string message, int? line = null, int? position = null) => _issues.Add(new XmlValidationIssue(XmlSeverityType.Error, message, line ?? 0, position ?? 0)); internal void AddWarning(string message, int? line = null, int? position = null) => _issues.Add(new XmlValidationIssue(XmlSeverityType.Warning, message, line ?? 0, position ?? 0)); } /// /// Represents a single validation issue (error or warning) with optional location information. /// /// Issue severity (Error or Warning). /// Human-readable description of the issue. /// Line number in the XML where the issue occurred, if available. /// Column position in the XML where the issue occurred, if available. public sealed record XmlValidationIssue(XmlSeverityType Severity, string Message, int LineNumber, int LinePosition);