diff --git a/XSDVisualiser.Desktop/Views/LeftTreeView.axaml.cs b/XSDVisualiser.Desktop/Views/LeftTreeView.axaml.cs index 78a65ee..9d40d29 100644 --- a/XSDVisualiser.Desktop/Views/LeftTreeView.axaml.cs +++ b/XSDVisualiser.Desktop/Views/LeftTreeView.axaml.cs @@ -321,8 +321,7 @@ public partial class LeftTreeView : UserControl try { - var xml = BuildXmlString(node); - await File.WriteAllTextAsync(localPath!, xml, Encoding.UTF8); + await WriteXmlToFileAsync(node, localPath!); await ShowTextDialogAsync("Export XML", $"Exported subtree to:\n{localPath}"); } catch (Exception ex) @@ -351,6 +350,24 @@ public partial class LeftTreeView : UserControl return sb.ToString(); } + private static async Task WriteXmlToFileAsync(SchemaNode root, string path) + { + var settings = new XmlWriterSettings + { + Indent = true, + OmitXmlDeclaration = false, + Encoding = new UTF8Encoding(false), + NewLineOnAttributes = false + }; + + await using var fs = File.Create(path); + await using var writer = XmlWriter.Create(fs, settings); + await writer.WriteStartDocumentAsync(); + WriteElementRecursive(writer, root); + await writer.WriteEndDocumentAsync(); + await writer.FlushAsync(); + } + private static void WriteElementRecursive(XmlWriter writer, SchemaNode node) { var localName = string.IsNullOrWhiteSpace(node.Name) ? "Element" : node.Name!; diff --git a/XSDVisualiser/Rescources/ObjektType3.xml b/XSDVisualiser/Rescources/ObjektType3.xml new file mode 100644 index 0000000..2c9151b --- /dev/null +++ b/XSDVisualiser/Rescources/ObjektType3.xml @@ -0,0 +1,2 @@ + +JOURNALPOST_NOT_OK diff --git a/XSDVisualiser/Utils/XmlValidation.cs b/XSDVisualiser/Utils/XmlValidation.cs index 4b09ee1..84aa382 100644 --- a/XSDVisualiser/Utils/XmlValidation.cs +++ b/XSDVisualiser/Utils/XmlValidation.cs @@ -1,5 +1,8 @@ +using System.IO; using System.Xml; using System.Xml.Schema; +using System.Text; +using System.Text.RegularExpressions; namespace XSDVisualiser.Core; @@ -22,7 +25,35 @@ public static class XmlValidator (string localName, string nsUri)? rootInfo = TryReadRoot(xmlPath); if (rootInfo is null) { - result.AddError("Unable to read XML root element."); + // Provide a more informative error by attempting to parse and capture XmlException details + try + { + using var probe = XmlReader.Create(xmlPath, new XmlReaderSettings + { + DtdProcessing = DtdProcessing.Ignore, + IgnoreWhitespace = true, + IgnoreComments = true, + CloseInput = true, + ConformanceLevel = ConformanceLevel.Document + }); + while (probe.Read()) + { + if (probe.NodeType == XmlNodeType.Element && probe.Depth == 0) + { + // Should not happen since TryReadRoot failed, but just in case + rootInfo = (probe.LocalName, probe.NamespaceURI); + break; + } + } + + if (rootInfo is null) + result.AddError("XML appears to be empty or does not contain a document root element."); + } + catch (XmlException xe) + { + result.AddError($"XML parsing error: {xe.Message}", xe.LineNumber, xe.LinePosition); + TryAddEncodingDiagnostics(xmlPath, xe, result); + } return result; } @@ -58,7 +89,14 @@ public static class XmlValidator } else { - // No candidates at all; continue and let the validator report more actionable errors. + // No candidates at all; attempt to locate a global type with the same QName and synthesize a matching element for validation. + TryAddSyntheticElementForMatchingType(schemas, ref qname, result); + } + + // After any adjustments/synthesis, re-check presence + if (schemas.GlobalElements[qname] is not XmlSchemaElement) + { + // Still not found; continue and let the validator report more actionable errors. result.AddWarning($"Element '{{{qname.Namespace}}}{qname.Name}' was not found in the compiled schema set. Proceeding with best-effort validation."); } } @@ -98,6 +136,7 @@ public static class XmlValidator catch (XmlException xe) { result.AddError($"XML parsing error: {xe.Message}", xe.LineNumber, xe.LinePosition); + TryAddEncodingDiagnostics(xmlPath, xe, result); } return result; } @@ -154,6 +193,7 @@ public static class XmlValidator catch (XmlException xe) { result.AddError($"XML parsing error: {xe.Message}", xe.LineNumber, xe.LinePosition); + TryAddEncodingDiagnostics(xmlPath, xe, result); } return result; @@ -178,7 +218,7 @@ public static class XmlValidator private static (string localName, string nsUri)? TryReadRoot(string xmlPath) { - using var reader = XmlReader.Create(xmlPath, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore, IgnoreWhitespace = true, IgnoreComments = true }); + using var reader = XmlReader.Create(xmlPath, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore, IgnoreWhitespace = true, IgnoreComments = true, CloseInput = true, ConformanceLevel = ConformanceLevel.Document }); try { while (reader.Read()) @@ -228,6 +268,164 @@ public static class XmlValidator return (null, ($"XML parsing error: {xe.Message}", xe.LineNumber, xe.LinePosition)); } } + + private static void TryAddSyntheticElementForMatchingType(XmlSchemaSet schemas, ref XmlQualifiedName qname, XmlValidationResult result) + { + try + { + // Try exact QName match first + var typeObj = schemas.GlobalTypes[qname]; + XmlQualifiedName? matchedTypeQName = null; + if (typeObj != null) + { + matchedTypeQName = qname; + } + else + { + // Fallbacks: + // 1) Find by exact local name across namespaces + var localName = qname.Name; + var byName = schemas.GlobalTypes.Names.Cast() + .FirstOrDefault(n => string.Equals(n.Name, localName, StringComparison.Ordinal)); + if (byName != null) + { + matchedTypeQName = byName; + // Align element namespace to the found type's namespace + qname = new XmlQualifiedName(localName, byName.Namespace ?? string.Empty); + } + else + { + // 2) Heuristic: try common suffix pattern "Type" + var candidateLocal = localName + "Type"; + var requestedNs = qname.Namespace ?? string.Empty; + // Prefer same namespace if available + var sameNsCandidate = schemas.GlobalTypes.Names.Cast() + .FirstOrDefault(n => string.Equals(n.Name, candidateLocal, StringComparison.Ordinal) && string.Equals(n.Namespace ?? string.Empty, requestedNs, StringComparison.Ordinal)); + if (sameNsCandidate != null) + { + matchedTypeQName = sameNsCandidate; + } + else + { + // Fallback to any namespace + var anyNsCandidate = schemas.GlobalTypes.Names.Cast() + .FirstOrDefault(n => string.Equals(n.Name, candidateLocal, StringComparison.Ordinal)); + if (anyNsCandidate != null) + { + matchedTypeQName = anyNsCandidate; + // Align the element namespace to the found type's namespace + qname = new XmlQualifiedName(localName, anyNsCandidate.Namespace ?? string.Empty); + } + } + } + } + + if (matchedTypeQName == null) + return; + + // Create a minimal schema that declares the missing element pointing to the matched global type + var synthetic = new XmlSchema { TargetNamespace = qname.Namespace }; + var el = new XmlSchemaElement + { + Name = qname.Name, + SchemaTypeName = matchedTypeQName + }; + synthetic.Items.Add(el); + + schemas.Add(synthetic); + schemas.Compile(); + + var typeDesc = $"{{{matchedTypeQName.Namespace}}}{matchedTypeQName.Name}"; + string hint; + if (string.Equals(matchedTypeQName.Name, qname.Name, StringComparison.Ordinal)) + hint = "exact type name match"; + else if (string.Equals(matchedTypeQName.Name, qname.Name + "Type", StringComparison.Ordinal)) + hint = "matched by 'Type' heuristic"; + else + hint = "matched by best-effort lookup"; + + result.AddWarning($"Element '{{{qname.Namespace}}}{qname.Name}' was not declared, but a global type {typeDesc} exists ({hint}). Added a synthetic element for validation."); + } + catch (XmlSchemaException xse) + { + result.AddWarning($"Failed to add synthetic element for '{{{qname.Namespace}}}{qname.Name}': {xse.Message}", xse.LineNumber, xse.LinePosition); + } + } + + private static void TryAddEncodingDiagnostics(string xmlPath, XmlException xe, XmlValidationResult result) + { + try + { + var (bomId, bomName) = DetectBom(xmlPath); + var declared = ReadDeclaredEncoding(xmlPath); + + bool missingBomButUtf16Declared = (declared != null && declared.StartsWith("utf-16", StringComparison.OrdinalIgnoreCase) && string.IsNullOrEmpty(bomId)); + bool explicitNoBomError = xe.Message.Contains("There is no Unicode byte order mark", StringComparison.OrdinalIgnoreCase); + + if (missingBomButUtf16Declared || explicitNoBomError) + { + var actual = string.IsNullOrEmpty(bomId) ? "none" : bomName; + var decl = declared ?? "unspecified"; + var hint = "The XML declares UTF-16 but the file does not have a UTF-16 BOM. Either save the file as UTF-16 LE with BOM, or change the XML declaration to encoding=\"utf-8\" and save as UTF-8."; + result.AddWarning($"Encoding diagnostic: Declared encoding='{decl}'; BOM detected={actual}. {hint}"); + return; + } + + if (!string.IsNullOrEmpty(bomId) && declared != null && !IsBomCompatibleWithDeclared(bomId, declared)) + { + result.AddWarning($"Encoding diagnostic: Declared encoding='{declared}', but BOM indicates '{bomName}'. Align the XML declaration with the actual file encoding."); + } + } + catch + { + // Swallow any diagnostics errors to avoid masking the primary parsing error + } + } + + private static (string? BomId, string Friendly) DetectBom(string path) + { + using var fs = File.OpenRead(path); + Span buf = stackalloc byte[4]; + int read = fs.Read(buf); + if (read >= 4) + { + if (buf[0] == 0x00 && buf[1] == 0x00 && buf[2] == 0xFE && buf[3] == 0xFF) return ("utf-32-be", "UTF-32 BE BOM"); + if (buf[0] == 0xFF && buf[1] == 0xFE && buf[2] == 0x00 && buf[3] == 0x00) return ("utf-32-le", "UTF-32 LE BOM"); + } + if (read >= 3 && buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) return ("utf-8", "UTF-8 BOM"); + if (read >= 2) + { + if (buf[0] == 0xFE && buf[1] == 0xFF) return ("utf-16-be", "UTF-16 BE BOM"); + if (buf[0] == 0xFF && buf[1] == 0xFE) return ("utf-16-le", "UTF-16 LE BOM"); + } + return (null, "none"); + } + + private static string? ReadDeclaredEncoding(string path) + { + byte[] bytes; + using (var fs = File.OpenRead(path)) + { + var len = (int)Math.Min(1024, fs.Length); + bytes = new byte[len]; + _ = fs.Read(bytes, 0, len); + } + var sample = Encoding.UTF8.GetString(bytes); + var m = Regex.Match(sample, "<\\?xml\\s+version\\s*=\\s*['\"][^'\"]+['\"][^>]*encoding\\s*=\\s*['\"]([^'\"]+)['\"][^>]*\\?>", RegexOptions.IgnoreCase); + if (m.Success) return m.Groups[1].Value.Trim(); + return null; + } + + private static bool IsBomCompatibleWithDeclared(string? bomId, string declared) + { + if (string.IsNullOrEmpty(bomId)) return true; + var d = declared.ToLowerInvariant(); + return (bomId == "utf-8" && d == "utf-8") + || (bomId == "utf-16-le" && (d == "utf-16" || d == "utf-16le")) + || (bomId == "utf-16-be" && (d == "utf-16" || d == "utf-16be")) + || (bomId == "utf-32-le" && (d == "utf-32" || d == "utf-32le")) + || (bomId == "utf-32-be" && (d == "utf-32" || d == "utf-32be")); + } } public sealed class XmlValidationResult @@ -249,3 +447,5 @@ public sealed class XmlValidationResult } public sealed record XmlValidationIssue(XmlSeverityType Severity, string Message, int LineNumber, int LinePosition); + +