XSDVisualizer/XSDVisualiser/Parsing/XsdSchemaParser.cs
2025-10-18 16:30:38 +02:00

352 lines
14 KiB
C#

using System;
using System.Collections.Generic;
using System.Linq;
using System.Xml;
using System.Xml.Schema;
using XSDVisualiser.Models;
namespace XSDVisualiser.Parsing
{
public class XsdSchemaParser
{
private readonly XmlSchemaSet _set = new();
public SchemaModel Parse(string xsdPath)
{
_set.XmlResolver = new XmlUrlResolver();
using var reader = XmlReader.Create(xsdPath, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore });
var schema = XmlSchema.Read(reader, ValidationCallback);
_set.Add(schema);
_set.CompilationSettings = new XmlSchemaCompilationSettings { EnableUpaCheck = true };
_set.Compile();
var model = new SchemaModel
{
TargetNamespace = schema.TargetNamespace
};
foreach (XmlSchemaElement globalEl in _set.Schemas().Cast<XmlSchema>()
.SelectMany(s => s.Elements.Values.Cast<XmlSchemaElement>()))
{
var node = BuildNodeForElement(globalEl, parentContentModel: null);
model.RootElements.Add(node);
}
return model;
}
private void ValidationCallback(object? sender, ValidationEventArgs e)
{
// For now, we do not throw; we capture compiled info best-effort.
// Console.Error.WriteLine($"[XSD Validation {e.Severity}] {e.Message}");
}
private SchemaNode BuildNodeForElement(XmlSchemaElement element, string? parentContentModel)
{
var node = new SchemaNode
{
Name = element.Name ?? element.RefName.Name,
Namespace = (element.QualifiedName.IsEmpty ? element.RefName : element.QualifiedName).Namespace,
IsNillable = element.IsNillable,
Cardinality = new Occurs
{
Min = element.MinOccurs,
Max = element.MaxOccurs,
MaxIsUnbounded = element.MaxOccursString == "unbounded"
},
ContentModel = parentContentModel
};
var type = ResolveElementType(element);
if (type != null)
{
node.TypeName = GetQualifiedTypeName(type);
if (type.Datatype != null)
{
node.BuiltInType = type.Datatype.TypeCode.ToString();
}
switch (type)
{
case XmlSchemaComplexType ct:
HandleComplexType(node, ct);
break;
case XmlSchemaSimpleType st:
node.ContentModel = "simple";
node.Constraints = ExtractConstraints(st);
break;
}
}
return node;
}
private static string? GetQualifiedTypeName(XmlSchemaType type)
{
if (!type.QualifiedName.IsEmpty) return type.QualifiedName.ToString();
return type.BaseXmlSchemaType != null && !type.BaseXmlSchemaType.QualifiedName.IsEmpty
? type.BaseXmlSchemaType.QualifiedName.ToString()
: type.Name;
}
private XmlSchemaType? ResolveElementType(XmlSchemaElement el)
{
if (el.ElementSchemaType != null) return el.ElementSchemaType;
if (!el.SchemaTypeName.IsEmpty)
{
return _set.GlobalTypes[el.SchemaTypeName] as XmlSchemaType;
}
if (el.SchemaType != null) return el.SchemaType;
return null;
}
private void HandleComplexType(SchemaNode node, XmlSchemaComplexType ct)
{
// Attributes
foreach (XmlSchemaAttribute attr in ct.AttributeUses.Values.OfType<XmlSchemaAttribute>())
{
node.Attributes.Add(ExtractAttribute(attr));
}
// Content model
if (ct.ContentTypeParticle is XmlSchemaGroupBase group)
{
var content = group switch
{
XmlSchemaSequence => "sequence",
XmlSchemaChoice => "choice",
XmlSchemaAll => "all",
_ => "group"
};
node.ContentModel = content;
foreach (var item in group.Items)
{
switch (item)
{
case XmlSchemaElement childEl:
node.Children.Add(BuildNodeForElement(childEl, content));
break;
case XmlSchemaGroupBase nestedGroup:
// Flatten nested groups by introducing synthetic nodes
var synthetic = new SchemaNode
{
Name = "(group)",
Namespace = node.Namespace,
ContentModel = nestedGroup switch
{
XmlSchemaSequence => "sequence",
XmlSchemaChoice => "choice",
XmlSchemaAll => "all",
_ => "group"
},
Cardinality = new Occurs { Min = nestedGroup.MinOccurs, Max = nestedGroup.MaxOccurs, MaxIsUnbounded = nestedGroup.MaxOccursString == "unbounded" }
};
foreach (var nestedItem in nestedGroup.Items)
{
if (nestedItem is XmlSchemaElement ngChild)
{
synthetic.Children.Add(BuildNodeForElement(ngChild, synthetic.ContentModel));
}
}
node.Children.Add(synthetic);
break;
// Skip other particles for now
}
}
}
else if (ct.ContentType == XmlSchemaContentType.TextOnly && ct.ContentModel is XmlSchemaSimpleContent simpleContent)
{
node.ContentModel = "simple";
if (simpleContent.Content is XmlSchemaSimpleContentExtension ext)
{
var baseType = ResolveType(ext.BaseTypeName);
if (baseType is XmlSchemaSimpleType st)
{
node.Constraints = ExtractConstraints(st);
node.TypeName ??= GetQualifiedTypeName(st);
node.BuiltInType ??= st.Datatype?.TypeCode.ToString();
}
foreach (XmlSchemaAttribute attr in ext.Attributes.OfType<XmlSchemaAttribute>())
{
node.Attributes.Add(ExtractAttribute(attr));
}
}
else if (simpleContent.Content is XmlSchemaSimpleContentRestriction res)
{
var baseType = ResolveType(res.BaseTypeName);
if (baseType is XmlSchemaSimpleType st)
{
var cons = ExtractConstraints(st);
MergeFacets(cons, res.Facets);
node.Constraints = cons;
node.TypeName ??= GetQualifiedTypeName(st);
node.BuiltInType ??= st.Datatype?.TypeCode.ToString();
}
}
}
}
private XmlSchemaType? ResolveType(XmlQualifiedName qname)
{
if (qname.IsEmpty) return null;
return _set.GlobalTypes[qname] as XmlSchemaType;
}
private AttributeInfo ExtractAttribute(XmlSchemaAttribute attr)
{
var info = new AttributeInfo
{
Name = attr.Name ?? attr.RefName.Name,
Namespace = (attr.QualifiedName.IsEmpty ? attr.RefName : attr.QualifiedName).Namespace,
Use = attr.Use.ToString()
};
XmlSchemaSimpleType? st = null;
if (attr.AttributeSchemaType != null) st = attr.AttributeSchemaType as XmlSchemaSimpleType;
else if (!attr.SchemaTypeName.IsEmpty) st = ResolveType(attr.SchemaTypeName) as XmlSchemaSimpleType;
else if (attr.SchemaType != null) st = attr.SchemaType;
if (st != null)
{
info.TypeName = GetQualifiedTypeName(st);
info.BuiltInType = st.Datatype?.TypeCode.ToString();
info.Constraints = ExtractConstraints(st);
}
return info;
}
private ConstraintSet? ExtractConstraints(XmlSchemaSimpleType st)
{
var cons = new ConstraintSet
{
BaseTypeName = GetQualifiedTypeName(st.BaseXmlSchemaType)
};
if (st.Content is XmlSchemaSimpleTypeRestriction restr)
{
MergeFacets(cons, restr.Facets);
}
else if (st.Content is XmlSchemaSimpleTypeList list)
{
cons.Patterns.Add("(list)");
if (!list.ItemTypeName.IsEmpty)
{
var baseType = ResolveType(list.ItemTypeName);
if (baseType is XmlSchemaSimpleType itemSt)
{
var sub = ExtractConstraints(itemSt);
Merge(cons, sub);
}
}
}
else if (st.Content is XmlSchemaSimpleTypeUnion union)
{
cons.Patterns.Add("(union)");
foreach (var memberType in union.BaseMemberTypes)
{
if (memberType is XmlSchemaSimpleType mst)
{
var sub = ExtractConstraints(mst);
Merge(cons, sub);
}
}
}
return cons;
}
private static void Merge(ConstraintSet target, ConstraintSet? source)
{
if (source == null) return;
foreach (var e in source.Enumerations) if (!target.Enumerations.Contains(e)) target.Enumerations.Add(e);
foreach (var p in source.Patterns) if (!target.Patterns.Contains(p)) target.Patterns.Add(p);
if (source.Numeric != null)
{
target.Numeric ??= new NumericBounds();
target.Numeric.MinInclusive ??= source.Numeric.MinInclusive;
target.Numeric.MaxInclusive ??= source.Numeric.MaxInclusive;
target.Numeric.MinExclusive ??= source.Numeric.MinExclusive;
target.Numeric.MaxExclusive ??= source.Numeric.MaxExclusive;
}
if (source.Length != null)
{
target.Length ??= new LengthBounds();
if (source.Length.LengthSpecified && !target.Length.LengthSpecified)
{
target.Length.Length = source.Length.Length;
target.Length.LengthSpecified = true;
}
if (source.Length.MinLengthSpecified && !target.Length.MinLengthSpecified)
{
target.Length.MinLength = source.Length.MinLength;
target.Length.MinLengthSpecified = true;
}
if (source.Length.MaxLengthSpecified && !target.Length.MaxLengthSpecified)
{
target.Length.MaxLength = source.Length.MaxLength;
target.Length.MaxLengthSpecified = true;
}
}
}
private static void MergeFacets(ConstraintSet cons, XmlSchemaObjectCollection facets)
{
foreach (var f in facets)
{
switch (f)
{
case XmlSchemaEnumerationFacet enumFacet:
cons.Enumerations.Add(enumFacet.Value);
break;
case XmlSchemaPatternFacet patternFacet:
cons.Patterns.Add(patternFacet.Value);
break;
case XmlSchemaMinInclusiveFacet minInc:
cons.Numeric ??= new NumericBounds();
cons.Numeric.MinInclusive = minInc.Value;
break;
case XmlSchemaMaxInclusiveFacet maxInc:
cons.Numeric ??= new NumericBounds();
cons.Numeric.MaxInclusive = maxInc.Value;
break;
case XmlSchemaMinExclusiveFacet minEx:
cons.Numeric ??= new NumericBounds();
cons.Numeric.MinExclusive = minEx.Value;
break;
case XmlSchemaMaxExclusiveFacet maxEx:
cons.Numeric ??= new NumericBounds();
cons.Numeric.MaxExclusive = maxEx.Value;
break;
case XmlSchemaLengthFacet len:
cons.Length ??= new LengthBounds();
if (int.TryParse(len.Value, out var l))
{
cons.Length.Length = l;
cons.Length.LengthSpecified = true;
}
break;
case XmlSchemaMinLengthFacet minLen:
cons.Length ??= new LengthBounds();
if (int.TryParse(minLen.Value, out var ml))
{
cons.Length.MinLength = ml;
cons.Length.MinLengthSpecified = true;
}
break;
case XmlSchemaMaxLengthFacet maxLen:
cons.Length ??= new LengthBounds();
if (int.TryParse(maxLen.Value, out var xl))
{
cons.Length.MaxLength = xl;
cons.Length.MaxLengthSpecified = true;
}
break;
}
}
}
}
}