using System.Text;
using System.Xml;
using System.Xml.Schema;
namespace XSDVisualiser.Core;
///
/// Parses XML Schema (XSD) into a simplified model for visualization and tooling.
///
public class XsdSchemaParser
{
private readonly XmlSchemaSet _set = new();
///
/// Reads and compiles the provided XSD file into a schema set, then produces a simplified schema model.
///
/// Path to the XSD file.
/// A populated representing global elements and their structures.
public SchemaModel Parse(string xsdPath)
{
_set.XmlResolver = new XmlUrlResolver();
using var reader = XmlReader.Create(xsdPath, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore });
var schema = XmlSchema.Read(reader, ValidationCallback);
_set.Add(schema!);
_set.CompilationSettings = new XmlSchemaCompilationSettings { EnableUpaCheck = true };
_set.Compile();
var model = new SchemaModel
{
TargetNamespace = schema!.TargetNamespace
};
foreach (var globalEl in _set.Schemas().Cast()
.SelectMany(s => s.Elements.Values.Cast()))
{
var node = BuildNodeForElement(globalEl, null);
model.RootElements.Add(node);
}
return model;
}
private void ValidationCallback(object? sender, ValidationEventArgs e)
{
// Intentionally no-op: schema is parsed best-effort without throwing on compile warnings/errors.
}
private SchemaNode BuildNodeForElement(XmlSchemaElement element, string? parentContentModel)
{
var node = new SchemaNode
{
Name = element.Name ?? element.RefName.Name,
Namespace = (element.QualifiedName.IsEmpty ? element.RefName : element.QualifiedName).Namespace,
IsNillable = element.IsNillable,
Cardinality = new Occurs
{
Min = element.MinOccurs,
Max = element.MaxOccurs,
MaxIsUnbounded = element.MaxOccursString == "unbounded"
},
ContentModel = parentContentModel
};
// Prefer element-level documentation
node.Documentation = ExtractDocumentation(element);
var type = ResolveElementType(element);
if (type == null) return node;
node.TypeName = GetQualifiedTypeName(type);
if (type.Datatype != null) node.BuiltInType = type.Datatype.TypeCode.ToString();
// Fallback to type-level documentation if none on element
if (string.IsNullOrWhiteSpace(node.Documentation))
switch (type)
{
case XmlSchemaComplexType ctDoc:
node.Documentation = ExtractDocumentation(ctDoc);
break;
case XmlSchemaSimpleType stDoc:
node.Documentation = ExtractDocumentation(stDoc);
break;
}
switch (type)
{
case XmlSchemaComplexType ct:
HandleComplexType(node, ct);
break;
case XmlSchemaSimpleType st:
node.ContentModel = "simple";
node.Constraints = ExtractConstraints(st);
break;
}
return node;
}
private static string? GetQualifiedTypeName(XmlSchemaType type)
{
if (!type.QualifiedName.IsEmpty) return type.QualifiedName.ToString();
return type.BaseXmlSchemaType is { QualifiedName.IsEmpty: false }
? type.BaseXmlSchemaType.QualifiedName.ToString()
: type.Name;
}
private XmlSchemaType? ResolveElementType(XmlSchemaElement el)
{
if (el.ElementSchemaType != null) return el.ElementSchemaType;
if (!el.SchemaTypeName.IsEmpty) return _set.GlobalTypes[el.SchemaTypeName] as XmlSchemaType;
return el.SchemaType;
}
private void HandleComplexType(SchemaNode node, XmlSchemaComplexType ct)
{
// Collect attributes (ensure uniqueness)
var seenAttrKeys = new HashSet(StringComparer.Ordinal);
// 1) Compiled attribute uses (includes inherited/group attributes after Compile)
foreach (var attr in ct.AttributeUses.Values.OfType())
{
var qn = attr.QualifiedName.IsEmpty ? attr.RefName : attr.QualifiedName;
var key = qn.ToString();
if (seenAttrKeys.Add(key)) node.Attributes.Add(ExtractAttribute(attr));
}
// 2) Uncompiled attributes directly on the type (fallback)
foreach (var a in ct.Attributes.OfType())
{
var qn = a.QualifiedName.IsEmpty ? a.RefName : a.QualifiedName;
var key = qn.ToString();
if (seenAttrKeys.Add(key)) node.Attributes.Add(ExtractAttribute(a));
}
// 3) Attributes from complexContent extension/restriction
if (ct.ContentModel is XmlSchemaComplexContent cc)
switch (cc.Content)
{
case XmlSchemaComplexContentExtension cext:
{
foreach (var a in cext.Attributes.OfType())
{
var qn = a.QualifiedName.IsEmpty ? a.RefName : a.QualifiedName;
var key = qn.ToString();
if (seenAttrKeys.Add(key)) node.Attributes.Add(ExtractAttribute(a));
}
break;
}
case XmlSchemaComplexContentRestriction cres:
{
foreach (var a in cres.Attributes.OfType())
{
var qn = a.QualifiedName.IsEmpty ? a.RefName : a.QualifiedName;
var key = qn.ToString();
if (seenAttrKeys.Add(key)) node.Attributes.Add(ExtractAttribute(a));
}
break;
}
}
// Content model
if (ct.ContentTypeParticle is XmlSchemaGroupBase group)
{
var content = group switch
{
XmlSchemaSequence => "sequence",
XmlSchemaChoice => "choice",
XmlSchemaAll => "all",
_ => "group"
};
node.ContentModel = content;
foreach (var item in group.Items)
switch (item)
{
case XmlSchemaElement childEl:
node.Children.Add(BuildNodeForElement(childEl, content));
break;
case XmlSchemaGroupBase nestedGroup:
// Flatten nested groups by introducing synthetic nodes
var synthetic = new SchemaNode
{
Name = "(group)",
Namespace = node.Namespace,
ContentModel = nestedGroup switch
{
XmlSchemaSequence => "sequence",
XmlSchemaChoice => "choice",
XmlSchemaAll => "all",
_ => "group"
},
Cardinality = new Occurs
{
Min = nestedGroup.MinOccurs, Max = nestedGroup.MaxOccurs,
MaxIsUnbounded = nestedGroup.MaxOccursString == "unbounded"
}
};
foreach (var nestedItem in nestedGroup.Items)
if (nestedItem is XmlSchemaElement ngChild)
synthetic.Children.Add(BuildNodeForElement(ngChild, synthetic.ContentModel));
node.Children.Add(synthetic);
break;
// Skip other particles for now
}
}
else if (ct is
{
ContentType: XmlSchemaContentType.TextOnly, ContentModel: XmlSchemaSimpleContent simpleContent
})
{
node.ContentModel = "simple";
switch (simpleContent.Content)
{
case XmlSchemaSimpleContentExtension ext:
{
var baseType = ResolveType(ext.BaseTypeName);
if (baseType is XmlSchemaSimpleType st)
{
node.Constraints = ExtractConstraints(st);
node.TypeName ??= GetQualifiedTypeName(st);
node.BuiltInType ??= st.Datatype?.TypeCode.ToString();
}
foreach (var attr in ext.Attributes.OfType())
{
var qn = attr.QualifiedName.IsEmpty ? attr.RefName : attr.QualifiedName;
var key = qn.ToString();
if (seenAttrKeys.Add(key)) node.Attributes.Add(ExtractAttribute(attr));
}
break;
}
case XmlSchemaSimpleContentRestriction res:
{
var baseType = ResolveType(res.BaseTypeName);
if (baseType is XmlSchemaSimpleType st)
{
var cons = ExtractConstraints(st);
MergeFacets(cons, res.Facets);
node.Constraints = cons;
node.TypeName ??= GetQualifiedTypeName(st);
node.BuiltInType ??= st.Datatype?.TypeCode.ToString();
}
break;
}
}
}
}
private XmlSchemaType? ResolveType(XmlQualifiedName qname)
{
if (qname.IsEmpty) return null;
return _set.GlobalTypes[qname] as XmlSchemaType;
}
private AttributeInfo ExtractAttribute(XmlSchemaAttribute attr)
{
var info = new AttributeInfo
{
Name = attr.Name ?? attr.RefName.Name,
Namespace = (attr.QualifiedName.IsEmpty ? attr.RefName : attr.QualifiedName).Namespace,
Use = attr.Use.ToString()
};
XmlSchemaSimpleType? st = null;
if (attr.AttributeSchemaType != null) st = attr.AttributeSchemaType;
else if (!attr.SchemaTypeName.IsEmpty) st = ResolveType(attr.SchemaTypeName) as XmlSchemaSimpleType;
else if (attr.SchemaType != null) st = attr.SchemaType;
if (st == null) return info;
info.TypeName = GetQualifiedTypeName(st);
info.BuiltInType = st.Datatype?.TypeCode.ToString();
info.Constraints = ExtractConstraints(st);
return info;
}
private ConstraintSet? ExtractConstraints(XmlSchemaSimpleType st)
{
var cons = new ConstraintSet
{
BaseTypeName = GetQualifiedTypeName(st.BaseXmlSchemaType)
};
switch (st.Content)
{
case XmlSchemaSimpleTypeRestriction restr:
MergeFacets(cons, restr.Facets);
break;
case XmlSchemaSimpleTypeList list:
{
if (!list.ItemTypeName.IsEmpty)
{
var baseType = ResolveType(list.ItemTypeName);
if (baseType is XmlSchemaSimpleType itemSt)
{
var sub = ExtractConstraints(itemSt);
Merge(cons, sub);
}
}
break;
}
case XmlSchemaSimpleTypeUnion union:
{
foreach (var memberType in union.BaseMemberTypes)
{
if (memberType is not { } mst) continue;
var sub = ExtractConstraints(mst);
Merge(cons, sub);
}
break;
}
}
return cons;
}
private static void Merge(ConstraintSet target, ConstraintSet? source)
{
// Merge generic constraints (name + value de-duplication)
if (source?.Constraints == null) return;
foreach (var sc in source.Constraints)
{
var exists = target.Constraints.Any(tc =>
string.Equals(tc.Name, sc.Name, StringComparison.Ordinal) &&
string.Equals(tc.Value, sc.Value, StringComparison.Ordinal));
if (!exists) target.Constraints.Add(new ConstraintEntry { Name = sc.Name, Value = sc.Value });
}
}
private static void MergeFacets(ConstraintSet cons, XmlSchemaObjectCollection facets)
{
foreach (var f in facets)
{
// Capture all constraints generically for dynamic display
if (f is not XmlSchemaFacet baseFacet) continue;
var name = GetFacetName(f);
var value = baseFacet.Value;
if (string.IsNullOrEmpty(name) || string.IsNullOrEmpty(value)) continue;
var exists = cons.Constraints.Any(entry =>
string.Equals(entry.Name, name, StringComparison.Ordinal) &&
string.Equals(entry.Value, value, StringComparison.Ordinal));
if (!exists) cons.Constraints.Add(new ConstraintEntry { Name = name, Value = value });
}
}
private static string GetFacetName(XmlSchemaObject facet)
{
var typeName = facet.GetType().Name; // e.g., XmlSchemaMinInclusiveFacet
if (typeName.StartsWith("XmlSchema", StringComparison.Ordinal))
typeName = typeName.Substring("XmlSchema".Length);
if (typeName.EndsWith("Facet", StringComparison.Ordinal))
typeName = typeName.Substring(0, typeName.Length - "Facet".Length);
if (typeName.Length == 0) return typeName;
return char.ToLowerInvariant(typeName[0]) + typeName.Substring(1);
}
private static string? ExtractDocumentation(XmlSchemaAnnotated? annotated)
{
if (annotated?.Annotation == null) return null;
var sb = new StringBuilder();
foreach (var item in annotated.Annotation.Items)
{
if (item is not XmlSchemaDocumentation doc) continue;
if (doc.Markup is { Length: > 0 })
{
var pieceBuilder = new StringBuilder();
foreach (var node in doc.Markup)
try
{
var text = node?.InnerText;
if (!string.IsNullOrWhiteSpace(text)) pieceBuilder.Append(text);
}
catch
{
// ignore malformed nodes
}
var piece = pieceBuilder.ToString().Trim();
if (string.IsNullOrWhiteSpace(piece)) continue;
if (sb.Length > 0) sb.AppendLine().AppendLine();
sb.Append(piece);
}
else if (!string.IsNullOrWhiteSpace(doc.Source))
{
// If there is a source but no markup, skip; we only render text.
}
}
return sb.Length == 0 ? null : sb.ToString();
}
}