diff --git a/ICSharpCode.AvalonEdit/ICSharpCode.AvalonEdit.csproj b/ICSharpCode.AvalonEdit/ICSharpCode.AvalonEdit.csproj index 8a73964..cbbf511 100644 --- a/ICSharpCode.AvalonEdit/ICSharpCode.AvalonEdit.csproj +++ b/ICSharpCode.AvalonEdit/ICSharpCode.AvalonEdit.csproj @@ -315,9 +315,28 @@ - + + + + + + + + + + + + + + - + + + + + + + diff --git a/ICSharpCode.AvalonEdit/XmlParser/AbstractXmlVisitor.cs b/ICSharpCode.AvalonEdit/XmlParser/AbstractXmlVisitor.cs new file mode 100644 index 0000000..12682e6 --- /dev/null +++ b/ICSharpCode.AvalonEdit/XmlParser/AbstractXmlVisitor.cs @@ -0,0 +1,47 @@ +// +// +// +// +// $Revision$ +// +using System; +using System.Text; + +namespace ICSharpCode.AvalonEdit.XmlParser +{ + /// + /// Derive from this class to create visitor for the XML tree + /// + public abstract class AbstractXmlVisitor : IXmlVisitor + { + /// Visit RawDocument + public virtual void VisitDocument(RawDocument document) + { + foreach(RawObject child in document.Children) child.AcceptVisitor(this); + } + + /// Visit RawElement + public virtual void VisitElement(RawElement element) + { + foreach(RawObject child in element.Children) child.AcceptVisitor(this); + } + + /// Visit RawTag + public virtual void VisitTag(RawTag tag) + { + foreach(RawObject child in tag.Children) child.AcceptVisitor(this); + } + + /// Visit RawAttribute + public virtual void VisitAttribute(RawAttribute attribute) + { + + } + + /// Visit RawText + public virtual void VisitText(RawText text) + { + + } + } +} diff --git a/ICSharpCode.AvalonEdit/XmlParser/AttributeCollection.cs b/ICSharpCode.AvalonEdit/XmlParser/AttributeCollection.cs new file mode 100644 index 0000000..f43fda4 --- /dev/null +++ b/ICSharpCode.AvalonEdit/XmlParser/AttributeCollection.cs @@ -0,0 +1,104 @@ +// +// +// +// +// $Revision$ +// + +using System; +using System.Collections.Generic; + +namespace ICSharpCode.AvalonEdit.XmlParser +{ + /// + /// Specailized attribute collection with attribute name caching + /// + public class AttributeCollection: FilteredCollection> + { + /// Wrap the given collection. Non-attributes are filtered + public AttributeCollection(ChildrenCollection source): base(source) {} + + /// Wrap the given collection. Non-attributes are filtered. Items not matching the condition are filtered. + public AttributeCollection(ChildrenCollection source, Predicate condition): base(source, condition) {} + + Dictionary> hashtable = new Dictionary>(); + + void AddToHashtable(RawAttribute attr) + { + string localName = attr.LocalName; + if (!hashtable.ContainsKey(localName)) { + hashtable[localName] = new List(1); + } + hashtable[localName].Add(attr); + } + + void RemoveFromHashtable(RawAttribute attr) + { + string localName = attr.LocalName; + hashtable[localName].Remove(attr); + } + + static List NoAttributes = new List(); + + /// + /// Get all attributes with given local name. + /// Hash table is used for lookup so this is cheap. + /// + public IEnumerable GetByLocalName(string localName) + { + if (hashtable.ContainsKey(localName)) { + return hashtable[localName]; + } else { + return NoAttributes; + } + } + + /// + protected override void ClearItems() + { + foreach(RawAttribute item in this) { + RemoveFromHashtable(item); + item.Changing -= item_Changing; + item.Changed -= item_Changed; + } + base.ClearItems(); + } + + /// + protected override void InsertItem(int index, RawAttribute item) + { + AddToHashtable(item); + item.Changing += item_Changing; + item.Changed += item_Changed; + base.InsertItem(index, item); + } + + /// + protected override void RemoveItem(int index) + { + RemoveFromHashtable(this[index]); + this[index].Changing -= item_Changing; + this[index].Changed -= item_Changed; + base.RemoveItem(index); + } + + /// + protected override void SetItem(int index, RawAttribute item) + { + throw new NotSupportedException(); + } + + // Every item in the collectoin should be registered to these handlers + // so that we can handle renames + + void item_Changing(object sender, RawObjectEventArgs e) + { + RemoveFromHashtable((RawAttribute)e.Object); + } + + void item_Changed(object sender, RawObjectEventArgs e) + { + AddToHashtable((RawAttribute)e.Object); + } + } +} diff --git a/ICSharpCode.AvalonEdit/XmlParser/Cache.cs b/ICSharpCode.AvalonEdit/XmlParser/Cache.cs new file mode 100644 index 0000000..9110bcc --- /dev/null +++ b/ICSharpCode.AvalonEdit/XmlParser/Cache.cs @@ -0,0 +1,127 @@ +// +// +// +// +// $Revision$ +// + +using System; +using System.Collections.Generic; +using ICSharpCode.AvalonEdit.Document; + +namespace ICSharpCode.AvalonEdit.XmlParser +{ + /// + /// Holds all valid parsed items. + /// Also tracks their offsets as document changes. + /// + class Cache + { + /// Previously parsed items as long as they are valid + TextSegmentCollection parsedItems = new TextSegmentCollection(); + + /// + /// Is used to identify what memory range was touched by object + /// The default is (StartOffset, EndOffset + 1) which is not stored + /// + TextSegmentCollection touchedMemoryRanges = new TextSegmentCollection(); + + class TouchedMemoryRange: TextSegment + { + public RawObject TouchedByObject { get; set; } + } + + public void UpdateOffsetsAndInvalidate(IEnumerable changes) + { + foreach(DocumentChangeEventArgs change in changes) { + // Update offsets of all items + parsedItems.UpdateOffsets(change); + touchedMemoryRanges.UpdateOffsets(change); + + // Remove any items affected by the change + XmlParser.Log("Changed offset {0}", change.Offset); + // Removing will cause one of the ends to be set to change.Offset + // FindSegmentsContaining includes any segments touching + // so that conviniently takes care of the +1 byte + foreach(RawObject obj in parsedItems.FindSegmentsContaining(change.Offset)) { + Remove(obj, false); + } + foreach(TouchedMemoryRange memory in touchedMemoryRanges.FindSegmentsContaining(change.Offset)) { + XmlParser.Log("Found that {0} dependeds on memory ({1}-{2})", memory.TouchedByObject, memory.StartOffset, memory.EndOffset); + Remove(memory.TouchedByObject, true); + touchedMemoryRanges.Remove(memory); + } + } + } + + /// Add object to cache, optionally adding extra memory tracking + public void Add(RawObject obj, int? maxTouchedLocation) + { + XmlParser.Assert(obj.Length > 0 || obj is RawDocument, string.Format("Invalid object {0}. It has zero length.", obj)); + if (obj is RawContainer) { + int objStartOffset = obj.StartOffset; + int objEndOffset = obj.EndOffset; + foreach(RawObject child in ((RawContainer)obj).Children) { + XmlParser.Assert(objStartOffset <= child.StartOffset && child.EndOffset <= objEndOffset, "Wrong nesting"); + } + } + parsedItems.Add(obj); + obj.IsInCache = true; + if (maxTouchedLocation != null) { + // location is assumed to be read so the range ends at (location + 1) + // For example eg for "a_" it is (0-2) + TouchedMemoryRange memRange = new TouchedMemoryRange() { + StartOffset = obj.StartOffset, + EndOffset = maxTouchedLocation.Value + 1, + TouchedByObject = obj + }; + touchedMemoryRanges.Add(memRange); + XmlParser.Log("{0} touched memory range ({1}-{2})", obj, memRange.StartOffset, memRange.EndOffset); + } + } + + List FindParents(RawObject child) + { + List parents = new List(); + foreach(RawObject parent in parsedItems.FindSegmentsContaining(child.StartOffset)) { + // Parent is anyone wholy containg the child + if (parent.StartOffset <= child.StartOffset && child.EndOffset <= parent.EndOffset && parent != child) { + parents.Add(parent); + } + } + return parents; + } + + /// Remove from cache + public void Remove(RawObject obj, bool includeParents) + { + if (includeParents) { + List parents = FindParents(obj); + + foreach(RawObject r in parents) { + if (parsedItems.Remove(r)) { + r.IsInCache = false; + XmlParser.Log("Removing cached item {0} (it is parent)", r); + } + } + } + + if (parsedItems.Remove(obj)) { + obj.IsInCache = false; + XmlParser.Log("Removed cached item {0}", obj); + } + } + + public T GetObject(int offset, int lookaheadCount, Predicate conditon) where T: RawObject, new() + { + RawObject obj = parsedItems.FindFirstSegmentWithStartAfter(offset); + while(obj != null && offset <= obj.StartOffset && obj.StartOffset <= offset + lookaheadCount) { + if (obj is T && conditon((T)obj)) { + return (T)obj; + } + obj = parsedItems.GetNextSegment(obj); + } + return null; + } + } +} diff --git a/ICSharpCode.AvalonEdit/XmlParser/ChildrenCollection.cs b/ICSharpCode.AvalonEdit/XmlParser/ChildrenCollection.cs new file mode 100644 index 0000000..24923f8 --- /dev/null +++ b/ICSharpCode.AvalonEdit/XmlParser/ChildrenCollection.cs @@ -0,0 +1,94 @@ +// +// +// +// +// $Revision$ +// + +using System; +using System.Collections; +using System.Collections.Generic; +using System.Collections.ObjectModel; +using System.Collections.Specialized; +using System.Linq; + +namespace ICSharpCode.AvalonEdit.XmlParser +{ + /// + /// Collection that is publicly read-only and has support + /// for adding/removing multiple items at a time. + /// + public class ChildrenCollection: Collection, INotifyCollectionChanged + { + /// Occurs when the collection is changed + public event NotifyCollectionChangedEventHandler CollectionChanged; + + /// Raises event + // Do not inherit - it is not called if event is null + void OnCollectionChanged(NotifyCollectionChangedEventArgs e) + { + if (CollectionChanged != null) { + CollectionChanged(this, e); + } + } + + /// + protected override void ClearItems() + { + throw new NotSupportedException(); + } + + /// + protected override void InsertItem(int index, T item) + { + throw new NotSupportedException(); + } + + /// + protected override void RemoveItem(int index) + { + throw new NotSupportedException(); + } + + /// + protected override void SetItem(int index, T item) + { + throw new NotSupportedException(); + } + + internal void InsertItemAt(int index, T item) + { + base.InsertItem(index, item); + if (CollectionChanged != null) + OnCollectionChanged(new NotifyCollectionChangedEventArgs(NotifyCollectionChangedAction.Add, new T[] { item }.ToList(), index)); + } + + internal void RemoveItemAt(int index) + { + T removed = this[index]; + base.RemoveItem(index); + if (CollectionChanged != null) + OnCollectionChanged(new NotifyCollectionChangedEventArgs(NotifyCollectionChangedAction.Remove, new T[] { removed }.ToList(), index)); + } + + internal void InsertItemsAt(int index, IList items) + { + for(int i = 0; i < items.Count; i++) { + base.InsertItem(index + i, items[i]); + } + if (CollectionChanged != null) + OnCollectionChanged(new NotifyCollectionChangedEventArgs(NotifyCollectionChangedAction.Add, (IList)items, index)); + } + + internal void RemoveItemsAt(int index, int count) + { + List removed = new List(); + for(int i = 0; i < count; i++) { + removed.Add(this[index]); + base.RemoveItem(index); + } + if (CollectionChanged != null) + OnCollectionChanged(new NotifyCollectionChangedEventArgs(NotifyCollectionChangedAction.Remove, (IList)removed, index)); + } + } +} diff --git a/ICSharpCode.AvalonEdit/XmlParser/Collections.cs b/ICSharpCode.AvalonEdit/XmlParser/Collections.cs deleted file mode 100644 index 6732874..0000000 --- a/ICSharpCode.AvalonEdit/XmlParser/Collections.cs +++ /dev/null @@ -1,330 +0,0 @@ -// -// -// -// -// $Revision$ -// - -using System; -using System.Collections; -using System.Collections.Generic; -using System.Collections.ObjectModel; -using System.Collections.Specialized; -using System.Linq; - -namespace ICSharpCode.AvalonEdit.XmlParser -{ - /// - /// Collection that is publicly read-only and has support - /// for adding/removing multiple items at a time. - /// - public class ChildrenCollection: Collection, INotifyCollectionChanged - { - /// Occurs when the collection is changed - public event NotifyCollectionChangedEventHandler CollectionChanged; - - /// Raises event - // Do not inherit - it is not called if event is null - void OnCollectionChanged(NotifyCollectionChangedEventArgs e) - { - if (CollectionChanged != null) { - CollectionChanged(this, e); - } - } - - /// - protected override void ClearItems() - { - throw new NotSupportedException(); - } - - /// - protected override void InsertItem(int index, T item) - { - throw new NotSupportedException(); - } - - /// - protected override void RemoveItem(int index) - { - throw new NotSupportedException(); - } - - /// - protected override void SetItem(int index, T item) - { - throw new NotSupportedException(); - } - - internal void InsertItemAt(int index, T item) - { - base.InsertItem(index, item); - if (CollectionChanged != null) - OnCollectionChanged(new NotifyCollectionChangedEventArgs(NotifyCollectionChangedAction.Add, new T[] { item }.ToList(), index)); - } - - internal void RemoveItemAt(int index) - { - T removed = this[index]; - base.RemoveItem(index); - if (CollectionChanged != null) - OnCollectionChanged(new NotifyCollectionChangedEventArgs(NotifyCollectionChangedAction.Remove, new T[] { removed }.ToList(), index)); - } - - internal void InsertItemsAt(int index, IList items) - { - for(int i = 0; i < items.Count; i++) { - base.InsertItem(index + i, items[i]); - } - if (CollectionChanged != null) - OnCollectionChanged(new NotifyCollectionChangedEventArgs(NotifyCollectionChangedAction.Add, (IList)items, index)); - } - - internal void RemoveItemsAt(int index, int count) - { - List removed = new List(); - for(int i = 0; i < count; i++) { - removed.Add(this[index]); - base.RemoveItem(index); - } - if (CollectionChanged != null) - OnCollectionChanged(new NotifyCollectionChangedEventArgs(NotifyCollectionChangedAction.Remove, (IList)removed, index)); - } - } - - /// - /// Specailized attribute collection with attribute name caching - /// - public class AttributeCollection: FilteredCollection> - { - /// Wrap the given collection. Non-attributes are filtered - public AttributeCollection(ChildrenCollection source): base(source) {} - - /// Wrap the given collection. Non-attributes are filtered. Items not matching the condition are filtered. - public AttributeCollection(ChildrenCollection source, Predicate condition): base(source, condition) {} - - Dictionary> hashtable = new Dictionary>(); - - void AddToHashtable(RawAttribute attr) - { - string localName = attr.LocalName; - if (!hashtable.ContainsKey(localName)) { - hashtable[localName] = new List(1); - } - hashtable[localName].Add(attr); - } - - void RemoveFromHashtable(RawAttribute attr) - { - string localName = attr.LocalName; - hashtable[localName].Remove(attr); - } - - static List NoAttributes = new List(); - - /// - /// Get all attributes with given local name. - /// Hash table is used for lookup so this is cheap. - /// - public IEnumerable GetByLocalName(string localName) - { - if (hashtable.ContainsKey(localName)) { - return hashtable[localName]; - } else { - return NoAttributes; - } - } - - /// - protected override void ClearItems() - { - foreach(RawAttribute item in this) { - RemoveFromHashtable(item); - item.Changing -= item_Changing; - item.Changed -= item_Changed; - } - base.ClearItems(); - } - - /// - protected override void InsertItem(int index, RawAttribute item) - { - AddToHashtable(item); - item.Changing += item_Changing; - item.Changed += item_Changed; - base.InsertItem(index, item); - } - - /// - protected override void RemoveItem(int index) - { - RemoveFromHashtable(this[index]); - this[index].Changing -= item_Changing; - this[index].Changed -= item_Changed; - base.RemoveItem(index); - } - - /// - protected override void SetItem(int index, RawAttribute item) - { - throw new NotSupportedException(); - } - - // Every item in the collectoin should be registered to these handlers - // so that we can handle renames - - void item_Changing(object sender, RawObjectEventArgs e) - { - RemoveFromHashtable((RawAttribute)e.Object); - } - - void item_Changed(object sender, RawObjectEventArgs e) - { - AddToHashtable((RawAttribute)e.Object); - } - } - - /// - /// Collection that presents only some items from the wrapped collection. - /// It implicitely filters object that are not of type T (or derived). - /// - public class FilteredCollection: ObservableCollection where C: INotifyCollectionChanged, IList - { - C source; - Predicate condition; - List srcPtrs = new List(); // Index to the original collection - - /// Wrap the given collection. Items of type other then T are filtered - public FilteredCollection(C source) : this (source, x => true) { } - - /// Wrap the given collection. Items of type other then T are filtered. Items not matching the condition are filtered. - public FilteredCollection(C source, Predicate condition) - { - this.source = source; - this.condition = condition; - - this.source.CollectionChanged += SourceCollectionChanged; - - Reset(); - } - - void Reset() - { - this.Clear(); - srcPtrs.Clear(); - for(int i = 0; i < source.Count; i++) { - if (source[i] is T && condition(source[i])) { - this.Add((T)source[i]); - srcPtrs.Add(i); - } - } - } - - void SourceCollectionChanged(object sender, NotifyCollectionChangedEventArgs e) - { - switch(e.Action) { - case NotifyCollectionChangedAction.Add: - // Update pointers - for(int i = 0; i < srcPtrs.Count; i++) { - if (srcPtrs[i] >= e.NewStartingIndex) { - srcPtrs[i] += e.NewItems.Count; - } - } - // Find where to add items - int addIndex = srcPtrs.FindIndex(srcPtr => srcPtr >= e.NewStartingIndex); - if (addIndex == -1) addIndex = this.Count; - // Add items to collection - for(int i = 0; i < e.NewItems.Count; i++) { - if (e.NewItems[i] is T && condition(e.NewItems[i])) { - this.InsertItem(addIndex, (T)e.NewItems[i]); - srcPtrs.Insert(addIndex, e.NewStartingIndex + i); - addIndex++; - } - } - break; - case NotifyCollectionChangedAction.Remove: - // Remove the item from our collection - for(int i = 0; i < e.OldItems.Count; i++) { - // Anyone points to the removed item? - int removeIndex = srcPtrs.IndexOf(e.OldStartingIndex + i); - // Remove - if (removeIndex != -1) { - this.RemoveAt(removeIndex); - srcPtrs.RemoveAt(removeIndex); - } - } - // Update pointers - for(int i = 0; i < srcPtrs.Count; i++) { - if (srcPtrs[i] >= e.OldStartingIndex) { - srcPtrs[i] -= e.OldItems.Count; - } - } - break; - case NotifyCollectionChangedAction.Reset: - Reset(); - break; - default: - throw new NotSupportedException(e.Action.ToString()); - } - } - } - - /// - /// Two collections in sequence - /// - public class MergedCollection: ObservableCollection where C: INotifyCollectionChanged, IList - { - C a; - C b; - - /// Create a wrapper containing elements of 'a' and then 'b' - public MergedCollection(C a, C b) - { - this.a = a; - this.b = b; - - this.a.CollectionChanged += SourceCollectionAChanged; - this.b.CollectionChanged += SourceCollectionBChanged; - - Reset(); - } - - void Reset() - { - this.Clear(); - foreach(T item in a) this.Add(item); - foreach(T item in b) this.Add(item); - } - - void SourceCollectionAChanged(object sender, NotifyCollectionChangedEventArgs e) - { - SourceCollectionChanged(0, e); - } - - void SourceCollectionBChanged(object sender, NotifyCollectionChangedEventArgs e) - { - SourceCollectionChanged(a.Count, e); - } - - void SourceCollectionChanged(int collectionStart, NotifyCollectionChangedEventArgs e) - { - switch(e.Action) { - case NotifyCollectionChangedAction.Add: - for (int i = 0; i < e.NewItems.Count; i++) { - this.InsertItem(collectionStart + e.NewStartingIndex + i, (T)e.NewItems[i]); - } - break; - case NotifyCollectionChangedAction.Remove: - for (int i = 0; i < e.OldItems.Count; i++) { - this.RemoveAt(collectionStart + e.OldStartingIndex); - } - break; - case NotifyCollectionChangedAction.Reset: - Reset(); - break; - default: - throw new NotSupportedException(e.Action.ToString()); - } - } - } -} diff --git a/ICSharpCode.AvalonEdit/XmlParser/ExtensionMethods.cs b/ICSharpCode.AvalonEdit/XmlParser/ExtensionMethods.cs new file mode 100644 index 0000000..4a046a9 --- /dev/null +++ b/ICSharpCode.AvalonEdit/XmlParser/ExtensionMethods.cs @@ -0,0 +1,51 @@ +// +// +// +// +// $Revision$ +// + +using System; +using System.Collections.Generic; +using System.Collections.ObjectModel; +using System.Collections.Specialized; +using System.Diagnostics; +using System.Linq; + +using ICSharpCode.AvalonEdit.Document; + +namespace ICSharpCode.AvalonEdit.XmlParser +{ + static class ExtensionMethods + { + // Copied from ICSharpCode.SharpDevelop.Dom.ExtensionMethods + /// + /// Converts a recursive data structure into a flat list. + /// + /// The root elements of the recursive data structure. + /// The function that gets the children of an element. + /// Iterator that enumerates the tree structure in preorder. + public static IEnumerable Flatten(this IEnumerable input, Func> recursion) + { + Stack> stack = new Stack>(); + try { + stack.Push(input.GetEnumerator()); + while (stack.Count > 0) { + while (stack.Peek().MoveNext()) { + T element = stack.Peek().Current; + yield return element; + IEnumerable children = recursion(element); + if (children != null) { + stack.Push(children.GetEnumerator()); + } + } + stack.Pop().Dispose(); + } + } finally { + while (stack.Count > 0) { + stack.Pop().Dispose(); + } + } + } + } +} diff --git a/ICSharpCode.AvalonEdit/XmlParser/FilteredCollection.cs b/ICSharpCode.AvalonEdit/XmlParser/FilteredCollection.cs new file mode 100644 index 0000000..2783f88 --- /dev/null +++ b/ICSharpCode.AvalonEdit/XmlParser/FilteredCollection.cs @@ -0,0 +1,100 @@ +// +// +// +// +// $Revision$ +// + +using System; +using System.Collections; +using System.Collections.Generic; +using System.Collections.ObjectModel; +using System.Collections.Specialized; + +namespace ICSharpCode.AvalonEdit.XmlParser +{ + /// + /// Collection that presents only some items from the wrapped collection. + /// It implicitely filters object that are not of type T (or derived). + /// + public class FilteredCollection: ObservableCollection where C: INotifyCollectionChanged, IList + { + C source; + Predicate condition; + List srcPtrs = new List(); // Index to the original collection + + /// Wrap the given collection. Items of type other then T are filtered + public FilteredCollection(C source) : this (source, x => true) { } + + /// Wrap the given collection. Items of type other then T are filtered. Items not matching the condition are filtered. + public FilteredCollection(C source, Predicate condition) + { + this.source = source; + this.condition = condition; + + this.source.CollectionChanged += SourceCollectionChanged; + + Reset(); + } + + void Reset() + { + this.Clear(); + srcPtrs.Clear(); + for(int i = 0; i < source.Count; i++) { + if (source[i] is T && condition(source[i])) { + this.Add((T)source[i]); + srcPtrs.Add(i); + } + } + } + + void SourceCollectionChanged(object sender, NotifyCollectionChangedEventArgs e) + { + switch(e.Action) { + case NotifyCollectionChangedAction.Add: + // Update pointers + for(int i = 0; i < srcPtrs.Count; i++) { + if (srcPtrs[i] >= e.NewStartingIndex) { + srcPtrs[i] += e.NewItems.Count; + } + } + // Find where to add items + int addIndex = srcPtrs.FindIndex(srcPtr => srcPtr >= e.NewStartingIndex); + if (addIndex == -1) addIndex = this.Count; + // Add items to collection + for(int i = 0; i < e.NewItems.Count; i++) { + if (e.NewItems[i] is T && condition(e.NewItems[i])) { + this.InsertItem(addIndex, (T)e.NewItems[i]); + srcPtrs.Insert(addIndex, e.NewStartingIndex + i); + addIndex++; + } + } + break; + case NotifyCollectionChangedAction.Remove: + // Remove the item from our collection + for(int i = 0; i < e.OldItems.Count; i++) { + // Anyone points to the removed item? + int removeIndex = srcPtrs.IndexOf(e.OldStartingIndex + i); + // Remove + if (removeIndex != -1) { + this.RemoveAt(removeIndex); + srcPtrs.RemoveAt(removeIndex); + } + } + // Update pointers + for(int i = 0; i < srcPtrs.Count; i++) { + if (srcPtrs[i] >= e.OldStartingIndex) { + srcPtrs[i] -= e.OldItems.Count; + } + } + break; + case NotifyCollectionChangedAction.Reset: + Reset(); + break; + default: + throw new NotSupportedException(e.Action.ToString()); + } + } + } +} diff --git a/ICSharpCode.AvalonEdit/XmlParser/IXmlVisitor.cs b/ICSharpCode.AvalonEdit/XmlParser/IXmlVisitor.cs new file mode 100644 index 0000000..0cb3132 --- /dev/null +++ b/ICSharpCode.AvalonEdit/XmlParser/IXmlVisitor.cs @@ -0,0 +1,32 @@ +// +// +// +// +// $Revision$ +// +using System; +using System.Text; + +namespace ICSharpCode.AvalonEdit.XmlParser +{ + /// + /// Visitor for the XML tree + /// + public interface IXmlVisitor + { + /// Visit RawDocument + void VisitDocument(RawDocument document); + + /// Visit RawElement + void VisitElement(RawElement element); + + /// Visit RawTag + void VisitTag(RawTag tag); + + /// Visit RawAttribute + void VisitAttribute(RawAttribute attribute); + + /// Visit RawText + void VisitText(RawText text); + } +} diff --git a/ICSharpCode.AvalonEdit/XmlParser/MergedCollection.cs b/ICSharpCode.AvalonEdit/XmlParser/MergedCollection.cs new file mode 100644 index 0000000..04fb80a --- /dev/null +++ b/ICSharpCode.AvalonEdit/XmlParser/MergedCollection.cs @@ -0,0 +1,73 @@ +// +// +// +// +// $Revision$ +// + +using System; +using System.Collections.Generic; +using System.Collections.ObjectModel; +using System.Collections.Specialized; + +namespace ICSharpCode.AvalonEdit.XmlParser +{ + /// + /// Two collections in sequence + /// + public class MergedCollection: ObservableCollection where C: INotifyCollectionChanged, IList + { + C a; + C b; + + /// Create a wrapper containing elements of 'a' and then 'b' + public MergedCollection(C a, C b) + { + this.a = a; + this.b = b; + + this.a.CollectionChanged += SourceCollectionAChanged; + this.b.CollectionChanged += SourceCollectionBChanged; + + Reset(); + } + + void Reset() + { + this.Clear(); + foreach(T item in a) this.Add(item); + foreach(T item in b) this.Add(item); + } + + void SourceCollectionAChanged(object sender, NotifyCollectionChangedEventArgs e) + { + SourceCollectionChanged(0, e); + } + + void SourceCollectionBChanged(object sender, NotifyCollectionChangedEventArgs e) + { + SourceCollectionChanged(a.Count, e); + } + + void SourceCollectionChanged(int collectionStart, NotifyCollectionChangedEventArgs e) + { + switch(e.Action) { + case NotifyCollectionChangedAction.Add: + for (int i = 0; i < e.NewItems.Count; i++) { + this.InsertItem(collectionStart + e.NewStartingIndex + i, (T)e.NewItems[i]); + } + break; + case NotifyCollectionChangedAction.Remove: + for (int i = 0; i < e.OldItems.Count; i++) { + this.RemoveAt(collectionStart + e.OldStartingIndex); + } + break; + case NotifyCollectionChangedAction.Reset: + Reset(); + break; + default: + throw new NotSupportedException(e.Action.ToString()); + } + } + } +} diff --git a/ICSharpCode.AvalonEdit/XmlParser/Visitors.cs b/ICSharpCode.AvalonEdit/XmlParser/PrettyPrintXmlVisitor.cs similarity index 51% rename from ICSharpCode.AvalonEdit/XmlParser/Visitors.cs rename to ICSharpCode.AvalonEdit/XmlParser/PrettyPrintXmlVisitor.cs index 9edd7cc..dcf9c9e 100644 --- a/ICSharpCode.AvalonEdit/XmlParser/Visitors.cs +++ b/ICSharpCode.AvalonEdit/XmlParser/PrettyPrintXmlVisitor.cs @@ -9,63 +9,6 @@ using System.Text; namespace ICSharpCode.AvalonEdit.XmlParser { - /// - /// Visitor for the XML tree - /// - public interface IXmlVisitor - { - /// Visit RawDocument - void VisitDocument(RawDocument document); - - /// Visit RawElement - void VisitElement(RawElement element); - - /// Visit RawTag - void VisitTag(RawTag tag); - - /// Visit RawAttribute - void VisitAttribute(RawAttribute attribute); - - /// Visit RawText - void VisitText(RawText text); - } - - /// - /// Derive from this class to create visitor for the XML tree - /// - public abstract class AbstractXmlVisitor : IXmlVisitor - { - /// Visit RawDocument - public virtual void VisitDocument(RawDocument document) - { - foreach(RawObject child in document.Children) child.AcceptVisitor(this); - } - - /// Visit RawElement - public virtual void VisitElement(RawElement element) - { - foreach(RawObject child in element.Children) child.AcceptVisitor(this); - } - - /// Visit RawTag - public virtual void VisitTag(RawTag tag) - { - foreach(RawObject child in tag.Children) child.AcceptVisitor(this); - } - - /// Visit RawAttribute - public virtual void VisitAttribute(RawAttribute attribute) - { - - } - - /// Visit RawText - public virtual void VisitText(RawText text) - { - - } - } - /// /// Converts the XML tree back to text. /// The text should exactly match the original. diff --git a/ICSharpCode.AvalonEdit/XmlParser/RawAttribute.cs b/ICSharpCode.AvalonEdit/XmlParser/RawAttribute.cs new file mode 100644 index 0000000..c0293a7 --- /dev/null +++ b/ICSharpCode.AvalonEdit/XmlParser/RawAttribute.cs @@ -0,0 +1,129 @@ +// +// +// +// +// $Revision$ +// + +using System; +using System.Collections.Generic; +using System.Collections.ObjectModel; +using System.Collections.Specialized; +using System.Diagnostics; +using System.Linq; + +using ICSharpCode.AvalonEdit.Document; + +namespace ICSharpCode.AvalonEdit.XmlParser +{ + /// + /// Name-value pair in a tag + /// + public class RawAttribute: RawObject + { + /// Name with namespace prefix - exactly as in source file + public string Name { get; internal set; } + /// Equals sign and surrounding whitespace + public string EqualsSign { get; internal set; } + /// The raw value - exactly as in source file (*probably* quoted and escaped) + public string QuotedValue { get; internal set; } + /// Unquoted and dereferenced value of the attribute + public string Value { get; internal set; } + + internal override void DebugCheckConsistency(bool allowNullParent) + { + DebugAssert(Name != null, "Null Name"); + DebugAssert(EqualsSign != null, "Null EqualsSign"); + DebugAssert(QuotedValue != null, "Null QuotedValue"); + DebugAssert(Value != null, "Null Value"); + base.DebugCheckConsistency(allowNullParent); + } + + #region Helpper methods + + /// The element containing this attribute + /// Null if orphaned + public RawElement ParentElement { + get { + RawTag tag = this.Parent as RawTag; + if (tag != null) { + return tag.Parent as RawElement; + } + return null; + } + } + + /// The part of name before ":" + /// Empty string if not found + public string Prefix { + get { + return GetNamespacePrefix(this.Name); + } + } + + /// The part of name after ":" + /// Whole name if ":" not found + public string LocalName { + get { + return GetLocalName(this.Name); + } + } + + /// + /// Resolved namespace of the name. Empty string if not found + /// From the specification: "The namespace name for an unprefixed attribute name always has no value." + /// + public string Namespace { + get { + if (string.IsNullOrEmpty(this.Prefix)) return NoNamespace; + + RawElement elem = this.ParentElement; + if (elem != null) { + return elem.ReslovePrefix(this.Prefix); + } + return NoNamespace; // Orphaned attribute + } + } + + /// Attribute is declaring namespace ("xmlns" or "xmlns:*") + public bool IsNamespaceDeclaration { + get { + return this.Name == "xmlns" || this.Prefix == "xmlns"; + } + } + + #endregion + + /// + public override void AcceptVisitor(IXmlVisitor visitor) + { + visitor.VisitAttribute(this); + } + + /// + internal override void UpdateDataFrom(RawObject source) + { + base.UpdateDataFrom(source); // Check asserts + if (this.LastUpdatedFrom == source) return; + RawAttribute src = (RawAttribute)source; + if (this.Name != src.Name || + this.EqualsSign != src.EqualsSign || + this.QuotedValue != src.QuotedValue || + this.Value != src.Value) + { + OnChanging(); + this.Name = src.Name; + this.EqualsSign = src.EqualsSign; + this.QuotedValue = src.QuotedValue; + this.Value = src.Value; + OnChanged(); + } + } + + /// + public override string ToString() + { + return string.Format("[{0} '{1}{2}{3}']", base.ToString(), this.Name, this.EqualsSign, this.Value); + } + } +} diff --git a/ICSharpCode.AvalonEdit/XmlParser/RawContainer.cs b/ICSharpCode.AvalonEdit/XmlParser/RawContainer.cs new file mode 100644 index 0000000..4f29f5d --- /dev/null +++ b/ICSharpCode.AvalonEdit/XmlParser/RawContainer.cs @@ -0,0 +1,256 @@ +// +// +// +// +// $Revision$ +// + +using System; +using System.Collections.Generic; +using System.Collections.ObjectModel; +using System.Collections.Specialized; +using System.Diagnostics; +using System.Linq; + +using ICSharpCode.AvalonEdit.Document; + +namespace ICSharpCode.AvalonEdit.XmlParser +{ + /// + /// Abstact base class for all types that can contain child nodes + /// + public abstract class RawContainer: RawObject + { + /// + /// Children of the node. It is read-only. + /// Note that is has CollectionChanged event. + /// + public ChildrenCollection Children { get; private set; } + + /// Create new container + public RawContainer() + { + this.Children = new ChildrenCollection(); + } + + #region Helpper methods + + ObservableCollection elements; + + /// Gets direcly nested elements (non-recursive) + public ObservableCollection Elements { + get { + if (elements == null) { + elements = new FilteredCollection>(this.Children); + } + return elements; + } + } + + internal RawObject FirstChild { + get { + return this.Children[0]; + } + } + + internal RawObject LastChild { + get { + return this.Children[this.Children.Count - 1]; + } + } + + #endregion + + /// + public override IEnumerable GetSelfAndAllChildren() + { + return new RawObject[] { this }.Flatten(i => i is RawContainer ? ((RawContainer)i).Children : null); + } + + /// + /// Gets a child fully containg the given offset. + /// Goes recursively down the tree. + /// Specail case if at the end of attribute or text + /// + public RawObject GetChildAtOffset(int offset) + { + foreach(RawObject child in this.Children) { + if ((child is RawAttribute || child is RawText) && offset == child.EndOffset) return child; + if (child.StartOffset < offset && offset < child.EndOffset) { + if (child is RawContainer) { + return ((RawContainer)child).GetChildAtOffset(offset); + } else { + return child; + } + } + } + return this; // No childs at offset + } + + // Only these four methods should be used to modify the collection + + /// To be used exlucively by the parser + internal void AddChild(RawObject item) + { + // Childs can be only added to newly parsed items + Assert(this.Parent == null, "I have to be new"); + Assert(item.IsInCache, "Added item must be in cache"); + // Do not set parent pointer + this.Children.InsertItemAt(this.Children.Count, item); + } + + /// To be used exlucively by the parser + internal void AddChildren(IEnumerable items) + { + // Childs can be only added to newly parsed items + Assert(this.Parent == null, "I have to be new"); + // Do not set parent pointer + this.Children.InsertItemsAt(this.Children.Count, items.ToList()); + } + + /// + /// To be used exclusively by the children update algorithm. + /// Insert child and keep links consistent. + /// + void InsertChild(int index, RawObject item) + { + LogDom("Inserting {0} at index {1}", item, index); + + RawDocument document = this.Document; + Assert(document != null, "Can not insert to dangling object"); + Assert(item.Parent != this, "Can not own item twice"); + + SetParentPointersInTree(item); + + this.Children.InsertItemAt(index, item); + + document.OnObjectInserted(index, item); + } + + /// Recursively fix all parent pointer in a tree + /// + /// Cache constraint: + /// If cached item has parent set, then the whole subtree must be consistent + /// + void SetParentPointersInTree(RawObject item) + { + // All items come from the parser cache + + if (item.Parent == null) { + // Dangling object - either a new parser object or removed tree (still cached) + item.Parent = this; + if (item is RawContainer) { + foreach(RawObject child in ((RawContainer)item).Children) { + ((RawContainer)item).SetParentPointersInTree(child); + } + } + } else if (item.Parent == this) { + // If node is attached and then deattached, it will have null parent pointer + // but valid subtree - so its children will alredy have correct parent pointer + // like in this case + item.DebugCheckConsistency(false); + // Rest of the tree is consistent - do not recurse + } else { + // From cache & parent set => consitent subtree + item.DebugCheckConsistency(false); + // The parent (or any futher parents) can not be part of parsed document + // becuase otherwise this item would be included twice => safe to change parents + DebugAssert(item.Parent.Document == null, "Old parent is part of document as well"); + // Maintain cache constraint by setting parents to null + foreach(RawObject ancest in item.GetAncestors().ToList()) { + ancest.Parent = null; + } + item.Parent = this; + // Rest of the tree is consistent - do not recurse + } + } + + /// + /// To be used exclusively by the children update algorithm. + /// Remove child, set parent to null and notify the document + /// + void RemoveChild(int index) + { + RawObject removed = this.Children[index]; + LogDom("Removing {0} at index {1}", removed, index); + + // Null parent pointer + Assert(removed.Parent == this, "Inconsistent child"); + removed.Parent = null; + + this.Children.RemoveItemAt(index); + + this.Document.OnObjectRemoved(index, removed); + } + + /// Verify that the subtree is consistent. Only in debug build. + internal override void DebugCheckConsistency(bool allowNullParent) + { + base.DebugCheckConsistency(allowNullParent); + RawObject prevChild = null; + int myStartOffset = this.StartOffset; + int myEndOffset = this.EndOffset; + foreach(RawObject child in this.Children) { + Assert(child.Length != 0, "Empty child"); + if (!allowNullParent) { + Assert(child.Parent != null, "Null parent reference"); + } + Assert(child.Parent == null || child.Parent == this, "Inccorect parent reference"); + Assert(myStartOffset <= child.StartOffset && child.EndOffset <= myEndOffset, "Child not within parent text range"); + if (this.IsInCache) + Assert(child.IsInCache, "Child not in cache"); + if (prevChild != null) + Assert(prevChild.EndOffset <= child.StartOffset, "Overlaping childs"); + child.DebugCheckConsistency(allowNullParent); + prevChild = child; + } + } + + internal void UpdateTreeFrom(RawContainer srcContainer) + { + RemoveChildrenNotIn(srcContainer.Children); + InsertAndUpdateChildrenFrom(srcContainer.Children); + } + + void RemoveChildrenNotIn(IList srcList) + { + Dictionary srcChildren = srcList.ToDictionary(i => i.StartOffset); + for(int i = 0; i < this.Children.Count;) { + RawObject child = this.Children[i]; + RawObject srcChild; + + if (srcChildren.TryGetValue(child.StartOffset, out srcChild) && child.CanUpdateDataFrom(srcChild)) { + // Keep only one item with given offset (we might have several due to deletion) + srcChildren.Remove(child.StartOffset); + if (child is RawContainer) + ((RawContainer)child).RemoveChildrenNotIn(((RawContainer)srcChild).Children); + i++; + } else { + RemoveChild(i); + } + } + } + + void InsertAndUpdateChildrenFrom(IList srcList) + { + for(int i = 0; i < srcList.Count; i++) { + // End of our list? + if (i == this.Children.Count) { + InsertChild(i, srcList[i]); + continue; + } + RawObject child = this.Children[i]; + RawObject srcChild = srcList[i]; + + if (child.CanUpdateDataFrom(srcChild) /* includes offset test */) { + child.UpdateDataFrom(srcChild); + if (child is RawContainer) + ((RawContainer)child).InsertAndUpdateChildrenFrom(((RawContainer)srcChild).Children); + } else { + InsertChild(i, srcChild); + } + } + Assert(this.Children.Count == srcList.Count, "List lengths differ after update"); + } + } +} diff --git a/ICSharpCode.AvalonEdit/XmlParser/RawDocument.cs b/ICSharpCode.AvalonEdit/XmlParser/RawDocument.cs new file mode 100644 index 0000000..c6a1df6 --- /dev/null +++ b/ICSharpCode.AvalonEdit/XmlParser/RawDocument.cs @@ -0,0 +1,72 @@ +// +// +// +// +// $Revision$ +// + +using System; +using System.Collections.Generic; +using System.Collections.ObjectModel; +using System.Collections.Specialized; +using System.Diagnostics; +using System.Linq; + +using ICSharpCode.AvalonEdit.Document; + +namespace ICSharpCode.AvalonEdit.XmlParser +{ + /// + /// The root object of the XML document + /// + public class RawDocument: RawContainer + { + /// Parser that produced this document + internal XmlParser Parser { get; set; } + + /// Occurs when object is added to any part of the document + public event EventHandler ObjectInserted; + /// Occurs when object is removed from any part of the document + public event EventHandler ObjectRemoved; + /// Occurs before local data of any object in the document changes + public event EventHandler ObjectChanging; + /// Occurs after local data of any object in the document changed + public event EventHandler ObjectChanged; + + internal void OnObjectInserted(int index, RawObject obj) + { + if (ObjectInserted != null) + ObjectInserted(this, new NotifyCollectionChangedEventArgs(NotifyCollectionChangedAction.Add, new RawObject[] { obj }.ToList(), index)); + } + + internal void OnObjectRemoved(int index, RawObject obj) + { + if (ObjectRemoved != null) + ObjectRemoved(this, new NotifyCollectionChangedEventArgs(NotifyCollectionChangedAction.Remove, new RawObject[] { obj }.ToList(), index)); + } + + internal void OnObjectChanging(RawObject obj) + { + if (ObjectChanging != null) + ObjectChanging(this, new RawObjectEventArgs() { Object = obj } ); + } + + internal void OnObjectChanged(RawObject obj) + { + if (ObjectChanged != null) + ObjectChanged(this, new RawObjectEventArgs() { Object = obj } ); + } + + /// + public override void AcceptVisitor(IXmlVisitor visitor) + { + visitor.VisitDocument(this); + } + + /// + public override string ToString() + { + return string.Format("[{0} Chld:{1}]", base.ToString(), this.Children.Count); + } + } +} diff --git a/ICSharpCode.AvalonEdit/XmlParser/RawElement.cs b/ICSharpCode.AvalonEdit/XmlParser/RawElement.cs new file mode 100644 index 0000000..0a07b58 --- /dev/null +++ b/ICSharpCode.AvalonEdit/XmlParser/RawElement.cs @@ -0,0 +1,186 @@ +// +// +// +// +// $Revision$ +// + +using System; +using System.Collections.Generic; +using System.Collections.ObjectModel; +using System.Collections.Specialized; +using System.Diagnostics; +using System.Linq; + +using ICSharpCode.AvalonEdit.Document; + +namespace ICSharpCode.AvalonEdit.XmlParser +{ + /// + /// Logical grouping of other nodes together. + /// + public class RawElement: RawContainer + { + /// No tags are missing anywhere within this element (recursive) + public bool IsProperlyNested { get; set; } + /// True in wellformed XML + public bool HasStartOrEmptyTag { get; set; } + /// True in wellformed XML + public bool HasEndTag { get; set; } + + /// StartTag of an element. + public RawTag StartTag { + get { + return (RawTag)this.Children[0]; + } + } + + internal override void DebugCheckConsistency(bool allowNullParent) + { + DebugAssert(Children.Count > 0, "No children"); + base.DebugCheckConsistency(allowNullParent); + } + + #region Helpper methods + + AttributeCollection attributes; + + /// Gets attributes of the element + public AttributeCollection Attributes { + get { + if (attributes == null) { + attributes = new AttributeCollection(this.StartTag.Children); + } + return attributes; + } + } + + ObservableCollection attributesAndElements; + + // TODO: Identity + /// Gets both attributes and elements + public ObservableCollection AttributesAndElements { + get { + if (attributesAndElements == null) { + attributesAndElements = new MergedCollection> ( + // New wrapper with RawObject types + new FilteredCollection>(this.StartTag.Children, x => x is RawAttribute), + new FilteredCollection>(this.Children, x => x is RawElement) + ); + } + return attributesAndElements; + } + } + + /// Name with namespace prefix - exactly as in source + public string Name { + get { + return this.StartTag.Name; + } + } + + /// The part of name before ":" + /// Empty string if not found + public string Prefix { + get { + return GetNamespacePrefix(this.StartTag.Name); + } + } + + /// The part of name after ":" + /// Empty string if not found + public string LocalName { + get { + return GetLocalName(this.StartTag.Name); + } + } + + /// Resolved namespace of the name + /// Empty string if prefix is not found + public string Namespace { + get { + string prefix = this.Prefix; + if (string.IsNullOrEmpty(prefix)) { + return FindDefaultNamesapce(); + } else { + return ReslovePrefix(prefix); + } + } + } + + /// Find the defualt namesapce for this context + public string FindDefaultNamesapce() + { + RawElement current = this; + while(current != null) { + string namesapce = current.GetAttributeValue(NoNamespace, "xmlns"); + if (namesapce != null) return namesapce; + current = current.Parent as RawElement; + } + return string.Empty; // No namesapce + } + + /// + /// Recursively resolve given prefix in this context. Prefix must have some value. + /// + /// Empty string if prefix is not found + public string ReslovePrefix(string prefix) + { + if (string.IsNullOrEmpty(prefix)) throw new ArgumentException("No prefix given", "prefix"); + + // Implicit namesapces + if (prefix == "xml") return XmlNamespace; + if (prefix == "xmlns") return XmlnsNamespace; + + RawElement current = this; + while(current != null) { + string namesapce = current.GetAttributeValue(XmlnsNamespace, prefix); + if (namesapce != null) return namesapce; + current = current.Parent as RawElement; + } + return NoNamespace; // Can not find prefix + } + + /// + /// Get unquoted value of attribute. + /// It looks in the no namespace (empty string). + /// + /// Null if not found + public string GetAttributeValue(string localName) + { + return GetAttributeValue(NoNamespace, localName); + } + + /// + /// Get unquoted value of attribute + /// + /// Namespace. Can be no namepace (empty string), which is the default for attributes. + /// Local name - text after ":" + /// Null if not found + public string GetAttributeValue(string @namespace, string localName) + { + @namespace = @namespace ?? string.Empty; + foreach(RawAttribute attr in this.Attributes.GetByLocalName(localName)) { + DebugAssert(attr.LocalName == localName, "Bad hashtable"); + if (attr.Namespace == @namespace) { + return attr.Value; + } + } + return null; + } + + #endregion + + /// + public override void AcceptVisitor(IXmlVisitor visitor) + { + visitor.VisitElement(this); + } + + /// + public override string ToString() + { + return string.Format("[{0} '{1}{2}{3}' Attr:{4} Chld:{5} Nest:{6}]", base.ToString(), this.StartTag.OpeningBracket, this.StartTag.Name, this.StartTag.ClosingBracket, this.StartTag.Children.Count, this.Children.Count, this.IsProperlyNested ? "Ok" : "Bad"); + } + } +} diff --git a/ICSharpCode.AvalonEdit/XmlParser/RawObject.cs b/ICSharpCode.AvalonEdit/XmlParser/RawObject.cs new file mode 100644 index 0000000..e280c9a --- /dev/null +++ b/ICSharpCode.AvalonEdit/XmlParser/RawObject.cs @@ -0,0 +1,251 @@ +// +// +// +// +// $Revision$ +// + +using System; +using System.Collections.Generic; +using System.Collections.ObjectModel; +using System.Collections.Specialized; +using System.Diagnostics; +using System.Linq; + +using ICSharpCode.AvalonEdit.Document; + +namespace ICSharpCode.AvalonEdit.XmlParser +{ + /// + /// Abstact base class for all types + /// + public abstract class RawObject: TextSegment + { + /// Empty string. The namespace used if there is no "xmlns" specified + public static readonly string NoNamespace = string.Empty; + + /// Namespace for "xml:" prefix: "http://www.w3.org/XML/1998/namespace" + public static readonly string XmlNamespace = "http://www.w3.org/XML/1998/namespace"; + + /// Namesapce for "xmlns:" prefix: "http://www.w3.org/2000/xmlns/" + public static readonly string XmlnsNamespace = "http://www.w3.org/2000/xmlns/"; + + /// Parent node. + /// + /// New cached items start with null parent. + /// Cache constraint: + /// If cached item has parent set, then the whole subtree must be consistent + /// + public RawObject Parent { get; set; } + + /// Gets the document owning this object or null if orphaned + public RawDocument Document { + get { + if (this.Parent != null) { + return this.Parent.Document; + } else if (this is RawDocument) { + return (RawDocument)this; + } else { + return null; + } + } + } + + /// Creates new object + public RawObject() + { + this.LastUpdatedFrom = this; + } + + /// Occurs before the value of any local properties changes. Nested changes do not cause the event to occur + public event EventHandler Changing; + + /// Occurs after the value of any local properties changed. Nested changes do not cause the event to occur + public event EventHandler Changed; + + /// Raises Changing event + protected void OnChanging() + { + LogDom("Changing {0}", this); + if (Changing != null) { + Changing(this, new RawObjectEventArgs() { Object = this } ); + } + RawDocument doc = this.Document; + if (doc != null) { + doc.OnObjectChanging(this); + } + } + + /// Raises Changed event + protected void OnChanged() + { + LogDom("Changed {0}", this); + if (Changed != null) { + Changed(this, new RawObjectEventArgs() { Object = this } ); + } + RawDocument doc = this.Document; + if (doc != null) { + doc.OnObjectChanged(this); + } + } + + List syntaxErrors; + + /// + /// The error that occured in the context of this node (excluding nested nodes) + /// + public IEnumerable SyntaxErrors { + get { + if (syntaxErrors == null) { + return new SyntaxError[] {}; + } else { + return syntaxErrors; + } + } + } + + internal void AddSyntaxError(SyntaxError error) + { + DebugAssert(error.Object == this, "Must own the error"); + if (this.syntaxErrors == null) this.syntaxErrors = new List(); + syntaxErrors.Add(error); + } + + /// Throws exception if condition is false + /// Present in release mode - use only for very cheap aserts + protected static void Assert(bool condition, string message) + { + if (!condition) { + throw new Exception("Assertion failed: " + message); + } + } + + /// Throws exception if condition is false + [Conditional("DEBUG")] + protected static void DebugAssert(bool condition, string message) + { + if (!condition) { + throw new Exception("Assertion failed: " + message); + } + } + + /// Recursively gets self and all nested nodes. + public virtual IEnumerable GetSelfAndAllChildren() + { + return new RawObject[] { this }; + } + + /// Get all ancestors of this node + public IEnumerable GetAncestors() + { + RawObject curr = this.Parent; + while(curr != null) { + yield return curr; + curr = curr.Parent; + } + } + + /// Call appropriate visit method on the given visitor + public abstract void AcceptVisitor(IXmlVisitor visitor); + + /// The parser tree object this object was updated from + internal object LastUpdatedFrom { get; private set; } + + internal bool IsInCache { get; set; } + + /// Is call to UpdateDataFrom is allowed? + internal bool CanUpdateDataFrom(RawObject source) + { + return + this.GetType() == source.GetType() && + this.StartOffset == source.StartOffset && + (this.LastUpdatedFrom == source || !this.IsInCache); + } + + /// Copy all data from the 'source' to this object + internal virtual void UpdateDataFrom(RawObject source) + { + Assert(this.GetType() == source.GetType(), "Source has different type"); + DebugAssert(this.StartOffset == source.StartOffset, "Source has different StartOffset"); + + if (this.LastUpdatedFrom == source) { + DebugAssert(this.EndOffset == source.EndOffset, "Source has different EndOffset"); + return; + } + + Assert(!this.IsInCache, "Can not update cached item"); + Assert(source.IsInCache, "Must update from cache"); + + this.LastUpdatedFrom = source; + this.StartOffset = source.StartOffset; + // In some cases we are just updating objects of that same + // type and position and hoping to be luckily right + this.EndOffset = source.EndOffset; + + // Do not bother comparing - assume changed if non-null + if (this.syntaxErrors != null || source.syntaxErrors != null) { + // May be called again in derived class - oh, well, nevermind + OnChanging(); + if (source.syntaxErrors == null) { + this.syntaxErrors = null; + } else { + this.syntaxErrors = new List(); + foreach(var error in source.SyntaxErrors) { + // The object differs, so create our own copy + // The source still might need it in the future and we do not want to break it + this.AddSyntaxError(error.Clone(this)); + } + } + OnChanged(); + } + } + + /// Verify that the item is consistent. Only in debug build. + [Conditional("DEBUG")] + internal virtual void DebugCheckConsistency(bool allowNullParent) + { + + } + + /// + public override string ToString() + { + return string.Format("{0}({1}-{2})", this.GetType().Name.Remove(0, 3), this.StartOffset, this.EndOffset); + } + + internal static void LogDom(string format, params object[] args) + { + System.Diagnostics.Debug.WriteLine(string.Format("XML DOM: " + format, args)); + } + + #region Helpper methods + + /// The part of name before ":" + /// Empty string if not found + protected static string GetNamespacePrefix(string name) + { + if (string.IsNullOrEmpty(name)) return string.Empty; + int colonIndex = name.IndexOf(':'); + if (colonIndex != -1) { + return name.Substring(0, colonIndex); + } else { + return string.Empty; + } + } + + /// The part of name after ":" + /// Whole name if ":" not found + protected static string GetLocalName(string name) + { + if (string.IsNullOrEmpty(name)) return string.Empty; + int colonIndex = name.IndexOf(':'); + if (colonIndex != -1) { + return name.Remove(0, colonIndex + 1); + } else { + return name ?? string.Empty; + } + } + + #endregion + } +} diff --git a/ICSharpCode.AvalonEdit/XmlParser/RawObjectEventArgs.cs b/ICSharpCode.AvalonEdit/XmlParser/RawObjectEventArgs.cs new file mode 100644 index 0000000..f7869bf --- /dev/null +++ b/ICSharpCode.AvalonEdit/XmlParser/RawObjectEventArgs.cs @@ -0,0 +1,25 @@ +// +// +// +// +// $Revision$ +// + +using System; +using System.Collections.Generic; +using System.Collections.ObjectModel; +using System.Collections.Specialized; +using System.Diagnostics; +using System.Linq; + +using ICSharpCode.AvalonEdit.Document; + +namespace ICSharpCode.AvalonEdit.XmlParser +{ + /// Holds event args for event caused by + public class RawObjectEventArgs: EventArgs + { + /// The object that caused the event + public RawObject Object { get; set; } + } +} diff --git a/ICSharpCode.AvalonEdit/XmlParser/RawObjects.cs b/ICSharpCode.AvalonEdit/XmlParser/RawObjects.cs deleted file mode 100644 index 537bd4d..0000000 --- a/ICSharpCode.AvalonEdit/XmlParser/RawObjects.cs +++ /dev/null @@ -1,1026 +0,0 @@ -// -// -// -// -// $Revision$ -// - -using System; -using System.Collections.Generic; -using System.Collections.ObjectModel; -using System.Collections.Specialized; -using System.Diagnostics; -using System.Linq; - -using ICSharpCode.AvalonEdit.Document; - -namespace ICSharpCode.AvalonEdit.XmlParser -{ - /// Holds event args for event caused by - public class RawObjectEventArgs: EventArgs - { - /// The object that caused the event - public RawObject Object { get; set; } - } - - /// - /// Abstact base class for all types - /// - public abstract class RawObject: TextSegment - { - /// Empty string. The namespace used if there is no "xmlns" specified - public static readonly string NoNamespace = string.Empty; - - /// Namespace for "xml:" prefix: "http://www.w3.org/XML/1998/namespace" - public static readonly string XmlNamespace = "http://www.w3.org/XML/1998/namespace"; - - /// Namesapce for "xmlns:" prefix: "http://www.w3.org/2000/xmlns/" - public static readonly string XmlnsNamespace = "http://www.w3.org/2000/xmlns/"; - - /// Parent node. - /// - /// New cached items start with null parent. - /// Cache constraint: - /// If cached item has parent set, then the whole subtree must be consistent - /// - public RawObject Parent { get; set; } - - /// Gets the document owning this object or null if orphaned - public RawDocument Document { - get { - if (this.Parent != null) { - return this.Parent.Document; - } else if (this is RawDocument) { - return (RawDocument)this; - } else { - return null; - } - } - } - - /// Creates new object - public RawObject() - { - this.LastUpdatedFrom = this; - } - - /// Occurs before the value of any local properties changes. Nested changes do not cause the event to occur - public event EventHandler Changing; - - /// Occurs after the value of any local properties changed. Nested changes do not cause the event to occur - public event EventHandler Changed; - - /// Raises Changing event - protected void OnChanging() - { - LogDom("Changing {0}", this); - if (Changing != null) { - Changing(this, new RawObjectEventArgs() { Object = this } ); - } - RawDocument doc = this.Document; - if (doc != null) { - doc.OnObjectChanging(this); - } - } - - /// Raises Changed event - protected void OnChanged() - { - LogDom("Changed {0}", this); - if (Changed != null) { - Changed(this, new RawObjectEventArgs() { Object = this } ); - } - RawDocument doc = this.Document; - if (doc != null) { - doc.OnObjectChanged(this); - } - } - - List syntaxErrors; - - /// - /// The error that occured in the context of this node (excluding nested nodes) - /// - public IEnumerable SyntaxErrors { - get { - if (syntaxErrors == null) { - return new SyntaxError[] {}; - } else { - return syntaxErrors; - } - } - } - - internal void AddSyntaxError(SyntaxError error) - { - DebugAssert(error.Object == this, "Must own the error"); - if (this.syntaxErrors == null) this.syntaxErrors = new List(); - syntaxErrors.Add(error); - } - - /// Throws exception if condition is false - /// Present in release mode - use only for very cheap aserts - protected static void Assert(bool condition, string message) - { - if (!condition) { - throw new Exception("Assertion failed: " + message); - } - } - - /// Throws exception if condition is false - [Conditional("DEBUG")] - protected static void DebugAssert(bool condition, string message) - { - if (!condition) { - throw new Exception("Assertion failed: " + message); - } - } - - /// Recursively gets self and all nested nodes. - public virtual IEnumerable GetSelfAndAllChildren() - { - return new RawObject[] { this }; - } - - /// Get all ancestors of this node - public IEnumerable GetAncestors() - { - RawObject curr = this.Parent; - while(curr != null) { - yield return curr; - curr = curr.Parent; - } - } - - /// Call appropriate visit method on the given visitor - public abstract void AcceptVisitor(IXmlVisitor visitor); - - /// The parser tree object this object was updated from - internal object LastUpdatedFrom { get; private set; } - - internal bool IsInCache { get; set; } - - /// Is call to UpdateDataFrom is allowed? - internal bool CanUpdateDataFrom(RawObject source) - { - return - this.GetType() == source.GetType() && - this.StartOffset == source.StartOffset && - (this.LastUpdatedFrom == source || !this.IsInCache); - } - - /// Copy all data from the 'source' to this object - internal virtual void UpdateDataFrom(RawObject source) - { - Assert(this.GetType() == source.GetType(), "Source has different type"); - DebugAssert(this.StartOffset == source.StartOffset, "Source has different StartOffset"); - - if (this.LastUpdatedFrom == source) { - DebugAssert(this.EndOffset == source.EndOffset, "Source has different EndOffset"); - return; - } - - Assert(!this.IsInCache, "Can not update cached item"); - Assert(source.IsInCache, "Must update from cache"); - - this.LastUpdatedFrom = source; - this.StartOffset = source.StartOffset; - // In some cases we are just updating objects of that same - // type and position and hoping to be luckily right - this.EndOffset = source.EndOffset; - - // Do not bother comparing - assume changed if non-null - if (this.syntaxErrors != null || source.syntaxErrors != null) { - // May be called again in derived class - oh, well, nevermind - OnChanging(); - if (source.syntaxErrors == null) { - this.syntaxErrors = null; - } else { - this.syntaxErrors = new List(); - foreach(var error in source.SyntaxErrors) { - // The object differs, so create our own copy - // The source still might need it in the future and we do not want to break it - this.AddSyntaxError(error.Clone(this)); - } - } - OnChanged(); - } - } - - /// Verify that the item is consistent. Only in debug build. - [Conditional("DEBUG")] - internal virtual void DebugCheckConsistency(bool allowNullParent) - { - - } - - /// - public override string ToString() - { - return string.Format("{0}({1}-{2})", this.GetType().Name.Remove(0, 3), this.StartOffset, this.EndOffset); - } - - internal static void LogDom(string format, params object[] args) - { - System.Diagnostics.Debug.WriteLine(string.Format("XML DOM: " + format, args)); - } - - #region Helpper methods - - /// The part of name before ":" - /// Empty string if not found - protected static string GetNamespacePrefix(string name) - { - if (string.IsNullOrEmpty(name)) return string.Empty; - int colonIndex = name.IndexOf(':'); - if (colonIndex != -1) { - return name.Substring(0, colonIndex); - } else { - return string.Empty; - } - } - - /// The part of name after ":" - /// Whole name if ":" not found - protected static string GetLocalName(string name) - { - if (string.IsNullOrEmpty(name)) return string.Empty; - int colonIndex = name.IndexOf(':'); - if (colonIndex != -1) { - return name.Remove(0, colonIndex + 1); - } else { - return name ?? string.Empty; - } - } - - #endregion - } - - /// - /// Abstact base class for all types that can contain child nodes - /// - public abstract class RawContainer: RawObject - { - /// - /// Children of the node. It is read-only. - /// Note that is has CollectionChanged event. - /// - public ChildrenCollection Children { get; private set; } - - /// Create new container - public RawContainer() - { - this.Children = new ChildrenCollection(); - } - - #region Helpper methods - - ObservableCollection elements; - - /// Gets direcly nested elements (non-recursive) - public ObservableCollection Elements { - get { - if (elements == null) { - elements = new FilteredCollection>(this.Children); - } - return elements; - } - } - - internal RawObject FirstChild { - get { - return this.Children[0]; - } - } - - internal RawObject LastChild { - get { - return this.Children[this.Children.Count - 1]; - } - } - - #endregion - - /// - public override IEnumerable GetSelfAndAllChildren() - { - return new RawObject[] { this }.Flatten(i => i is RawContainer ? ((RawContainer)i).Children : null); - } - - /// - /// Gets a child fully containg the given offset. - /// Goes recursively down the tree. - /// Specail case if at the end of attribute or text - /// - public RawObject GetChildAtOffset(int offset) - { - foreach(RawObject child in this.Children) { - if ((child is RawAttribute || child is RawText) && offset == child.EndOffset) return child; - if (child.StartOffset < offset && offset < child.EndOffset) { - if (child is RawContainer) { - return ((RawContainer)child).GetChildAtOffset(offset); - } else { - return child; - } - } - } - return this; // No childs at offset - } - - // Only these four methods should be used to modify the collection - - /// To be used exlucively by the parser - internal void AddChild(RawObject item) - { - // Childs can be only added to newly parsed items - Assert(this.Parent == null, "I have to be new"); - Assert(item.IsInCache, "Added item must be in cache"); - // Do not set parent pointer - this.Children.InsertItemAt(this.Children.Count, item); - } - - /// To be used exlucively by the parser - internal void AddChildren(IEnumerable items) - { - // Childs can be only added to newly parsed items - Assert(this.Parent == null, "I have to be new"); - // Do not set parent pointer - this.Children.InsertItemsAt(this.Children.Count, items.ToList()); - } - - /// - /// To be used exclusively by the children update algorithm. - /// Insert child and keep links consistent. - /// - void InsertChild(int index, RawObject item) - { - LogDom("Inserting {0} at index {1}", item, index); - - RawDocument document = this.Document; - Assert(document != null, "Can not insert to dangling object"); - Assert(item.Parent != this, "Can not own item twice"); - - SetParentPointersInTree(item); - - this.Children.InsertItemAt(index, item); - - document.OnObjectInserted(index, item); - } - - /// Recursively fix all parent pointer in a tree - /// - /// Cache constraint: - /// If cached item has parent set, then the whole subtree must be consistent - /// - void SetParentPointersInTree(RawObject item) - { - // All items come from the parser cache - - if (item.Parent == null) { - // Dangling object - either a new parser object or removed tree (still cached) - item.Parent = this; - if (item is RawContainer) { - foreach(RawObject child in ((RawContainer)item).Children) { - ((RawContainer)item).SetParentPointersInTree(child); - } - } - } else if (item.Parent == this) { - // If node is attached and then deattached, it will have null parent pointer - // but valid subtree - so its children will alredy have correct parent pointer - // like in this case - item.DebugCheckConsistency(false); - // Rest of the tree is consistent - do not recurse - } else { - // From cache & parent set => consitent subtree - item.DebugCheckConsistency(false); - // The parent (or any futher parents) can not be part of parsed document - // becuase otherwise this item would be included twice => safe to change parents - DebugAssert(item.Parent.Document == null, "Old parent is part of document as well"); - // Maintain cache constraint by setting parents to null - foreach(RawObject ancest in item.GetAncestors().ToList()) { - ancest.Parent = null; - } - item.Parent = this; - // Rest of the tree is consistent - do not recurse - } - } - - /// - /// To be used exclusively by the children update algorithm. - /// Remove child, set parent to null and notify the document - /// - void RemoveChild(int index) - { - RawObject removed = this.Children[index]; - LogDom("Removing {0} at index {1}", removed, index); - - // Null parent pointer - Assert(removed.Parent == this, "Inconsistent child"); - removed.Parent = null; - - this.Children.RemoveItemAt(index); - - this.Document.OnObjectRemoved(index, removed); - } - - /// Verify that the subtree is consistent. Only in debug build. - internal override void DebugCheckConsistency(bool allowNullParent) - { - base.DebugCheckConsistency(allowNullParent); - RawObject prevChild = null; - int myStartOffset = this.StartOffset; - int myEndOffset = this.EndOffset; - foreach(RawObject child in this.Children) { - Assert(child.Length != 0, "Empty child"); - if (!allowNullParent) { - Assert(child.Parent != null, "Null parent reference"); - } - Assert(child.Parent == null || child.Parent == this, "Inccorect parent reference"); - Assert(myStartOffset <= child.StartOffset && child.EndOffset <= myEndOffset, "Child not within parent text range"); - if (this.IsInCache) - Assert(child.IsInCache, "Child not in cache"); - if (prevChild != null) - Assert(prevChild.EndOffset <= child.StartOffset, "Overlaping childs"); - child.DebugCheckConsistency(allowNullParent); - prevChild = child; - } - } - - internal void UpdateTreeFrom(RawContainer srcContainer) - { - RemoveChildrenNotIn(srcContainer.Children); - InsertAndUpdateChildrenFrom(srcContainer.Children); - } - - void RemoveChildrenNotIn(IList srcList) - { - Dictionary srcChildren = srcList.ToDictionary(i => i.StartOffset); - for(int i = 0; i < this.Children.Count;) { - RawObject child = this.Children[i]; - RawObject srcChild; - - if (srcChildren.TryGetValue(child.StartOffset, out srcChild) && child.CanUpdateDataFrom(srcChild)) { - // Keep only one item with given offset (we might have several due to deletion) - srcChildren.Remove(child.StartOffset); - if (child is RawContainer) - ((RawContainer)child).RemoveChildrenNotIn(((RawContainer)srcChild).Children); - i++; - } else { - RemoveChild(i); - } - } - } - - void InsertAndUpdateChildrenFrom(IList srcList) - { - for(int i = 0; i < srcList.Count; i++) { - // End of our list? - if (i == this.Children.Count) { - InsertChild(i, srcList[i]); - continue; - } - RawObject child = this.Children[i]; - RawObject srcChild = srcList[i]; - - if (child.CanUpdateDataFrom(srcChild) /* includes offset test */) { - child.UpdateDataFrom(srcChild); - if (child is RawContainer) - ((RawContainer)child).InsertAndUpdateChildrenFrom(((RawContainer)srcChild).Children); - } else { - InsertChild(i, srcChild); - } - } - Assert(this.Children.Count == srcList.Count, "List lengths differ after update"); - } - } - - /// Information about syntax error that occured during parsing - public class SyntaxError: TextSegment - { - /// Object for which the error occured - public RawObject Object { get; internal set; } - /// Textual description of the error - public string Message { get; internal set; } - /// Any user data - public object Tag { get; set; } - - internal SyntaxError Clone(RawObject newOwner) - { - return new SyntaxError { - Object = newOwner, - Message = Message, - Tag = Tag, - StartOffset = StartOffset, - EndOffset = EndOffset, - }; - } - } - - /// - /// The root object of the XML document - /// - public class RawDocument: RawContainer - { - /// Parser that produced this document - internal XmlParser Parser { get; set; } - - /// Occurs when object is added to any part of the document - public event EventHandler ObjectInserted; - /// Occurs when object is removed from any part of the document - public event EventHandler ObjectRemoved; - /// Occurs before local data of any object in the document changes - public event EventHandler ObjectChanging; - /// Occurs after local data of any object in the document changed - public event EventHandler ObjectChanged; - - internal void OnObjectInserted(int index, RawObject obj) - { - if (ObjectInserted != null) - ObjectInserted(this, new NotifyCollectionChangedEventArgs(NotifyCollectionChangedAction.Add, new RawObject[] { obj }.ToList(), index)); - } - - internal void OnObjectRemoved(int index, RawObject obj) - { - if (ObjectRemoved != null) - ObjectRemoved(this, new NotifyCollectionChangedEventArgs(NotifyCollectionChangedAction.Remove, new RawObject[] { obj }.ToList(), index)); - } - - internal void OnObjectChanging(RawObject obj) - { - if (ObjectChanging != null) - ObjectChanging(this, new RawObjectEventArgs() { Object = obj } ); - } - - internal void OnObjectChanged(RawObject obj) - { - if (ObjectChanged != null) - ObjectChanged(this, new RawObjectEventArgs() { Object = obj } ); - } - - /// - public override void AcceptVisitor(IXmlVisitor visitor) - { - visitor.VisitDocument(this); - } - - /// - public override string ToString() - { - return string.Format("[{0} Chld:{1}]", base.ToString(), this.Children.Count); - } - } - - /// - /// Represents any markup starting with "<" and (hopefully) ending with ">" - /// - public class RawTag: RawContainer - { - /// These identify the start of DTD elements - public static readonly string[] DTDNames = new string[] {" Opening bracket - usually "<" - public string OpeningBracket { get; internal set; } - /// Name following the opening bracket - public string Name { get; internal set; } - /// Opening bracket - usually ">" - public string ClosingBracket { get; internal set; } - - /// True if tag starts with "<" - public bool IsStartOrEmptyTag { get { return OpeningBracket == "<"; } } - /// True if tag starts with "<" and ends with ">" - public bool IsStartTag { get { return OpeningBracket == "<" && ClosingBracket == ">"; } } - /// True if tag starts with "<" and does not end with ">" - public bool IsEmptyTag { get { return OpeningBracket == "<" && ClosingBracket != ">" ; } } - /// True if tag starts with "</" - public bool IsEndTag { get { return OpeningBracket == " True if tag starts with "<?" - public bool IsProcessingInstruction { get { return OpeningBracket == " True if tag starts with "<!--" - public bool IsComment { get { return OpeningBracket == ""; "--" is error - Comment, - - /// Ends with "]]>" - CData, - - /// Ends with "?>" - ProcessingInstruction, - - /// Ends with "<" or ">" - UnknownBang, - - /// Unknown - Other - } - - /// - /// Whitespace or character data - /// - public class RawText: RawObject - { - /// The context in which the text occured - public RawTextType Type { get; set; } - /// The text exactly as in source - public string EscapedValue { get; set; } - /// The text with all entity references resloved - public string Value { get; set; } - - /// - public override void AcceptVisitor(IXmlVisitor visitor) - { - visitor.VisitText(this); - } - - /// - internal override void UpdateDataFrom(RawObject source) - { - base.UpdateDataFrom(source); // Check asserts - if (this.LastUpdatedFrom == source) return; - RawText src = (RawText)source; - if (this.EscapedValue != src.EscapedValue || - this.Value != src.Value) - { - OnChanging(); - this.EscapedValue = src.EscapedValue; - this.Value = src.Value; - OnChanged(); - } - } - - /// - public override string ToString() - { - return string.Format("[{0} Text.Length={1}]", base.ToString(), this.EscapedValue.Length); - } - } - - static class ExtensionMethods - { - // Copied from ICSharpCode.SharpDevelop.Dom.ExtensionMethods - /// - /// Converts a recursive data structure into a flat list. - /// - /// The root elements of the recursive data structure. - /// The function that gets the children of an element. - /// Iterator that enumerates the tree structure in preorder. - public static IEnumerable Flatten(this IEnumerable input, Func> recursion) - { - Stack> stack = new Stack>(); - try { - stack.Push(input.GetEnumerator()); - while (stack.Count > 0) { - while (stack.Peek().MoveNext()) { - T element = stack.Peek().Current; - yield return element; - IEnumerable children = recursion(element); - if (children != null) { - stack.Push(children.GetEnumerator()); - } - } - stack.Pop().Dispose(); - } - } finally { - while (stack.Count > 0) { - stack.Pop().Dispose(); - } - } - } - } -} diff --git a/ICSharpCode.AvalonEdit/XmlParser/RawTag.cs b/ICSharpCode.AvalonEdit/XmlParser/RawTag.cs new file mode 100644 index 0000000..69ed3e2 --- /dev/null +++ b/ICSharpCode.AvalonEdit/XmlParser/RawTag.cs @@ -0,0 +1,94 @@ +// +// +// +// +// $Revision$ +// + +using System; +using System.Collections.Generic; +using System.Collections.ObjectModel; +using System.Collections.Specialized; +using System.Diagnostics; +using System.Linq; + +using ICSharpCode.AvalonEdit.Document; + +namespace ICSharpCode.AvalonEdit.XmlParser +{ + /// + /// Represents any markup starting with "<" and (hopefully) ending with ">" + /// + public class RawTag: RawContainer + { + /// These identify the start of DTD elements + public static readonly string[] DTDNames = new string[] {" Opening bracket - usually "<" + public string OpeningBracket { get; internal set; } + /// Name following the opening bracket + public string Name { get; internal set; } + /// Opening bracket - usually ">" + public string ClosingBracket { get; internal set; } + + /// True if tag starts with "<" + public bool IsStartOrEmptyTag { get { return OpeningBracket == "<"; } } + /// True if tag starts with "<" and ends with ">" + public bool IsStartTag { get { return OpeningBracket == "<" && ClosingBracket == ">"; } } + /// True if tag starts with "<" and does not end with ">" + public bool IsEmptyTag { get { return OpeningBracket == "<" && ClosingBracket != ">" ; } } + /// True if tag starts with "</" + public bool IsEndTag { get { return OpeningBracket == " True if tag starts with "<?" + public bool IsProcessingInstruction { get { return OpeningBracket == " True if tag starts with "<!--" + public bool IsComment { get { return OpeningBracket == ""; "--" is error + Comment, + + /// Ends with "]]>" + CData, + + /// Ends with "?>" + ProcessingInstruction, + + /// Ends with "<" or ">" + UnknownBang, + + /// Unknown + Other + } +} diff --git a/ICSharpCode.AvalonEdit/XmlParser/SyntaxError.cs b/ICSharpCode.AvalonEdit/XmlParser/SyntaxError.cs new file mode 100644 index 0000000..f9642ee --- /dev/null +++ b/ICSharpCode.AvalonEdit/XmlParser/SyntaxError.cs @@ -0,0 +1,40 @@ +// +// +// +// +// $Revision$ +// + +using System; +using System.Collections.Generic; +using System.Collections.ObjectModel; +using System.Collections.Specialized; +using System.Diagnostics; +using System.Linq; + +using ICSharpCode.AvalonEdit.Document; + +namespace ICSharpCode.AvalonEdit.XmlParser +{ + /// Information about syntax error that occured during parsing + public class SyntaxError: TextSegment + { + /// Object for which the error occured + public RawObject Object { get; internal set; } + /// Textual description of the error + public string Message { get; internal set; } + /// Any user data + public object Tag { get; set; } + + internal SyntaxError Clone(RawObject newOwner) + { + return new SyntaxError { + Object = newOwner, + Message = Message, + Tag = Tag, + StartOffset = StartOffset, + EndOffset = EndOffset, + }; + } + } +} diff --git a/ICSharpCode.AvalonEdit/XmlParser/TagMatchingHeuristics.cs b/ICSharpCode.AvalonEdit/XmlParser/TagMatchingHeuristics.cs new file mode 100644 index 0000000..17c8a82 --- /dev/null +++ b/ICSharpCode.AvalonEdit/XmlParser/TagMatchingHeuristics.cs @@ -0,0 +1,409 @@ +// +// +// +// +// $Revision$ +// + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +using ICSharpCode.AvalonEdit.Utils; + +namespace ICSharpCode.AvalonEdit.XmlParser +{ + class TagMatchingHeuristics + { + const int maxConfigurationCount = 10; + + XmlParser parser; + Cache cache; + string input; + List tags; + + public TagMatchingHeuristics(XmlParser parser, string input, List tags) + { + this.parser = parser; + this.cache = parser.Cache; + this.input = input; + this.tags = tags; + } + + public RawDocument ReadDocument() + { + RawDocument doc = new RawDocument() { Parser = parser }; + + XmlParser.Log("Flat stream: {0}", PrintObjects(tags)); + List valid = MatchTags(tags); + XmlParser.Log("Fixed stream: {0}", PrintObjects(valid)); + IEnumerator validStream = valid.GetEnumerator(); + validStream.MoveNext(); // Move to first + while(true) { + // End of stream? + try { + if (validStream.Current == null) break; + } catch (InvalidCastException) { + break; + } + doc.AddChild(ReadTextOrElement(validStream)); + } + + if (doc.Children.Count > 0) { + doc.StartOffset = doc.FirstChild.StartOffset; + doc.EndOffset = doc.LastChild.EndOffset; + } + + XmlParser.Log("Constructed {0}", doc); + cache.Add(doc, null); + return doc; + } + + RawObject ReadSingleObject(IEnumerator objStream) + { + RawObject obj = objStream.Current; + objStream.MoveNext(); + return obj; + } + + RawObject ReadTextOrElement(IEnumerator objStream) + { + RawObject curr = objStream.Current; + if (curr is RawText || curr is RawElement) { + return ReadSingleObject(objStream); + } else { + RawTag currTag = (RawTag)curr; + if (currTag == StartTagPlaceholder) { + return ReadElement(objStream); + } else if (currTag.IsStartOrEmptyTag) { + return ReadElement(objStream); + } else { + return ReadSingleObject(objStream); + } + } + } + + RawElement ReadElement(IEnumerator objStream) + { + RawElement element = new RawElement(); + element.IsProperlyNested = true; + + // Read start tag + RawTag startTag = ReadSingleObject(objStream) as RawTag; + XmlParser.DebugAssert(startTag != null, "Start tag expected"); + XmlParser.DebugAssert(startTag.IsStartOrEmptyTag || startTag == StartTagPlaceholder, "Start tag expected"); + if (startTag == StartTagPlaceholder) { + element.HasStartOrEmptyTag = false; + element.IsProperlyNested = false; + TagReader.OnSyntaxError(element, objStream.Current.StartOffset, objStream.Current.EndOffset, + "Matching openning tag was not found"); + } else { + element.HasStartOrEmptyTag = true; + element.AddChild(startTag); + } + + // Read content and end tag + if (element.StartTag.IsStartTag || startTag == StartTagPlaceholder) { + while(true) { + RawTag currTag = objStream.Current as RawTag; // Peek + if (currTag == EndTagPlaceholder) { + TagReader.OnSyntaxError(element, element.LastChild.EndOffset, element.LastChild.EndOffset, + "Expected ''", element.StartTag.Name); + ReadSingleObject(objStream); + element.HasEndTag = false; + element.IsProperlyNested = false; + break; + } else if (currTag != null && currTag.IsEndTag) { + if (currTag.Name != element.StartTag.Name) { + TagReader.OnSyntaxError(element, currTag.StartOffset + 2, currTag.StartOffset + 2 + currTag.Name.Length, + "Expected '{0}'. End tag must have same name as start tag.", element.StartTag.Name); + } + element.AddChild(ReadSingleObject(objStream)); + element.HasEndTag = true; + break; + } + RawObject nested = ReadTextOrElement(objStream); + if (nested is RawElement) { + if (!((RawElement)nested).IsProperlyNested) + element.IsProperlyNested = false; + element.AddChildren(Split((RawElement)nested).ToList()); + } else { + element.AddChild(nested); + } + } + } else { + element.HasEndTag = false; + } + + element.StartOffset = element.FirstChild.StartOffset; + element.EndOffset = element.LastChild.EndOffset; + + XmlParser.Log("Constructed {0}", element); + cache.Add(element, null); // Need all elements in cache for offset tracking + return element; + } + + IEnumerable Split(RawElement elem) + { + int myIndention = GetIndentLevel(elem); + // If has virtual end and is indented + if (!elem.HasEndTag && myIndention != -1) { + int lastAccepted = 0; // Accept start tag + while (lastAccepted + 1 < elem.Children.Count - 1 /* no end tag */) { + RawObject nextItem = elem.Children[lastAccepted + 1]; + if (nextItem is RawText) { + lastAccepted++; continue; // Accept + } else { + // Include all more indented items + if (GetIndentLevel(nextItem) > myIndention) { + lastAccepted++; continue; // Accept + } else { + break; // Reject + } + } + } + // Accepted everything? + if (lastAccepted + 1 == elem.Children.Count - 1) { + yield return elem; + yield break; + } + XmlParser.Log("Splitting {0} - take {1} of {2} nested", elem, lastAccepted, elem.Children.Count - 2); + RawElement topHalf = new RawElement(); + topHalf.HasStartOrEmptyTag = elem.HasStartOrEmptyTag; + topHalf.HasEndTag = elem.HasEndTag; + topHalf.AddChildren(elem.Children.Take(lastAccepted + 1)); // Start tag + nested + topHalf.StartOffset = topHalf.FirstChild.StartOffset; + topHalf.EndOffset = topHalf.LastChild.EndOffset; + TagReader.OnSyntaxError(topHalf, topHalf.LastChild.EndOffset, topHalf.LastChild.EndOffset, + "Expected ''", topHalf.StartTag.Name); + + XmlParser.Log("Constructed {0}", topHalf); + cache.Add(topHalf, null); + yield return topHalf; + for(int i = lastAccepted + 1; i < elem.Children.Count - 1; i++) { + yield return elem.Children[i]; + } + } else { + yield return elem; + } + } + + int GetIndentLevel(RawObject obj) + { + int offset = obj.StartOffset - 1; + int level = 0; + while(true) { + if (offset < 0) break; + char c = input[offset]; + if (c == ' ') { + level++; + } else if (c == '\t') { + level += 4; + } else if (c == '\r' || c == '\n') { + break; + } else { + return -1; + } + offset--; + } + return level; + } + + /// + /// Stack of still unmatched start tags. + /// It includes the cost and backtack information. + /// + class Configuration + { + /// Unmatched start tags + public ImmutableStack StartTags { get; set; } + /// Properly nested tags + public ImmutableStack Document { get; set; } + /// Number of needed modificaitons to the document + public int Cost { get; set; } + } + + /// + /// Dictionary which stores the cheapest configuration + /// + class Configurations: Dictionary, Configuration> + { + public Configurations() + { + } + + public Configurations(IEnumerable configs) + { + foreach(Configuration config in configs) { + this.Add(config); + } + } + + /// Overwrite only if cheaper + public void Add(Configuration newConfig) + { + Configuration oldConfig; + if (this.TryGetValue(newConfig.StartTags, out oldConfig)) { + if (newConfig.Cost < oldConfig.Cost) { + this[newConfig.StartTags] = newConfig; + } + } else { + base.Add(newConfig.StartTags, newConfig); + } + } + + public override string ToString() + { + StringBuilder sb = new StringBuilder(); + foreach(var kvp in this) { + sb.Append("\n - '"); + foreach(RawTag startTag in kvp.Value.StartTags.Reverse()) { + sb.Append('<'); + sb.Append(startTag.Name); + sb.Append('>'); + } + sb.AppendFormat("' = {0}", kvp.Value.Cost); + } + return sb.ToString(); + } + } + + // Tags used to guide the element creation + readonly RawTag StartTagPlaceholder = new RawTag(); + readonly RawTag EndTagPlaceholder = new RawTag(); + + /// + /// Add start or end tag placeholders so that the documment is properly nested + /// + List MatchTags(IEnumerable objs) + { + Configurations configurations = new Configurations(); + configurations.Add(new Configuration { + StartTags = ImmutableStack.Empty, + Document = ImmutableStack.Empty, + Cost = 0, + }); + foreach(RawObject obj in objs) { + configurations = ProcessObject(configurations, obj); + } + // Close any remaining start tags + foreach(Configuration conifg in configurations.Values) { + while(!conifg.StartTags.IsEmpty) { + conifg.StartTags = conifg.StartTags.Pop(); + conifg.Document = conifg.Document.Push(EndTagPlaceholder); + conifg.Cost += 1; + } + } + XmlParser.Log("Configurations after closing all remaining tags:" + configurations.ToString()); + Configuration bestConfig = configurations.Values.OrderBy(v => v.Cost).First(); + XmlParser.Log("Best configuration has cost {0}", bestConfig.Cost); + + return bestConfig.Document.Reverse().ToList(); + } + + /// Get posible configurations after considering fiven object + Configurations ProcessObject(Configurations oldConfigs, RawObject obj) + { + XmlParser.Log("Processing {0}", obj); + + RawTag tag = obj as RawTag; + XmlParser.Assert(obj is RawTag || obj is RawText || obj is RawElement, obj.GetType().Name + " not expected"); + if (obj is RawElement) + XmlParser.Assert(((RawElement)obj).IsProperlyNested, "Element not proprly nested"); + + Configurations newConfigs = new Configurations(); + + foreach(var kvp in oldConfigs) { + Configuration oldConfig = kvp.Value; + var oldStartTags = oldConfig.StartTags; + var oldDocument = oldConfig.Document; + int oldCost = oldConfig.Cost; + + if (tag != null && tag.IsStartTag) { + newConfigs.Add(new Configuration { // Push start-tag (cost 0) + StartTags = oldStartTags.Push(tag), + Document = oldDocument.Push(tag), + Cost = oldCost, + }); + } else if (tag != null && tag.IsEndTag) { + newConfigs.Add(new Configuration { // Ignore (cost 1) + StartTags = oldStartTags, + Document = oldDocument.Push(StartTagPlaceholder).Push(tag), + Cost = oldCost + 1, + }); + if (!oldStartTags.IsEmpty && oldStartTags.Peek().Name != tag.Name) { + newConfigs.Add(new Configuration { // Pop 1 item (cost 1) - not mathcing + StartTags = oldStartTags.Pop(), + Document = oldDocument.Push(tag), + Cost = oldCost + 1, + }); + } + int popedCount = 0; + var startTags = oldStartTags; + var doc = oldDocument; + foreach(RawTag poped in oldStartTags) { + popedCount++; + if (poped.Name == tag.Name) { + newConfigs.Add(new Configuration { // Pop 'x' items (cost x-1) - last one is matching + StartTags = startTags.Pop(), + Document = doc.Push(tag), + Cost = oldCost + popedCount - 1, + }); + } + startTags = startTags.Pop(); + doc = doc.Push(EndTagPlaceholder); + } + } else { + // Empty tag or other tag type or text or properly nested element + newConfigs.Add(new Configuration { // Ignore (cost 0) + StartTags = oldStartTags, + Document = oldDocument.Push(obj), + Cost = oldCost, + }); + } + } + + // Log("New configurations:" + newConfigs.ToString()); + + Configurations bestNewConfigurations = new Configurations( + newConfigs.Values.OrderBy(v => v.Cost).Take(maxConfigurationCount) + ); + + XmlParser.Log("Best new configurations:" + bestNewConfigurations.ToString()); + + return bestNewConfigurations; + } + + #region Helper methods + + string PrintObjects(IEnumerable objs) + { + StringBuilder sb = new StringBuilder(); + foreach(RawObject obj in objs) { + if (obj is RawTag) { + if (obj == StartTagPlaceholder) { + sb.Append("#StartTag#"); + } else if (obj == EndTagPlaceholder) { + sb.Append("#EndTag#"); + } else { + sb.Append(((RawTag)obj).OpeningBracket); + sb.Append(((RawTag)obj).Name); + sb.Append(((RawTag)obj).ClosingBracket); + } + } else if (obj is RawElement) { + sb.Append('['); + sb.Append(PrintObjects(((RawElement)obj).Children)); + sb.Append(']'); + } else if (obj is RawText) { + sb.Append('~'); + } else { + throw new Exception("Should not be here: " + obj); + } + } + return sb.ToString(); + } + + #endregion + } +} diff --git a/ICSharpCode.AvalonEdit/XmlParser/TagReader.cs b/ICSharpCode.AvalonEdit/XmlParser/TagReader.cs new file mode 100644 index 0000000..e5a6fe4 --- /dev/null +++ b/ICSharpCode.AvalonEdit/XmlParser/TagReader.cs @@ -0,0 +1,708 @@ +// +// +// +// +// $Revision$ +// + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using System.Text; + +namespace ICSharpCode.AvalonEdit.XmlParser +{ + class TagReader: TokenReader + { + XmlParser parser; + Cache cache; + string input; + + public TagReader(XmlParser parser, string input): base(input) + { + this.parser = parser; + this.cache = parser.Cache; + this.input = input; + } + + bool TryReadFromCacheOrNew(out T res) where T: RawObject, new() + { + return TryReadFromCacheOrNew(out res, t => true); + } + + bool TryReadFromCacheOrNew(out T res, Predicate condition) where T: RawObject, new() + { + T cached = cache.GetObject(this.CurrentLocation, 0, condition); + if (cached != null) { + Skip(cached.Length); + res = cached; + return true; + } else { + res = new T(); + return false; + } + } + + void OnParsed(RawObject obj) + { + XmlParser.Log("Parsed {0}", obj); + cache.Add(obj, this.MaxTouchedLocation > this.CurrentLocation ? (int?)this.MaxTouchedLocation : null); + } + + /// + /// Read all tags in the document in a flat sequence. + /// It also includes the text between tags and possibly some properly nested Elements from cache. + /// + public List ReadAllTags() + { + List stream = new List(); + + while(true) { + if (IsEndOfFile()) { + break; + } else if (TryPeek('<')) { + RawElement elem; + if (TryReadFromCacheOrNew(out elem, e => e.IsProperlyNested)) { + stream.Add(elem); + } else { + stream.Add(ReadTag()); + } + } else { + stream.AddRange(ReadText(RawTextType.CharacterData)); + } + } + + return stream; + } + + /// + /// Context: "<" + /// + RawTag ReadTag() + { + AssertHasMoreData(); + + RawTag tag; + if (TryReadFromCacheOrNew(out tag)) return tag; + + tag.StartOffset = this.CurrentLocation; + + // Read the opening bracket + // It identifies the type of tag and parsing behavior for the rest of it + tag.OpeningBracket = ReadOpeningBracket(); + + if (tag.IsStartOrEmptyTag || tag.IsEndTag || tag.IsProcessingInstruction) { + // Read the name + string name; + if (TryReadName(out name)) { + if (!IsValidName(name)) { + OnSyntaxError(tag, this.CurrentLocation - name.Length, this.CurrentLocation, "The name '{0}' is invalid", name); + } + } else { + OnSyntaxError(tag, "Element name expected"); + } + tag.Name = name; + } + + if (tag.IsStartOrEmptyTag || tag.IsEndTag) { + // Read attributes for the tag + while(true) { + // Chech for all forbiden 'name' charcters first - see ReadName + if (IsEndOfFile()) break; + if (TryPeekWhiteSpace()) { + tag.AddChildren(ReadText(RawTextType.WhiteSpace)); + continue; // End of file might be next + } + if (TryPeek('<')) break; + if (TryPeek('>') || TryPeek('/') || TryPeek('?')) break; // End tag + + // We have "=\'\"" or name - read attribute + tag.AddChild(ReadAttribulte()); + } + } else if (tag.IsDocumentType) { + tag.AddChildren(ReadContentOfDTD()); + } else { + int start = this.CurrentLocation; + IEnumerable text; + if (tag.IsComment) { + text = ReadText(RawTextType.Comment); + } else if (tag.IsCData) { + text = ReadText(RawTextType.CData); + } else if (tag.IsProcessingInstruction) { + text = ReadText(RawTextType.ProcessingInstruction); + } else if (tag.IsUnknownBang) { + text = ReadText(RawTextType.UnknownBang); + } else { + throw new Exception(string.Format("Unknown opening bracket '{0}'", tag.OpeningBracket)); + } + // Enumerate + text = text.ToList(); + // Backtrack at complete start + if (IsEndOfFile() || (tag.IsUnknownBang && TryPeek('<'))) { + GoBack(start); + } else { + tag.AddChildren(text); + } + } + + // Read closing bracket + string bracket; + TryReadClosingBracket(out bracket); + tag.ClosingBracket = bracket; + + // Error check + int brStart = this.CurrentLocation - (tag.ClosingBracket ?? string.Empty).Length; + int brEnd = this.CurrentLocation; + if (tag.Name == null) { + // One error was reported already + } else if (tag.IsStartOrEmptyTag) { + if (tag.ClosingBracket != ">" && tag.ClosingBracket != "/>") OnSyntaxError(tag, brStart, brEnd, "'>' or '/>' expected"); + } else if (tag.IsEndTag) { + if (tag.ClosingBracket != ">") OnSyntaxError(tag, brStart, brEnd, "'>' expected"); + } else if (tag.IsComment) { + if (tag.ClosingBracket != "-->") OnSyntaxError(tag, brStart, brEnd, "'-->' expected"); + } else if (tag.IsCData) { + if (tag.ClosingBracket != "]]>") OnSyntaxError(tag, brStart, brEnd, "']]>' expected"); + } else if (tag.IsProcessingInstruction) { + if (tag.ClosingBracket != "?>") OnSyntaxError(tag, brStart, brEnd, "'?>' expected"); + } else if (tag.IsUnknownBang) { + if (tag.ClosingBracket != ">") OnSyntaxError(tag, brStart, brEnd, "'>' expected"); + } else if (tag.IsDocumentType) { + if (tag.ClosingBracket != ">") OnSyntaxError(tag, brStart, brEnd, "'>' expected"); + } else { + throw new Exception(string.Format("Unknown opening bracket '{0}'", tag.OpeningBracket)); + } + + // Attribute name may not apper multiple times + var duplicates = tag.Children.OfType().GroupBy(attr => attr.Name).SelectMany(g => g.Skip(1)); + foreach(RawAttribute attr in duplicates) { + OnSyntaxError(tag, attr.StartOffset, attr.EndOffset, "Attribute with name '{0}' already exists", attr.Name); + } + + tag.EndOffset = this.CurrentLocation; + + OnParsed(tag); + return tag; + } + + /// + /// Reads any of the know opening brackets. (only full bracket) + /// Context: "<" + /// + string ReadOpeningBracket() + { + // We are using a lot of string literals so that the memory instances are shared + int start = this.CurrentLocation; + if (TryRead('<')) { + if (TryRead('/')) { + return " + /// Reads any of the know closing brackets. (only full bracket) + /// Context: any + /// + bool TryReadClosingBracket(out string bracket) + { + // We are using a lot of string literals so that the memory instances are shared + if (TryRead('>')) { + bracket = ">"; + } else if (TryRead("/>")) { + bracket = "/>"; + } else if (TryRead("?>")) { + bracket = "?>"; + } else if (TryRead("-->")) { + bracket = "-->"; + } else if (TryRead("]]>")) { + bracket = "]]>"; + } else { + bracket = string.Empty; + return false; + } + return true; + } + + IEnumerable ReadContentOfDTD() + { + int start = this.CurrentLocation; + while(true) { + if (IsEndOfFile()) break; // End of file + TryMoveToNonWhiteSpace(); // Skip whitespace + if (TryRead('\'')) TryMoveTo('\''); // Skip single quoted string TODO: Bug + if (TryRead('\"')) TryMoveTo('\"'); // Skip single quoted string + if (TryRead('[')) { // Start of nested infoset + // Reading infoset + while(true) { + if (IsEndOfFile()) break; + TryMoveToAnyOf('<', ']'); + if (TryPeek('<')) { + if (start != this.CurrentLocation) { // Two following tags + yield return MakeText(start, this.CurrentLocation); + } + yield return ReadTag(); + start = this.CurrentLocation; + } + if (TryPeek(']')) break; + } + } + TryRead(']'); // End of nested infoset + if (TryPeek('>')) break; // Proper closing + if (TryPeek('<')) break; // Malformed XML + TryMoveNext(); // Skip anything else + } + if (start != this.CurrentLocation) { + yield return MakeText(start, this.CurrentLocation); + } + } + + /// + /// Context: name or "=\'\"" + /// + RawAttribute ReadAttribulte() + { + AssertHasMoreData(); + + RawAttribute attr; + if (TryReadFromCacheOrNew(out attr)) return attr; + + attr.StartOffset = this.CurrentLocation; + + // Read name + string name; + if (TryReadName(out name)) { + if (!IsValidName(name)) { + OnSyntaxError(attr, this.CurrentLocation - name.Length, this.CurrentLocation, "The name '{0}' is invalid", name); + } + } else { + OnSyntaxError(attr, "Attribute name expected"); + } + attr.Name = name; + + // Read equals sign and surrounding whitespace + int checkpoint = this.CurrentLocation; + TryMoveToNonWhiteSpace(); + if (TryRead('=')) { + int chk2 = this.CurrentLocation; + TryMoveToNonWhiteSpace(); + if (!TryPeek('"') && !TryPeek('\'')) { + // Do not read whitespace if quote does not follow + GoBack(chk2); + } + attr.EqualsSign = GetText(checkpoint, this.CurrentLocation); + } else { + GoBack(checkpoint); + OnSyntaxError(attr, "'=' expected"); + attr.EqualsSign = string.Empty; + } + + // Read attribute value + int start = this.CurrentLocation; + char quoteChar = TryPeek('"') ? '"' : '\''; + bool startsWithQuote; + if (TryRead(quoteChar)) { + startsWithQuote = true; + int valueStart = this.CurrentLocation; + TryMoveToAnyOf(quoteChar, '<'); + if (TryRead(quoteChar)) { + if (!TryPeekAnyOf(' ', '\t', '\n', '\r', '/', '>', '?')) { + if (TryPeekPrevious('=', 2) || (TryPeekPrevious('=', 3) && TryPeekPrevious(' ', 2))) { + // This actually most likely means that we are in the next attribute value + GoBack(valueStart); + ReadAttributeValue(quoteChar); + if (TryRead(quoteChar)) { + OnSyntaxError(attr, "White space or end of tag expected"); + } else { + OnSyntaxError(attr, "Quote {0} expected (or add whitespace after the following one)", quoteChar); + } + } else { + OnSyntaxError(attr, "White space or end of tag expected"); + } + } + } else { + // '<' or end of file + GoBack(valueStart); + ReadAttributeValue(quoteChar); + OnSyntaxError(attr, "Quote {0} expected", quoteChar); + } + } else { + startsWithQuote = false; + int valueStart = this.CurrentLocation; + ReadAttributeValue(null); + TryRead('\"'); + TryRead('\''); + if (valueStart == this.CurrentLocation) { + OnSyntaxError(attr, "Attribute value expected"); + } else { + OnSyntaxError(attr, valueStart, this.CurrentLocation, "Attribute value must be quoted"); + } + } + attr.QuotedValue = GetText(start, this.CurrentLocation); + attr.Value = Unquote(attr.QuotedValue); + attr.Value = Dereference(attr, attr.Value, startsWithQuote ? start + 1 : start); + + attr.EndOffset = this.CurrentLocation; + + OnParsed(attr); + return attr; + } + + /// + /// Read everything up to quote (excluding), opening/closing tag or attribute signature + /// + void ReadAttributeValue(char? quote) + { + while(true) { + if (IsEndOfFile()) return; + // What is next? + int start = this.CurrentLocation; + TryMoveToNonWhiteSpace(); // Read white space (if any) + if (quote.HasValue) { + if (TryPeek(quote.Value)) return; + } else { + if (TryPeek('"') || TryPeek('\'')) return; + } + // Opening/closing tag + if (TryPeekAnyOf('<', '/', '>')) { + GoBack(start); + return; + } + // Try reading attribute signature + string name; + if (TryReadName(out name)) { + int nameEnd = this.CurrentLocation; + if (TryMoveToNonWhiteSpace() && TryRead("=") && + TryMoveToNonWhiteSpace() && TryPeekAnyOf('"', '\'')) + { + // Start of attribute. Great + GoBack(start); + return; // Done + } else { + // Just some gargabe - make it part of the value + GoBack(nameEnd); + continue; // Read more + } + } + TryMoveNext(); // Accept everyting else + } + } + + RawText MakeText(int start, int end) + { + XmlParser.DebugAssert(end > start, "Empty text"); + + RawText text = new RawText() { + StartOffset = start, + EndOffset = end, + EscapedValue = GetText(start, end), + Type = RawTextType.Other + }; + + OnParsed(text); + return text; + } + + const int maxEntityLength = 12; // The longest build-in one is 10 ("􏿿") + const int maxTextFragmentSize = 8; + const int lookAheadLenght = (3 * maxTextFragmentSize) / 2; // More so that we do not get small "what was inserted" fragments + + /// + /// Reads text and optionaly separates it into fragments. + /// It can also return empty set for no appropriate text input. + /// Make sure you enumerate it only once + /// + IEnumerable ReadText(RawTextType type) + { + bool lookahead = false; + while(true) { + RawText text; + if (TryReadFromCacheOrNew(out text, t => t.Type == type)) { + // Cached text found + yield return text; + continue; // Read next fragment; the method can handle "no text left" + } + text.Type = type; + + // Limit the reading to just a few characters + // (the first character not to be read) + int fragmentEnd = Math.Min(this.CurrentLocation + maxTextFragmentSize, this.InputLength); + + // Look if some futher text has been already processed and align so that + // we hit that chache point. It is expensive so it is off for the first run + if (lookahead) { + // Note: Must fit entity + RawObject nextFragment = cache.GetObject(this.CurrentLocation + maxEntityLength, lookAheadLenght - maxEntityLength, t => t.Type == type); + if (nextFragment != null) { + fragmentEnd = Math.Min(nextFragment.StartOffset, this.InputLength); + XmlParser.Log("Parsing only text ({0}-{1}) because later text was already processed", this.CurrentLocation, fragmentEnd); + } + } + lookahead = true; + + text.StartOffset = this.CurrentLocation; + int start = this.CurrentLocation; + + // Try move to the terminator given by the context + if (type == RawTextType.WhiteSpace) { + TryMoveToNonWhiteSpace(fragmentEnd); + } else if (type == RawTextType.CharacterData) { + while(true) { + if (!TryMoveToAnyOf(new char[] {'<', ']'}, fragmentEnd)) break; // End of fragment + if (TryPeek('<')) break; + if (TryPeek(']')) { + if (TryPeek("]]>")) { + OnSyntaxError(text, this.CurrentLocation, this.CurrentLocation + 3, "']]>' is not allowed in text"); + } + TryMoveNext(); + continue; + } + throw new Exception("Infinite loop"); + } + } else if (type == RawTextType.Comment) { + // Do not report too many errors + bool errorReported = false; + while(true) { + if (!TryMoveTo('-', fragmentEnd)) break; // End of fragment + if (TryPeek("-->")) break; + if (TryPeek("--") && !errorReported) { + OnSyntaxError(text, this.CurrentLocation, this.CurrentLocation + 2, "'--' is not allowed in comment"); + errorReported = true; + } + TryMoveNext(); + } + } else if (type == RawTextType.CData) { + while(true) { + // We can not use use TryMoveTo("]]>", fragmentEnd) because it may incorectly accept "]" at the end of fragment + if (!TryMoveTo(']', fragmentEnd)) break; // End of fragment + if (TryPeek("]]>")) break; + TryMoveNext(); + } + } else if (type == RawTextType.ProcessingInstruction) { + while(true) { + if (!TryMoveTo('?', fragmentEnd)) break; // End of fragment + if (TryPeek("?>")) break; + TryMoveNext(); + } + } else if (type == RawTextType.UnknownBang) { + TryMoveToAnyOf(new char[] {'<', '>'}, fragmentEnd); + } else { + throw new Exception("Uknown type " + type); + } + + // Terminal found or real end was reached; + bool finished = this.CurrentLocation < fragmentEnd || IsEndOfFile(); + + if (!finished) { + // We have to continue reading more text fragments + + // If there is entity reference, make sure the next segment starts with it to prevent framentation + int entitySearchStart = Math.Max(start + 1 /* data for us */, this.CurrentLocation - maxEntityLength); + int entitySearchLength = this.CurrentLocation - entitySearchStart; + if (entitySearchLength > 0) { + // Note that LastIndexOf works backward + int entityIndex = input.LastIndexOf('&', this.CurrentLocation - 1, entitySearchLength); + if (entityIndex != -1) { + GoBack(entityIndex); + } + } + } + + text.EscapedValue = GetText(start, this.CurrentLocation); + if (type == RawTextType.CharacterData) { + text.Value = Dereference(text, text.EscapedValue, start); + } else { + text.Value = text.EscapedValue; + } + text.EndOffset = this.CurrentLocation; + + if (text.EscapedValue.Length > 0) { + OnParsed(text); + yield return text; + } + + if (finished) { + yield break; + } + } + } + + #region Helper methods + + void OnSyntaxError(RawObject obj, string message, params object[] args) + { + OnSyntaxError(obj, this.CurrentLocation, this.CurrentLocation + 1, message, args); + } + + public static void OnSyntaxError(RawObject obj, int start, int end, string message, params object[] args) + { + if (end <= start) end = start + 1; + XmlParser.Log("Syntax error ({0}-{1}): {2}", start, end, string.Format(message, args)); + obj.AddSyntaxError(new SyntaxError() { + Object = obj, + StartOffset = start, + EndOffset = end, + Message = string.Format(message, args), + }); + } + + static bool IsValidName(string name) + { + try { + System.Xml.XmlConvert.VerifyName(name); + return true; + } catch (System.Xml.XmlException) { + return false; + } + } + + /// Remove quoting from the given string + static string Unquote(string quoted) + { + if (string.IsNullOrEmpty(quoted)) return string.Empty; + char first = quoted[0]; + if (quoted.Length == 1) return (first == '"' || first == '\'') ? string.Empty : quoted; + char last = quoted[quoted.Length - 1]; + if (first == '"' || first == '\'') { + if (first == last) { + // Remove both quotes + return quoted.Substring(1, quoted.Length - 2); + } else { + // Remove first quote + return quoted.Remove(0, 1); + } + } else { + if (last == '"' || last == '\'') { + // Remove last quote + return quoted.Substring(0, quoted.Length - 1); + } else { + // Keep whole string + return quoted; + } + } + } + + string Dereference(RawObject owner, string text, int textLocation) + { + StringBuilder sb = null; // The dereferenced text so far (all up to 'curr') + int curr = 0; + while(true) { + // Reached end of input + if (curr == text.Length) { + if (sb != null) { + return sb.ToString(); + } else { + return text; + } + } + + // Try to find reference + int start = text.IndexOf('&', curr); + + // No more references found + if (start == -1) { + if (sb != null) { + sb.Append(text, curr, text.Length - curr); // Add rest + return sb.ToString(); + } else { + return text; + } + } + + // Append text before the enitiy reference + if (sb == null) sb = new StringBuilder(text.Length); + sb.Append(text, curr, start - curr); + curr = start; + + // Process the entity + int errorLoc = textLocation + sb.Length; + + // Find entity name + int end = text.IndexOfAny(new char[] {'&', ';'}, start + 1, Math.Min(maxEntityLength, text.Length - (start + 1))); + if (end == -1 || text[end] == '&') { + // Not found + OnSyntaxError(owner, errorLoc, errorLoc + 1, "Entity reference must be terminated with ';'"); + // Keep '&' + sb.Append('&'); + curr++; + continue; // Restart and next character location + } + string name = text.Substring(start + 1, end - (start + 1)); + + // Resolve the name + string replacement; + if (name == "amp") { + replacement = "&"; + } else if (name == "lt") { + replacement = "<"; + } else if (name == "gt") { + replacement = ">"; + } else if (name == "apos") { + replacement = "'"; + } else if (name == "quot") { + replacement = "\""; + } else if (name.Length > 0 && name[0] == '#') { + int num; + if (name.Length > 1 && name[1] == 'x') { + if (!int.TryParse(name.Substring(2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture.NumberFormat, out num)) { + num = -1; + OnSyntaxError(owner, errorLoc + 3, errorLoc + 1 + name.Length, "Hexadecimal code of unicode character expected"); + } + } else { + if (!int.TryParse(name.Substring(1), NumberStyles.None, CultureInfo.InvariantCulture.NumberFormat, out num)) { + num = -1; + OnSyntaxError(owner, errorLoc + 2, errorLoc + 1 + name.Length, "Numeric code of unicode character expected"); + } + } + if (num != -1) { + try { + replacement = char.ConvertFromUtf32(num); + } catch (ArgumentOutOfRangeException) { + replacement = null; + OnSyntaxError(owner, errorLoc + 2, errorLoc + 1 + name.Length, "Invalid unicode character U+{0:X} ({0})", num); + } + } else { + replacement = null; + } + } else { + replacement = null; + if (parser.EntityReferenceIsError) { + OnSyntaxError(owner, errorLoc, errorLoc + 1 + name.Length + 1, "Unknown entity reference '{0}'", name); + } + } + + // Append the replacement to output + if (replacement != null) { + sb.Append(replacement); + } else { + sb.Append('&'); + sb.Append(name); + sb.Append(';'); + } + curr = end + 1; + continue; + } + } + + #endregion + } +} diff --git a/ICSharpCode.AvalonEdit/XmlParser/TokenReader.cs b/ICSharpCode.AvalonEdit/XmlParser/TokenReader.cs new file mode 100644 index 0000000..be8ad33 --- /dev/null +++ b/ICSharpCode.AvalonEdit/XmlParser/TokenReader.cs @@ -0,0 +1,306 @@ +// +// +// +// +// $Revision$ +// + +using System; +using System.Collections.Generic; +using System.Linq; + +namespace ICSharpCode.AvalonEdit.XmlParser +{ + class TokenReader + { + string input; + int inputLength; + int currentLocation; + + // CurrentLocation is assumed to be touched and the fact does not + // have to be recorded in this variable. + // This stores any value bigger than that if applicable. + // Acutal value is max(currentLocation, maxTouchedLocation). + int maxTouchedLocation; + + public int InputLength { + get { return inputLength; } + } + + public int CurrentLocation { + get { return currentLocation; } + } + + public int MaxTouchedLocation { + get { return Math.Max(currentLocation, maxTouchedLocation); } + } + + public TokenReader(string input) + { + this.input = input; + this.inputLength = input.Length; + } + + protected bool IsEndOfFile() + { + return currentLocation == inputLength; + } + + protected void AssertIsEndOfFile() + { + XmlParser.Assert(IsEndOfFile(), "End of file expected at this point"); + } + + protected bool HasMoreData() + { + return currentLocation < inputLength; + } + + protected void AssertHasMoreData() + { + XmlParser.Assert(HasMoreData(), "Unexpected end of file"); + } + + protected bool TryMoveNext() + { + if (currentLocation == inputLength) return false; + + currentLocation++; + return true; + } + + protected void Skip(int count) + { + if (currentLocation + count > inputLength) throw new Exception("Skipping after the end of file"); + currentLocation += count; + } + + protected void GoBack(int oldLocation) + { + if (oldLocation > currentLocation) throw new Exception("Trying to move forward"); + maxTouchedLocation = Math.Max(maxTouchedLocation, currentLocation); + currentLocation = oldLocation; + } + + protected bool TryRead(char c) + { + if (currentLocation == inputLength) return false; + + if (input[currentLocation] == c) { + currentLocation++; + return true; + } else { + return false; + } + } + + protected bool TryReadAnyOf(params char[] c) + { + if (currentLocation == inputLength) return false; + + if (c.Contains(input[currentLocation])) { + currentLocation++; + return true; + } else { + return false; + } + } + + protected bool TryRead(string text) + { + if (TryPeek(text)) { + currentLocation += text.Length; + return true; + } else { + return false; + } + } + + protected bool TryPeekPrevious(char c, int back) + { + if (currentLocation - back == inputLength) return false; + if (currentLocation - back < 0 ) return false; + + return input[currentLocation - back] == c; + } + + protected bool TryPeek(char c) + { + if (currentLocation == inputLength) return false; + + return input[currentLocation] == c; + } + + protected bool TryPeekAnyOf(params char[] chars) + { + if (currentLocation == inputLength) return false; + + return chars.Contains(input[currentLocation]); + } + + protected bool TryPeek(string text) + { + if (!TryPeek(text[0])) return false; // Early exit + + maxTouchedLocation = Math.Max(maxTouchedLocation, currentLocation + (text.Length - 1)); + // The following comparison 'touches' the end of file - it does depend on the end being there + if (currentLocation + text.Length > inputLength) return false; + + return input.Substring(currentLocation, text.Length) == text; + } + + protected bool TryPeekWhiteSpace() + { + if (currentLocation == inputLength) return false; + + char c = input[currentLocation]; + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; + } + + // The move functions do not have to move if already at target + // The move functions allow 'overriding' of the document length + + protected bool TryMoveTo(char c) + { + return TryMoveTo(c, inputLength); + } + + protected bool TryMoveTo(char c, int inputLength) + { + if (currentLocation == inputLength) return false; + int index = input.IndexOf(c, currentLocation, inputLength - currentLocation); + if (index != -1) { + currentLocation = index; + return true; + } else { + currentLocation = inputLength; + return false; + } + } + + protected bool TryMoveToAnyOf(params char[] c) + { + return TryMoveToAnyOf(c, inputLength); + } + + protected bool TryMoveToAnyOf(char[] c, int inputLength) + { + if (currentLocation == inputLength) return false; + int index = input.IndexOfAny(c, currentLocation, inputLength - currentLocation); + if (index != -1) { + currentLocation = index; + return true; + } else { + currentLocation = inputLength; + return false; + } + } + + protected bool TryMoveTo(string text) + { + return TryMoveTo(text, inputLength); + } + + protected bool TryMoveTo(string text, int inputLength) + { + if (currentLocation == inputLength) return false; + int index = input.IndexOf(text, currentLocation, inputLength - currentLocation, StringComparison.Ordinal); + if (index != -1) { + maxTouchedLocation = index + text.Length - 1; + currentLocation = index; + return true; + } else { + currentLocation = inputLength; + return false; + } + } + + protected bool TryMoveToNonWhiteSpace() + { + return TryMoveToNonWhiteSpace(inputLength); + } + + protected bool TryMoveToNonWhiteSpace(int inputLength) + { + while(TryPeekWhiteSpace()) currentLocation++; + return HasMoreData(); + } + + /// + /// Read a name token. + /// The following characters are not allowed: + /// "" End of file + /// " \n\r\t" Whitesapce + /// "=\'\"" Attribute value + /// "<>/?" Tags + /// + /// True if read at least one character + protected bool TryReadName(out string res) + { + int start = currentLocation; + // Keep reading up to invalid character + while(true) { + if (currentLocation == inputLength) break; // Reject end of file + char c = input[currentLocation]; + if (0x41 <= (int)c && (int)c <= 0x7A) { // Accpet 0x41-0x7A (A-Z[\]^_`a-z) + currentLocation++; + continue; + } + if (c == ' ' || c == '\n' || c == '\r' || c == '\t' || // Reject whitesapce + c == '=' || c == '\'' || c == '"' || // Reject attributes + c == '<' || c == '>' || c == '/' || c == '?') { // Reject tags + break; + } else { + currentLocation++; + continue; // Accept other character + } + } + if (start == currentLocation) { + res = string.Empty; + return false; + } else { + res = GetText(start, currentLocation); + return true; + } + } + + protected string GetText(int start, int end) + { + if (end > currentLocation) throw new Exception("Reading ahead of current location"); + if (start == inputLength && end == inputLength) { + return string.Empty; + } else { + return GetCachedString(input.Substring(start, end - start)); + } + } + + Dictionary stringCache = new Dictionary(); + int stringCacheRequestedCount; + int stringCacheRequestedSize; + int stringCacheStoredCount; + int stringCacheStoredSize; + + string GetCachedString(string cached) + { + stringCacheRequestedCount += 1; + stringCacheRequestedSize += 8 + 2 * cached.Length; + // Do not bother with long strings + if (cached.Length <= 32) return cached; + if (stringCache.ContainsKey(cached)) { + // Get the instance from the cache instead + return stringCache[cached]; + } else { + // Add to cache + stringCacheStoredCount += 1; + stringCacheStoredSize += 8 + 2 * cached.Length; + stringCache.Add(cached, cached); + return cached; + } + } + + public void PrintStringCacheStats() + { + XmlParser.Log("String cache: Requested {0} ({1} bytes); Actaully stored {2} ({3} bytes); {4}% stored", stringCacheRequestedCount, stringCacheRequestedSize, stringCacheStoredCount, stringCacheStoredSize, stringCacheRequestedSize == 0 ? 0 : stringCacheStoredSize * 100 / stringCacheRequestedSize); + } + } +} diff --git a/ICSharpCode.AvalonEdit/XmlParser/XmlParser.cs b/ICSharpCode.AvalonEdit/XmlParser/XmlParser.cs index 8c3fc50..ecedfae 100644 --- a/ICSharpCode.AvalonEdit/XmlParser/XmlParser.cs +++ b/ICSharpCode.AvalonEdit/XmlParser/XmlParser.cs @@ -5,13 +5,10 @@ // $Revision$ // -using ICSharpCode.AvalonEdit.Utils; using System; using System.Collections.Generic; using System.Diagnostics; -using System.Globalization; -using System.Linq; -using System.Text; + using ICSharpCode.AvalonEdit.Document; namespace ICSharpCode.AvalonEdit.XmlParser @@ -169,1497 +166,4 @@ namespace ICSharpCode.AvalonEdit.XmlParser return userDocument; } } - - /// - /// Holds all valid parsed items. - /// Also tracks their offsets as document changes. - /// - class Cache - { - /// Previously parsed items as long as they are valid - TextSegmentCollection parsedItems = new TextSegmentCollection(); - - /// - /// Is used to identify what memory range was touched by object - /// The default is (StartOffset, EndOffset + 1) which is not stored - /// - TextSegmentCollection touchedMemoryRanges = new TextSegmentCollection(); - - class TouchedMemoryRange: TextSegment - { - public RawObject TouchedByObject { get; set; } - } - - public void UpdateOffsetsAndInvalidate(IEnumerable changes) - { - foreach(DocumentChangeEventArgs change in changes) { - // Update offsets of all items - parsedItems.UpdateOffsets(change); - touchedMemoryRanges.UpdateOffsets(change); - - // Remove any items affected by the change - XmlParser.Log("Changed offset {0}", change.Offset); - // Removing will cause one of the ends to be set to change.Offset - // FindSegmentsContaining includes any segments touching - // so that conviniently takes care of the +1 byte - foreach(RawObject obj in parsedItems.FindSegmentsContaining(change.Offset)) { - Remove(obj, false); - } - foreach(TouchedMemoryRange memory in touchedMemoryRanges.FindSegmentsContaining(change.Offset)) { - XmlParser.Log("Found that {0} dependeds on memory ({1}-{2})", memory.TouchedByObject, memory.StartOffset, memory.EndOffset); - Remove(memory.TouchedByObject, true); - touchedMemoryRanges.Remove(memory); - } - } - } - - /// Add object to cache, optionally adding extra memory tracking - public void Add(RawObject obj, int? maxTouchedLocation) - { - XmlParser.Assert(obj.Length > 0 || obj is RawDocument, string.Format("Invalid object {0}. It has zero length.", obj)); - if (obj is RawContainer) { - int objStartOffset = obj.StartOffset; - int objEndOffset = obj.EndOffset; - foreach(RawObject child in ((RawContainer)obj).Children) { - XmlParser.Assert(objStartOffset <= child.StartOffset && child.EndOffset <= objEndOffset, "Wrong nesting"); - } - } - parsedItems.Add(obj); - obj.IsInCache = true; - if (maxTouchedLocation != null) { - // location is assumed to be read so the range ends at (location + 1) - // For example eg for "a_" it is (0-2) - TouchedMemoryRange memRange = new TouchedMemoryRange() { - StartOffset = obj.StartOffset, - EndOffset = maxTouchedLocation.Value + 1, - TouchedByObject = obj - }; - touchedMemoryRanges.Add(memRange); - XmlParser.Log("{0} touched memory range ({1}-{2})", obj, memRange.StartOffset, memRange.EndOffset); - } - } - - List FindParents(RawObject child) - { - List parents = new List(); - foreach(RawObject parent in parsedItems.FindSegmentsContaining(child.StartOffset)) { - // Parent is anyone wholy containg the child - if (parent.StartOffset <= child.StartOffset && child.EndOffset <= parent.EndOffset && parent != child) { - parents.Add(parent); - } - } - return parents; - } - - /// Remove from cache - public void Remove(RawObject obj, bool includeParents) - { - if (includeParents) { - List parents = FindParents(obj); - - foreach(RawObject r in parents) { - if (parsedItems.Remove(r)) { - r.IsInCache = false; - XmlParser.Log("Removing cached item {0} (it is parent)", r); - } - } - } - - if (parsedItems.Remove(obj)) { - obj.IsInCache = false; - XmlParser.Log("Removed cached item {0}", obj); - } - } - - public T GetObject(int offset, int lookaheadCount, Predicate conditon) where T: RawObject, new() - { - RawObject obj = parsedItems.FindFirstSegmentWithStartAfter(offset); - while(obj != null && offset <= obj.StartOffset && obj.StartOffset <= offset + lookaheadCount) { - if (obj is T && conditon((T)obj)) { - return (T)obj; - } - obj = parsedItems.GetNextSegment(obj); - } - return null; - } - } - - class TokenReader - { - string input; - int inputLength; - int currentLocation; - - // CurrentLocation is assumed to be touched and the fact does not - // have to be recorded in this variable. - // This stores any value bigger than that if applicable. - // Acutal value is max(currentLocation, maxTouchedLocation). - int maxTouchedLocation; - - public int InputLength { - get { return inputLength; } - } - - public int CurrentLocation { - get { return currentLocation; } - } - - public int MaxTouchedLocation { - get { return Math.Max(currentLocation, maxTouchedLocation); } - } - - public TokenReader(string input) - { - this.input = input; - this.inputLength = input.Length; - } - - protected bool IsEndOfFile() - { - return currentLocation == inputLength; - } - - protected void AssertIsEndOfFile() - { - XmlParser.Assert(IsEndOfFile(), "End of file expected at this point"); - } - - protected bool HasMoreData() - { - return currentLocation < inputLength; - } - - protected void AssertHasMoreData() - { - XmlParser.Assert(HasMoreData(), "Unexpected end of file"); - } - - protected bool TryMoveNext() - { - if (currentLocation == inputLength) return false; - - currentLocation++; - return true; - } - - protected void Skip(int count) - { - if (currentLocation + count > inputLength) throw new Exception("Skipping after the end of file"); - currentLocation += count; - } - - protected void GoBack(int oldLocation) - { - if (oldLocation > currentLocation) throw new Exception("Trying to move forward"); - maxTouchedLocation = Math.Max(maxTouchedLocation, currentLocation); - currentLocation = oldLocation; - } - - protected bool TryRead(char c) - { - if (currentLocation == inputLength) return false; - - if (input[currentLocation] == c) { - currentLocation++; - return true; - } else { - return false; - } - } - - protected bool TryReadAnyOf(params char[] c) - { - if (currentLocation == inputLength) return false; - - if (c.Contains(input[currentLocation])) { - currentLocation++; - return true; - } else { - return false; - } - } - - protected bool TryRead(string text) - { - if (TryPeek(text)) { - currentLocation += text.Length; - return true; - } else { - return false; - } - } - - protected bool TryPeekPrevious(char c, int back) - { - if (currentLocation - back == inputLength) return false; - if (currentLocation - back < 0 ) return false; - - return input[currentLocation - back] == c; - } - - protected bool TryPeek(char c) - { - if (currentLocation == inputLength) return false; - - return input[currentLocation] == c; - } - - protected bool TryPeekAnyOf(params char[] chars) - { - if (currentLocation == inputLength) return false; - - return chars.Contains(input[currentLocation]); - } - - protected bool TryPeek(string text) - { - if (!TryPeek(text[0])) return false; // Early exit - - maxTouchedLocation = Math.Max(maxTouchedLocation, currentLocation + (text.Length - 1)); - // The following comparison 'touches' the end of file - it does depend on the end being there - if (currentLocation + text.Length > inputLength) return false; - - return input.Substring(currentLocation, text.Length) == text; - } - - protected bool TryPeekWhiteSpace() - { - if (currentLocation == inputLength) return false; - - char c = input[currentLocation]; - return c == ' ' || c == '\t' || c == '\n' || c == '\r'; - } - - // The move functions do not have to move if already at target - // The move functions allow 'overriding' of the document length - - protected bool TryMoveTo(char c) - { - return TryMoveTo(c, inputLength); - } - - protected bool TryMoveTo(char c, int inputLength) - { - if (currentLocation == inputLength) return false; - int index = input.IndexOf(c, currentLocation, inputLength - currentLocation); - if (index != -1) { - currentLocation = index; - return true; - } else { - currentLocation = inputLength; - return false; - } - } - - protected bool TryMoveToAnyOf(params char[] c) - { - return TryMoveToAnyOf(c, inputLength); - } - - protected bool TryMoveToAnyOf(char[] c, int inputLength) - { - if (currentLocation == inputLength) return false; - int index = input.IndexOfAny(c, currentLocation, inputLength - currentLocation); - if (index != -1) { - currentLocation = index; - return true; - } else { - currentLocation = inputLength; - return false; - } - } - - protected bool TryMoveTo(string text) - { - return TryMoveTo(text, inputLength); - } - - protected bool TryMoveTo(string text, int inputLength) - { - if (currentLocation == inputLength) return false; - int index = input.IndexOf(text, currentLocation, inputLength - currentLocation, StringComparison.Ordinal); - if (index != -1) { - maxTouchedLocation = index + text.Length - 1; - currentLocation = index; - return true; - } else { - currentLocation = inputLength; - return false; - } - } - - protected bool TryMoveToNonWhiteSpace() - { - return TryMoveToNonWhiteSpace(inputLength); - } - - protected bool TryMoveToNonWhiteSpace(int inputLength) - { - while(TryPeekWhiteSpace()) currentLocation++; - return HasMoreData(); - } - - /// - /// Read a name token. - /// The following characters are not allowed: - /// "" End of file - /// " \n\r\t" Whitesapce - /// "=\'\"" Attribute value - /// "<>/?" Tags - /// - /// True if read at least one character - protected bool TryReadName(out string res) - { - int start = currentLocation; - // Keep reading up to invalid character - while(true) { - if (currentLocation == inputLength) break; // Reject end of file - char c = input[currentLocation]; - if (0x41 <= (int)c && (int)c <= 0x7A) { // Accpet 0x41-0x7A (A-Z[\]^_`a-z) - currentLocation++; - continue; - } - if (c == ' ' || c == '\n' || c == '\r' || c == '\t' || // Reject whitesapce - c == '=' || c == '\'' || c == '"' || // Reject attributes - c == '<' || c == '>' || c == '/' || c == '?') { // Reject tags - break; - } else { - currentLocation++; - continue; // Accept other character - } - } - if (start == currentLocation) { - res = string.Empty; - return false; - } else { - res = GetText(start, currentLocation); - return true; - } - } - - protected string GetText(int start, int end) - { - if (end > currentLocation) throw new Exception("Reading ahead of current location"); - if (start == inputLength && end == inputLength) { - return string.Empty; - } else { - return GetCachedString(input.Substring(start, end - start)); - } - } - - Dictionary stringCache = new Dictionary(); - int stringCacheRequestedCount; - int stringCacheRequestedSize; - int stringCacheStoredCount; - int stringCacheStoredSize; - - string GetCachedString(string cached) - { - stringCacheRequestedCount += 1; - stringCacheRequestedSize += 8 + 2 * cached.Length; - // Do not bother with long strings - if (cached.Length <= 32) return cached; - if (stringCache.ContainsKey(cached)) { - // Get the instance from the cache instead - return stringCache[cached]; - } else { - // Add to cache - stringCacheStoredCount += 1; - stringCacheStoredSize += 8 + 2 * cached.Length; - stringCache.Add(cached, cached); - return cached; - } - } - - public void PrintStringCacheStats() - { - XmlParser.Log("String cache: Requested {0} ({1} bytes); Actaully stored {2} ({3} bytes); {4}% stored", stringCacheRequestedCount, stringCacheRequestedSize, stringCacheStoredCount, stringCacheStoredSize, stringCacheRequestedSize == 0 ? 0 : stringCacheStoredSize * 100 / stringCacheRequestedSize); - } - } - - class TagReader: TokenReader - { - XmlParser parser; - Cache cache; - string input; - - public TagReader(XmlParser parser, string input): base(input) - { - this.parser = parser; - this.cache = parser.Cache; - this.input = input; - } - - bool TryReadFromCacheOrNew(out T res) where T: RawObject, new() - { - return TryReadFromCacheOrNew(out res, t => true); - } - - bool TryReadFromCacheOrNew(out T res, Predicate condition) where T: RawObject, new() - { - T cached = cache.GetObject(this.CurrentLocation, 0, condition); - if (cached != null) { - Skip(cached.Length); - res = cached; - return true; - } else { - res = new T(); - return false; - } - } - - void OnParsed(RawObject obj) - { - XmlParser.Log("Parsed {0}", obj); - cache.Add(obj, this.MaxTouchedLocation > this.CurrentLocation ? (int?)this.MaxTouchedLocation : null); - } - - /// - /// Read all tags in the document in a flat sequence. - /// It also includes the text between tags and possibly some properly nested Elements from cache. - /// - public List ReadAllTags() - { - List stream = new List(); - - while(true) { - if (IsEndOfFile()) { - break; - } else if (TryPeek('<')) { - RawElement elem; - if (TryReadFromCacheOrNew(out elem, e => e.IsProperlyNested)) { - stream.Add(elem); - } else { - stream.Add(ReadTag()); - } - } else { - stream.AddRange(ReadText(RawTextType.CharacterData)); - } - } - - return stream; - } - - /// - /// Context: "<" - /// - RawTag ReadTag() - { - AssertHasMoreData(); - - RawTag tag; - if (TryReadFromCacheOrNew(out tag)) return tag; - - tag.StartOffset = this.CurrentLocation; - - // Read the opening bracket - // It identifies the type of tag and parsing behavior for the rest of it - tag.OpeningBracket = ReadOpeningBracket(); - - if (tag.IsStartOrEmptyTag || tag.IsEndTag || tag.IsProcessingInstruction) { - // Read the name - string name; - if (TryReadName(out name)) { - if (!IsValidName(name)) { - OnSyntaxError(tag, this.CurrentLocation - name.Length, this.CurrentLocation, "The name '{0}' is invalid", name); - } - } else { - OnSyntaxError(tag, "Element name expected"); - } - tag.Name = name; - } - - if (tag.IsStartOrEmptyTag || tag.IsEndTag) { - // Read attributes for the tag - while(true) { - // Chech for all forbiden 'name' charcters first - see ReadName - if (IsEndOfFile()) break; - if (TryPeekWhiteSpace()) { - tag.AddChildren(ReadText(RawTextType.WhiteSpace)); - continue; // End of file might be next - } - if (TryPeek('<')) break; - if (TryPeek('>') || TryPeek('/') || TryPeek('?')) break; // End tag - - // We have "=\'\"" or name - read attribute - tag.AddChild(ReadAttribulte()); - } - } else if (tag.IsDocumentType) { - tag.AddChildren(ReadContentOfDTD()); - } else { - int start = this.CurrentLocation; - IEnumerable text; - if (tag.IsComment) { - text = ReadText(RawTextType.Comment); - } else if (tag.IsCData) { - text = ReadText(RawTextType.CData); - } else if (tag.IsProcessingInstruction) { - text = ReadText(RawTextType.ProcessingInstruction); - } else if (tag.IsUnknownBang) { - text = ReadText(RawTextType.UnknownBang); - } else { - throw new Exception(string.Format("Unknown opening bracket '{0}'", tag.OpeningBracket)); - } - // Enumerate - text = text.ToList(); - // Backtrack at complete start - if (IsEndOfFile() || (tag.IsUnknownBang && TryPeek('<'))) { - GoBack(start); - } else { - tag.AddChildren(text); - } - } - - // Read closing bracket - string bracket; - TryReadClosingBracket(out bracket); - tag.ClosingBracket = bracket; - - // Error check - int brStart = this.CurrentLocation - (tag.ClosingBracket ?? string.Empty).Length; - int brEnd = this.CurrentLocation; - if (tag.Name == null) { - // One error was reported already - } else if (tag.IsStartOrEmptyTag) { - if (tag.ClosingBracket != ">" && tag.ClosingBracket != "/>") OnSyntaxError(tag, brStart, brEnd, "'>' or '/>' expected"); - } else if (tag.IsEndTag) { - if (tag.ClosingBracket != ">") OnSyntaxError(tag, brStart, brEnd, "'>' expected"); - } else if (tag.IsComment) { - if (tag.ClosingBracket != "-->") OnSyntaxError(tag, brStart, brEnd, "'-->' expected"); - } else if (tag.IsCData) { - if (tag.ClosingBracket != "]]>") OnSyntaxError(tag, brStart, brEnd, "']]>' expected"); - } else if (tag.IsProcessingInstruction) { - if (tag.ClosingBracket != "?>") OnSyntaxError(tag, brStart, brEnd, "'?>' expected"); - } else if (tag.IsUnknownBang) { - if (tag.ClosingBracket != ">") OnSyntaxError(tag, brStart, brEnd, "'>' expected"); - } else if (tag.IsDocumentType) { - if (tag.ClosingBracket != ">") OnSyntaxError(tag, brStart, brEnd, "'>' expected"); - } else { - throw new Exception(string.Format("Unknown opening bracket '{0}'", tag.OpeningBracket)); - } - - // Attribute name may not apper multiple times - var duplicates = tag.Children.OfType().GroupBy(attr => attr.Name).SelectMany(g => g.Skip(1)); - foreach(RawAttribute attr in duplicates) { - OnSyntaxError(tag, attr.StartOffset, attr.EndOffset, "Attribute with name '{0}' already exists", attr.Name); - } - - tag.EndOffset = this.CurrentLocation; - - OnParsed(tag); - return tag; - } - - /// - /// Reads any of the know opening brackets. (only full bracket) - /// Context: "<" - /// - string ReadOpeningBracket() - { - // We are using a lot of string literals so that the memory instances are shared - int start = this.CurrentLocation; - if (TryRead('<')) { - if (TryRead('/')) { - return " - /// Reads any of the know closing brackets. (only full bracket) - /// Context: any - /// - bool TryReadClosingBracket(out string bracket) - { - // We are using a lot of string literals so that the memory instances are shared - if (TryRead('>')) { - bracket = ">"; - } else if (TryRead("/>")) { - bracket = "/>"; - } else if (TryRead("?>")) { - bracket = "?>"; - } else if (TryRead("-->")) { - bracket = "-->"; - } else if (TryRead("]]>")) { - bracket = "]]>"; - } else { - bracket = string.Empty; - return false; - } - return true; - } - - IEnumerable ReadContentOfDTD() - { - int start = this.CurrentLocation; - while(true) { - if (IsEndOfFile()) break; // End of file - TryMoveToNonWhiteSpace(); // Skip whitespace - if (TryRead('\'')) TryMoveTo('\''); // Skip single quoted string TODO: Bug - if (TryRead('\"')) TryMoveTo('\"'); // Skip single quoted string - if (TryRead('[')) { // Start of nested infoset - // Reading infoset - while(true) { - if (IsEndOfFile()) break; - TryMoveToAnyOf('<', ']'); - if (TryPeek('<')) { - if (start != this.CurrentLocation) { // Two following tags - yield return MakeText(start, this.CurrentLocation); - } - yield return ReadTag(); - start = this.CurrentLocation; - } - if (TryPeek(']')) break; - } - } - TryRead(']'); // End of nested infoset - if (TryPeek('>')) break; // Proper closing - if (TryPeek('<')) break; // Malformed XML - TryMoveNext(); // Skip anything else - } - if (start != this.CurrentLocation) { - yield return MakeText(start, this.CurrentLocation); - } - } - - /// - /// Context: name or "=\'\"" - /// - RawAttribute ReadAttribulte() - { - AssertHasMoreData(); - - RawAttribute attr; - if (TryReadFromCacheOrNew(out attr)) return attr; - - attr.StartOffset = this.CurrentLocation; - - // Read name - string name; - if (TryReadName(out name)) { - if (!IsValidName(name)) { - OnSyntaxError(attr, this.CurrentLocation - name.Length, this.CurrentLocation, "The name '{0}' is invalid", name); - } - } else { - OnSyntaxError(attr, "Attribute name expected"); - } - attr.Name = name; - - // Read equals sign and surrounding whitespace - int checkpoint = this.CurrentLocation; - TryMoveToNonWhiteSpace(); - if (TryRead('=')) { - int chk2 = this.CurrentLocation; - TryMoveToNonWhiteSpace(); - if (!TryPeek('"') && !TryPeek('\'')) { - // Do not read whitespace if quote does not follow - GoBack(chk2); - } - attr.EqualsSign = GetText(checkpoint, this.CurrentLocation); - } else { - GoBack(checkpoint); - OnSyntaxError(attr, "'=' expected"); - attr.EqualsSign = string.Empty; - } - - // Read attribute value - int start = this.CurrentLocation; - char quoteChar = TryPeek('"') ? '"' : '\''; - bool startsWithQuote; - if (TryRead(quoteChar)) { - startsWithQuote = true; - int valueStart = this.CurrentLocation; - TryMoveToAnyOf(quoteChar, '<'); - if (TryRead(quoteChar)) { - if (!TryPeekAnyOf(' ', '\t', '\n', '\r', '/', '>', '?')) { - if (TryPeekPrevious('=', 2) || (TryPeekPrevious('=', 3) && TryPeekPrevious(' ', 2))) { - // This actually most likely means that we are in the next attribute value - GoBack(valueStart); - ReadAttributeValue(quoteChar); - if (TryRead(quoteChar)) { - OnSyntaxError(attr, "White space or end of tag expected"); - } else { - OnSyntaxError(attr, "Quote {0} expected (or add whitespace after the following one)", quoteChar); - } - } else { - OnSyntaxError(attr, "White space or end of tag expected"); - } - } - } else { - // '<' or end of file - GoBack(valueStart); - ReadAttributeValue(quoteChar); - OnSyntaxError(attr, "Quote {0} expected", quoteChar); - } - } else { - startsWithQuote = false; - int valueStart = this.CurrentLocation; - ReadAttributeValue(null); - TryRead('\"'); - TryRead('\''); - if (valueStart == this.CurrentLocation) { - OnSyntaxError(attr, "Attribute value expected"); - } else { - OnSyntaxError(attr, valueStart, this.CurrentLocation, "Attribute value must be quoted"); - } - } - attr.QuotedValue = GetText(start, this.CurrentLocation); - attr.Value = Unquote(attr.QuotedValue); - attr.Value = Dereference(attr, attr.Value, startsWithQuote ? start + 1 : start); - - attr.EndOffset = this.CurrentLocation; - - OnParsed(attr); - return attr; - } - - /// - /// Read everything up to quote (excluding), opening/closing tag or attribute signature - /// - void ReadAttributeValue(char? quote) - { - while(true) { - if (IsEndOfFile()) return; - // What is next? - int start = this.CurrentLocation; - TryMoveToNonWhiteSpace(); // Read white space (if any) - if (quote.HasValue) { - if (TryPeek(quote.Value)) return; - } else { - if (TryPeek('"') || TryPeek('\'')) return; - } - // Opening/closing tag - if (TryPeekAnyOf('<', '/', '>')) { - GoBack(start); - return; - } - // Try reading attribute signature - string name; - if (TryReadName(out name)) { - int nameEnd = this.CurrentLocation; - if (TryMoveToNonWhiteSpace() && TryRead("=") && - TryMoveToNonWhiteSpace() && TryPeekAnyOf('"', '\'')) - { - // Start of attribute. Great - GoBack(start); - return; // Done - } else { - // Just some gargabe - make it part of the value - GoBack(nameEnd); - continue; // Read more - } - } - TryMoveNext(); // Accept everyting else - } - } - - RawText MakeText(int start, int end) - { - XmlParser.DebugAssert(end > start, "Empty text"); - - RawText text = new RawText() { - StartOffset = start, - EndOffset = end, - EscapedValue = GetText(start, end), - Type = RawTextType.Other - }; - - OnParsed(text); - return text; - } - - const int maxEntityLength = 12; // The longest build-in one is 10 ("􏿿") - const int maxTextFragmentSize = 8; - const int lookAheadLenght = (3 * maxTextFragmentSize) / 2; // More so that we do not get small "what was inserted" fragments - - /// - /// Reads text and optionaly separates it into fragments. - /// It can also return empty set for no appropriate text input. - /// Make sure you enumerate it only once - /// - IEnumerable ReadText(RawTextType type) - { - bool lookahead = false; - while(true) { - RawText text; - if (TryReadFromCacheOrNew(out text, t => t.Type == type)) { - // Cached text found - yield return text; - continue; // Read next fragment; the method can handle "no text left" - } - text.Type = type; - - // Limit the reading to just a few characters - // (the first character not to be read) - int fragmentEnd = Math.Min(this.CurrentLocation + maxTextFragmentSize, this.InputLength); - - // Look if some futher text has been already processed and align so that - // we hit that chache point. It is expensive so it is off for the first run - if (lookahead) { - // Note: Must fit entity - RawObject nextFragment = cache.GetObject(this.CurrentLocation + maxEntityLength, lookAheadLenght - maxEntityLength, t => t.Type == type); - if (nextFragment != null) { - fragmentEnd = Math.Min(nextFragment.StartOffset, this.InputLength); - XmlParser.Log("Parsing only text ({0}-{1}) because later text was already processed", this.CurrentLocation, fragmentEnd); - } - } - lookahead = true; - - text.StartOffset = this.CurrentLocation; - int start = this.CurrentLocation; - - // Try move to the terminator given by the context - if (type == RawTextType.WhiteSpace) { - TryMoveToNonWhiteSpace(fragmentEnd); - } else if (type == RawTextType.CharacterData) { - while(true) { - if (!TryMoveToAnyOf(new char[] {'<', ']'}, fragmentEnd)) break; // End of fragment - if (TryPeek('<')) break; - if (TryPeek(']')) { - if (TryPeek("]]>")) { - OnSyntaxError(text, this.CurrentLocation, this.CurrentLocation + 3, "']]>' is not allowed in text"); - } - TryMoveNext(); - continue; - } - throw new Exception("Infinite loop"); - } - } else if (type == RawTextType.Comment) { - // Do not report too many errors - bool errorReported = false; - while(true) { - if (!TryMoveTo('-', fragmentEnd)) break; // End of fragment - if (TryPeek("-->")) break; - if (TryPeek("--") && !errorReported) { - OnSyntaxError(text, this.CurrentLocation, this.CurrentLocation + 2, "'--' is not allowed in comment"); - errorReported = true; - } - TryMoveNext(); - } - } else if (type == RawTextType.CData) { - while(true) { - // We can not use use TryMoveTo("]]>", fragmentEnd) because it may incorectly accept "]" at the end of fragment - if (!TryMoveTo(']', fragmentEnd)) break; // End of fragment - if (TryPeek("]]>")) break; - TryMoveNext(); - } - } else if (type == RawTextType.ProcessingInstruction) { - while(true) { - if (!TryMoveTo('?', fragmentEnd)) break; // End of fragment - if (TryPeek("?>")) break; - TryMoveNext(); - } - } else if (type == RawTextType.UnknownBang) { - TryMoveToAnyOf(new char[] {'<', '>'}, fragmentEnd); - } else { - throw new Exception("Uknown type " + type); - } - - // Terminal found or real end was reached; - bool finished = this.CurrentLocation < fragmentEnd || IsEndOfFile(); - - if (!finished) { - // We have to continue reading more text fragments - - // If there is entity reference, make sure the next segment starts with it to prevent framentation - int entitySearchStart = Math.Max(start + 1 /* data for us */, this.CurrentLocation - maxEntityLength); - int entitySearchLength = this.CurrentLocation - entitySearchStart; - if (entitySearchLength > 0) { - // Note that LastIndexOf works backward - int entityIndex = input.LastIndexOf('&', this.CurrentLocation - 1, entitySearchLength); - if (entityIndex != -1) { - GoBack(entityIndex); - } - } - } - - text.EscapedValue = GetText(start, this.CurrentLocation); - if (type == RawTextType.CharacterData) { - text.Value = Dereference(text, text.EscapedValue, start); - } else { - text.Value = text.EscapedValue; - } - text.EndOffset = this.CurrentLocation; - - if (text.EscapedValue.Length > 0) { - OnParsed(text); - yield return text; - } - - if (finished) { - yield break; - } - } - } - - #region Helper methods - - void OnSyntaxError(RawObject obj, string message, params object[] args) - { - OnSyntaxError(obj, this.CurrentLocation, this.CurrentLocation + 1, message, args); - } - - public static void OnSyntaxError(RawObject obj, int start, int end, string message, params object[] args) - { - if (end <= start) end = start + 1; - XmlParser.Log("Syntax error ({0}-{1}): {2}", start, end, string.Format(message, args)); - obj.AddSyntaxError(new SyntaxError() { - Object = obj, - StartOffset = start, - EndOffset = end, - Message = string.Format(message, args), - }); - } - - static bool IsValidName(string name) - { - try { - System.Xml.XmlConvert.VerifyName(name); - return true; - } catch (System.Xml.XmlException) { - return false; - } - } - - /// Remove quoting from the given string - static string Unquote(string quoted) - { - if (string.IsNullOrEmpty(quoted)) return string.Empty; - char first = quoted[0]; - if (quoted.Length == 1) return (first == '"' || first == '\'') ? string.Empty : quoted; - char last = quoted[quoted.Length - 1]; - if (first == '"' || first == '\'') { - if (first == last) { - // Remove both quotes - return quoted.Substring(1, quoted.Length - 2); - } else { - // Remove first quote - return quoted.Remove(0, 1); - } - } else { - if (last == '"' || last == '\'') { - // Remove last quote - return quoted.Substring(0, quoted.Length - 1); - } else { - // Keep whole string - return quoted; - } - } - } - - string Dereference(RawObject owner, string text, int textLocation) - { - StringBuilder sb = null; // The dereferenced text so far (all up to 'curr') - int curr = 0; - while(true) { - // Reached end of input - if (curr == text.Length) { - if (sb != null) { - return sb.ToString(); - } else { - return text; - } - } - - // Try to find reference - int start = text.IndexOf('&', curr); - - // No more references found - if (start == -1) { - if (sb != null) { - sb.Append(text, curr, text.Length - curr); // Add rest - return sb.ToString(); - } else { - return text; - } - } - - // Append text before the enitiy reference - if (sb == null) sb = new StringBuilder(text.Length); - sb.Append(text, curr, start - curr); - curr = start; - - // Process the entity - int errorLoc = textLocation + sb.Length; - - // Find entity name - int end = text.IndexOfAny(new char[] {'&', ';'}, start + 1, Math.Min(maxEntityLength, text.Length - (start + 1))); - if (end == -1 || text[end] == '&') { - // Not found - OnSyntaxError(owner, errorLoc, errorLoc + 1, "Entity reference must be terminated with ';'"); - // Keep '&' - sb.Append('&'); - curr++; - continue; // Restart and next character location - } - string name = text.Substring(start + 1, end - (start + 1)); - - // Resolve the name - string replacement; - if (name == "amp") { - replacement = "&"; - } else if (name == "lt") { - replacement = "<"; - } else if (name == "gt") { - replacement = ">"; - } else if (name == "apos") { - replacement = "'"; - } else if (name == "quot") { - replacement = "\""; - } else if (name.Length > 0 && name[0] == '#') { - int num; - if (name.Length > 1 && name[1] == 'x') { - if (!int.TryParse(name.Substring(2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture.NumberFormat, out num)) { - num = -1; - OnSyntaxError(owner, errorLoc + 3, errorLoc + 1 + name.Length, "Hexadecimal code of unicode character expected"); - } - } else { - if (!int.TryParse(name.Substring(1), NumberStyles.None, CultureInfo.InvariantCulture.NumberFormat, out num)) { - num = -1; - OnSyntaxError(owner, errorLoc + 2, errorLoc + 1 + name.Length, "Numeric code of unicode character expected"); - } - } - if (num != -1) { - try { - replacement = char.ConvertFromUtf32(num); - } catch (ArgumentOutOfRangeException) { - replacement = null; - OnSyntaxError(owner, errorLoc + 2, errorLoc + 1 + name.Length, "Invalid unicode character U+{0:X} ({0})", num); - } - } else { - replacement = null; - } - } else { - replacement = null; - if (parser.EntityReferenceIsError) { - OnSyntaxError(owner, errorLoc, errorLoc + 1 + name.Length + 1, "Unknown entity reference '{0}'", name); - } - } - - // Append the replacement to output - if (replacement != null) { - sb.Append(replacement); - } else { - sb.Append('&'); - sb.Append(name); - sb.Append(';'); - } - curr = end + 1; - continue; - } - } - - #endregion - } - - class TagMatchingHeuristics - { - const int maxConfigurationCount = 10; - - XmlParser parser; - Cache cache; - string input; - List tags; - - public TagMatchingHeuristics(XmlParser parser, string input, List tags) - { - this.parser = parser; - this.cache = parser.Cache; - this.input = input; - this.tags = tags; - } - - public RawDocument ReadDocument() - { - RawDocument doc = new RawDocument() { Parser = parser }; - - XmlParser.Log("Flat stream: {0}", PrintObjects(tags)); - List valid = MatchTags(tags); - XmlParser.Log("Fixed stream: {0}", PrintObjects(valid)); - IEnumerator validStream = valid.GetEnumerator(); - validStream.MoveNext(); // Move to first - while(true) { - // End of stream? - try { - if (validStream.Current == null) break; - } catch (InvalidCastException) { - break; - } - doc.AddChild(ReadTextOrElement(validStream)); - } - - if (doc.Children.Count > 0) { - doc.StartOffset = doc.FirstChild.StartOffset; - doc.EndOffset = doc.LastChild.EndOffset; - } - - XmlParser.Log("Constructed {0}", doc); - cache.Add(doc, null); - return doc; - } - - RawObject ReadSingleObject(IEnumerator objStream) - { - RawObject obj = objStream.Current; - objStream.MoveNext(); - return obj; - } - - RawObject ReadTextOrElement(IEnumerator objStream) - { - RawObject curr = objStream.Current; - if (curr is RawText || curr is RawElement) { - return ReadSingleObject(objStream); - } else { - RawTag currTag = (RawTag)curr; - if (currTag == StartTagPlaceholder) { - return ReadElement(objStream); - } else if (currTag.IsStartOrEmptyTag) { - return ReadElement(objStream); - } else { - return ReadSingleObject(objStream); - } - } - } - - RawElement ReadElement(IEnumerator objStream) - { - RawElement element = new RawElement(); - element.IsProperlyNested = true; - - // Read start tag - RawTag startTag = ReadSingleObject(objStream) as RawTag; - XmlParser.DebugAssert(startTag != null, "Start tag expected"); - XmlParser.DebugAssert(startTag.IsStartOrEmptyTag || startTag == StartTagPlaceholder, "Start tag expected"); - if (startTag == StartTagPlaceholder) { - element.HasStartOrEmptyTag = false; - element.IsProperlyNested = false; - TagReader.OnSyntaxError(element, objStream.Current.StartOffset, objStream.Current.EndOffset, - "Matching openning tag was not found"); - } else { - element.HasStartOrEmptyTag = true; - element.AddChild(startTag); - } - - // Read content and end tag - if (element.StartTag.IsStartTag || startTag == StartTagPlaceholder) { - while(true) { - RawTag currTag = objStream.Current as RawTag; // Peek - if (currTag == EndTagPlaceholder) { - TagReader.OnSyntaxError(element, element.LastChild.EndOffset, element.LastChild.EndOffset, - "Expected ''", element.StartTag.Name); - ReadSingleObject(objStream); - element.HasEndTag = false; - element.IsProperlyNested = false; - break; - } else if (currTag != null && currTag.IsEndTag) { - if (currTag.Name != element.StartTag.Name) { - TagReader.OnSyntaxError(element, currTag.StartOffset + 2, currTag.StartOffset + 2 + currTag.Name.Length, - "Expected '{0}'. End tag must have same name as start tag.", element.StartTag.Name); - } - element.AddChild(ReadSingleObject(objStream)); - element.HasEndTag = true; - break; - } - RawObject nested = ReadTextOrElement(objStream); - if (nested is RawElement) { - if (!((RawElement)nested).IsProperlyNested) - element.IsProperlyNested = false; - element.AddChildren(Split((RawElement)nested).ToList()); - } else { - element.AddChild(nested); - } - } - } else { - element.HasEndTag = false; - } - - element.StartOffset = element.FirstChild.StartOffset; - element.EndOffset = element.LastChild.EndOffset; - - XmlParser.Log("Constructed {0}", element); - cache.Add(element, null); // Need all elements in cache for offset tracking - return element; - } - - IEnumerable Split(RawElement elem) - { - int myIndention = GetIndentLevel(elem); - // If has virtual end and is indented - if (!elem.HasEndTag && myIndention != -1) { - int lastAccepted = 0; // Accept start tag - while (lastAccepted + 1 < elem.Children.Count - 1 /* no end tag */) { - RawObject nextItem = elem.Children[lastAccepted + 1]; - if (nextItem is RawText) { - lastAccepted++; continue; // Accept - } else { - // Include all more indented items - if (GetIndentLevel(nextItem) > myIndention) { - lastAccepted++; continue; // Accept - } else { - break; // Reject - } - } - } - // Accepted everything? - if (lastAccepted + 1 == elem.Children.Count - 1) { - yield return elem; - yield break; - } - XmlParser.Log("Splitting {0} - take {1} of {2} nested", elem, lastAccepted, elem.Children.Count - 2); - RawElement topHalf = new RawElement(); - topHalf.HasStartOrEmptyTag = elem.HasStartOrEmptyTag; - topHalf.HasEndTag = elem.HasEndTag; - topHalf.AddChildren(elem.Children.Take(lastAccepted + 1)); // Start tag + nested - topHalf.StartOffset = topHalf.FirstChild.StartOffset; - topHalf.EndOffset = topHalf.LastChild.EndOffset; - TagReader.OnSyntaxError(topHalf, topHalf.LastChild.EndOffset, topHalf.LastChild.EndOffset, - "Expected ''", topHalf.StartTag.Name); - - XmlParser.Log("Constructed {0}", topHalf); - cache.Add(topHalf, null); - yield return topHalf; - for(int i = lastAccepted + 1; i < elem.Children.Count - 1; i++) { - yield return elem.Children[i]; - } - } else { - yield return elem; - } - } - - int GetIndentLevel(RawObject obj) - { - int offset = obj.StartOffset - 1; - int level = 0; - while(true) { - if (offset < 0) break; - char c = input[offset]; - if (c == ' ') { - level++; - } else if (c == '\t') { - level += 4; - } else if (c == '\r' || c == '\n') { - break; - } else { - return -1; - } - offset--; - } - return level; - } - - /// - /// Stack of still unmatched start tags. - /// It includes the cost and backtack information. - /// - class Configuration - { - /// Unmatched start tags - public ImmutableStack StartTags { get; set; } - /// Properly nested tags - public ImmutableStack Document { get; set; } - /// Number of needed modificaitons to the document - public int Cost { get; set; } - } - - /// - /// Dictionary which stores the cheapest configuration - /// - class Configurations: Dictionary, Configuration> - { - public Configurations() - { - } - - public Configurations(IEnumerable configs) - { - foreach(Configuration config in configs) { - this.Add(config); - } - } - - /// Overwrite only if cheaper - public void Add(Configuration newConfig) - { - Configuration oldConfig; - if (this.TryGetValue(newConfig.StartTags, out oldConfig)) { - if (newConfig.Cost < oldConfig.Cost) { - this[newConfig.StartTags] = newConfig; - } - } else { - base.Add(newConfig.StartTags, newConfig); - } - } - - public override string ToString() - { - StringBuilder sb = new StringBuilder(); - foreach(var kvp in this) { - sb.Append("\n - '"); - foreach(RawTag startTag in kvp.Value.StartTags.Reverse()) { - sb.Append('<'); - sb.Append(startTag.Name); - sb.Append('>'); - } - sb.AppendFormat("' = {0}", kvp.Value.Cost); - } - return sb.ToString(); - } - } - - // Tags used to guide the element creation - readonly RawTag StartTagPlaceholder = new RawTag(); - readonly RawTag EndTagPlaceholder = new RawTag(); - - /// - /// Add start or end tag placeholders so that the documment is properly nested - /// - List MatchTags(IEnumerable objs) - { - Configurations configurations = new Configurations(); - configurations.Add(new Configuration { - StartTags = ImmutableStack.Empty, - Document = ImmutableStack.Empty, - Cost = 0, - }); - foreach(RawObject obj in objs) { - configurations = ProcessObject(configurations, obj); - } - // Close any remaining start tags - foreach(Configuration conifg in configurations.Values) { - while(!conifg.StartTags.IsEmpty) { - conifg.StartTags = conifg.StartTags.Pop(); - conifg.Document = conifg.Document.Push(EndTagPlaceholder); - conifg.Cost += 1; - } - } - XmlParser.Log("Configurations after closing all remaining tags:" + configurations.ToString()); - Configuration bestConfig = configurations.Values.OrderBy(v => v.Cost).First(); - XmlParser.Log("Best configuration has cost {0}", bestConfig.Cost); - - return bestConfig.Document.Reverse().ToList(); - } - - /// Get posible configurations after considering fiven object - Configurations ProcessObject(Configurations oldConfigs, RawObject obj) - { - XmlParser.Log("Processing {0}", obj); - - RawTag tag = obj as RawTag; - XmlParser.Assert(obj is RawTag || obj is RawText || obj is RawElement, obj.GetType().Name + " not expected"); - if (obj is RawElement) - XmlParser.Assert(((RawElement)obj).IsProperlyNested, "Element not proprly nested"); - - Configurations newConfigs = new Configurations(); - - foreach(var kvp in oldConfigs) { - Configuration oldConfig = kvp.Value; - var oldStartTags = oldConfig.StartTags; - var oldDocument = oldConfig.Document; - int oldCost = oldConfig.Cost; - - if (tag != null && tag.IsStartTag) { - newConfigs.Add(new Configuration { // Push start-tag (cost 0) - StartTags = oldStartTags.Push(tag), - Document = oldDocument.Push(tag), - Cost = oldCost, - }); - } else if (tag != null && tag.IsEndTag) { - newConfigs.Add(new Configuration { // Ignore (cost 1) - StartTags = oldStartTags, - Document = oldDocument.Push(StartTagPlaceholder).Push(tag), - Cost = oldCost + 1, - }); - if (!oldStartTags.IsEmpty && oldStartTags.Peek().Name != tag.Name) { - newConfigs.Add(new Configuration { // Pop 1 item (cost 1) - not mathcing - StartTags = oldStartTags.Pop(), - Document = oldDocument.Push(tag), - Cost = oldCost + 1, - }); - } - int popedCount = 0; - var startTags = oldStartTags; - var doc = oldDocument; - foreach(RawTag poped in oldStartTags) { - popedCount++; - if (poped.Name == tag.Name) { - newConfigs.Add(new Configuration { // Pop 'x' items (cost x-1) - last one is matching - StartTags = startTags.Pop(), - Document = doc.Push(tag), - Cost = oldCost + popedCount - 1, - }); - } - startTags = startTags.Pop(); - doc = doc.Push(EndTagPlaceholder); - } - } else { - // Empty tag or other tag type or text or properly nested element - newConfigs.Add(new Configuration { // Ignore (cost 0) - StartTags = oldStartTags, - Document = oldDocument.Push(obj), - Cost = oldCost, - }); - } - } - - // Log("New configurations:" + newConfigs.ToString()); - - Configurations bestNewConfigurations = new Configurations( - newConfigs.Values.OrderBy(v => v.Cost).Take(maxConfigurationCount) - ); - - XmlParser.Log("Best new configurations:" + bestNewConfigurations.ToString()); - - return bestNewConfigurations; - } - - #region Helper methods - - string PrintObjects(IEnumerable objs) - { - StringBuilder sb = new StringBuilder(); - foreach(RawObject obj in objs) { - if (obj is RawTag) { - if (obj == StartTagPlaceholder) { - sb.Append("#StartTag#"); - } else if (obj == EndTagPlaceholder) { - sb.Append("#EndTag#"); - } else { - sb.Append(((RawTag)obj).OpeningBracket); - sb.Append(((RawTag)obj).Name); - sb.Append(((RawTag)obj).ClosingBracket); - } - } else if (obj is RawElement) { - sb.Append('['); - sb.Append(PrintObjects(((RawElement)obj).Children)); - sb.Append(']'); - } else if (obj is RawText) { - sb.Append('~'); - } else { - throw new Exception("Should not be here: " + obj); - } - } - return sb.ToString(); - } - - #endregion - } }