Friday 21 January 2011

A simple paged IList, arrays of doubles and the LOH

Turns out large (1000+ elements) arrays of doubles get put on the Large Object Heap, rather than living out their days on the normal heap. Apparently, it’s an optimization by Microsoft to keep the arrays aligned to 8-byte boundaries (as the LOH is collected but not compacted).

This leads to some exciting behaviour if you’re using a List<double> that reaches 1000 doubles. Especially if you’re not thinking about it, and just allowing it to expand, as you’ll keep recreating new arrays whenever you reach the array size limit. Did MS choose 1000 or 1024? Not sure.

A colleague hit this recently in our production code. In our specific case the right thing to do was stop using so many large arrays of doubles, as they weren’t needed.

Alternatively, one way to avoid fragmentation and LOH usage would be to preallocate large arrays or Lists of doubles – if you know how large an array you’re going to need.

Another way would be to create a paged IList implementation, one keeps its contents in a set of arrays that are sized just under the maximum to avoid LOH use.

Yes, it subverts Microsoft’s performance enhancement, and yes, I’m sure they know what they’re doing. I’m not sure what they’re doing though, so if nothing else it means I can run some comparisons, iterating through a list of doubles, for example.

Conveniently it also makes another nice workout – some code below, using a List<T[]> as the underlying page collection. I've not implemented Remove, RemoveAt and Insert methods as I didn't immediately need them for the tests I had in mind.

public class PagedList<T> : IList<T>
{
 private int _pageSize = 100;

 private int _currentPage = 0;

 private int _currentPageHighwatermark = 0;

 private List<T[]> _underlyingList;

 public PagedList(int pageSize)
 {
  _pageSize = pageSize;
  _underlyingList = new List();
  _underlyingList.Add(new T[_pageSize]);      
                    // if it has pages already, we'll be tagging an 
                    // extra empty page on the end ...
  _currentPage = _underlyingList.Count - 1;                   
                    // also, if it has pages already then the 
                    // highwatermark will be wrong            
 }

 /// <returns>Index of element, -1 if not found</returns>
 public int IndexOf(T item) // consider boxing, unboxing
 {
  int index = 0;
  var iter = _underlyingList.GetEnumerator();
  while(iter.MoveNext())
  {
   foreach(var t in iter.Current)
   {
    if(t.Equals(item))
    {
     return index;
    }
    index++;
   }
  }
  return -1;
 }

 public void Insert(int index, T item)
 {
  throw new NotImplementedException();
 }

 public void RemoveAt(int index)
 {
  throw new NotImplementedException();
 }

 public T this[int index]
 {
  get
  {
   int desiredPage = Math.Abs(index/_pageSize);
   int desiredPageIndex = index%_pageSize;
   if (desiredPage * _pageSize + desiredPageIndex > 
                                _currentPage * _pageSize + _currentPageHighwatermark 
                                || index < 0)
   {
    throw new IndexOutOfRangeException("Index was out of bounds");
   }
   return _underlyingList[desiredPage][desiredPageIndex]; // Consider what to do about nulls here
  }
  set
  {
   // Check whether index is out of range
   int desiredPage = Math.Abs(index / _pageSize);
   int desiredPageIndex = index % _pageSize;
   if (desiredPage * _pageSize + desiredPageIndex > 
                                _currentPage * _pageSize + _currentPageHighwatermark 
                                || index < 0)   {
    throw new IndexOutOfRangeException("Index was out of bounds");
   }
   _underlyingList[desiredPage][desiredPageIndex] = value;
  }
 }

 public void Add(T item)
 {
  var page = _underlyingList.Last();
  if (_currentPageHighwatermark == _pageSize)
  {
   _underlyingList.Add(new T[_pageSize]);
   _currentPage++;
   _currentPageHighwatermark = 0;
  }
  _underlyingList[_currentPage][_currentPageHighwatermark] = item;
  _currentPageHighwatermark++;
 }

 public void Clear()
 {
  _underlyingList.Clear();
  _underlyingList.Add(new T[_pageSize]);
  _currentPage = 0;
  _currentPageHighwatermark = 0;
 }

 public bool Contains(T item)
 {
  var iter = _underlyingList.GetEnumerator();
  while (iter.MoveNext())
  {
   foreach (var t in iter.Current)
   {
    if (t.Equals(item))
    {
     return true;
    }
   }
  }
  return false;
 }

        public void CopyTo(T[] array, int arrayIndex)
        {
            // Do out-of-bounds checking on arrayIndex, sizeof array
            for(int i = arrayIndex; i < Count; i++)
            {
                array[i - arrayIndex] = this[i];
            }
        }

 public int Count
 {
  get { return (_currentPage*_pageSize) + _currentPageHighwatermark; }
 }

 public bool IsReadOnly
 {
  get { throw new NotImplementedException(); }
 }

 public bool Remove(T item)
 {
  throw new NotImplementedException();
 }

 public IEnumerator<T> GetEnumerator()
 {
  return new PagedListEnumerator<T>(this);
 }

 private class PagedListEnumerator<T> : IEnumerator<T>
 {
  private PagedList<T> _underlyingPagedList;
  private int _cursor;

  public PagedListEnumerator(PagedList<T> list)
  {
   _underlyingPagedList = list;
   _cursor = -1;
  }

  public void Dispose()
  {
  }

  public bool MoveNext()
  {
   if (_cursor < _underlyingPagedList.Count)
    _cursor++;

   return (!(_cursor == _underlyingPagedList.Count));
  }

  public void Reset()
  {
   _cursor = -1;
  }

  public T Current
  {
   get { return _underlyingPagedList[_cursor]; }   // Handle fail when out of bounds
  }

  object IEnumerator.Current
  {
   get { return Current; }
  }
 }

        System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
        {
            return GetEnumerator();
        }
}

No comments: