演算法基礎<三> 字典
查詢
統計多本書中每個單詞出現的頻率,使用符號表(字典),但是怎麼快速定位到Key是一個難題,當Key資料量上億了之後還能快速定位嗎
查詢的成本模型:比較次數,陣列的訪問次數
字典
public interface ISearchDict<TKey, TValue> : IDisposable { bool IsEmpty { get; } int Length { get; } bool Contains(TKey key); TValue this[TKey key] { get;set; } TValue Get(TKey key); void Add(TKey key, TValue value); void Delete(TKey key); }
無序連結串列字典順序查詢
值得注意的是:這邊使用遞迴實現刪除,這個設計非常巧妙
public class SequentialSearchSTNet<TKey, TValue> : ISearchDict<TKey, TValue> { private int _length; private Node _first; private class Node:IDisposable { public TKey Key { set; get; } public TValue Value { set; get; } public Node Next { set; get; } public Node(TKey key,TValue value,Node next) { Key = key; Value = value; Next = next; } public void Dispose() { Dispose(true); } protected virtual void Dispose(bool disposing) { if(disposing) { Key = default; Value = default; } } } public SequentialSearchSTNet() { _length = 0; _first = null; } public bool IsEmpty => _length==0; public int Length => _length; public void Add(TKey key, TValue value) { if (key == default || value == default) throw new ArgumentNullException("this key or value is default value"); if(_first!=null) { for (Node x = _first; x != null; x = x.Next) { if (key.Equals(x.Key)) { x.Value = value; return; } } } _first = new Node(key, value, _first); _length++; } public bool Contains(TKey key) { if (key == default) throw new ArgumentNullException(" this key is default value"); if(!IsEmpty) { for(Node x=_first;x!=null;x=x.Next) { if(key.Equals(x.Key)) { return true; } } } return false; } public TValue Get(TKey key) { if (key == default||IsEmpty) return default; for(var x =_first;x!=null;x=x.Next) { if (key.Equals(x.Key)) return x.Value; } return default; } public TValue this[TKey key] { get => Get(key); set => Add(key,value); } public void Delete(TKey key) { if (key == default) throw new ArgumentNullException(" this key is default value"); _first = delete(_first, key); } /// <summary> /// 通過遞迴實現刪除結點,這個設計非常巧妙 /// </summary> /// <param name="x"></param> /// <param name="key"></param> /// <returns></returns> private Node delete(Node x,TKey key) { if (x == null) return null; if(key.Equals(x.Key)) { _length--; return x.Next; } x.Next = delete(x.Next, key);//這邊非常巧妙的跳過了當前個,delete方法如果next是要刪除物件就返回了x.next.next,如果不是就返回x.next; return x; } public void Dispose() { Dispose(true); } protected virtual void Dispose(bool disposing) { if (disposing) { if (!IsEmpty) { for (var x = _first; x != null; x = x.Next) { x.Dispose(); } _length = 0; _first = null; } } } }
命題A:在含有N對鍵值的基於(無序)連結串列的符號表中,未命中的查詢和插入操作都需要N次比較。命中的查詢在最壞情況下需要N次比較。特別的,向一個空表插入N個不同的鍵需要~N^2/2次比較。
推論:向一個空表中插入N個不同的鍵需要~N^2/2次比較。
有序陣列字典二分查詢
有序字典的查詢效率肯定高的多。
public interface IBinarySearchDict<TKey,TValue>: ISearchDict<TKey, TValue> { int Capacity { get; } /// <summary> /// 找到key對應的Index /// </summary> /// <param name="key"></param> /// <returns></returns> int Rank(TKey key); void DeleteMin(); void DeleteMax(); TKey Min(); TKey Max(); TKey Floor(TKey key); TKey Ceiling(TKey key); }
public class BinarySearchSTNet<TKey, TValue> : IBinarySearchDict<TKey, TValue> where TKey : IComparable
{
private int _length;
private int _capacity;
private TKey[] _keys;
private TValue[] _values;
public BinarySearchSTNet():this(2)
{
}
public BinarySearchSTNet(int capacity)
{
_capacity = capacity;
_keys = new TKey[capacity];
_values = new TValue[capacity];
_length = 0;
}
public TValue this[TKey key] { get => Get(key); set => Add(key,value); }
public bool IsEmpty => _length==0;
public int Length => _length;
public int Capacity => _capacity;
private void resize(int capacity)
{
if (capacity < _length)
throw new ArgumentException("this capacity is less than length");
var temkey = new TKey[capacity];
var temvalue = new TValue[capacity];
for(int i=0;i<_length;i++)
{
temkey[i] = _keys[i];
temvalue[i] = _values[i];
}
_keys = temkey;
_values = temvalue;
}
public void Add(TKey key, TValue value)
{
if (key == default)
throw new ArgumentException("this key is default");
if (value == default)
{
Delete(key);
return;
}
int i = Rank(key);
if(i<_length && _keys[i].CompareTo(key)==0)
{
_values[i] = value;
}
if (_length == _keys.Length) resize(2 * _keys.Length);
for (int j = _length; j > i; j--)
{
_keys[j] = _keys[j - 1];
_values[j] = _values[j - 1];
}
_keys[i] = key;
_values[i] = value;
_length++;
}
public bool Contains(TKey key)
{
if (key == default||IsEmpty)
return false;
int i = Rank(key);
if (i < _length && key.CompareTo(_keys[i]) == 0)
return true;
return false;
}
public void Delete(TKey key)
{
if (key == default)
return;
if(Contains(key))
{
int i = Rank(key);
for(int j=i;j<_length-1;j++)
{
_keys[j] = _keys[j + 1];
_values[j] = _values[j + 1];
}
_keys[_length - 1] = default;
_values[_length - 1] = default;
_length--;
if (_length>0&&_length == _keys.Length / 4) resize(_keys.Length / 2);
}
}
public void DeleteMax()
{
if (!IsEmpty)
Delete(Max());
}
public void DeleteMin()
{
if (!IsEmpty)
Delete(Min());
}
public void Dispose()
{
throw new NotImplementedException();
}
protected virtual void Dispose(bool disposing)
{
if(disposing)
{
if(!IsEmpty)
{
for(int i=0;i<_length;i++)
{
_keys[i] = default;
_values[i] = default;
}
_length = 0;
_keys = null;
_values = null;
}
}
}
public TKey Ceiling(TKey key)
{
if (key == default)
return default;
int i = Rank(key);
if (i == _length) return default;
return _keys[i];
}
public TKey Floor(TKey key)
{
if (key == default)
return default;
int i = Rank(key);
if (i < _length && key.CompareTo(_keys[i]) == 0) return _keys[i];
if (i == 0) return default;
return _keys[i - 1];
}
public TValue Get(TKey key)
{
if (key == default || IsEmpty)
return default;
int i = Rank(key);
if (i < _length && _keys[i].CompareTo(key) == 0) return _values[i];
return default;
}
public TKey Max()
{
if (!IsEmpty)
return _keys[_length - 1];
return default;
}
public TKey Min()
{
if (!IsEmpty)
return _keys[0];
return default;
}
/// <summary>
/// 秩,key所在的等級,使用迭代
/// </summary>
/// <param name="key"></param>
/// <returns></returns>
public int Rank(TKey key)
{
if (key == default)
throw new ArgumentException("this key is default");
int lo = 0;
int hi = _length - 1;
while(lo<=hi)
{
int mid = lo + (hi - lo) / 2;
int cmp = key.CompareTo(_keys[mid]);
if (cmp < 0) hi = mid - 1;
else if (cmp > 0) lo = mid + 1;
else return mid;
}
return lo;
}
}
命題B:在N個鍵的有序陣列中進行二分查詢最多需要(lgN+1)次比較(無論是否成功)。
缺點:Add太慢了,基於有序陣列的字典所需要訪問陣列的次數是陣列長度的平方級別。
命題B(續):向大小為N的有序陣列中插入一個新的元素在最壞情況下需要訪問2N次陣列,向空字典中插入N個元素在最壞情況下需要訪問N^2次陣列。
查詢是LgN是目標之一,但是插入是2N似乎代價太大了,要支援高效的插入,似乎鏈式結構可以滿足,但是鏈式結構是無法使用二分查詢的。那麼二叉查詢樹似乎就是我們一直追尋的目標。
二叉查詢樹
二叉樹中,每個結點只有一個父結點指向自己,每個結點都只有左右兩個連結。分別指向左子結點和右子結點。
二叉樹的定義:每個結點都含有一個Comparable的鍵(以及相關聯的值)且每個結點的鍵都大於其左子樹中的任意結點的鍵而小於右子樹的任意結點的鍵。
計數器
插入
查詢
最好和最壞的情況
命題C:在由N個隨機鍵構造的二叉查詢樹,查詢命中平均所需的比較次數為~2InN(約1.39lgN)
命題D:在由N個隨機鍵構造的二叉查詢樹中插入操作和查詢未命中平均所需的比較次數未~2InN(1.39lgN).
Floor
主要就是更新結點指向
命題E:在一顆二叉查詢樹中,所有操作的最壞情況下所需的事件都和樹 的高度成正比。
/// <summary>
/// Get,Add,Delete等都使用遞迴
/// </summary>
/// <typeparam name="TKey"></typeparam>
/// <typeparam name="TValue"></typeparam>
public class BSTNet<TKey, TValue> : IBSTSearchDict<TKey, TValue> where TKey : IComparable
{
private class Node
{
public TKey Key { set; get; }
public TValue Value { set; get; }
public Node Left { set; get; }
public Node Right { set; get; }
/// <summary>
/// 每個結點計數器,每個結點下擁有結點的數量,包含自己
/// </summary>
public int Length { set; get; }
public Node(TKey key,TValue value,int length)
{
this.Key = key;
this.Value = value;
this.Length = length;
}
}
public BSTNet()
{
_root = null;
}
private Node _root;
public TValue this[TKey key] { get => Get(key); set => Add(key,value); }
public bool IsEmpty => length()==0;
public int Length => length();
private int length()
{
return length(_root);
}
private int length(Node node)
{
if (node == null) return 0;
else return node.Length;
}
private int length(TKey lo, TKey hi)
{
if (lo == default) throw new ArgumentException("this low key is default");
if (hi == default) throw new ArgumentException("this high is default");
if (lo.CompareTo(hi) > 0) return 0;
if (Contains(hi))
return Rank(hi) - Rank(lo) + 1;
else
return Rank(hi) - Rank(lo);
}
public int Rank(TKey key)
{
if (key == default) throw new ArgumentException("this key is default");
return rank(key, _root);
}
private int rank(TKey key, Node node)
{
if (node == null) return 0;
int cmp = key.CompareTo(node.Key);
if (cmp < 0) return rank(key, node.Left);
else if (cmp > 0) return 1 + length(node.Left) + rank(key, node.Right);
else return length(node.Left);
}
public bool Contains(TKey key)
{
if (key == default) throw new ArgumentException("this key is default value");
return Get(key) != default;
}
public TValue Get(TKey key)
{
return get(_root, key);
}
private TValue get(Node node,TKey key)
{
if (key == default || node == null) return default;
int cmp = key.CompareTo(node.Key);
if (cmp < 0) return get(node.Left, key);
if (cmp > 0) return get(node.Right, key);
return node.Value;
}
public void Add(TKey key, TValue value)
{
if (key == default) throw new ArgumentException("this key is default value");
if (value == default)
{
Delete(key);
return;
}
_root = add(_root,key,value);
}
/// <summary>
/// 使用了遞迴,所有查詢的結點計數器都+1;
/// </summary>
/// <param name="node"></param>
/// <param name="key"></param>
/// <param name="value"></param>
/// <returns></returns>
private Node add(Node node,TKey key,TValue value)
{
if (node == null) return new Node(key, value, 1);
int cmp = key.CompareTo(node.Key);
if (cmp < 0) node.Left = add(node.Left, key, value);//追加到結點左邊
if (cmp > 0) node.Right = add(node.Right, key, value);//追加到結點右邊
else node.Value = value;
node.Length = 1 + length(node.Left) + length(node.Right);//計數器自增
return node;
}
public TKey Ceiling(TKey key)
{
if (key == default || IsEmpty) return default;
Node tem = ceiling(_root, key);
if (tem == null) throw new ArgumentException("this key is too large");
else return tem.Key;
}
private Node ceiling(Node node,TKey key)
{
if (node == null) return null;
int cmp = key.CompareTo(node.Key);
if (cmp == 0) return node;
if(cmp<0)
{
Node tem = ceiling(node.Left, key);
if (tem != null) return tem;
else return node;
}
return ceiling(node.Right, key);
}
public void Delete(TKey key)
{
if (key == default) return;
_root = delete(_root, key);
}
private Node delete(Node node,TKey key)
{
if (node == null || key == default) return default;
int cmp = key.CompareTo(node.Key);
if (cmp < 0) node.Left = delete(node.Left, key);
else if (cmp > 0) node.Right = delete(node.Right, key);
else
{
//該結點就是刪除的結點
if (node.Right == null) return node.Left;
else if (node.Left == null) return node.Right;
else
{
Node tem = node;//左右都正常
node = min(tem.Right);//將右邊
node.Right = deleteMin(tem.Right);
node.Left = tem.Left;
}
}
node.Length = 1 + length(node.Left) + length(node.Right);
return node;
}
public void DeleteMax()
{
if (IsEmpty) return;
_root = deleteMax(_root);
}
private Node deleteMax(Node node)
{
if (node == null) return null;
if (node.Right == null) return node.Left;
node.Right = deleteMax(node.Right);
node.Length = length(node.Left) + length(node.Right) + 1;
return node;
}
public void DeleteMin()
{
if (IsEmpty) return;
_root = deleteMin(_root);
}
private Node deleteMin(Node node)
{
if (node == null) return null;
if (node.Left == null) return node.Right;
node.Left = deleteMin(node.Left);
node.Length = 1 + length(node.Left) + length(node.Right);
return node;
}
public void Dispose()
{
Dispose(true);
}
protected virtual void Dispose(bool disposing)
{
if(disposing)
{
if (!IsEmpty)
DeleteMin();
}
}
public TKey Floor(TKey key)
{
if (key == default || IsEmpty) return default;
Node tem = floor(_root, key);
if (tem == null) throw new ArgumentException("this key is too small");
else return tem.Key;
}
private Node floor(Node node,TKey key)
{
if (key == default) return null;
int cmp = key.CompareTo(node.Key);
if (cmp == 0) return node;
if (cmp < 0) return floor(node.Left, key);
Node tem = floor(node.Right, key);
if (tem != null) return tem;
else return node;
}
public TKey Floor2(TKey key)
{
TKey tem = floor2(_root, key, default);
if (tem == null) throw new ArgumentException("argument to floor() is too small");
else return tem;
}
private TKey floor2(Node node, TKey key, TKey best)
{
if (node == null) return best;
int cmp = key.CompareTo(node.Key);
if (cmp < 0) return floor2(node.Left, key, best);
else if (cmp > 0) return floor2(node.Right, key, node.Key);
else return node.Key;
}
public TKey Max()
{
if (IsEmpty) return default;
return max(_root).Key;
}
private Node max(Node node)
{
if (node == null) return null;
if (node.Right == null) return node;
else return max(node.Right);
}
public TKey Min()
{
if (IsEmpty) return default;
return min(_root).Key;
}
private Node min(Node node)
{
if (node == null) return null;
if (node.Left == null) return node;
else return min(node.Left);
}
}
平衡查詢樹(2-3查詢樹)
二叉查詢樹最壞的情況還是很糟糕的,平衡查詢樹可以有效解決這個問題,無論陣列的初始狀態如何,它的執行時間都是對數級別的。
定義:一顆2-3查詢樹由以下結點組成:
- 2-結點,含有一個鍵和兩條連結,左連結指向的2-3樹中的鍵都小於該結點,右連結指向的2-3樹種的鍵都大於該結點。
- 3-結點,含有兩個鍵(及其對應的值)和三條連結,左連結指向的2-3樹中的鍵都小於該結點,中連結指向的2-3樹種的鍵都位於該結點的兩個鍵之間。右連結指向的2-3樹的鍵都大於該結點。
一顆完美平衡的2-3查詢樹種的所有空連線到根結點的距離都應該是相同的。
查詢
插入
先進行一次未命中的查詢,然後把新結點掛在樹的底部,如果未命中的查詢結束於一個2-結點,就將2-結點換成3-結點。如果未命中的查詢結束於一個3-結點,就先將3-結點換成4-結點,然後轉換成2-3樹。
4結點轉換成2-3樹要麻煩,如果4-結點的父結點是2-結點,那麼4-結點就轉換成一個3-結點和2個2-結點。
如果4-結點的父結點是2-結點,爺結點也是2-結點,那麼就一次向上轉換,直到根結點,然後樹的根高就會加一。
分解4-結點一共有6種情況
4結點的分解不會影響樹的有序性和平衡性
命題F:在一棵大小為N的2-3樹中,查詢和插入操作訪問的結點必然不超過lgN個。
含有10億個結點的一顆2-3樹的高度僅在19-30之間,最後訪問30個結點就能夠在10億個鍵中進行任意插入和查詢。
紅黑二叉查詢樹(紅黑樹)
通過紅連結將2-3查詢樹的3-結點變成兩個2結點的連結。
紅黑樹的定義:
- 紅連結均為左連結
- 沒有任何一個結點同時和兩條紅連結相連。
- 該樹是一個完美黑色平衡樹,任意空連線到根結點的路徑上的黑連結數量相同。
如果將紅連結畫平
結點中通過一個屬性Color來判定指向該結點是紅色還是黑色。同時約定空連線為黑色
旋轉
左旋轉:右連結轉化為左連結
2-結點插入
3-結點插入
有三種情況,通過0次,1次,2次旋轉以及顏色的變化得到期望的結果。
顏色變化
底部插入
插入總結
刪除最小鍵
如果查詢的鍵在最底部,可以直接刪除它。
如果不在最底部,就需要和後繼結點交換。問題就可以轉換成在一棵根結點不是2-結點的子樹中刪除最小的鍵。
命題G:一棵大小為N的紅黑樹的高度不會超過2lgN
命題H:一棵大小為N的紅黑樹中,根結點到任意結點的平均路徑長度為~1.00LgN。
命題I:在一棵紅黑樹中,以下操作在最壞情況下所需的時間是對數級別:查詢,插入,查詢最小鍵,查詢最大鍵,floor,ceiling,rank,select(),刪除最小鍵,刪除最大鍵,刪除,範圍查詢。
千億的資料量十幾次比較就可以找到。
public class RedBlackBSTNet<TKey, TValue> : IRedBlackBST<TKey, TValue> where TKey : IComparable<TKey>
{
private static readonly bool RED = true;
private static readonly bool BLACK = false;
private Node _root;
private class Node
{
public TKey Key { set; get; }
public TValue Value { set; get; }
public Node Right { set; get; }
public Node Left { set; get; }
/// <summary>
/// 指向該結點的顏色
/// </summary>
public bool Color { set; get; }
/// <summary>
/// 該結點下的結點量,包括本結點
/// </summary>
public int Length { set; get; }
public Node(TKey key,TValue value,bool color,int length)
{
this.Key = key;
this.Value = value;
this.Color = color;
this.Length = length;
}
}
public RedBlackBSTNet()
{
}
/// <summary>
/// 預設空結點是黑色
/// </summary>
/// <param name="node"></param>
/// <returns></returns>
private bool isRed(Node node)
{
if (node == null) return false;
return node.Color == RED;
}
private int length(Node node)
{
if (node == null) return 0;
return node.Length;
}
private int length()
{
return length(_root);
}
public TValue this[TKey key] { get =>Get(key); set => Add(key,value); }
public bool IsEmpty => _root==null;
public int Length => length();
public void Add(TKey key, TValue value)
{
if (key == default) return;
if (value == default)
{
Delete(key);
return;
}
_root = add(_root, key, value);
_root.Color = BLACK;
}
/// <summary>
/// 這邊在遞迴的上一層修改了顏色
/// </summary>
/// <param name="node"></param>
/// <param name="key"></param>
/// <param name="value"></param>
/// <returns></returns>
private Node add(Node node,TKey key,TValue value)
{
if (node == null) return new Node(key, value, RED, 1);//新增結點都是3-結點
int cmp = key.CompareTo(node.Key);
if (cmp < 0) node.Left = add(node.Left, key, value);
else if (cmp > 0) node.Right = add(node.Right, key, value);
else node.Value = value;
//修改顏色,讓所有的紅色結點都是左節點,直接看插入總結
if (isRed(node.Right) && !isRed(node.Left)) node = rotateLeft(node);//右邊是紅色,左邊不是紅色
if (isRed(node.Left) && isRed(node.Left.Left)) node = rotateRight(node);//如果左邊是紅色,左邊的左邊也是紅色
if (isRed(node.Left) && isRed(node.Right)) flipColors(node);//如果左右連邊都是紅色
node.Length = length(node.Left) + length(node.Right) + 1;
return node;
}
/// <summary>
/// 翻轉顏色,三個結點顏色全部反轉
/// </summary>
/// <param name="node"></param>
private void flipColors(Node node)
{
//node must have opposite color of its two children
node.Color = !node.Color;
node.Left.Color = !node.Left.Color;
node.Right.Color = !node.Right.Color;
}
/// <summary>
/// 紅左連結變成紅右連線,node->h,tem->x
/// </summary>
/// <param name="node"></param>
/// <returns></returns>
private Node rotateRight(Node h)
{
Node x = h.Left;
h.Left = x.Right;
x.Right = h;
x.Color = x.Right.Color;
x.Right.Color = RED;
x.Length = h.Length;
h.Length = length(h.Left) + length(h.Right) + 1;
return x;
}
/// <summary>
/// 紅右連線變成左連線
/// </summary>
/// <param name="h"></param>
/// <returns></returns>
private Node rotateLeft(Node h)
{
Node x = h.Right;
h.Right = x.Left;
x.Left = h;
x.Color = x.Left.Color;
x.Left.Color = RED;
x.Length = h.Length;
h.Length = length(h.Left) + length(h.Right) + 1;
return x;
}
public TKey Ceiling(TKey key)
{
if (key == default || IsEmpty) return default;
Node node = ceiling(_root, key);
if (node == null) throw new ArgumentException("this key is too small");
else return node.Key;
}
private Node ceiling(Node node,TKey key)
{
if (node == null) return null;
int cmp = key.CompareTo(node.Key);
if (cmp == 0) return node;
if(cmp>0) return ceiling(node.Right, key);
Node tem = ceiling(node.Left, key);
if (tem != null) return tem;
else return node;
}
public bool Contains(TKey key)
{
return Get(key) != default;
}
public void Delete(TKey key)
{
throw new NotImplementedException();
}
private Node delete(Node node,TKey key)
{
if (key.CompareTo(node.Key) < 0)
{
if (!isRed(node.Left) && !isRed(node.Left.Left))
node = moveRedLeft(node);
node.Left = delete(node.Left, key);
}
else
{
if (isRed(node.Left))
node = rotateRight(node);
if (key.CompareTo(node.Key) == 0 && (node.Right == null))
return null;
if (!isRed(node.Right) && !isRed(node.Right.Left))
node = moveRedRight(node);
if (key.CompareTo(node.Key) == 0)
{
Node x = min(node.Right);
node.Key = x.Key;
node.Value = x.Value;
// h.val = get(h.right, min(h.right).key);
// h.key = min(h.right).key;
node.Right = deleteMin(node.Right);
}
else node.Right = delete(node.Right, key);
}
return balance(node);
}
public void DeleteMax()
{
if (IsEmpty)
return;
if (!isRed(_root.Left) && !isRed(_root.Right))
_root.Color = RED;
_root = deleteMax(_root);
if (!IsEmpty) _root.Color = BLACK;
}
private Node deleteMax(Node node)
{
if (isRed(node.Left))
node = rotateRight(node);
if (node.Right == null)
return null;
if (!isRed(node.Right) && !isRed(node.Right.Left))
node = moveRedRight(node);
node.Right = deleteMax(node.Right);
return balance(node);
}
public void DeleteMin()
{
if (IsEmpty) return;
if (!isRed(_root.Left) && !isRed(_root.Right))
_root.Color = RED;
_root = deleteMin(_root);
if (!IsEmpty) _root.Color = BLACK;
}
/// <summary>
/// 遞迴用的好
/// </summary>
/// <param name="node"></param>
/// <returns></returns>
private Node deleteMin(Node node)
{
if (node.Left == null)
return null;
if (!isRed(node.Left) && !isRed(node.Left.Left))
node = moveRedLeft(node);
node.Left = deleteMin(node.Left);
return balance(node);
}
/// <summary>
/// 恢復紅黑樹的狀態
/// </summary>
/// <param name="h"></param>
/// <returns></returns>
private Node balance(Node node)
{
if (isRed(node.Right)) node = rotateLeft(node);
if (isRed(node.Left) && isRed(node.Left.Left)) node = rotateLeft(node);
if (isRed(node.Left) && isRed(node.Right)) flipColors(node);
node.Length = length(node.Left) + length(node.Right) + 1;
return node;
}
/// <summary>
///
/// </summary>
/// <param name="node"></param>
/// <returns></returns>
private Node moveRedLeft(Node node)
{
flipColors(node);
if(isRed(node.Right.Left))
{
node.Right = rotateRight(node.Right);
node = rotateLeft(node);
flipColors(node);
}
return node;
}
private Node moveRedRight(Node node)
{
flipColors(node);
if(isRed(node.Left.Left))
{
node = rotateRight(node);
flipColors(node);
}
return node;
}
public void Dispose()
{
Dispose(true);
}
protected virtual void Dispose(bool disposing)
{
if(disposing)
{
if(!IsEmpty)
{
DeleteMin();
}
}
}
public TKey Select(int rank)
{
if (rank < 0 || rank > length())
return default;
return select(_root, rank);
}
private TKey select(Node node, int rank)
{
if (node == null) return default;
int leftlength = length(node.Left);
if (leftlength > rank) return select(node.Left, rank);
else if (leftlength < rank) return select(node.Right, rank - leftlength - 1);
else return node.Key;
}
public TKey Floor(TKey key)
{
if (key == default || IsEmpty)
return default;
Node node = floor(_root, key);
if (node == null) throw new ArgumentException("this key is too small");
else return node.Key;
}
private Node floor(Node node,TKey key)
{
if (node == null) return null;
int cmp = key.CompareTo(node.Key);
if (cmp == 0) return node;
if (cmp < 0) return floor(node.Left, key);
Node tem = floor(node.Right, key);
if (tem != null) return tem;
else return node;
}
public TValue Get(TKey key)
{
if (key == default || IsEmpty)
return default;
return get(_root, key);
}
private TValue get(Node node,TKey key)
{
while(node !=null)
{
int cmp = key.CompareTo(node.Key);
if (cmp < 0) node = node.Left;
else if (cmp > 0) node = node.Right;
else return node.Value;
}
return default;
}
public int Height()
{
return height(_root);
}
private int height(Node node)
{
if (node == null) return -1;
return 1 + Math.Max(height(node.Left), height(node.Right));
}
public TKey Max()
{
if (IsEmpty) return default;
return max(_root).Key;
}
private Node max(Node node)
{
if (node.Right == null) return node;
else return max(node.Right);
}
public TKey Min()
{
if (IsEmpty) return default;
return min(_root).Key;
}
private Node min(Node node)
{
if (node.Left == null) return node;
else return min(node.Left);
}
public int Rank(TKey key)
{
if (key == default) throw new ArgumentException("this key is default value");
return rank(key, _root);
}
private int rank(TKey key, Node node)
{
if (node == null) return 0;
int cmp = key.CompareTo(node.Key);
if (cmp < 0) return rank(key, node.Left);
else if (cmp > 0) return 1 + length(node.Left) + rank(key, node.Right);
else return length(node.Left);
}
}
散列表
如果所有的鍵都是小整數,可以用一個數組來實現無序的符號表,將鍵作為陣列的索引而陣列中鍵i處儲存的就是它對應的值。用算術操作將鍵轉化為陣列的索引來訪問陣列中的鍵值對。
- 用雜湊函式將被查詢的鍵轉化為陣列的一個索引。
- 處理碰撞和衝突過程,有兩個方法:拉鍊法和線性探測法。
散列表是時間和空間上作出權衡的例子,如果沒有記憶體限制,可以直接將鍵作為陣列的索引,查詢操作只需要訪問記憶體一次就可以完成。
概率論是數學分析重大成果,使用雜湊標,可以實現在一般應用中有常數級別的查詢和插入操作的符號表。
雜湊函式
- 將key轉化為[0-M]內的整數。
- 轉化的整數在[0-M-1]上是均勻分佈的。
餘留法
使用素數餘留法
HashCode
將hashcode的返回值轉化為陣列的索引。通過hashcode和餘留髮結合起來產生0到M-1的整數。
private int hash(Key x)
{
return (x.hashCode() & 0x7fffffff) & M
}
軟快取
將每個鍵的雜湊值快取起來,這樣減少計算雜湊值的時間
一致性,高校性,均勻性
假設J:使用雜湊函式能夠均勻並獨立地將所有的鍵散佈於0到M-1之間。
處理碰撞
拉鍊法
將M的陣列中每個元素指向一個連結串列。連結串列中每個結點都儲存了雜湊值為該元素的索引鍵值對。
需要M足夠大,這樣連結串列就比較小。
命題K:在一張含有M條連結串列和N個鍵的散列表中,任意一條連結串列中的鍵的數量均在N/M的常數因子範圍內的概率無限趨向於1。
性質L:在一張含有M條連結串列和N個鍵的散列表中,未命中查詢和插入操作所需的比較次數為~N/M
public class SeparateChainingHashSTNet<TKey, TValue>
{
private static readonly int INIT_CAPACITY = 4;
private int _length;
private int _capacity;
private SequentialSearchSTNet<TKey, TValue>[] st;
public int Length => _length;
public int Capacity => _capacity;
public SeparateChainingHashSTNet(): this(INIT_CAPACITY)
{
}
public SeparateChainingHashSTNet(int capacity)
{
this._capacity = capacity;
st =new SequentialSearchSTNet<TKey,TValue>[capacity];
for (int i = 0; i < capacity; i++)
st[i] = new SequentialSearchSTNet<TKey, TValue>();
}
private void resize(int chains)
{
SeparateChainingHashSTNet<TKey, TValue> temp = new SeparateChainingHashSTNet<TKey, TValue>(chains);
for (int i = 0; i < _capacity; i++)
{
foreach (TKey key in st[i].Keys)
{
temp.Add(key, st[i].Get(key));
}
}
this._capacity = temp.Capacity;
this._length = temp.Length;
this.st = temp.st;
}
/// <summary>
/// 計算雜湊值
/// </summary>
/// <param name="key"></param>
/// <returns></returns>
private int hash(TKey key)
{
return (key.GetHashCode() & 0x7fffffff) % _capacity;
}
public bool isEmpty() => Length == 0;
public bool Contains(TKey key)
{
if (key == null) throw new ArgumentException("argument to contains() is null");
return get(key) != null;
}
public TValue get(TKey key)
{
if (key == null) throw new ArgumentException("argument to get() is null");
int i = hash(key);
return st[i].Get(key);
}
public void Add(TKey key, TValue val)
{
if (key == null) throw new ArgumentException("first argument to put() is null");
if (val == null)
{
delete(key);
return;
}
if (_length >= 10 * _capacity) resize(2 * _capacity);
int i = hash(key);
if (!st[i].Contains(key)) _length++;
st[i].Add(key, val);
}
public void delete(TKey key)
{
if (key == null) throw new ArgumentException("argument to delete() is null");
int i = hash(key);
if (st[i].Contains(key)) _length--;
st[i].Delete(key);
if (_capacity > INIT_CAPACITY && _length <= 2 * _capacity) resize(_capacity / 2);
}
}
線性探測法
用大小為M的陣列儲存N個鍵值對,M>N,依靠陣列中的空位解決碰撞衝突。開放地址散列表。
當發生碰撞時,直接使用雜湊標中下一個位置
鍵
public class LinearProbingHashSTNet<TKey, TValue>
{
private static readonly int INIT_CAPACITY = 4;
private int _length;
private int _capacity;
private TKey[] _keys;
private TValue[] _values;
public int Length => _length;
public int Capacity => _capacity;
public TKey[] Keys => _keys;
public TValue[] Values => _values;
public LinearProbingHashSTNet():this(INIT_CAPACITY)
{
}
public LinearProbingHashSTNet(int capacity)
{
_capacity = capacity;
_length = 0;
_keys = new TKey[capacity];
_values = new TValue[capacity];
}
public bool IsEmpty => _length == 0;
public bool Contains(TKey key)
{
if (key == null) throw new ArgumentException("argument to contains() is null");
return Get(key) != null;
}
private int hash(TKey key)
{
return (key.GetHashCode() & 0x7fffffff) % _capacity;
}
private void resize(int capacity)
{
LinearProbingHashSTNet<TKey, TValue> temp = new LinearProbingHashSTNet<TKey, TValue>(capacity);
for (int i = 0; i < _capacity; i++)
{
if (_keys[i] != null)
{
temp.Add(_keys[i], _values[i]);
}
}
_keys = temp.Keys;
_values = temp.Values;
_capacity = temp.Capacity;
}
public void Add(TKey key, TValue val)
{
if (key == null) throw new ArgumentException("first argument to put() is null");
if (val == null)
{
Delete(key);
return;
}
// double table size if 50% full
if (_length >= Capacity / 2) resize(2 * Capacity);
int i;
for (i = hash(key); _keys[i] != null; i = (i + 1) % _capacity)
{
if (_keys[i].Equals(key))
{
_values[i] = val;
return;
}
}
_keys[i] = key;
_values[i] = val;
_length++;
}
public TValue Get(TKey key)
{
if (key == null) throw new ArgumentException("argument to get() is null");
for (int i = hash(key); _keys[i] != null; i = (i + 1) % _capacity)
if (_keys[i].Equals(key))
return _values[i];
return default;
}
public void Delete(TKey key)
{
if (key == null) throw new ArgumentException("argument to delete() is null");
if (!Contains(key)) return;
// find position i of key
int i = hash(key);
while (!key.Equals(_keys[i]))
{
i = (i + 1) % _capacity;
}
// delete key and associated value
_keys[i] = default;
_values[i] = default;
// rehash all keys in same cluster
i = (i + 1) % _capacity;
while (_keys[i] != null)
{
// delete keys[i] an vals[i] and reinsert
TKey keyToRehash = _keys[i];
TValue valToRehash = _values[i];
_keys[i] = default;
_values[i] = default;
_length--;
Add(keyToRehash, valToRehash);
i = (i + 1) % _capacity;
}
_length--;
// halves size of array if it's 12.5% full or less
if (_length > 0 && _length <= _capacity / 8) resize(_capacity / 2);
}
}
字典總結
大多數程式設計師的第一選擇都是雜湊標,然後才是紅黑樹。