集合和映射

集合

集合(set)作为存储数据容器时:

  • 它不允许存储相同元素,只能保留一份。
  • 能快速帮助我们进行去重操作,过滤掉重复元素。

典型应用

词汇量统计

统计一篇英文文章的总单词数,使用集合进行去重,判断英文文章难度。

创建集合接口

新建Iset接口

namespace AlgorithmTest11_集合ISet
{
interface Iset<T>
{
/// <summary>
/// 判空
/// </summary>
bool IsEmpty { get; }

/// <summary>
/// 元素个数
/// </summary>
int Count { get; }

/// <summary>
/// 添加
/// </summary>
/// <param name="t"></param>
void Add(T t);

/// <summary>
/// 删除
/// </summary>
/// <param name="t"></param>
void Remove(T t);

/// <summary>
/// 查询是否有某个元素
/// </summary>
/// <param name="e"></param>
/// <returns></returns>
bool Contains(T e);
}
}

实现集合接口

使用无序链表LinkedList1Set实现集合接口

using AlgorithmTest08_链表;

namespace AlgorithmTest11_集合ISet
{
/// <summary>
/// 链表实现集合
/// </summary>
/// <typeparam name="T"></typeparam>
public class LinkedList1Set<T> : Iset<T>
{
/// <summary>
/// 引用08链表的LinkedList1
/// </summary>
private LinkedList1<T> list;
public LinkedList1Set()
{
list = new LinkedList1<T>();
}

public bool IsEmpty { get { return list.IsEmpty; } }

public int Count { get { return list.Count; } }

public void Add(T t)
{
//如果不包含该元素
if (!list.Contains(t))
{
list.AddFirst(t);//头部添加
}
}

public bool Contains(T t)
{//查询是否有某个元素
return list.Contains(t);
}

public void Remove(T t)
{//根据值删除
list.Remove(t);
}
}
}

创建工具类

创建TestHelper,用来将文件里文本存储到列表

using System;
using System.Collections.Generic;
using System.IO;

namespace AlgorithmTest11_集合ISet
{
class TestHelper
{
//读取名为filename的文件,并将他分词,分词后存入list
public static List<string> ReadFile(string filename)
{
//使用FileStream类打开filename文件
FileStream fs = new FileStream(filename, FileMode.Open);
//使用streamreader读取filename文件信息
StreamReader sr = new StreamReader(fs);
//将读取的单词存入动态数组words中
List<string> words = new List<string>();
//分词操作,简陋的分词方式
//只要单词拼写不一样,就认为是不同单词,不考虑词性时态
while (!sr.EndOfStream)//如果没有督导filename末尾,就继续
{
//去除头部尾部空格,存在contents
string contents = sr.ReadLine().Trim();
//寻找contents第一个字母的位置
int start = FirstCharacterIndex(contents, 0);
//开始分词逻辑,将一个个的单词存储在数组words中
for (int i = start + 1; i < contents.Length;)
{
if (i == contents.Length || !Char.IsLetter(contents[i]))
{
string word = contents.Substring(start, i - start).ToLower();
words.Add(word);
start = FirstCharacterIndex(contents, i);
i = start + 1;
}
else
i++;
}
}
//关闭流对象释放资源
fs.Close();
sr.Close();
return words;
}
//寻找字符串s中,从start的位置开始的第一个字母字符的位置.
private static int FirstCharacterIndex(string s, int start)
{
for (int i = start; i < s.Length; i++)
{
if (Char.IsLetter(s[i]))
{
return i;//返回位置
}
}
return s.Length;
}
}
}

添加文档,修改文档属性

image-20221112135150016

image-20221112135127159

测试结果

using System;
using System.Collections.Generic;
using System.Diagnostics;

namespace AlgorithmTest11_集合ISet
{
class Program
{
static void Main(string[] args)
{
#region 集合
Stopwatch t = new Stopwatch();
Console.WriteLine("双城记");
List<string> words = TestHelper.ReadFile("txt/A Tale Of Two Cities.txt");
Console.WriteLine("总单词数:" + words.Count);
//去重
LinkedList1Set<string> linkedset = new LinkedList1Set<string>();
t.Start();
foreach (var item in words)
{
linkedset.Add(item);
}
t.Stop();

Console.WriteLine("去重后:" + linkedset.Count);
Console.WriteLine("执行时长," + t.ElapsedMilliseconds + "ms");

#endregion
Console.ReadKey();
}
}
}

image-20221112142519811

用时接近六秒,可以看出顺序查找非常耗时

映射/字典

定义接口

创建IDictionary<Key, Value>,定义接口

namespace AlgorithmTest12_映射or字典IDictionary
{
interface IDictionary<Key, Value>
{
/// <summary>
/// 添加
/// </summary>
/// <param name="key"></param>
/// <param name="value"></param>
void Add(Key key, Value value);
/// <summary>
/// 通过键删除
/// </summary>
/// <param name="key"></param>
void Remove(Key key);
/// <summary>
/// 查看是否有这个键
/// </summary>
/// <param name="key"></param>
/// <returns></returns>
bool ContainsKey(Key key);
/// <summary>
/// 通过键获取值
/// </summary>
/// <param name="key"></param>
/// <returns></returns>
Value Get(Key key);
/// <summary>
/// 修改键对应的值
/// </summary>
/// <param name="key"></param>
/// <param name="newValue"></param>
void Set(Key key, Value newValue);
/// <summary>
/// 元素个数
/// </summary>
int Count { get; }
/// <summary>
/// 判空
/// </summary>
bool IsEmpty { get; }
}
}

实现接口功能

新建字典专用链表

新建带两个泛型参数的链表

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace AlgorithmTest12_映射or字典IDictionary
{
class LinkedList3<Key, Value>
{
private class Node
{
public Key key;
public Value value;
public Node next;

public Node(Key key, Value value, Node next)
{
this.key = key;
this.value = value;
this.next = next;
}
public override string ToString()
{
return key.ToString() + ":" + value.ToString();
}
}
private Node head;
private int N;
public LinkedList3()
{
head = null;
N = 0;
}
public int Count { get { return N; } }
public bool IsEmpty { get { return N == 0; } }

//返回key所在节点
private Node GetNode(Key key)
{
Node cur = head;//从头查找
while (cur != null)
{
//如果键在链表中
if (cur.key.Equals(key))
{
return cur;
}
cur = cur.next;
}
return null;
}
/// <summary>
/// 添加
/// </summary>
/// <param name="key"></param>
/// <param name="value"></param>
public void Add(Key key, Value value)
{
Node node = GetNode(key);
if (node == null)
{//如果为空,则新增加节点
head = new Node(key, value, head);
N++;
}
else
{//如果不为空则覆盖内容
node.value = value;
}
}
/// <summary>
/// 查询key是否在链表中
/// </summary>
/// <param name="key"></param>
/// <returns></returns>
public bool Contains(Key key)
{
//不为空说明能找到对应值
return GetNode(key) != null;
}
/// <summary>
/// 通过key获取值
/// </summary>
/// <param name="key"></param>
/// <returns></returns>
public Value Get(Key key)
{
Node node = GetNode(key);
if (node == null)
{
throw new ArgumentException("键" + key + "不存在");
}
else
{
return node.value;
}
}
/// <summary>
/// 修改键对应的值
/// </summary>
/// <param name="key"></param>
/// <param name="newValue"></param>
public void Set(Key key, Value newValue)
{
Node node = GetNode(key);
if (node == null)
{
throw new ArgumentException("键" + key + "不存在");
}
else
{
node.value = newValue;
}
}
/// <summary>
/// 根据值删除
/// </summary>
/// <param name="key"></param>
public void Remove(Key key)
{
if (head == null)
{//如果链表为空则弹出
return;
}
if (head.key.Equals(key))
{//删除的值为头部
head = head.next;
N--;
}
else
{
Node cur = head;
Node pre = null;
while (cur != null)
{
if (cur.t.Equals(key))
{
break;
}
pre = cur;//第一轮pre移动到head,第二轮pre右移
cur = cur.next;//第一轮cur右移,第二轮cur右移
//直到查询到相同内容,弹出.或者cur为null(查到底了)
}
if (cur != null)
{//删除操作,跳过cur
pre.next = pre.next.next;
N--;
}
}
}
}
}

实现字典类

新建字典类,通过链表实现字典功能

namespace AlgorithmTest12_映射or字典IDictionary
{
class LinkedList3Dictionary<Key, Value> : IDictionary<Key, Value>
{
private LinkedList3<Key, Value> list;

public LinkedList3Dictionary()
{
list = new LinkedList3<Key, Value>();
}

public int Count { get { return list.Count; } }

public bool IsEmpty { get { return list.IsEmpty; } }

public void Add(Key key, Value value)
{
list.Add(key, value);
}

public bool ContainsKey(Key key)
{
return list.Contains(key);
}

public Value Get(Key key)
{
return list.Get(key);
}

public void Remove(Key key)
{
list.Remove(key);
}

public void Set(Key key, Value newValue)
{
list.Set(key, newValue);
}
}
}

运行调试

using System;
using System.Collections.Generic;
using System.Diagnostics;

namespace AlgorithmTest12_映射or字典IDictionary
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("字典双城记");
List<string> words = TestHelper.ReadFile("txt/A Tale Of Two Cities.txt");
Console.WriteLine("总单词数:" + words.Count);
Stopwatch t = new Stopwatch();
//key为单词,值为频率
LinkedList3Dictionary<string, int> dic = new LinkedList3Dictionary<string, int>();
t.Start();
foreach (var key in words)
{
//key不在字典中 说明是第一次遇到此单词
if (!dic.ContainsKey(key))
{
dic.Add(key, 1);//频率为1
}
//否则让它加一
else
{
//修改dic对应key的值,自增1
dic.Set(key, dic.Get(key) + 1);
}
}
t.Stop();
Console.WriteLine("不同的单词总数: " + dic.Count);
Console.WriteLine("city出现次数: " + dic.Get("city"));
Console.WriteLine("运行时长: " + t.ElapsedMilliseconds + "ms");
Console.ReadKey();
}
}
}

image-20221112183711742

总结

可以看出查找用时非常久,这是因为Set和Get每次都调用GetNode循环遍历节点,复杂度n。