C++: Thuật toán đồ thị với C++

Khóa học qua video:

Lời khuyên: hãy copy code để chạy, sau khi chạy được rồi hãy phân tích từng đoạn code để hiểu bản chất vấn đề.

Một số biến quan trọng được dùng cho nhiều giải thuật:

1. Danh sách cạnh kề:

std::vector<std::vector<int>> adj(MAX_VERTEX);

Ở đây nếu chúng ta có adj[u][v] nghĩa là có một cạnh từ đỉnh u sang đỉnh v.

2. Thêm cạnh vào đồ thị:

- Vô hướng (undirected):

adj[u].push_back(v);
adj[v].push_back(u);

- Có hướng (directed):

adj[u].push_back(v);

3. Đánh dấu đỉnh đã duyệt:

bool seen[MAX_VERTEX];

Nếu seen[u] == true thì đỉnh u đã được duyệt qua.

4. Ma trận chi phí cho các bài toán tìm đường đi ngắn nhất:

const int oo = std::numeric_limits<int>::max();
std::vector<std::vector<int>> cost;

Ví dụ cost[u][v] = 10, có nghĩa là chi phí cho cạnh đi từ u sang v tốn 10 unit.

Thuật toán Breadth First Search - Graph Traversal

Mở đầu sẽ là thuật toán tìm kiếm theo chiều rộng dựa trên sự vận hành của queue (FIFO).

Lý thuyết

http://en.wikipedia.org/wiki/Breadth-first_search

Cài đặt bằng C++

#include <iostream>
#include <queue>
#include <vector>

const int MAX_VERTEX = 100;
bool seen[MAX_VERTEX];
std::vector<std::vector<int>> adj(MAX_VERTEX);

void visit(int u) {
  std::cout << u << "\n";
}

void bfs(int s) {
  std::queue<int> q;
  q.push(s);
  seen[s] = true;
  while (!q.empty()) {
    int u = q.front();
    visit(u);
    q.pop();
    for (auto v : adj[u]) {
      if (!seen[v]) {
        seen[v] = true;
        q.push(v);
      }
    }
  }
}

/**
 * For undirected graph
 */
void add_edge(int u, int v) {
  adj[u].push_back(v);
  adj[v].push_back(u);
}

int main() {
  add_edge(0, 1);
  add_edge(0, 2);
  add_edge(0, 3);
  add_edge(1, 4);
  add_edge(1, 5);
  add_edge(2, 3);
  bfs(0);
  return 0;
}

Ứng dụng

1. Tìm đường đi ngắn nhất (số cạnh) cho từng cặp đỉnh:

#include<iostream>
#include<queue>
#include<vector>
#include<limits>

const int MAX_VERTEX = 100;
const int oo = std::numeric_limits<int>::max();
bool seen[MAX_VERTEX];
std::vector<std::vector<int>> adj(MAX_VERTEX);
std::vector<int> distance(MAX_VERTEX, oo);

void bfs(int s) {
  std::queue<int> q;
  q.push(s);
  seen[s] = true;
  distance[s] = 0;
  while (!q.empty()) {
    int u = q.front();
    q.pop();
    for (auto v : adj[u]) {
      if (!seen[v]) {
        // recompute distance to v
        distance[v] = distance[u] + 1;
        seen[v] = true;
        q.push(v);
      }
    }
  }
}

/**
 * For undirected graph
 */
void add_edge(int u, int v) {
  adj[u].push_back(v);
  adj[v].push_back(u);
}

void shortest_distance() {
  add_edge(0, 1);
  add_edge(0, 2);
  add_edge(0, 3);
  add_edge(1, 4);
  add_edge(4, 5);
  bfs(0);
  for (int v = 0; v < 6; ++v) {
    std::cout << "shortest distance from 0->" << v << ": " << distance[v] << std::endl;
  }
}

int main() {
  shortest_distance();
  return 0;
}

2. Check bipartite graph (2-coloring):

#include<iostream>
#include<queue>
#include<vector>
#include<limits>

enum class color {
  NONE, BLACK, WHITE
};

const int MAX_VERTEX = 100;
const int oo = std::numeric_limits<int>::max();
std::vector<std::vector<int>> adj(MAX_VERTEX);
std::vector<color> colors(MAX_VERTEX, color::NONE);
bool is_bipartite = true;

color other(const color &c) {
  if (c == color::NONE) {
    return c;
  }
  return (c == color::BLACK) ? color::WHITE : color::BLACK;
}

void bfs(int s) {
  std::queue<int> q;
  q.push(s);
  colors[s] = color::WHITE;
  while (!q.empty()) {
    int u = q.front();
    q.pop();
    for (auto v : adj[u]) {
      if (colors[v] == color::NONE) {
        colors[v] = other(colors[u]);
        q.push(v);
      } else if (colors[v] == colors[u]) {
        is_bipartite = false;
        break;
      }
    }
  }
}

void add_edge(int u, int v) {
  adj[u].push_back(v);
  adj[v].push_back(u);
}

void check_bipartite() {
  add_edge(0, 3);
  add_edge(0, 4);
  add_edge(0, 5);
  add_edge(0, 6);

  // add this edge will break bipartite property
  // add_edge(0, 1);

  add_edge(1, 3);
  add_edge(1, 4);
  add_edge(1, 5);
  add_edge(1, 6);

  add_edge(2, 3);
  add_edge(2, 4);
  add_edge(2, 5);
  add_edge(2, 6);
  bfs(0);
  if (is_bipartite) {
    std::cout << "G is bipartite." << std::endl;
  } else {
    std::cout << "G is non-bipartite." << std::endl;
  }
}

int main() {
  check_bipartite();
  return 0;
}

Thuật toánDepth First Search - Graph Traversal

Thuật toán kế tiếp là thuật toán tìm kiếm theo chiều sâu (DFS), nó cũng là một thuật toán dùng để viếng thăm các đỉnh của đồ thị. Điều khác biệt rõ ràng nhất với BFS là DFS hoạt động dựa trên sự vận hành của stack (FILO) thay vì queue (FIFO). Vì bản chất của DFS là dùng lời gọi đệ qui để gọi hàm, cho nên mình sẽ implement DFS dùng đệ qui thay vì dùng stack.

Lý thuyết

http://en.wikipedia.org/wiki/Depth-first_search

Cài đặt bằng C++

#include <iostream>
#include <vector>

const int MAX_VERTEX = 100;
bool seen[MAX_VERTEX];
std::vector<std::vector<int>> adj(MAX_VERTEX);

void visit(int u) {
  std::cout << u << "\n";
}

void dfs(int u) {
  visit(u);
  seen[u] = true;
  for (auto v : adj[u]) {
    if (!seen[v]) {
      dfs(v);
    }
  }
}

/**
 * For undirected graph
 */
void add_edge(int u, int v) {
  adj[u].push_back(v);
  adj[v].push_back(u);
}

int main() {
  add_edge(0, 1);
  add_edge(0, 2);
  add_edge(0, 3);
  add_edge(1, 4);
  add_edge(1, 5);
  add_edge(2, 3);
  dfs(0);
  return 0;
}

Ứng dụng

1. Tìm thành phần liên thông của đồ thị (connected components):

#include <iostream>
#include <vector>

const int MAX_VERTEX = 100;
bool seen[MAX_VERTEX];
std::vector<std::vector<int>> adj(MAX_VERTEX);

void dfs(int u) {
  seen[u] = true;
  for (auto v : adj[u]) {
    if (!seen[v]) {
      dfs(v);
    }
  }
}

int count_connected_components(int no_vertex) {
  int cnt = 0;
  // for all vertex that has not been visited
  for (int v = 0; v < no_vertex; ++v) {
    if (!seen[v]) {
      // increase the number of components
      cnt++;
      // visit
      dfs(v);
    }
  }
  return cnt;
}

void add_edge(int u, int v) {
  adj[u].push_back(v);
  adj[v].push_back(u);
}

int main() {
  add_edge(0, 1);
  add_edge(0, 2);
  add_edge(3, 4);
  add_edge(4, 5);
  std::cout << count_connected_components(6) << std::endl;
  return 0;
}

2. Toposort

#include <iostream>
#include <vector>

const int MAX_VERTEX = 100;
bool seen[MAX_VERTEX];
std::vector<std::vector<int>> adj(MAX_VERTEX);
std::vector<int> topo_order;

void dfs(int u) {
  seen[u] = true;
  for (auto v : adj[u]) {
    if (!seen[v]) {
      dfs(v);
    }
  }
  // keep track the order of visited
  topo_order.push_back(u);
}

void add_edge(int u, int v) {
  adj[u].push_back(v);
  adj[v].push_back(u);
}

void toposort(int no_vertex) {
  for (int v = 0; v < no_vertex; ++v) {
    if (!seen[v]) {
      dfs(v);
    }
  }
  // print out topo order
  for (auto v : topo_order) {
    std::cout << v << " ";
  }
}

int main() {
  add_edge(0, 1);
  add_edge(0, 2);
  add_edge(3, 4);
  add_edge(4, 5);
  toposort(6);
  return 0;
}

3. Cycle detection:

#include <iostream>
#include <vector>
#include <algorithm>

enum class color {
  WHITE, GRAY, BLACK
};

const int MAX_VERTEX = 100;
std::vector<std::vector<int>> adj(MAX_VERTEX);
std::vector<color> colors(MAX_VERTEX);
bool is_cyclic = false;

void dfs(int u) {
  // start exploring
  colors[u] = color::GRAY;
  for (auto v : adj[u]) {
    if (colors[v] == color::WHITE) {
      dfs(v);
    } else {
    // check if back-edge exists
      if (colors[v] == color::BLACK && colors[u] == color::GRAY) {
        is_cyclic = true;
        return;
      }
    }
  }
  // finish exploring
  colors[u] = color::BLACK;
}

void add_edge(int u, int v) {
  adj[u].push_back(v);
  adj[v].push_back(u);
}

void check_cycle(int no_vertex) {
  // initially, all vertex are not visited
  std::fill(colors.begin(), colors.end(), color::WHITE);
  is_cyclic = false;
  for (int v = 0; v < no_vertex; ++v) {
    if (colors[v] == color::WHITE) {
      dfs(v);
    }
  }
  if (is_cyclic) {
    std::cout << "There is a cycle in this graph." << std::endl;
  } else {
    std::cout << "No cycle found." << std::endl;
  }
}

void test_cycle() {
  add_edge(0, 1);
  add_edge(1, 2);
  add_edge(2, 0);
  add_edge(4, 5);
  check_cycle(6);
}

void test_non_cycle() {
  add_edge(0, 1);
  add_edge(1, 2);
  add_edge(4, 5);
  check_cycle(6);
}

int main() {
  test_non_cycle();
  // test_cycle();
  return 0;
}

Thuật toán Roy Warshall's Algorithm - Shortest Path Problem

Thuật toán tiếp theo có tên là Floyd Warshall dùng để tìm đường với chi phi thấp nhất từ 2 cặp đỉnh (u, v). Running time cho giải thuật này là O(n^3) khá chậm so với Dijsktra, tuy nhiên nếu chúng ta cần query nhiều lần với những cặp đỉnh khác nhau thì việc precompute trước cho toàn bộ các cặp đỉnh thì thuật toán này sẽ nhanh hơn Dijkstra rất nhiều.

Giả sử chúng ta đồ thị với V đỉnh, E cạnh, và M query thì:

- Floyd Warshall sẽ có running time O(V^3) + M

- Dijsktra Heap: M * O(E * log(V))

Lý thuyết

http://en.wikipedia.org/wiki/Floyd%E...hall_algorithm

Cài đặt bằng C++

#include <iostream>
#include <vector>
#include <limits>

const int oo = std::numeric_limits<int>::max();
std::vector<std::vector<int>> cost;

void floyd_warshall(int no_vertex) {
  for (int bridge = 0; bridge < no_vertex; ++bridge) {
    for (int u = 0; u < no_vertex; ++u) {
      for (int v = 0; v < no_vertex; ++v) {
        cost[u][v] = std::min(cost[u][v], cost[u][bridge] + cost[bridge][v]);
      }
    }
  }
}

std::vector<std::vector<int>> generate_sample_cost() {
  return std::vector<std::vector<int>> {
    std::vector<int> {0, 2, 1, 3},
    std::vector<int> {1, 0, 4, 5},
    std::vector<int> {3, 1, 0, 3},
    std::vector<int> {1, 1, 1, 0},
  };
}

void query(int u, int v) {
  std::cout << "shortest path from " << u << "->" << v << ": " << cost[u][v] << "\n";
}

int main() {
  cost = generate_sample_cost();
  floyd_warshall(cost.size());
  query(0, 2);
  query(1, 2);
  return 0;
}

Thuật toán Prim's Algorithm - Minimum Spanning Tree

Thuật toán tiếp theo là thuật toán Prim, dùng để giải bài toán tìm cây khung nhỏ nhất trong đồ thị vô hướng. Thuật toán Prim là một dạng của thuật toán "tham lam" (greedy) vì nó luôn chọn cạnh với chi phí thấp nhất nhờ vào sự trợ giúp của min-heap. Về phần lý thuyết, các bạn có thể tham khảo những link sau.

Lý thuyết

http://en.wikipedia.org/wiki/Prim's_algorithm

http://en.wikipedia.org/wiki/Minimum_spanning_tree

Cài đặt bằng C++

#include <algorithm>
#include <iostream>
#include <queue>
#include <vector>
#include <utility>

struct edge {
  int v;
  int cost;

  edge(int v, int cost)
    :v(v), cost(cost) {
  }

  // sort by cost first then vertex
  bool operator >(const edge &e) const {
    return (cost != e.cost ? cost > e.cost : v > e.v);
  }
};

// alias for min priority queue
typedef std::priority_queue<edge, std::vector<edge>, std::greater<edge>> min_heap;
// infinity
const int oo = std::numeric_limits<int>::max();
// maximum # of vertex
const int MAX_VERTEX = 100;
// mark array
bool seen[MAX_VERTEX];
// adjacency list
std::vector<std::vector<int>> adj(MAX_VERTEX);
// cost matrix, initially all cost are infinity
std::vector<std::vector<int>> cost(MAX_VERTEX, std::vector<int>(MAX_VERTEX, oo));
// minimum cost heap
min_heap pq;

/**
 * For all vertex that are adjacent to u,
 * if it's not visited, we add them to
 * our queue with its cost
 */
void relax(int u) {
  seen[u] = true;
  for (auto v : adj[u]) {
    if (!seen[v]) {
      pq.push(edge(v, cost[u][v]));
    }
  }
}

/**
 * Prim algorithm for finding the minimum
 * spanning tree of undirected graph
 */
int prim(int s) {
  int min_cost = 0;
  relax(s);
  while (!pq.empty()) {
    // get the minimum cost edge
    edge e = pq.top();
    pq.pop();
    // if we have seen v
    // visit v, and update new cost
    if (!seen[e.v]) {
      min_cost += e.cost;
      relax(e.v);
    }
  }
  return min_cost;
}

void add_edge(int u, int v, int c) {
  adj[u].push_back(v);
  adj[v].push_back(u);
  cost[u][v] = c;
  cost[v][u] = c;
}

void test_minimum_spanning_tree() {
  add_edge(0, 1, 3);
  add_edge(0, 2, 3);
  add_edge(0, 3, 1);
  add_edge(0, 4, 2);
  add_edge(1, 2, 2);
  add_edge(2, 3, 11);
  add_edge(3, 4, 9);
  std::cout << "min cost: " << prim(0) << std::endl;
}

int main() {
  test_minimum_spanning_tree();
  return 0;
}

Thuật toán Kruskal's Algorithm - Minimum Spanning Tree

Thuật toán này cũng là một dạng thuật toán tham lam dùng để giải bài toán cây khung nhỏ nhất (giống như Prim). Điểm khác biệt giữa Kruskal và Prim là Kruskal hoạt động dựa trên cấu trúc dữ liệu có tên là "Disjoint Set".

Lý thuyết

http://en.wikipedia.org/wiki/Kruskal's_algorithm

http://en.wikipedia.org/wiki/Disjoin...data_structure

Cài đặt bằng C++

#include <algorithm>
#include <iostream>
#include <vector>
#include <utility>

const int MAX_SIZE = 100;

class disjoint_set {
  private:
  int components;
  int id[MAX_SIZE];
  int sizeof_ids[MAX_SIZE];
  int n;

  public:
  disjoint_set(int n)
        :components(n), n(n) {
    for (int i = 0; i < components; ++i) {
      id[i] = i;
      sizeof_ids[i] = 1;
    }
  }

  void join(int p, int q) {
    int i = find(p);
    int j = find(q);
    if (i == j) {
      return;
    }
    if (sizeof_ids[i] < sizeof_ids[j]) {
      id[i] = j;
      sizeof_ids[j] += sizeof_ids[i];
      sizeof_ids[i] = 1;
    } else {
      id[j] = i;
      sizeof_ids[i] += sizeof_ids[j];
      sizeof_ids[j] = 1;
    }
    components--;
  }

  int find(int p) {
    if (p != id[p]) {
      id[p] = find(id[p]);
    }
    return id[p];
  }

  bool is_connected(int p, int q) {
    return find(p) == find(q);
  }

  int size() const {
    return components;
  }
};

struct edge {
  int u;
  int v;
  int cost;

  edge(int u, int v, int cost)
    :u(u), v(v), cost(cost) {
  }

  // sort by cost
  bool operator <(const edge &e) const {
    return cost < e.cost;
  }
};

std::vector<edge> edges;

int kruskal(int no_vertex) {
  int min_cost = 0;
  std::sort(edges.begin(), edges.end());
  disjoint_set ds(no_vertex);
  for (auto e : edges) {
    if (!ds.is_connected(e.u, e.v)) {
      min_cost += e.cost;
      ds.join(e.u, e.v);
    }
  }
  return min_cost;
}

void test_minimum_spanning_tree() {
  edges.push_back(edge(0, 1, 3));
  edges.push_back(edge(0, 2, 3));
  edges.push_back(edge(0, 3, 1));
  edges.push_back(edge(0, 4, 2));
  edges.push_back(edge(1, 2, 2));
  edges.push_back(edge(2, 3, 11));
  edges.push_back(edge(3, 4, 9));
  std::cout << "min cost: " << kruskal(5) << std::endl;
}

int main() {
  test_minimum_spanning_tree();
  return 0;
}

Thuật toán Dijkstra's Algorithm - Shortest Path Problem

Tiếp theo là giải thuật Dijkstra dùng để giải bài toán tìm đường đi ngắn nhất giữa các đỉnh trên đồ thị có trọng số. Cho đồ thị G = (V, E) thì Dijkstra có running time là O(E log(V)).

Lý thuyết

http://en.wikipedia.org/wiki/Dijkstra's_algorithm

Cài đặt bằng C++

#include <iostream>
#include <vector>
#include <limits>

#include <iostream>
#include <queue>
#include <vector>
#include <limits>
#include <queue>

// priority queue
typedef std::priority_queue<std::pair<int, int>, std::vector<std::pair<int, int> >, std::greater<std::pair<int, int>> > min_heap;
// cost and vertex
typedef std::pair<int, int> vertex;

const int oo = std::numeric_limits<int>::max();
const int MAX_VERTEX = 100;
std::vector<std::vector<int>> adj(MAX_VERTEX);
std::vector<std::vector<int>> cost(MAX_VERTEX, std::vector<int>(MAX_VERTEX, oo));
std::vector<int> dist(MAX_VERTEX, oo);

void dijkstra(int s) {
  min_heap pq;
  dist[s] = 0;
  pq.push(vertex(0, s));
  while (!pq.empty()) {
    int u = pq.top().second;
    int cost_to_u = pq.top().first;
    pq.pop();
    if (dist[u] == cost_to_u) {
      for (auto v : adj[u]) {
        if (dist[v] > dist[u] + cost[u][v]) {
          dist[v] = dist[u] + cost[u][v];
          pq.push(vertex(dist[v], v));
        }
      }
    }
  }
}

void add_edge(int u, int v, int c) {
  adj[u].push_back(v);
  cost[u][v] = c;
}

int main() {
  add_edge(0, 1, 13);
  add_edge(1, 2, 8);
  add_edge(2, 4, 1);
  add_edge(0, 2, 18);
  add_edge(0, 3, 20);
  add_edge(0, 4, 23);
  dijkstra(0);
  for (int v = 0; v <= 4; ++v) {
    std::cout << "shortest path from 0->" << v << ": " << dist[v] << "\n";
  }
  return 0;
}