非負整数値を扱う Trie について

はじめに

非負整数を二分木のトライ木で管理するアレに関する日本語記事があんまり無いっぽいので雑にメモ. （というかそもそも専用の呼び名ないのかな？）

とりあえずここでは Binary Trie って呼んどきます.

Binary Trie とは

整数をビット列とみなしてトライ木っぽく持つ set 的なことができるデータ構造です.
正確には要素の重複を許す multiset っぽく実装することが多そう.

整数集合を管理できますが, 平衡二分木よりも実装が楽なので最高です.

こんな感じ
f:id:kazuma8128:20180506004833p:plain

ノードに書いてある数字は部分木に含まれる要素の個数です.

できること

ビット長を B とすると, 以下の操作が全て O(B) でできます.

insert(x) : 値 x を集合に（一つ）追加
erase(x) : 値 x を集合から（一つ）削除
max_element/min_elemet() : 集合内の最大/最小値を取得
lower_bound/upper_bound(x) : 集合内で値 x 以上の/より大きい最小の要素の番号を取得. ここでの番号とは小さい方から何番目かということ.

ここまではただの multiset でもできますが, 次の操作が重要.

kth_element(k) : k 番目に小さい要素を取得
max_element/min_element(x) : 集合内の値の中で, 値 x と XOR したときに最大/最小になる値の取得
kth_element(k, x) : 集合内の値の中で, 値 x で XOR したときの k 番目に小さい値の取得

これも O(B) でできます.

さらに遅延評価を行うと以下の操作が O(1) で可能になります

xor_all(x) : 全要素を値 x で XOR した値に変更

これと lower_bound とかを組み合わせることがよくあります.
（追記：よく考えたら遅延評価せずに XOR したときの lower_bound とかもできそうなので, 遅延評価は実装を楽にするため以外の価値がないかも？）

あと木構造なので, 当然永続化も簡単です.
詳しくはこっちを参照.

kazuma8128.hatenablog.com

方法

最低限のノードに持つ値は, 部分木に含まれる要素の個数, 左の子のポインタ, 右の子のポインタの三つ.

見方を変えると, 各値ごとにカウントを持たせる動的な Segment Tree という風にも言えるかもしれません.

insert, erase ではノードを作成, 削除したり個数を±1したりするだけです.
max_element/min_element では個数が 0 でない部分木に右/左ノード優先で潜っていきます.
kth_element では二分探索木みたいな感じで根から降りながら二分探索していく感じでできます.

XOR した優先順位での操作系では各深さに対応するビットが立っていれば左右の優先順位を反転させて同じことをやればよいです.

遅延評価するときはさらに, 部分木に対して XOR したい値を持ちます.
あとは各操作で子ノードに降りる前に伝搬していく.
ついでにその深さのビットが立っていれば左右の子ノードを swap.

よく分からなければ実装を見た方が早いかも.

実装例

通常版
procon-lib/binary_trie.cpp at master · kazuma8128/procon-lib · GitHub

template<typename U = unsigned, int B = 32>
class binary_trie {
    struct node {
        int cnt;
        node *ch[2];
        node() : cnt(0), ch{ nullptr, nullptr } {}
    };
    node* add(node* t, U val, int b = B - 1) {
        if (!t) t = new node;
        t->cnt += 1;
        if (b < 0) return t;
        bool f = (val >> (U)b) & (U)1;
        t->ch[f] = add(t->ch[f], val, b - 1);
        return t;
    }
    node* sub(node* t, U val, int b = B - 1) {
        assert(t);
        t->cnt -= 1;
        if (t->cnt == 0) return nullptr;
        if (b < 0) return t;
        bool f = (val >> (U)b) & (U)1;
        t->ch[f] = sub(t->ch[f], val, b - 1);
        return t;
    }
    U get_min(node* t, U val, int b = B - 1) const {
        assert(t);
        if (b < 0) return 0;
        bool f = (val >> (U)b) & (U)1; f ^= !t->ch[f];
        return get_min(t->ch[f], val, b - 1) | ((U)f << (U)b);
    }
    U get(node* t, int k, int b = B - 1) const {
        if (b < 0) return 0;
        int m = t->ch[0] ? t->ch[0]->cnt : 0;
        return k < m ? get(t->ch[0], k, b - 1) : get(t->ch[1], k - m, b - 1) | ((U)1 << (U)b);
    }
    int count_lower(node* t, U val, int b = B - 1) {
        if (!t || b < 0) return 0;
        bool f = (val >> (U)b) & (U)1;
        return (f && t->ch[0] ? t->ch[0]->cnt : 0) + count_lower(t->ch[f], val, b - 1);
    }
    node *root;
public:
    binary_trie() : root(nullptr) {}
    int size() const {
        return root ? root->cnt : 0;
    }
    bool empty() const {
        return !root;
    }
    void insert(U val) {
        root = add(root, val);
    }
    void erase(U val) {
        root = sub(root, val);
    }
    U max_element(U bias = 0) const {
        return get_min(root, ~bias);
    }
    U min_element(U bias = 0) const {
        return get_min(root, bias);
    }
    int lower_bound(U val) { // return id
        return count_lower(root, val);
    }
    int upper_bound(U val) { // return id
        return count_lower(root, val + 1);
    }
    U operator[](int k) const {
        assert(0 <= k && k < size());
        return get(root, k);
    }
    int count(U val) const {
        if (!root) return 0;
        node *t = root;
        for (int i = B - 1; i >= 0; i--) {
            t = t->ch[(val >> (U)i) & (U)1];
            if (!t) return 0;
        }
        return t->cnt;
    }
};

遅延評価版
procon-lib/lazy_binary_trie.cpp at master · kazuma8128/procon-lib · GitHub

template<typename U = unsigned, int B = 32>
class lazy_binary_trie {
    struct node {
        int cnt;
        U lazy;
        node *ch[2];
        node() : cnt(0), lazy(0), ch{ nullptr, nullptr } {}
    };
    void push(node* t, int b) {
        if ((t->lazy >> (U)b) & (U)1) swap(t->ch[0], t->ch[1]);
        if (t->ch[0]) t->ch[0]->lazy ^= t->lazy;
        if (t->ch[1]) t->ch[1]->lazy ^= t->lazy;
        t->lazy = 0;
    }
    node* add(node* t, U val, int b = B - 1) {
        if (!t) t = new node;
        t->cnt += 1;
        if (b < 0) return t;
        push(t, b);
        bool f = (val >> (U)b) & (U)1;
        t->ch[f] = add(t->ch[f], val, b - 1);
        return t;
    }
    node* sub(node* t, U val, int b = B - 1) {
        assert(t);
        t->cnt -= 1;
        if (t->cnt == 0) return nullptr;
        if (b < 0) return t;
        push(t, b);
        bool f = (val >> (U)b) & (U)1;
        t->ch[f] = sub(t->ch[f], val, b - 1);
        return t;
    }
    U get_min(node* t, U val, int b = B - 1) {
        assert(t);
        if (b < 0) return 0;
        push(t, b);
        bool f = (val >> (U)b) & (U)1; f ^= !t->ch[f];
        return get_min(t->ch[f], val, b - 1) | ((U)f << (U)b);
    }
    U get(node* t, int k, int b = B - 1) {
        if (b < 0) return 0;
        push(t, b);
        int m = t->ch[0] ? t->ch[0]->cnt : 0;
        return k < m ? get(t->ch[0], k, b - 1) : get(t->ch[1], k - m, b - 1) | ((U)1 << (U)b);
    }
    int count_lower(node* t, U val, int b = B - 1) {
        if (!t || b < 0) return 0;
        push(t, b);
        bool f = (val >> (U)b) & (U)1;
        return (f && t->ch[0] ? t->ch[0]->cnt : 0) + count_lower(t->ch[f], val, b - 1);
    }
    node *root;
public:
    lazy_binary_trie() : root(nullptr) {}
    int size() const {
        return root ? root->cnt : 0;
    }
    bool empty() const {
        return !root;
    }
    void insert(U val) {
        root = add(root, val);
    }
    void erase(U val) {
        root = sub(root, val);
    }
    void xor_all(U val) {
        if (root) root->lazy ^= val;
    }
    U max_element(U bias = 0) {
        return get_min(root, ~bias);
    }
    U min_element(U bias = 0) {
        return get_min(root, bias);
    }
    int lower_bound(U val) { // return id
        return count_lower(root, val);
    }
    int upper_bound(U val) { // return id
        return count_lower(root, val + 1);
    }
    U operator[](int k) {
        assert(0 <= k && k < size());
        return get(root, k);
    }
    int count(U val) {
        if (!root) return 0;
        node *t = root;
        for (int i = B - 1; i >= 0; i--) {
            push(t, i);
            t = t->ch[(val >> (U)i) & (U)1];
            if (!t) return 0;
        }
        return t->cnt;
    }
};

存在しない要素を erase すると assert で落ちるので注意.

ノードをプールしてないのでちょっと遅いです. 定数倍で困ったらプールするとよさげ.

永続化したい場合はクラス化しない方が便利ですが, ここではクラス化してます.

たぶん大丈夫だと思いますがバグってたらごめんなさい.

練習問題

Binary Trie を使わなくても解ける問題もありますが, verifyに便利なので載せます.
白文字で方針も軽く書いてます（ネタバレ注意）

https://arc033.contest.atcoder.jp/tasks/arc033_3 (ARC033 C データ構造)
set 操作の verify 用
http://codeforces.com/contest/947/problem/C (Codeforces Round #470 C Perfect Security)
XOR での最小値を使う
http://www.spoj.com/problems/SUBXOR/ (SPOJ SubXor)
遅延で lower_bound
http://codeforces.com/contest/966/problem/C (Codeforces Round #477 C Big Secret)
遅延の upper_bound で殴れる
https://www.codechef.com/problems/GPD (CodeChef Gotham PD)
永続化
https://www.codechef.com/problems/PSHTTR (CodeChef Pishty and tree)
永続化して, 木のノードに部分木の総 XOR も持たせるとオンラインで各クエリO(B)で解ける. 想定解はたぶんオフラインでBIT
http://judge.u-aizu.ac.jp/onlinejudge/description.jsp?id=2270 (AOJ UTPC The L-th Number)
解説では永続 Segment Tree のイメージで説明されてますが, 永続 Binary Trie の問題とも見なせる

kazuma8128’s blog

競プロの面白い問題を解きます

非負整数値を扱う Trie について

はじめに

Binary Trie とは

できること

方法

実装例

練習問題