Chaos Slover 补档计划 - 后缀自动机的应用

2015.11.20 09:11 Fri| 79 visits oi_2016| ChaosSlover补档计划| Text

后缀自动机也就是这个样子了吧,暂时还有一道股市的预测没有去做,挖了一个坑。其他的题大概也不会更加麻烦,后缀自动机到这里就告一个段落吧!

BZOJ3277&3473 串

求一个目标串出现在多少模板串中,可以用后缀自动机+树状数组解决。参见 BZOJ2780。

这道题是求一个串有多少子串出现在至少 k 个模板串中。我们可以将问题转化为询问一个串的每一个后缀有多少前缀出现在至少 k 个模板串中。考虑单个字符串,能表示它的前缀的结点存在于后缀树中从它的匹配结点到根的路径上,其中每一个结点都能够表示一个长度区间上的前缀。

于是我们为后缀树上结点赋权值。如果这个结点出现在 k 个以上的模板串中,令它的权值是它的 pre 边能表示的长度,否则它的权值为 0。然后处理出从根到每个结点路径上的权值和。

最后对于每一个目标串,将所有表示它的后缀的点的路径上权值和都加起来,就是答案了。

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#include <bits/stdc++.h>
using namespace std;

#define N 200005

struct node
{
    node *son[26], *pre;
    int len;
    vector<int> col;
    node(int _len = 0) : len(_len) {}
    void *operator new(size_t s);
} sam[N], *pos;

void *node::operator new(size_t s) { return pos++; }

node *root, *last;
char str[N];
int n, k, tot, head[N], cnt[N], p[N], ans[N], f[N], dist[N];
vector<int> col[N], query[N];
pair<int, int> id[N];

void reset()
{
    pos = sam;
    last = root = new node();
}

void insert(int id, int c)
{
    node *p = last;
    if (p->son[c])
    {
        node *q = p->son[c];
        if (q->len == p->len + 1) last = q;
        else
        {
            node *nq = new node(p->len + 1);
            memcpy(nq->son, q->son, sizeof nq->son);
            nq->pre = q->pre; q->pre = nq;
            while (p && p->son[c] == q)
                p->son[c] = nq, p = p->pre;
            last = nq;
        }
    }
    else
    {
        node *np = new node(p->len + 1);
        while (p && !p->son[c])
            p->son[c] = np, p = p->pre;
        if (!p) np->pre = root;
        else
        {
            node *q = p->son[c];
            if (q->len == p->len + 1) np->pre = q;
            else
            {
                node *nq = new node(p->len + 1);
                memcpy(nq->son, q->son, sizeof nq->son);
                nq->pre = q->pre;
                np->pre = q->pre = nq;
                while (p && p->son[c] == q)
                    p->son[c] = nq, p = p->pre;
            }
        }
        last = np;
    }
    last->col.push_back(id);
}

struct graph
{
    int next, to;
    graph() {}
    graph(int _next, int _to)
    : next(_next), to(_to) {}
} edge[N];

inline void add(int x, int y)
{
    static int cnt = 0;
    edge[++cnt] = graph(head[x], y);
    head[x] = cnt;
}

void build()
{
    for (int i = 1; sam + i != pos; ++i)
        add(sam[i].pre - sam, i);
}

void dfs(int x)
{
    id[x].first = ++tot; p[tot] = x;
    for (int i = 0; i < (int)sam[x].col.size(); ++i)
        col[sam[x].col[i]].push_back(tot);
    for (int i = head[x]; i; i = edge[i].next)
        dfs(edge[i].to);
    id[x].second = tot;
}

void insert(int x)
{
    for (int i = x; i <= tot; i += i & -i)
        ++f[i];
}

int getsum(int x)
{
    int re = 0;
    for (int i = x; i; i -= i & -i)
        re += f[i];
    return re;
}

void dfs2(int x, int fa)
{
    dist[x] = dist[fa];
    if (ans[x] >= k) dist[x] += sam[x].len - sam[fa].len;
    for (int i = head[x]; i; i = edge[i].next)
        dfs2(edge[i].to, x);
}

int main()
{
    cin >> n >> k;
    reset();
    for (int i = 1; i <= n; ++i)
    {
        scanf("%s", str);
        int len = strlen(str);
        for (int j = len - 1; j >= 0; --j)
            insert(i, str[j] - 'a'),
            query[i].push_back(last - sam);
        last = root;
    }
    build();
    dfs(0);
    for (int i = 1; i <= n; ++i)
        insert(col[i][0]);
    for (int i = 1; i <= tot; ++i)
    {
        ans[p[i]] = getsum(id[p[i]].second) - getsum(id[p[i]].first - 1);
        for (int k = 0; k < (int)sam[p[i]].col.size(); ++k)
        {
            int j = sam[p[i]].col[k];
            if (++cnt[j] < (int)col[j].size())
                insert(col[j][cnt[j]]);
        }
    }
    dfs2(0, 0);
    for (int i = 1; i <= n; ++i)
    {
        int trueans = 0;
        for (int j = 0; j < (int)query[i].size(); ++j)
            trueans += dist[query[i][j]];
        printf("%d ", trueans);
    }
    return 0;
}

BZOJ3413 匹配

设目标串在原串中的第一次出现的位置的左端点是 x。那么若目标串是 (123),那么答案就是 1~x 的区间上串 (1) 出现的次数+ 1~x+1 的区间上串 (12) 出现的次数+ 1~x+2 的区间上串 (123) 出现的次数+x-1 次失败的匹配。这只需要求后缀自动机上分别求表示串 (1)、(12)、(123) 的结点的 right 集合中数值在相应范围中的元素个数。处理出 pre 树的 DFS 序,建立主席树,处理询问吧……

算一下复杂度:

空间上 20W 个后缀自动机结点,乘上字符集加上乱七八糟的还有连边各种数组一共约 400W;建立主席树 20W*log20W=400W,乘上乱七八糟的大概要 1200W。加起来一共 1500W。

时间上 300W 个在主席树上的询问,6000W。不是特别令人安心啊……

然而犯了一个把线段树上的 l 写成 1 的 SB 错误(大概是不总写只确定右端点的线段树)。

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#include <bits/stdc++.h>
using namespace std;

#define N 200005

struct node
{
    node *son[10], *pre;
    int len, right, minr;
    node(int _len = 0) : len(_len), right(N) {}
    void *operator new(size_t s);
} sam[N], *pos;

void *node::operator new(size_t s) { return pos++; }

struct segnode
{
    segnode *son[2];
    int sum;
    void *operator new(size_t s);
} seg[N * 18], *segpos = seg;

void *segnode::operator new(size_t s) { return segpos++; }

node *root, *last;
segnode *segroot[N];
int n, m, tot, head[N], p[N]; char str[N];
pair<int, int> id[N];

void reset()
{
    pos = sam;
    last = root = new node();
}

void insert(int id, int c)
{
    node *p = last, *np = new node(p->len + 1);
    while (p && !p->son[c])
        p->son[c] = np, p = p->pre;
    if (!p) np->pre = root;
    else
    {
        node *q = p->son[c];
        if (q->len == p->len + 1) np->pre = q;
        else
        {
            node *nq = new node(p->len + 1);
            memcpy(nq->son, q->son, sizeof nq->son);
            nq->pre = q->pre;
            np->pre = q->pre = nq;
            while (p && p->son[c] == q)
                p->son[c] = nq, p = p->pre;
        }
    }
    last = np;
    last->right = id;
}

struct graph
{
    int next, to;
    graph() {}
    graph(int _next, int _to)
    : next(_next), to(_to) {}
} edge[N];

inline void add(int x, int y)
{
    static int cnt = 0;
    edge[++cnt] = graph(head[x], y);
    head[x] = cnt;
}

void build()
{
    for (int i = 1; sam + i != pos; ++i)
        add(sam[i].pre - sam, i);
}

void dfs(int x)
{
    id[x].first = ++tot; p[tot] = x;
    sam[x].minr = sam[x].right;
    for (int i = head[x]; i; i = edge[i].next)
        dfs(edge[i].to),
        sam[x].minr = min(sam[x].minr, sam[edge[i].to].minr);
    id[x].second = tot;
}

void insert(segnode *pre, segnode *&pos, int l, int r, int x)
{
    pos = new segnode();
    *pos = *pre; ++pos->sum;
    if (l == r) return;
    int mid = (l + r) >> 1;
    if (x <= mid) insert(pre->son[0], pos->son[0], l, mid, x);
    else insert(pre->son[1], pos->son[1], mid + 1, r, x);
}

int query(segnode *pos, int l, int r, int x)
{
    if (r <= x) return pos->sum;
    int mid = (l + r) >> 1;
    if (x <= mid) return query(pos->son[0], l, mid, x);
    return pos->son[0]->sum + query(pos->son[1], mid + 1, r, x);
}

vector<int> v;

int check(char str[], int len)
{
    node *p = root;
    v.resize(0);
    for (int i = 0; i < len; ++i)
    {
        if (!p->son[str[i] - '0'])
            return n + len;
        p = p->son[str[i] - '0'];
        v.push_back(p - sam);
    }
    return p->minr;
}

int main()
{
    cin >> n;
    scanf("%s", str);
    reset();
    for (int i = 0; i < n; ++i)
        insert(i + 1, str[i] - '0');
    build();
    dfs(0);
    segroot[0] = new segnode();
    segroot[0]->son[0] = segroot[0]->son[1] = segroot[0];
    for (int i = 1; i <= tot; ++i)
        if (sam[p[i]].right != N)
            insert(segroot[i - 1], segroot[i], 1, n, sam[p[i]].right);
        else segroot[i] = segroot[i - 1];
    cin >> m;
    while (m--)
    {
        scanf("%s", str);
        int len = strlen(str);
        int t = check(str, len);
        t = t - len + 1;
        long long ans = t;
        for (int i = 0; i < (int)v.size(); ++i, ++t)
            ans += query(segroot[id[v[i]].second], 1, n, min(t, n))
                 - query(segroot[id[v[i]].first - 1], 1, n, min(t, n));
        printf("%lld\n", ans - 1);
    }
    return 0;
}

BZOJ2555 SubString

最开始看排行榜感觉自己的常数好大,后来知道在五月二十日之前的代码都是暴力……我的 LCT 的常数还是挺好看的。

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#include <bits/stdc++.h>
using namespace std;

#define N 1200005

struct link_cut_tree
{
    struct node
    {
        static node *nil;
        node *fa, *son[2];
        int key, tag, rev;
        node() : key(0), tag(0), rev(0)
        { fa = son[0] = son[1] = nil; }
        bool root() { return fa->son[0] != this && fa->son[1] != this; }
        int check() { return fa->son[1] == this; }
        void combine(node *x, int d) { son[d] = x; x->fa = this; }
        void pushdown() { if (!root()) fa->pushdown(); lazy(); }
        void lazy()
        {
            if (tag)
                son[0]->key += tag, son[1]->key += tag,
                son[0]->tag += tag, son[1]->tag += tag, tag = 0;
            if (rev)
                rev ^= 1, swap(son[0], son[1]),
                son[0]->rev ^= 1, son[1]->rev ^= 1;
        }
    };

    node *tree[N];

    void rotate(node *x)
    {
        node *k = x->fa;
        int d = !x->check();
        k->combine(x->son[d], d ^ 1);
        if (!k->root())
            k->fa->son[k->check()] = x;
        x->fa = k->fa;
        x->combine(k, d);
    }

    void splay(node *x)
    {
        x->pushdown();
        while (!x->root())
        {
            if (!x->fa->root())
                rotate(x->check() ^ x->fa->check() ? x : x->fa);
            rotate(x);
        }
    }

    void access(node *x)
    {
        node *now = x, *pre = node::nil;
        while (now != node::nil)
            splay(now), now->son[1] = pre,
            pre = now, now = now->fa;
        splay(x);
    }

    void link(int x, int y)
    {
        tree[x]->fa = tree[y];
        access(tree[y]); addval(tree[y], tree[x]->key);
    }

    void cut(int x)
    {
        access(tree[x]);
        node *s = tree[x]->son[0];
        addval(s, -tree[x]->key);
        s->fa = tree[x]->son[0] = node::nil;
    }

    void addval(node *x, int y)
    {
        x->key += y; x->tag += y;
    }
};

link_cut_tree::node *link_cut_tree::node::nil = new link_cut_tree::node();
link_cut_tree t;

char str[3000005];
int q, mask;

struct node
{
    node *son[26], *pre;
    int len;
    node(int _len = 0) : len(_len) {}
    void *operator new(size_t s);
} sam[N], *pos;

void *node::operator new(size_t s)
{ t.tree[pos - sam] = new link_cut_tree::node(); return pos++; }

node *root, *last;

void reset()
{
    pos = sam;
    last = root = new node();
}

void insert(int c)
{
    node *p = last, *np = new node(p->len + 1);
    t.tree[np - sam]->key = 1;
    while (p && !p->son[c])
        p->son[c] = np, p = p->pre;
    if (!p) np->pre = root, t.link(np - sam, 0);
    else
    {
        node *q = p->son[c];
        if (q->len == p->len + 1)
            np->pre = q, t.link(np - sam, q - sam);
        else
        {
            node *nq = new node(p->len + 1);
            memcpy(nq->son, q->son, sizeof nq->son);
            nq->pre = q->pre;
            t.link(nq - sam, q->pre - sam);
            t.cut(q - sam);
            np->pre = q->pre = nq;
            t.link(q - sam, nq - sam);
            t.link(np - sam, nq - sam);
            while (p && p->son[c] == q)
                p->son[c] = nq, p = p->pre;
        }
    }
    last = np;
}

int query(char str[], int len)
{
    node *p = root;
    for (int i = 0; i < len; ++i)
    {
        if (!p->son[str[i] - 'A']) return 0;
        p = p->son[str[i] - 'A'];
    }
    t.access(t.tree[p - sam]);
    return t.tree[p - sam]->key;
}

int getstr(int mask)
{
    scanf("%s", str);
    int len = strlen(str);
    for (int j = 0; j < len; ++j) 
        mask = (mask * 131 + j) % len,
        swap(str[j], str[mask]);
    return len;
}

int main()
{
    cin >> q;
    scanf("%s", str);
    int len = strlen(str);
    reset();
    for (int j = 0; j < len; ++j)
        insert(str[j] - 'A');
    char opt[10]; int ans = 0;
    for (int i = 1; i <= q; ++i)
    {
        scanf("%s", opt);
        if (opt[0] == 'A')
        {
            len = getstr(mask);
            for (int j = 0; j < len; ++j)
                insert(str[j] - 'A');
        }
        else
        {
            len = getstr(mask);
            printf("%d\n", ans = query(str, len));
            mask ^= ans;
        }
    }
    return 0;
}

BZOJ3926 [Zjoi2015]诸神眷顾的幻想乡

现在就发现这种题变得好水的了……

只有 20 个叶子结点,然后就枚举根,dfs 往广义后缀自动机里面加字符,最后把各个结点能表示的长度加到一起就好了。

然而这道题为什么不用广义的后缀自动机也能过呢?

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
#include <bits/stdc++.h>
using namespace std;

#define N 100005

struct node
{
    node *son[10], *pre;
    int len;
    node(int _len = 0) : len(_len) {}
    void *operator new(size_t s);
} sam[N * 40], *pos;

void *node::operator new(size_t s) { return pos++; }

node *root;

void reset()
{
    pos = sam;
    root = new node();
}

node *insert(node *p, int c)
{
    if (p->son[c])
    {
        node *q = p->son[c];
        if (q->len == p->len + 1) return q;
        node *nq = new node(p->len + 1);
        memcpy(nq->son, q->son, sizeof nq->son);
        nq->pre = q->pre; q->pre = nq;
        while (p && p->son[c] == q)
            p->son[c] = nq, p = p->pre;
        return nq;
    }
    else
    {
        node *np = new node(p->len + 1);
        while (p && !p->son[c])
            p->son[c] = np, p = p->pre;
        if (!p) np->pre = root;
        else
        {
            node *q = p->son[c];
            if (q->len == p->len + 1) np->pre = q;
            else
            {
                node *nq = new node(p->len + 1);
                memcpy(nq->son, q->son, sizeof nq->son);
                nq->pre = q->pre; np->pre = q->pre = nq;
                while (p && p->son[c] == q)
                    p->son[c] = nq, p = p->pre;
            }
        }
        return np;
    }
}

int n, m, head[N], in[N], col[N];
long long ans;
node *p[N];

struct graph
{
    int next, to;
    graph() {}
    graph(int _next, int _to)
    : next(_next), to(_to) {}
} edge[N * 2];

inline void add(int x, int y)
{
    static int cnt = 0;
    edge[++cnt] = graph(head[x], y);
    head[x] = cnt;
}

void dfs(int x, int fa)
{
    p[x] = insert(p[fa], col[x]);
    for (int i = head[x]; i; i = edge[i].next)
        if (edge[i].to != fa)
            dfs(edge[i].to, x);
}

int main()
{
    cin >> n >> m;
    for (int i = 1; i <= n; ++i)
        scanf("%d", &col[i]);
    for (int x, y, i = 1; i < n; ++i)
        scanf("%d%d", &x, &y),
        add(x, y), add(y, x), ++in[x], ++in[y];
    reset();
    for (int i = 1; i <= n; ++i)
        if (in[i] == 1)
            memset(p, 0, sizeof p), p[0] = root, dfs(i, 0);
    for (node *i = sam + 1; i != pos; ++i)
        ans += i->len - i->pre->len;
    cout << ans << endl;
    return 0;
}