Chaos Slover 补档计划 - 后缀自动机(3)

2015.11.18 13:00 Wed| 32 visits oi_2016| ChaosSlover补档计划| Text

BZOJ2780 [Spoj]8093 Sevenk Love Oimaster

真是辛苦。BZOJ 里面没有 C++11,不能用 auto 差评!用 cout 输出会 RE 差评!

下面暂且就贴带 auto 的代码好了,反正在 SPOJ 过了……

// 为什么网上所有这道题的题解都出自我们机房……

给出多个字符串,然后对于每一次询问某一个字符串在多少个原串中出现过。

首先,很显然要对字符串建立广义后缀自动机。广义后缀自动机就是多了几行和下面差不多的代码,然后每一次插入新字符串的时候 last=root。如何处理询问呢?实际上就是将询问串在自动机中匹配,询问 pre 树中以匹配到的结点为根的子树中出现了多少个原串的结点。

将每一个原串看作一种颜色,这样就相当于用后缀自动机给结点涂色。而在处理出 pre 树的 DFS 序之后,询问的操作就和 HH 的项链基本一样了,用树状数组维护即可。

细节:字符集可能很大,听说开满儿子指针一定会 MLE。然而我们可以用 map。谁说用 map 不能写指针版?直接把 map 当数组用就好了啊 - -|| 我是不会放弃指针的!

代码好长……广义后缀自动机√

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#include <bits/stdc++.h>
using namespace std;

struct node
{
    node *pre;
    map<char, node*> son;
    int len;
    vector<int> col;
    node(int _len = 0) : pre(0), len(_len) {}
    void *operator new(size_t s);
} newnode[200005], *pos;

void *node::operator new(size_t s)
{
    return pos++;
}

int n, m; char str[100005];
node *root, *last;
pair<int, int> id[200005];
pair<pair<int, int>, int> q[60005];
vector<int> col[10005];
int tot, cnt[10005], p[200005], ans[60005], fenwick[200005];

void reset()
{
    pos = newnode;
    last = root = new node();
}

void insert(int id, char c)
{
    node *p = last;
    if (p->son.find(c) != p->son.end())
    {
        node *q = p->son[c];
        if (q->len == p->len + 1) last = q;
        else
        {
            node *nq = new node(p->len + 1);
            nq->son = map<char, node*>(q->son);
            nq->pre = q->pre; q->pre = nq;
            while (p && p->son[c] == q)
                p->son[c] = nq, p = p->pre;
            last = nq;
        }
    }
    else
    {
        node *np = new node(p->len + 1);
        while (p && p->son.find(c) == p->son.end())
            p->son[c] = np, p = p->pre;
        if (!p) np->pre = root;
        else
        {
            node *q = p->son[c];
            if (q->len == p->len + 1) np->pre = q;
            else
            {
                node *nq = new node(p->len + 1);
                nq->son = map<char, node*>(q->son);
                nq->pre = q->pre;
                np->pre = q->pre = nq;
                while (p && p->son[c] == q)
                    p->son[c] = nq, p = p->pre;
            }
        }
        last = np;
    }
    last->col.push_back(id);
}

int head[100005];

struct graph
{
    int next, to;
    graph() {}
    graph(int _next, int _to)
    : next(_next), to(_to) {}
} edge[100005];

inline void add(int x, int y)
{
    static int cnt = 0;
    edge[++cnt] = graph(head[x], y);
    head[x] = cnt;
}

void dfs(int x)
{
    id[x].first = ++tot;
    p[tot] = x;
    for (auto i : newnode[x].col)
        col[i].push_back(tot);
    for (int i = head[x]; i; i = edge[i].next)
        dfs(edge[i].to);
    id[x].second = tot;
}

pair<int, int> find(char str[], int len)
{
    node *p = root;
    for (int i = 0; i < len; ++i)
    {
        if (p->son.find(str[i]) == p->son.end())
            return make_pair(-1, -1);
        p = p->son[str[i]];
    }
    return id[p - newnode];
}

void insert(int x)
{
    for (int i = x; i <= tot; i += i & -i)
        ++fenwick[i];
}

int query(int x)
{
    int re = 0;
    for (int i = x; i; i -= i & -i)
        re += fenwick[i];
    return re;
}

int main()
{
    cin >> n >> m;
    reset();
    for (int i = 1; i <= n; ++i)
    {
        last = root;
        scanf("%s", str);
        int l = strlen(str);
        for (int j = 0; j < l; ++j)
            insert(i, str[j]);
    }
    for (node *i = newnode + 1; i != pos; ++i)
        add(i->pre - newnode, i - newnode);
    dfs(0);
    for (int i = 1; i <= m; ++i)
    {
        scanf("%s", str);
        int l = strlen(str);
        q[i] = make_pair(find(str, l), i);
    }
    sort(q + 1, q + m + 1);
    for (int i = 1; i <= n; ++i)
        insert(col[i][0]);
    for (int i = 1; i <= m; ++i)
    {
        if (q[i].first.first == -1) continue;
        for (int j = max(q[i - 1].first.first, 0); j < q[i].first.first; ++j)
        {
            for (auto k : newnode[p[j]].col)
                if (++cnt[k] < (int)col[k].size())
                    insert(col[k][cnt[k]]);
        }
        ans[q[i].second] =
            query(q[i].first.second) - query(q[i].first.first - 1);
    }
    for (int i = 1; i <= m; ++i)
        printf("%d\n", ans[i]);
    return 0;
}

BZOJ1396&2865 识别子串

很显然,识别子串都是只出现过一次的串,即可以用后缀自动机中 pre 树的叶子结点表示的串。那么对于某一个 pre 树上的叶子结点 p,p->pre->len+1 就是右端点在 p->len 处的最短识别子串长度。然后可以在线段树上用以当前点为右端点的识别子串去更新答案。维护两颗线段树,输出较小答案即可。

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
#include <bits/stdc++.h>
using namespace std;

#define N 100005

struct node
{
    node *son[26], *pre;
    int len, vis;
    node(int _len = 0) : len(_len), vis(0)
    {
        memset(son, 0, sizeof son); pre = 0;
    }
    void *operator new(size_t s);
} newnode[N * 2], *pos;

void *node::operator new(size_t s)
{
    return pos++;
}

node *root, *last;

void reset()
{
    pos = newnode;
    last = root = new node();
}

void insert(int c)
{
    node *p = last, *np = new node(p->len + 1);
    while (p && !p->son[c])
        p->son[c] = np, p = p->pre;
    if (!p) np->pre = root;
    else
    {
        node *q = p->son[c];
        if (q->len == p->len + 1) np->pre = q;
        else
        {
            node *nq = new node(p->len + 1);
            memcpy(nq->son, q->son, sizeof nq->son);
            nq->pre = q->pre;
            np->pre = q->pre = nq;
            while (p && p->son[c] == q)
                p->son[c] = nq, p = p->pre;
        }
    }
    last = np;
}

#define lson (pos << 1)
#define rson (pos << 1 | 1)

char str[N];
int n, mint1[N * 4], mint2[N * 4];

void modify(int mint[], int pos, int l, int r, int x, int y, int z)
{
    if (x <= l && r <= y) { mint[pos] = min(mint[pos], z); return; }
    int mid = (l + r) >> 1;
    if (mint[pos] != 0x3f3f3f3f)
        mint[lson] = min(mint[lson], mint[pos]),
        mint[rson] = min(mint[rson], mint[pos]), mint[pos] = 0x3f3f3f3f;
    if (x <= mid)
        modify(mint, lson, l, mid, x, y, z);
    if (y > mid)
        modify(mint, rson, mid + 1, r, x, y, z);
}

int query(int mint[], int pos, int l, int r, int x)
{
    if (l == r) return mint[pos];
    int mid = (l + r) >> 1;
    if (mint[pos] != 0x3f3f3f3f)
        mint[lson] = min(mint[lson], mint[pos]),
        mint[rson] = min(mint[rson], mint[pos]), mint[pos] = 0x3f3f3f3f;
    if (x <= mid) return query(mint, lson, l, mid, x);
    else return query(mint, rson, mid + 1, r, x);
}

int main()
{
    scanf("%s", str);
    n = strlen(str);
    reset();
    for (int i = 0; i < n; ++i)
        insert(str[i] - 'a');
    for (node *i = newnode + 1; i != pos; ++i)
        i->pre->vis = 1;
    memset(mint1, 0x3f, sizeof mint1);
    memset(mint2, 0x3f, sizeof mint2);
    for (node *i = newnode + 1; i != pos; ++i)
        if (!i->vis)
        {
            int l = i->len - i->pre->len, r = i->len;
            modify(mint1, 1, 1, n, l, r, r - l + 1);
            if (l - 1)
                modify(mint2, 1, 1, n, 1, l - 1, r);
        }
    for (int i = 1; i <= n; ++i)
        printf("%d\n", min(query(mint1, 1, 1, n, i),
            query(mint2, 1, 1, n, i) - i + 1));
    return 0;
}

BZOJ2806 [Ctsc2012]Cheat

小强和阿米巴!小强和阿米巴!小强和阿米巴!

首先建立广义后缀自动机,匹配作文串,预处理出每一个结点能够匹配的最长长度(过程类似于求两个串的最长公共子序列那道题)。

之后我们考虑二分答案,通过询问用不重叠且长度都大于 mid 的区间最多能够覆盖多长的序列来判断是否可行。设 f[i] 表示 0~i 的区间内有多长会被覆盖,可以得到转移方程:

f[i]=max(f[i-1], f[j]+i-j),i-a[j] <= j <=i-mid。

然后发现 i-a[j] 是单调不减的,所以我们可以对 f[j]-j 维护一个单调队列。

为什么 i-a[j] 单调不减呢?a[j] 的意义是能够匹配到的最长长度,所以 a[i]<=a[i-1]+1,然后就没了。

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#include <bits/stdc++.h>
using namespace std;

#define N 1100000

struct node
{
    node *son[2], *pre;
    int len;
    node(int _len = 0) : len(_len) {}
    void *operator new(size_t s);
} sam[N * 2], *pos;

void *node::operator new(size_t s) { return pos++; }

node *root, *last;

void reset()
{
    pos = sam;
    last = root = new node();
}

void insert(int c)
{
    node *p = last;
    if (p->son[c])
    {
        node *q = p->son[c];
        if (q->len == p->len + 1) last = q;
        else
        {
            node *nq = new node(p->len + 1);
            nq->son[0] = q->son[0]; nq->son[1] = q->son[1];
            nq->pre = q->pre; q->pre = nq;
            while (p && p->son[c] == q)
                p->son[c] = nq, p = p->pre;
            last = nq;
        }
    }
    else
    {
        node *np = new node(p->len + 1);
        while (p && !p->son[c])
            p->son[c] = np, p = p->pre;
        if (!p) np->pre = root;
        else
        {
            node *q = p->son[c];
            if (q->len == p->len + 1) np->pre = q;
            else
            {
                node *nq = new node(p->len + 1);
                nq->son[0] = q->son[0]; nq->son[1] = q->son[1];
                nq->pre = q->pre;
                np->pre = q->pre = nq;
                while (p && p->son[c] == q)
                    p->son[c] = nq, p = p->pre;
            }
        }
        last = np;
    }
}


char str[N];
int n, m, l, need, a[N], f[N];

void find(char str[])
{
    node *p = root;
    int now = 0;
    for (int i = 0; i < l; ++i)
    {
        if (p->son[str[i] - '0'])
            ++now, p = p->son[str[i] - '0'];
        else
        {
            while (p && !p->son[str[i] - '0'])
                p = p->pre;
            if (!p) now = 0, p = root;
            else now = p->len + 1, p = p->son[str[i] - '0'];
        }
        a[i] = now;
    }
}

deque<pair<int, int> > q;

bool check(int mid)
{
    while (!q.empty()) q.pop_back();
    for (int i = 0; i < mid - 1; ++i)
        f[i] = 0;
    for (int i = mid - 1; i < l; ++i)
    {
        int t = i - mid;
        while (!q.empty() && q.back().first < f[t] - t)
            q.pop_back();
        q.push_back(make_pair(f[t] - t, t));
        if (i) f[i] = f[i - 1];
        else f[i] = 0;
        while (!q.empty() && q.front().second < i - a[i])
            q.pop_front();
        if (!q.empty())
            f[i] = max(f[i], q.front().first + i);
    }
    if (f[l - 1] >= need) return true;
    return false;
}

int main()
{
    cin >> n >> m;
    reset();
    for (int i = 1; i <= m; ++i)
    {
        scanf("%s", str);
        last = root;
        for (int j = 0; str[j]; ++j)
            insert(str[j] - '0');
    }
    for (int i = 1; i <= n; ++i)
    {
        scanf("%s", str);
        l = strlen(str);
        find(str);
        need = (l * 9 - 1) / 10 + 1;
        int left = 1, right = l, ans = 0;
        while (left <= right)
        {
            int mid = (left + right) >> 1;
            if (check(mid)) left = mid + 1, ans = mid;
            else right = mid - 1;
        }
        printf("%d\n", ans);
    }
    return 0;
}

BZOJ3172 [Tjoi2013]单词

本来是 AC 自动机的例题的,结果发现是后缀自动机傻题……

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#include <bits/stdc++.h>
using namespace std;

#define N 1000005

struct node
{
    node *son[26], *pre;
    int len, right;
    node(int _len = 0) : len(_len)
    {
        memset(son, 0, sizeof son); pre = 0;
    }
    void *operator new(size_t s);
} newnode[N * 2], *pos;

void *node::operator new(size_t s)
{
    return pos++;
}

node *root, *last, *ss[205];
vector<node *> v[N];
int n; char str[N];

void reset()
{
    pos = newnode;
    last = root = new node();
}

void getright()
{
    for (node *i = newnode; i != pos; ++i)
        v[i->len].push_back(i);
    for (int i = 1000000; i; --i)
        for (int j = 0; j < (int)v[i].size(); ++j)
            v[i][j]->pre->right += v[i][j]->right;
}

void insert(int c)
{
    node *p = last;
    if (p->son[c])
    {
        node *q = p->son[c];
        if (q->len == p->len + 1) last = q;
        else
        {
            node *nq = new node(p->len + 1);
            memcpy(nq->son, q->son, sizeof nq->son);
            nq->pre = q->pre; q->pre = nq;
            while (p && p->son[c] == q)
                p->son[c] = nq, p = p->pre;
            last = nq;
        }
    }
    else
    {
        node *np = new node(p->len + 1);
        while (p && !p->son[c])
            p->son[c] = np, p = p->pre;
        if (!p) np->pre = root;
        else
        {
            node *q = p->son[c];
            if (q->len == p->len + 1) np->pre = q;
            else
            {
                node *nq = new node(p->len + 1);
                memcpy(nq->son, q->son, sizeof nq->son);
                nq->pre = q->pre;
                np->pre = q->pre = nq;
                while (p && p->son[c] == q)
                    p->son[c] = nq, p = p->pre;
            }
        }
        last = np;
    }
    ++last->right;
}

int main()
{
    cin >> n;
    reset();
    for (int i = 1; i <= n; ++i)
    {
        scanf("%s", str);
        last = root;
        for (int j = 0; str[j]; ++j)
            insert(str[j] - 'a');
        ss[i] = last;
    }
    getright();
    for (int i = 1; i <= n; ++i)
        cout << ss[i]->right << endl;
    return 0;
}