Chaos Slover 补档计划 - 后缀自动机(2)

2015.11.17 09:42 Tue| 14 visits oi_2016| ChaosSlover补档计划| Text

据说都是后缀数组的经典题

POJ1743 Musical Theme

可以使用后缀数组+二分答案。

也可以使用后缀自动机,处理出每一个结点的 right 集合中的最大和最小值,它们的差-1(其实不 -1 也能过,可是这里真的需要 -1)和长度之间的最小值就是这个结点上的答案。

注意最后答案需要 +1,因为在最开始差分了。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#include <algorithm>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <vector>
using namespace std;

int n, a[20005];

struct node
{
    node *son[175], *pre;
    int len, minr, maxr;
    node(int _len = 0) : len(_len), minr(20000), maxr(0)
    {
        memset(son, 0, sizeof son); pre = 0;
    }
    void *operator new(size_t s);
} newnode[40001], *pos;

void *node::operator new(size_t s)
{
    return pos++;
}

node *root, *last;
vector<node *> v[20005];

void reset()
{
    for (int i = 0; i <= n; ++i)
        v[i].resize(0);
    pos = newnode;
    last = root = new node();
}

void insert(int c)
{
    node *p = last, *np = new node(p->len + 1);
    while (p && !p->son[c])
        p->son[c] = np, p = p->pre;
    if (!p) np->pre = root;
    else
    {
        node *q = p->son[c];
        if (q->len == p->len + 1) np->pre = q;
        else
        {
            node *nq = new node(p->len + 1);
            memcpy(nq->son, q->son, sizeof nq->son);
            nq->pre = q->pre;
            np->pre = q->pre = nq;
            while (p && p->son[c] == q)
                p->son[c] = nq, p = p->pre;
        }
    }
    last = np;
}

int getright()
{
    int re = 0;
    node *p = root;
    p->maxr = p->minr = 0;
    for (int i = 0; i < n; ++i)
        p = p->son[a[i]], p->maxr = p->minr = i + 1;
    for (node *i = newnode; i != pos; ++i)
        v[i->len].push_back(i);
    for (int i = n; i >= 1; --i)
        for (int j = 0; j < (int)v[i].size(); ++j)
            re = max(re, min(v[i][j]->len, v[i][j]->maxr - v[i][j]->minr - 1)),
            v[i][j]->pre->minr = min(v[i][j]->pre->minr, v[i][j]->minr),
            v[i][j]->pre->maxr = max(v[i][j]->pre->maxr, v[i][j]->maxr);
    return re < 4 ? 0 : re + 1;
}

int main()
{
    while (cin >> n, n)
    {
        for (int i = 0; i < n; ++i)
            scanf("%d", &a[i]); --n;
        for (int i = 0; i < n; ++i)
            a[i] = a[i + 1] - a[i] + 87;
        reset();
        for (int i = 0; i < n; ++i)
            insert(a[i]);
        cout << getright() << endl;
    }
}

SPOJSUBST1+DISUBSTR New Distinct Substrings

题目里面并没有说只有大写字母,真是令人悲伤的事情啊!

问互不相同的子串个数,其实就是每一个结点能够表示的字符串个数的和。p->len-p->pre->len 加起来就好了。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#include <algorithm>
#include <cstdio>
#include <cstring>
#include <iostream>
using namespace std;

struct node
{
    node *son[128], *pre;
    int len;
    node(int _len = 0) : len(_len)
    {
        memset(son, 0, sizeof son); pre = 0;
    }
    void *operator new(size_t s);
} newnode[100010], *pos;

void *node::operator new(size_t s)
{
    return pos++;
}

node *root, *last;
int n, ans; char str[50010];

void reset()
{
    pos = newnode; ans = 0;
    last = root = new node();
}

void insert(int c)
{
    node *p = last, *np = new node(p->len + 1);
    while (p && !p->son[c])
        p->son[c] = np, p = p->pre;
    if (!p) np->pre = root;
    else
    {
        node *q = p->son[c];
        if (q->len == p->len + 1) np->pre = q;
        else
        {
            node *nq = new node(p->len + 1);
            memcpy(nq->son, q->son, sizeof nq->son);
            nq->pre = q->pre;
            np->pre = q->pre = nq;
            while (p && p->son[c] == q)
                p->son[c] = nq, p = p->pre;
        }
    }
    last = np; ans += np->len - np->pre->len;
}

int main()
{
    cin >> n;
    for (int i = 1; i <= n; ++i)
    {
        scanf("%s", str);
        reset();
        int l = strlen(str);
        for (int i = 0; i < l; ++i)
            insert(str[i]);
        cout << ans << endl;
    }
    return 0;
}

BZOJ3676 回文串

用 Manacher 求出来所有的本质不同的回文串,然后放进后缀自动机里面跑。

本质不同的回文串实际上就是使得 maxp 增大的回文串,很显然最多也只有 N 个。注意使用 manacher 的时候好好判断边界 x_x 并且只能统计边缘是字母的回文串。

然后还需要在后缀自动机里面输出匹配的结点的 right 大小。这里可以用倍增迅速找到这个结点。预处理出从每一个结点开始沿着 pre 边向上跳 2^k 次能到达的位置,然后通过 len 的大小判断是否能跳。

然而这道题是一道回文自动机的裸题!

然而这道题卡后缀数组的时限!

然而这道题卡后缀自动机的内存!

然而这道题还需要开 long long!

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#include <bits/stdc++.h>
using namespace std;

struct node
{
    node *son[26], *pre;
    int len, right, id;
    node(int _len = 0) : len(_len) {}
    void *operator new(size_t s);
} newnode[600005], *pos;

void *node::operator new(size_t s)
{
    return pos++;
}

node *root, *last, *top[600005];
vector<node *> v[300005];
int n, tot, fa[600005][18], p[600005], point[300005];
char str[300005], s[600005];

void reset()
{
    pos = newnode;
    last = root = new node();
}

void insert(int c)
{
    node *p = last, *np = new node(p->len + 1);
    while (p && !p->son[c])
        p->son[c] = np, p = p->pre;
    if (!p) np->pre = root;
    else
    {
        node *q = p->son[c];
        if (q->len == p->len + 1) np->pre = q;
        else
        {
            node *nq = new node(p->len + 1);
            memcpy(nq->son, q->son, sizeof nq->son);
            nq->pre = q->pre;
            np->pre = q->pre = nq;
            while (p && p->son[c] == q)
                p->son[c] = nq, p = p->pre;
        }
    }
    last = np;
}

void build()
{
    for (node *i = newnode; i != pos; ++i)
        v[i->len].push_back(i);
    for (int i = 0; i <= n; ++i)
        for (int j = 0; j < (int)v[i].size(); ++j)
            top[tot] = v[i][j], v[i][j]->id = tot++;
    for (int i = 1; i < tot; ++i)
        fa[i][0] = top[i]->pre->id;
    for (int j = 1; j <= 17; ++j)
        for (int i = 1; i < tot; ++i)
            fa[i][j] = fa[fa[i][j - 1]][j - 1];
    node *p = root;
    for (int i = 0; i < n; ++i)
        p = p->son[str[i] - 'a'], p->right = 1,
        point[i + 1] = p->id;
    for (int i = tot - 1; i; --i)
        top[i]->pre->right += top[i]->right;
}

long long query(int l, int r)
{
    int len = r - l + 1;
    int x = point[r];
    for (int i = 17; i >= 0; --i)
        if (top[fa[x][i]]->len >= len)
            x = fa[x][i];
    return 1ll * top[x]->right * len;
}

long long manacher()
{
    s[0] = '$'; s[1] = '#';
    for (int i = 0; i < n; ++i)
        s[i * 2 + 2] = str[i], s[i * 2 + 3] = '#';
    int len = n * 2 + 2, maxp = 0, id = 0;
    long long re = 0;
    for (int i = 1; i < len; ++i)
    {
        if (maxp > i) p[i] = min(p[id * 2 - i], maxp - i);
        else
        {
            p[i] = 1;
            if (s[i] != '#')
                re = max(re, query((i - p[i] + 2) / 2, (i + p[i]) / 2));
        }
        while (s[i - p[i]] == s[i + p[i]])
        {
            ++p[i];
            if (s[i + p[i] - 1] != '#')
                re = max(re, query((i - p[i] + 2) / 2, (i + p[i]) / 2));
        }
        if (i + p[i] > maxp) maxp = i + p[i], id = i;
    }
    return re;
}

int main()
{
    scanf("%s", str);
    n = strlen(str);
    reset();
    for (int i = 0; i < n; ++i)
        insert(str[i] - 'a');
    build();
    cout << manacher() << endl;
    return 0;
}

POJ3294 Life Forms(伪)

广义后缀自动机,然而本人并不会输出方案,更不知道这份代码的正确性。这道题还是该用后缀数组做吧。

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
#include <bits/stdc++.h>
using namespace std;

struct node
{
    node *son[26], *pre;
    int len;
    bitset<105> vis;
    node(int _len = 0) : len(_len)
    {
        vis.reset();
        memset(son, 0, sizeof son); pre = 0;
    }
    void *operator new(size_t s);
} newnode[200005], *pos;

void *node::operator new(size_t s)
{
    return pos++;
}

int n; char str[1005];
vector<node *> v[1005];
node *root, *last;

void reset()
{
    pos = newnode;
    last = root = new node();
}

void insert(int id, int c)
{
    node *p = last;
    if (p->son[c])
    {
        node *q = p->son[c];
        if (q->len == p->len + 1) last = q;
        else
        {
            node *nq = new node(p->len + 1);
            memcpy(nq->son, q->son, sizeof nq->son);
            nq->pre = q->pre; q->pre = nq;
            while (p && p->son[c] == q)
                p->son[c] = nq, p = p->pre;
            last = nq;
        }
    }
    else
    {
        node *np = new node(p->len + 1);
        while (p && !p->son[c])
            p->son[c] = np, p = p->pre;
        if (!p) np->pre = root;
        else
        {
            node *q = p->son[c];
            if (q->len == p->len + 1) np->pre = q;
            else
            {
                node *nq = new node(p->len + 1);
                memcpy(nq->son, q->son, sizeof nq->son);
                nq->pre = q->pre;
                np->pre = q->pre = nq;
                while (p && p->son[c] == q)
                    p->son[c] = nq, p = p->pre;
            }
        }
        last = np;
    }
    last->vis[id] = 1;
}

int main()
{
    while (cin >> n, n)
    {
        reset();
        for (int i = 1; i <= n; ++i)
        {
            last = root;
            scanf("%s", str);
            int l = strlen(str);
            for (int j = 0; j < l; ++j)
                insert(i, str[j] - 'a');
        }
        int maxl = 0;
        for (int i = 0; i <= 1000; ++i)
            v[i].resize();
        for (node *i = newnode; i != pos; ++i)
            v[i->len].push_back(i);
        for (int i = 1000; i; --i)
            for (auto j : v[i])
                j->pre->vis |= j->vis;
        int ans = 0;
        for (node *i = newnode; i != pos; ++i)
            if ((int)i->vis.count() > (n >> 1))
                ans = max(ans, i->len);
        if (ans == 0) puts("?");
        else cout << ans << endl;
    }
}

Update: POJ3294 Life Forms

用后缀数组二分答案将 height 分组。细节就是需要分配不同的分隔符。另外这题用 C++ 交蜜汁 RE,G++ 通过。

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#include <bits/stdc++.h>
using namespace std;

#define N 110005

char str[N];
int x[N], y[N], c[N], tot, vis[105];
int s, n, bel[N], sa[N], rnk[N], height[N];

inline bool same(int a, int b, int l)
{
    return y[a] == y[b] &&
                ((a + l >= n && b + l >= n) ||
                (a + l < n && b + l < n && y[a + l] == y[b + l]));
}

void get_sa(int m = 256)
{
    for (int i = 0; i < n; ++i)         ++c[x[i] = 128 + str[i]];
    for (int i = 1; i < m; ++i)         c[i] += c[i - 1];
    for (int i = n - 1; i >= 0; --i)    sa[--c[x[i]]] = i;
    for (int k = 1; k < n; k <<= 1)
    {
        int p = 0;
        for (int i = n - k; i < n; ++i)     y[p++] = i;
        for (int i = 0; i < n; ++i)         if (sa[i] >= k) y[p++] = sa[i] - k;
        memset(c, 0, m * sizeof(int));
        for (int i = 0; i < n; ++i)         ++c[x[y[i]]];
        for (int i = 1; i < m; ++i)         c[i] += c[i - 1];
        for (int i = n - 1; i >= 0; --i)    sa[--c[x[y[i]]]] = y[i];
        for (int i = 0; i < n; ++i)         y[i] = x[i];
        m = 0; x[sa[0]] = 0;
        for (int i = 1; i < n; ++i)
            x[sa[i]] = same(sa[i], sa[i - 1], k) ? m : ++m;
        if (++m == n) break;
    }
}

void get_height()
{
    for (int i = 0; i < n; ++i) rnk[sa[i]] = i;
    for (int i = 0, j, k = 0; i < n; height[rnk[i++]] = k)
        if (rnk[i])
        {
            k = max(k - 1, 0); j = sa[rnk[i] - 1];
            while (str[i + k] == str[j + k]) ++k;
        }
}

void init()
{
    memset(sa, 0, sizeof sa);
    memset(rnk, 0, sizeof rnk);
    memset(height, 0, sizeof height);
    memset(x, 0, sizeof x);
    memset(y, 0, sizeof y);
    memset(c, 0, sizeof c);
}

bool check(int mid)
{
    int cnt = 0;
    for (int i = 1; i <= n; ++i)
    {
        if (height[i] < mid)
        {
            if (cnt > s / 2) return true;
            cnt = 0; ++tot;
        }
        else
        {
            if (vis[bel[sa[i - 1]]] != tot)
                ++cnt, vis[bel[sa[i - 1]]] = tot;
            if (vis[bel[sa[i]]] != tot)
                ++cnt, vis[bel[sa[i]]] = tot;
        }
    }
    return false;
}

void print(int ans)
{
    int cnt = 0;
    for (int i = 1; i <= n; ++i)
    {
        if (height[i] < ans)
        {
            if (cnt > s / 2)
            {
                for (int j = 0; j < ans; ++j)
                    putchar(str[sa[i - 1] + j]);
                puts("");
            }
            cnt = 0; ++tot;
        }
        else
        {
            if (vis[bel[sa[i - 1]]] != tot)
                ++cnt, vis[bel[sa[i - 1]]] = tot;
            if (vis[bel[sa[i]]] != tot)
                ++cnt, vis[bel[sa[i]]] = tot;
        }
    }
}

int main()
{
    while (cin >> s, s)
    {
        init();
        n = -1;
        for (int i = 1; i <= s; ++i)
        {
            str[n] = 150 + i,
            scanf("%s", str + n + 1);
            int t = n;
            n = strlen(str);
            for (int j = t + 1; j <= n; ++j)
                bel[j] = i;
        }
        get_sa();
        get_height();
        int l = 1, r = n, ans = 0;
        while (l <= r)
        {
            int mid = (l + r) >> 1;
            if (check(mid)) l = mid + 1, ans = mid;
            else r = mid - 1;
        }
        if (ans == 0) puts("?");
        else print(ans);
        puts("");
    }
    return 0;
}