2022年01月24日,第十天
A
m
a
z
i
n
g
!
!
!
Amazing!!!
Amazing!!! 这题居然有四种思路解决。
思路1:多项式解决,时间复杂度
O
(
n
l
o
g
?
n
)
O(nlog\ n)
O(nlog?n),总共跑了
12
12
12 次
F
F
T
FFT
FFT ,常数过大,开
O
2
O2
O2 才
A
C
AC
AC。由于
D
N
A
DNA
DNA 序列最多有
4
4
4 种不同的字符,我们可以考虑如下做法,分别考虑每一个字符
c
c
c ,对于两个串
s
,
?
t
s,\ t
s,?t ,长度分别为
n
,
?
m
n,\ m
n,?m,则有
a
[
i
]
=
[
s
[
i
]
=
=
c
]
,
?
b
[
i
]
=
[
t
[
i
]
=
=
c
]
a[i] = [s[i]==c],\ b[i]=[t[i]==c]
a[i]=[s[i]==c],?b[i]=[t[i]==c] ,
a
[
i
]
a[i]
a[i] 和
b
[
i
]
b[i]
b[i] 都是多项式,则有每一个位置的匹配数量为
∑
i
=
0
m
?
1
a
[
i
+
k
]
?
b
[
i
]
\displaystyle\sum_{i = 0}^{m - 1}a[i+k]\cdot b[i]
i=0∑m?1?a[i+k]?b[i] ,我们发现这东西很像
F
F
T
FFT
FFT ,我们使用一个技巧,将
b
[
i
]
b[i]
b[i] 翻转,令
T
[
i
]
=
b
[
m
?
1
?
i
]
T[i] = b[m - 1-i]
T[i]=b[m?1?i] ,那么
∑
i
=
0
m
?
1
a
[
i
+
k
]
?
T
[
m
?
1
?
i
]
\displaystyle\sum_{i = 0}^{m - 1}a[i + k]\cdot T[m - 1 - i]
i=0∑m?1?a[i+k]?T[m?1?i] ,下标和为定值
m
?
1
+
k
m - 1 + k
m?1+k ,直接上
N
T
T
NTT
NTT 或
F
F
T
FFT
FFT 即可。
#include <bits/stdc++.h>
#define ull unsigned long long
#define ll long long
#define re register
#define endl '\n'
using namespace std;
const int MOD = 998244353;
const int inf = 0x3f3f3f3f;
const double eps = 1e-8;
const double Pi = acos(-1.0);
const int N = 4e5 + 5;
struct node {
double x, y;
node (double x=0, double y=0): x(x), y(y) {}
node operator * (node r) { return node (x*r.x - y*r.y, x*r.y + y*r.x); }
node operator - (node r) { return node (x - r.x, y - r.y); }
node operator + (node r) { return node (x + r.x, y + r.y); }
}a[N], b[N];
int rev[N], bit, lim;
void fft (node* f, int flag) {
for (int i = 0; i < lim; i ++)
if (rev[i] > i) swap (f[i], f[rev[i]]);
for (int k = 1; k < lim; k <<= 1) {
node wnk = node (cos (Pi/k), flag * sin (Pi/k));
for (int i = 0; i < lim; i += k << 1) {
node w = node (1, 0);
for (int j = 0; j < k; j ++, w = w * wnk) {
node nx = f[i + j], ny = w * f[i + j + k];
f[i + j] = nx + ny;
f[i + j + k] = nx - ny;
}
}
}
if (flag == -1) for (int i = 0; i < lim; i ++) f[i].x /= lim;
}
char ch[] = {'A', 'G', 'C', 'T'};
char s[N], t[N];
int ans[N], n, m;
int solve () {
n = strlen (s), m = strlen (t);
for (lim = 1, bit = 0; lim < n + m; lim <<= 1) bit ++;
for (int i = 0; i < lim; i ++)
rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (bit - 1));
for (int i = 0; i < lim; i ++) ans[i] = 0;
for (int j = 0; j < 4; j ++) {
for (int i = 0; i < n; i ++) a[i] = node (s[i] == ch[j], 0);
for (int i = n; i < lim; i ++) a[i] = node ();
for (int i = 0; i < m; i ++) b[i] = node (t[m - i - 1] == ch[j], 0);
for (int i = m; i < lim; i ++) b[i] = node ();
fft (a, 1), fft (b, 1);
for (int i = 0; i < lim; i ++) a[i] = a[i] * b[i];
fft (a, -1);
for (int i = 0; i < n - m + 1; i ++)
ans[i] += int(a[i + m - 1].x + 0.5);
}
int res = 0;
for (int i = 0; i < n - m + 1; i ++)
if (m - ans[i] <= 3) res ++;
return res;
}
int main() {
int T; scanf ("%d", &T);
while (T --) {
scanf ("%s%s", s, t);
printf ("%d\n", solve ());
}
return 0;
}
思路2:后缀自动机解决,时间复杂度
O
(
n
)
O(n)
O(n) 。根据后缀自动机的性质,从源点出发,得到的状态是原串的子串,每一个点的
e
n
d
p
o
s
endpos
endpos 大小就是该子串出现的次数。由此我们直接
d
f
s
dfs
dfs ,在允许有
3
3
3 次跳过的前提下线性求出贡献。
#include <bits/stdc++.h>
#define ull unsigned long long
#define ll long long
#define re register
#define endl '\n'
using namespace std;
const int MOD = 998244353;
const int inf = 0x3f3f3f3f;
const double eps = 1e-8;
const double Pi = acos(-1.0);
const int N = 2e5 + 5;
struct node {
int link, len, ch[26];
}st[N << 1];
int sz, last, sum[N];
void sam_init () {
for (int i = 0; i <= sz; i ++) {
st[i].len = st[i].link = sum[i] = 0;
memset (st[i].ch, 0, sizeof (st[i].ch));
}
sz = last = 1;
}
void sam_extend (int c) {
int p = last, cur = ++ sz;
st[cur].len = st[p].len + 1, last = cur, sum[cur] = 1;
for ( ; p && ! st[p].ch[c]; p = st[p].link) st[p].ch[c] = cur;
if (! p) st[cur].link = 1;
else {
int q = st[p].ch[c];
if (st[p].len + 1 == st[q].len) st[cur].link = q;
else {
int clone = ++ sz;
st[clone].len = st[p].len + 1;
st[clone].link = st[q].link;
memcpy (st[clone].ch, st[q].ch, sizeof (st[q].ch));
for ( ; p && st[p].ch[c] == q; p = st[p].link) st[p].ch[c] = clone;
st[cur].link = st[q].link = clone;
}
}
}
char ch[] = {'A', 'C', 'G', 'T'};
char s[N], t[N];
int ns, nt, a[N], b[N], ans;
void dfs (int u, int j, int tot) {
if (j > nt) return void(ans += sum[u]);
for (int i = 0; i < 4; i ++) {
int v = ch[i] - 'A';
if (st[u].ch[v] == 0) continue;
if (ch[i] == t[j]) dfs (st[u].ch[v], j + 1, tot);
else if (tot < 3) dfs (st[u].ch[v], j + 1, tot + 1);
}
}
int main() {
int T; scanf ("%d", &T);
while (T --) {
scanf ("%s%s", s + 1, t + 1);
sam_init (), ns = strlen (s + 1), nt = strlen (t + 1);
for (int i = 1; i <= ns; i ++) sam_extend (s[i] - 'A');
for (int i = 1; i <= ns; i ++) b[i] = 0;
for (int i = 1; i <= sz; i ++) b[st[i].len] ++;
for (int i = 1; i <= ns; i ++) b[i] += b[i - 1];
for (int i = 1; i <= sz; i ++) a[b[st[i].len] --] = i;
for (int i = sz; i >= 1; i --) sum[st[a[i]].link] += sum[a[i]];
dfs (1, 1, ans = 0);
printf ("%d\n", ans);
}
return 0;
}
思路3:后缀数组解决,时间复杂度
O
(
n
?
l
o
g
?
n
)
O(n\ log\ n)
O(n?log?n) ,常数稍大,但不开
O
2
O2
O2 也可以过。具体做法是将两个串合并在一起(后缀数组解题常常需要这么做),然后求出
h
e
i
g
h
t
height
height 数组以及它的
S
T
ST
ST 表,用于
O
(
1
)
O(1)
O(1) 求出两个后缀的
L
C
P
LCP
LCP ,依照题目意思我们最多有三次机会可以跳过,即我们要对每一个可能的位置做三次
L
C
P
LCP
LCP ,如果可以匹配,那么答案贡献加一。
#include <bits/stdc++.h>
#define ull unsigned long long
#define ll long long
#define endl '\n'
using namespace std;
const int MOD = 998244353;
const int inf = 0x3f3f3f3f;
const double eps = 1e-8;
const double Pi = acos(-1.0);
const int N = 2e5 + 5;
int sa[N], rk[N], hei[N];
int x[N], y[N << 1], c[N];
char s[N], t[N];
int ns, nt, m, n;
void get_sa () {
for (int i = 1; i <= m; i ++) c[i] = 0;
for (int i = 1; i <= n; i ++) c[x[i] = s[i]] ++;
for (int i = 2; i <= m; i ++) c[i] += c[i - 1];
for (int i = n; i >= 1; i --) sa[c[x[i]] --] = i;
for (int k = 1; k <= n; k <<= 1) {
int num = 0;
for (int i = n - k + 1; i <= n; i ++) y[++ num] = i;
for (int i = 1; i <= n; i ++) if (sa[i] > k) y[++ num] = sa[i] - k;
for (int i = 1; i <= m; i ++) c[i] = 0;
for (int i = 1; i <= n; i ++) c[x[y[i]]] ++;
for (int i = 2; i <= m; i ++) c[i] += c[i - 1];
for (int i = n; i >= 1; i --) sa[c[x[y[i]]] --] = y[i], y[i] = 0;
for (int i = 1; i <= n; i ++) swap (x[i], y[i]);
x[sa[1]] = num = 1;
for (int i = 2; i <= n; i ++)
x[sa[i]] = (y[sa[i]] == y[sa[i - 1]] && y[sa[i] + k] == y[sa[i - 1] + k]) ? num : ++ num;
if (n == num) return ;
m = num;
}
}
void get_height () {
for (int i = 1; i <= n; i ++) rk[sa[i]] = i;
for (int i = 1, k = 0; i <= n; i ++) {
if (k) k --;
int j = sa[rk[i] - 1];
while (j + k <= n && s[i + k] == s[j + k]) k ++;
hei[rk[i]] = k;
}
}
int st[N][19], Lg[N];
void get_st () {
for (int i = 1; i <= n; i ++) st[i][0] = hei[i];
for (int i = 2; i <= n; i ++) Lg[i] = Lg[i >> 1] + 1;
for (int j = 1; j <= 18; j ++)
for (int i = 1; i + (1 << j) - 1 <= n; i ++)
st[i][j] = min (st[i][j - 1], st[i + (1 << (j - 1))][j - 1]);
}
int lcp (int x, int y) {
x = rk[x], y = rk[y];
if (x > y) swap (y, x);
int k = Lg [y - x];
return min (st[x + 1][k], st[y - (1 << k) + 1][k]);
}
int main() {
int T; scanf ("%d", &T);
while (T --) {
scanf ("%s%s", s + 1, t + 1);
ns = strlen (s + 1), nt = strlen (t + 1);
n = ns + nt, m = 256;
for (int i = 1; i <= nt; i ++) s[i + ns] = t[i];
int cas = ns - nt + 1;
if (cas <= 0) printf ("0\n");
else {
get_sa (), get_height (), get_st ();
int ans = 0;
for (int i = 1; i <= cas; i ++) {
int re = 0, len = 0;
while (re <= 3 && len <= nt) {
int num = lcp (i + len, ns + len + 1);
len += num + (re < 3), re ++;
}
if (len >= nt) ans ++;
}
printf ("%d\n", ans);
}
}
return 0;
}
思路4:二分加
h
a
s
h
hash
hash 解决,时间复杂度为
O
(
n
l
o
g
?
n
)
O(nlog\ n)
O(nlog?n) ,常数超小(
N
B
!
!
!
NB!!!
NB!!!),易于编程。具体操作和后缀数组那里一致。
#include <bits/stdc++.h>
#define ull unsigned long long
#define ll long long
#define re register
#define endl '\n'
using namespace std;
const int MOD = 998244353;
const int inf = 0x3f3f3f3f;
const double eps = 1e-8;
const double Pi = acos(-1.0);
const int N = 1e5 + 5;
const int k1 = 13331;
ull hs1[N], hs2[N], bin[N];
char s[N], t[N];
int n, m;
inline ull calc1 (int l, int r) {
return hs1[r] - hs1[l - 1] * bin[r - l + 1];
}
inline ull calc2 (int l, int r) {
return hs2[r] - hs2[l - 1] * bin[r - l + 1];
}
inline int lcp (int x, int y) {
int l = 1, r = m - y + 1;
while (l <= r) {
int mid = (l + r) >> 1;
if (calc1 (x, x + mid - 1) == calc2 (y, y + mid - 1)) l = mid + 1;
else r = mid - 1;
}
return l - 1;
}
int main() {
ios::sync_with_stdio(false), cin.tie(0), cout.tie(0);
int T; cin >> T; bin[0] = 1;
while (T --) {
cin >> (s + 1) >> (t + 1);
n = strlen (s + 1), m = strlen (t + 1);
int ans = 0;
for (int i = 1; i <= n; i ++) {
bin[i] = bin[i - 1] * k1;
hs1[i] = hs1[i - 1] * k1 + s[i];
}
for (int i = 1; i <= m; i ++) hs2[i] = hs2[i - 1] * k1 + t[i];
for (int i = 1, p = 0; i <= n - m + 1; i ++, p = 0) {
for (int cas = 0; cas < 4 && p <= m; cas ++)
p += lcp (i + p, 1 + p) + (cas < 3);
ans += p >= m;
}
cout << ans << endl;
}
return 0;
}
思路1:后缀数组解法,时间复杂度
O
(
n
l
o
g
?
n
)
O(nlog\ n)
O(nlog?n) ,扩展两个字符串的最长公共子串到多个字符串的最长公共子串。我们将所有字符串连接起来,并且用一个特殊的字符分隔开,当
s
a
[
l
?
r
]
sa[l\cdots r]
sa[l?r] 里包含所有用于分隔的特殊字符,那么这个区间内
h
e
i
g
h
t
height
height 数组的最小值就是答案,依照这个思路,我们可以二分答案
O
(
n
l
o
g
?
n
)
O(nlog\ n)
O(nlog?n),或者用单调队列加
t
w
o
?
p
o
i
n
t
e
r
two-pointer
two?pointer
O
(
n
)
O(n)
O(n),下面程序使用单调队列维护
h
e
i
g
h
t
height
height 数组的最小值。
#include <bits/stdc++.h>
#define ull unsigned long long
#define ll long long
#define re register
#define endl '\n'
using namespace std;
const int MOD = 998244353;
const int inf = 0x3f3f3f3f;
const double eps = 1e-8;
const double Pi = acos(-1.0);
const int N = 1e6 + 5;
int sa[N], rk[N], hei[N];
int x[N], y[N << 1], c[N];
char s[N], str[100100];
int n, m, idx;
void get_sa () {
for (int i = 1; i <= n; i ++) c[x[i] = s[i]] ++;
for (int i = 2; i <= m; i ++) c[i] += c[i - 1];
for (int i = n; i >= 1; i --) sa[c[x[i]] --] = i;
for (int k = 1; k <= n; k <<= 1) {
int num = 0;
for (int i = n - k + 1; i <= n; i ++) y[++ num] = i;
for (int i = 1; i <= n; i ++) if (sa[i] > k) y[++ num] = sa[i] - k;
for (int i = 1; i <= m; i ++) c[i] = 0;
for (int i = 1; i <= n; i ++) c[x[y[i]]] ++;
for (int i = 2; i <= m; i ++) c[i] += c[i - 1];
for (int i = n; i >= 1; i --) sa[c[x[y[i]]] --] = y[i], y[i] = 0;
for (int i = 1; i <= n; i ++) swap (x[i], y[i]);
x[sa[1]] = num = 1;
for (int i = 2; i <= n; i ++)
x[sa[i]] = (y[sa[i]] == y[sa[i - 1]] && y[sa[i] + k] == y[sa[i - 1] + k]) ? num : ++ num;
if (n == num) break;
m = num;
}
}
void get_height () {
for (int i = 1; i <= n; i ++) rk[sa[i]] = i;
for (int i = 1, k = 0; i <= n; i ++) {
if (k) k --;
int j = sa[rk[i] - 1];
while (j + k <= n && s[i + k] == s[j + k]) k ++;
hei[rk[i]] = k;
}
}
int vis[N], tot, v[100], q[N], tt, hh;
void add (int i) {
if (! vis[sa[i]]) return ;
v[vis[sa[i]]] ++;
tot += v[vis[sa[i]]] == 1;
}
void del (int i) {
if (! vis[sa[i]]) return ;
v[vis[sa[i]]] --;
tot -= v[vis[sa[i]]] == 0;
}
int solve () {
int ans = 0;
add (hh = 1);
for (int l = 1, r = 2; r <= n; r ++) {
while (hh <= tt && hei[q[tt]] >= hei[r]) tt --;
add (r), q[++ tt] = r;
if (tot == idx) {
while (l < r && tot == idx) del (l ++); add (-- l);
while (hh <= tt && q[hh] <= l) hh ++;
ans = max (ans, hei[q[hh]]);
}
}
return ans;
}
int main() {
ios::sync_with_stdio(false), cin.tie(0), cout.tie(0);
while (cin >> (str + 1)) {
s[++ n] = ++ idx;
for (int i = 1; str[i]; i ++)
s[++ n] = str[i], vis[n] = idx;
}
m = 256;
get_sa ();
get_height ();
cout << solve () << endl;
return 0;
}
思路2:后缀自动机解法,时间复杂度
O
(
n
)
O(n)
O(n) ,具体操作见代码注释。
#include <bits/stdc++.h>
#define ull unsigned long long
#define ll long long
#define re register
#define endl '\n'
using namespace std;
const int MOD = 998244353;
const int inf = 0x3f3f3f3f;
const double eps = 1e-8;
const double Pi = acos(-1.0);
const int N = 1e5 + 5;
struct node {
int len, link, ch[26];
}st[N << 1];
int sz, last;
void sam_init () { sz = last = 1; }
void sam_extend (int c) {
int cur = ++ sz, p = last;
st[cur].len = st[p].len + 1, last = cur;
for ( ; p && ! st[p].ch[c]; p = st[p].link) st[p].ch[c] = cur;
if (! p) st[cur].link = 1;
else {
int q = st[p].ch[c];
if (st[p].len + 1 == st[q].len) st[cur].link = q;
else {
int clone = ++ sz;
st[clone].len = st[p].len + 1;
st[clone].link = st[q].link;
memcpy (st[clone].ch, st[q].ch, sizeof (st[q].ch));
for ( ; p && st[p].ch[c] == q; p = st[p].link) st[p].ch[c] = clone;
st[q].link = st[cur].link = clone;
}
}
}
char s[N];
int a[N << 1], b[N], n;
int g[N << 1], f[N << 1];
int main() {
ios::sync_with_stdio(false), cin.tie(0), cout.tie(0);
sam_init ();
cin >> s, n = strlen (s);
for (int i = 0; i < n; i ++) sam_extend (s[i] - 'a');
for (int i = 1; i <= sz; i ++) b[st[i].len] ++;
for (int i = 1; i <= n; i ++) b[i] += b[i - 1];
for (int i = sz; i >= 1; i --) a[b[st[i].len] --] = i;
for (int i = 1; i <= sz; i ++) g[i] = st[i].len;
while (cin >> s) {
n = strlen (s);
for (int i = 1; i <= sz; i ++) f[i] = 0;
for (int i = 0, v = 1, len = 0; i < n; i ++) {
int c = s[i] - 'a';
while (v && ! st[v].ch[c]) v = st[v].link, len = st[v].len;
if (! v) v = 1, len = 0;
if (st[v].ch[c]) ++ len, v = st[v].ch[c];
f[v] = max (f[v], len);
}
for (int i = sz; i >= 1; i --)
f[st[a[i]].link] = max (f[st[a[i]].link], f[a[i]]);
for (int i = 1; i <= sz; i ++) g[i] = min (f[i], g[i]);
}
cout << *max_element (g + 1, g + sz + 1) << endl;
return 0;
}
思路:后缀自动机解决,时间复杂度
O
(
n
)
O(n)
O(n) ,求出每个状态
e
n
d
p
o
s
endpos
endpos 的大小
s
i
z
[
i
]
siz[i]
siz[i],对于每个结点
i
i
i 表示长度为
l
e
n
[
s
t
[
i
]
.
l
i
n
k
]
+
1
len[st[i].link]+1
len[st[i].link]+1 到
l
e
n
[
i
]
len[i]
len[i] 的子串出现次数为
s
i
z
[
i
]
siz[i]
siz[i] ,对于区间修改,可以考虑差分。
#include <bits/stdc++.h>
#define ull unsigned long long
#define ll long long
#define re register
#define endl '\n'
using namespace std;
const int MOD = 998244353;
const int inf = 0x3f3f3f3f;
const double eps = 1e-8;
const double Pi = acos(-1.0);
const int N = 2e5 + 5;
struct node {
int len, link, ch[26];
}st[N];
int sz, last, siz[N];
void sam_init () {
for (int i = 0; i <= sz; i ++) {
st[i].link = st[i].len = siz[i] = 0;
memset (st[i].ch, 0, sizeof (st[i].ch));
}
sz = last = 1;
}
void sam_extend (int c) {
int cur = ++ sz, p = last;
st[cur].len = st[p].len + 1, last = cur, siz[cur] = 1;
for ( ; p && ! st[p].ch[c]; p = st[p].link) st[p].ch[c] = cur;
if (! p) st[cur].link = 1;
else {
int q = st[p].ch[c];
if (st[p].len + 1 == st[q].len) st[cur].link = q;
else {
int clone = ++ sz;
st[clone].len = st[p].len + 1;
st[clone].link = st[q].link;
memcpy (st[clone].ch, st[q].ch, sizeof (st[q].ch));
for ( ; p && st[p].ch[c] == q; p = st[p].link) st[p].ch[c] = clone;
st[cur].link = st[q].link = clone;
}
}
}
char s[N];
int k, n, a[N], b[N];
int main() {
ios::sync_with_stdio(false), cin.tie(0), cout.tie(0);
int T; cin >> T;
while (T --) {
cin >> (s + 1) >> k;
sam_init (), n = strlen (s + 1);
for (int i = 1; i <= n; i ++) sam_extend (s[i] - 'a');
for (int i = 0; i <= n; i ++) b[i] = 0;
for (int i = 1; i <= sz; i ++) b[st[i].len] ++;
for (int i = 1; i <= n; i ++) b[i] += b[i - 1];
for (int i = sz; i >= 1; i --) a[b[st[i].len] --] = i;
for (int i = sz; i >= 1; i --) siz[st[a[i]].link] += siz[a[i]];
for (int i = 0; i <= n; i ++) b[i] = 0;
int ans = -1, cnt = 1;
for (int i = 1; i <= sz; i ++)
if (siz[i] == k && st[i].len != n)
b[st[st[i].link].len + 1] ++, b[st[i].len + 1] --;
for (int i = 1; i <= n; i ++) {
b[i] += b[i - 1];
if (b[i] >= cnt) cnt = b[i], ans = i;
}
cout << ans << endl;
}
return 0;
}
|