126 строки
4.1 KiB
C++
126 строки
4.1 KiB
C++
/*
|
|
* esa.hxx
|
|
* Copyright (c) 2010 Daisuke Okanohara All Rights Reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person
|
|
* obtaining a copy of this software and associated documentation
|
|
* files (the "Software"), to deal in the Software without
|
|
* restriction, including without limitation the rights to use,
|
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following
|
|
* conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be
|
|
* included in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#ifndef _ESA_HXX
|
|
#define _ESA_HXX
|
|
|
|
#include <vector>
|
|
#include <utility>
|
|
#include <cassert>
|
|
#include "sais.hxx"
|
|
|
|
namespace esaxx_private {
|
|
template<typename string_type, typename sarray_type, typename index_type>
|
|
index_type suffixtree(string_type T, sarray_type SA, sarray_type L, sarray_type R, sarray_type D, index_type n){
|
|
if (n == 0){
|
|
return 0;
|
|
}
|
|
sarray_type Psi = L;
|
|
Psi[SA[0]] = SA[n-1];
|
|
for (index_type i = 1; i < n; ++i){
|
|
Psi[SA[i]] = SA[i-1];
|
|
}
|
|
|
|
// Compare at most 2n log n charcters. Practically fastest
|
|
// "Permuted Longest-Common-Prefix Array", Juha Karkkainen, CPM 09
|
|
sarray_type PLCP = R;
|
|
index_type h = 0;
|
|
for (index_type i = 0; i < n; ++i){
|
|
index_type j = Psi[i];
|
|
while (i+h < n && j+h < n &&
|
|
T[i+h] == T[j+h]){
|
|
++h;
|
|
}
|
|
PLCP[i] = h;
|
|
if (h > 0) --h;
|
|
}
|
|
|
|
sarray_type H = L;
|
|
for (index_type i = 0; i < n; ++i){
|
|
H[i] = PLCP[SA[i]];
|
|
}
|
|
H[0] = -1;
|
|
|
|
std::vector<std::pair<index_type, index_type> > S;
|
|
S.push_back(std::make_pair((index_type)-1, (index_type)-1));
|
|
size_t nodeNum = 0;
|
|
for (index_type i = 0; ; ++i){
|
|
std::pair<index_type, index_type> cur (i, (i == n) ? -1 : H[i]);
|
|
std::pair<index_type, index_type> cand(S.back());
|
|
while (cand.second > cur.second){
|
|
if (i - cand.first > 1){
|
|
L[nodeNum] = cand.first;
|
|
R[nodeNum] = i;
|
|
D[nodeNum] = cand.second;
|
|
++nodeNum;
|
|
}
|
|
cur.first = cand.first;
|
|
S.pop_back();
|
|
cand = S.back();
|
|
}
|
|
if (cand.second < cur.second){
|
|
S.push_back(cur);
|
|
}
|
|
if (i == n) break;
|
|
S.push_back(std::make_pair(i, n - SA[i] + 1));
|
|
}
|
|
return nodeNum;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @brief Build an enhanced suffix array of a given string in linear time
|
|
* For an input text T, esaxx() builds an enhancd suffix array in linear time.
|
|
* i-th internal node is represented as a triple (L[i], R[i], D[i]);
|
|
* L[i] and R[i] is the left/right boundary of the suffix array as SA[L[i]....R[i]-1]
|
|
* D[i] is the depth of the internal node
|
|
* The number of internal node is at most N-1 and return the actual number by
|
|
* @param T[0...n-1] The input string. (random access iterator)
|
|
* @param SA[0...n-1] The output suffix array (random access iterator)
|
|
* @param L[0...n-1] The output left boundary of internal node (random access iterator)
|
|
* @param R[0...n-1] The output right boundary of internal node (random access iterator)
|
|
* @param D[0...n-1] The output depth of internal node (random access iterator)
|
|
* @param n The length of the input string
|
|
* @param k The alphabet size
|
|
* @pram nodeNum The output the number of internal node
|
|
* @return 0 if succeded, -1 or -2 otherwise
|
|
*/
|
|
|
|
template<typename string_type, typename sarray_type, typename index_type>
|
|
int esaxx(string_type T, sarray_type SA, sarray_type L, sarray_type R, sarray_type D,
|
|
index_type n, index_type k, index_type& nodeNum) {
|
|
if ((n < 0) || (k <= 0)) return -1;
|
|
int err = saisxx(T, SA, n, k);
|
|
if (err != 0){
|
|
return err;
|
|
}
|
|
nodeNum = esaxx_private::suffixtree(T, SA, L, R, D, n);
|
|
return 0;
|
|
}
|
|
|
|
|
|
#endif // _ESA_HXX
|