java實作sunday演算法範例分享

作者：Eve Cole 更新時間：2025-01-13 14:36:01

字串比對查找演算法中，最著名的兩個是KMP演算法（Knuth-Morris-Pratt)和BM演算法（Boyer-Moore)。兩個演算法在最壞情況下均具有線性的查找時間。但在實用上，KMP演算法並不比最簡單的C函式庫函數strstr()快多少，而BM演算法則往往比KMP演算法快上3－5倍(未親身實作)。但是BM演算法還不是最快的演算法，這裡介紹一種比BM演算法更快一些的查找演算法Sunday演算法。

Sunday演算法的想法和BM演算法中的壞字元想法非常類似。差別只是在於Sunday演算法在匹配失敗之後，是取目標字串中當前和Pattern字串對應的部分後面一個位置的字元來做壞字元匹配。當發現符合失敗的時候就判斷母串中目前偏移量+Pattern字串長度+1處(假設為K位置)的字元在Pattern字串中是否存在。如果存在，則將該位置和Pattern字串中的該字元對齊，再從頭開始匹配；如果不存在，就將Pattern字串向後移動，和母串k+1處的字元對齊，再進行匹配。重複上面的動作直到找到，或母串被找完結束。動手寫了個小例子來實作以下這個演算法。

在程式碼中，實作了兩種字串比對演算法，一種是Sunday方式，一種是普通的每次移動一位的方式，二者的效率對比在main函數中有，都是奈秒等級。演算法的詳細步驟，在程式碼中已經加入了相應的註解。關於BM演算法，下次空了再一起對照分析。

複製代碼代碼如下:

import java.util.HashMap;

import java.util.LinkedList;

import java.util.List;

import java.util.Map;

/**

* @author Scott

* @date 2013年12月28日

* @description

public class SundySearch {

String text = null;

String pattern = null;

int currentPos = 0;

/**

* 匹配後的子字串第一個字元位置列表

List<Integer> matchedPosList = new LinkedList<Integer>();

/**

* 匹配字元的Map,記錄改匹配字串有哪些char且每個char最後出現的位移

Map<Character, Integer> map = new HashMap<Character, Integer>();

public SundySearch(String text, String pattern) {

this.text = text;

this.pattern = pattern;

this.initMap();

};

/**

* Sunday匹配時，用來儲存Pattern中每個字元最後一次出現的位置，從左到右的順序

private void initMap() {

for (int i = 0; i < pattern.length(); i++) {

this.map.put(pattern.charAt(i), i);

}

/**

* 普通的字串遞歸匹配，匹配失敗就前進一位

public List<Integer> normalMatch() {

//匹配失敗，繼續往下走

if (!matchFromSpecialPos(currentPos)) {

currentPos += 1;

if ((text.length() - currentPos) < pattern.length()) {

return matchedPosList;

}

normalMatch();

} else {

//匹配成功，記錄位置

matchedPosList.add(currentPos);

currentPos += 1;

normalMatch();

}

return matchedPosList;

}

/**

* Sunday匹配，假定Text中的K字元的位置為：目前偏移量+Pattern字串長度+1

public List<Integer> sundayMatch() {

// 如果沒有匹配成功

if (!matchFromSpecialPos(currentPos)) {

// 如果Text中K字元沒有在Pattern字串中出現，則跳過整個Pattern字串長度

if ((currentPos + pattern.length() + 1) < text.length()

&& !map.containsKey(text.charAt(currentPos + pattern.length() + 1))) {

currentPos += pattern.length();

}else {

// 如果Text中K字元在Pattern字串中出現，則將Text中K字元的位置和Pattern字串中的最後一次出現K字元的位置對齊

if ((currentPos + pattern.length() + 1) > text.length()) {

currentPos += 1;

} else {

currentPos += pattern.length() - (Integer) map.get(text.charAt(currentPos + pattern.length()));

}

// 匹配完成，傳回全部匹配成功的初始位移

if ((text.length() - currentPos) < pattern.length()) {

return matchedPosList;

}

sundayMatch();

}else {

// 匹配成功前進一位然後再次匹配

matchedPosList.add(currentPos);

currentPos += 1;

sundayMatch();

}

return matchedPosList;

}

/**

* 檢查從Text的指定偏移量開始的子字串是否和Pattern匹配

public boolean matchFromSpecialPos(int pos) {

if ((text.length()-pos) < pattern.length()) {

return false;

}

for (int i = 0; i < pattern.length(); i++) {

if (text.charAt(pos + i) == pattern.charAt(i)) {

if (i == (pattern.length()-1)) {

return true;

}

continue;

} else {

break;

}

return false;

}

public static void main(String[] args) {

SundySearch sundySearch = new SundySearch("hello 啊啊阿道夫adfsadfklf adf234masdfsdfdsfdsfdsffwerwrewrerwerwersdf2666sdflsdfk", "adf");

long begin = System.nanoTime();

System.out.println("NormalMatch:" + sundySearch.normalMatch());

System.out.println("NormalMatch:" + (System.nanoTime() - begin));

begin = System.nanoTime();

System.out.println("SundayMatch:" + sundySearch.sundayMatch());

System.out.println("SundayMatch:" + (System.nanoTime() - begin));

}