Skip to content

Commit

Permalink
fix judgements
Browse files Browse the repository at this point in the history
  • Loading branch information
mattn committed May 7, 2023
1 parent 0a397d4 commit cdc9c6f
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 27 deletions.
86 changes: 63 additions & 23 deletions haiku.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,77 @@ var (
reWord = regexp.MustCompile(`^[ァ-ヾ]+$`)
reIgnoreText = regexp.MustCompile(`[\[\]「」『』、。?!]`)
reIgnoreChar = regexp.MustCompile(`[ァィゥェォャュョ]`)
reKana = regexp.MustCompile(`[ァ-タダ-ヶ]`)
reKana = regexp.MustCompile(`^[ァ-タダ-ヶ]+$`)
)

type Opt struct {
Udic *dict.Dict
Debug bool
}

func isEnd(c []string) bool {
return c[1] != "非自立" && !strings.HasPrefix(c[5], "連用") && c[5] != "未然形"
func dictIdx(d *dict.Dict, typ string) int {
if ii, ok := d.ContentsMeta[typ]; ok {
return int(ii)
}
return -1
}

func contains(c []string, s string) bool {
for _, cc := range c {
if cc == s {
return true
}
}
return false
}

func isIgnore(c []string) bool {
func isEnd(d *dict.Dict, c []string) bool {
idx := dictIdx(d, dict.PronunciationIndex)
if c[0] == "接頭辞" {
if idx >= 0 && contains(c, "御") {
return false
}
return true
}
if c[1] == "非自立" {
if c[0] == "名詞" {
return true
}
if c[0] == "動詞" {
return true
}
if idx >= 0 && c[idx] == "ノ" {
return true
}
return false
}
idx = dictIdx(d, dict.InflectionalForm)
if idx >= 0 {
if c[idx] == "未然形" {
return false
}
//if strings.HasPrefix(c[idx], "連用") {
// return false
//}
}
return true
}

func isIgnore(d *dict.Dict, c []string) bool {
return len(c) > 0 && (c[0] == "空白" || c[0] == "補助記号" || (c[0] == "記号" && c[1] == "空白"))
}

// isWord return true when the kind of the word is possible to be leading of
// sentence.
func isWord(c []string) bool {
func isWord(d *dict.Dict, c []string) bool {
for _, f := range []string{"名詞", "形容詞", "形容動詞", "副詞", "連体詞", "接続詞", "感動詞", "接頭詞", "フィラー"} {
if f == c[0] && c[1] != "接尾" {
return true
}
}
if c[0] == "接頭辞" || (c[0] == "接続詞" && c[1] == "名詞接続") {
return false
}
if c[0] == "形状詞" && c[1] != "助動詞語幹" {
return true
}
Expand All @@ -48,7 +95,7 @@ func isWord(c []string) bool {
if c[0] == "記号" && c[1] == "一般" {
return true
}
if c[0] == "助詞" && c[1] != "副助詞" && c[1] != "準体助詞" && c[1] != "終助詞" && c[1] != "係助詞" && c[1] != "格助詞" && c[1] != "接続助詞" {
if c[0] == "助詞" && c[1] != "副助詞" && c[1] != "準体助詞" && c[1] != "終助詞" && /*c[1] != "係助詞" &&*/ c[1] != "格助詞" && c[1] != "接続助詞" {
return true
}
if c[0] == "動詞" && c[1] != "接尾" && c[1] != "非自立" {
Expand Down Expand Up @@ -92,7 +139,7 @@ func MatchWithOpt(text string, rule []int, opt *Opt) bool {
var tmp []tokenizer.Token
for _, token := range tokens {
c := token.Features()
if !isIgnore(c) {
if !isIgnore(d, c) {
tmp = append(tmp, token)
}
}
Expand All @@ -105,12 +152,7 @@ func MatchWithOpt(text string, rule []int, opt *Opt) bool {
if reKana.MatchString(tok.Surface) {
y = tok.Surface
} else {
var idx int
if ii, ok := d.ContentsMeta[dict.PronunciationIndex]; ok {
idx = int(ii)
} else {
idx = -1
}
idx := dictIdx(d, dict.PronunciationIndex)
if idx >= 0 && idx < len(c) {
y = c[idx]
} else {
Expand All @@ -126,12 +168,15 @@ func MatchWithOpt(text string, rule []int, opt *Opt) bool {
}
return false
}
if pos >= len(rule) || (r[pos] == rule[pos] && !isWord(c)) {
if pos >= len(rule) || (r[pos] == rule[pos] && !isWord(d, c)) {
return false
}
n := countChars(y)
r[pos] -= n
if r[pos] == 0 {
if !isEnd(d, c) {
return false
}
pos++
if pos == len(r) && i == len(tokens)-1 {
return true
Expand Down Expand Up @@ -165,7 +210,7 @@ func FindWithOpt(text string, rule []int, opt *Opt) ([]string, error) {
var tmp []tokenizer.Token
for _, token := range tokens {
c := token.Features()
if !isIgnore(c) {
if !isIgnore(d, c) {
tmp = append(tmp, token)
}
}
Expand Down Expand Up @@ -204,12 +249,7 @@ func FindWithOpt(text string, rule []int, opt *Opt) ([]string, error) {
if reKana.MatchString(tok.Surface) {
y = tok.Surface
} else {
var idx int
if ii, ok := d.ContentsMeta[dict.PronunciationIndex]; ok {
idx = int(ii)
} else {
idx = -1
}
idx := dictIdx(d, dict.PronunciationIndex)
if idx >= 0 && idx < len(c) {
y = c[idx]
} else {
Expand All @@ -226,7 +266,7 @@ func FindWithOpt(text string, rule []int, opt *Opt) ([]string, error) {
copy(r, rule)
continue
}
if pos >= len(rule) || (r[pos] == rule[pos] && !isWord(c)) {
if pos >= len(rule) || (r[pos] == rule[pos] && !isWord(d, c)) {
pos = 0
ambigous = 0
sentence = ""
Expand All @@ -240,7 +280,7 @@ func FindWithOpt(text string, rule []int, opt *Opt) ([]string, error) {
if r[pos] >= 0 && (r[pos] == 0 || r[pos]+ambigous == 0) {
pos++
if pos == len(r) || pos == len(r)+1 {
if isEnd(c) {
if isEnd(d, c) {
ret = append(ret, sentence)
start = i + 1
}
Expand Down
2 changes: 1 addition & 1 deletion haiku_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ func testMatch(t *testing.T, filename string, rules []int, judge bool) {
if strings.HasPrefix(text, "#") {
continue
}
if Match(text, rules) != judge {
if MatchWithOpt(text, rules, &Opt{Debug: true}) != judge {
t.Fatalf("%q for %q must be %v", text, filename, rules)
}
}
Expand Down
1 change: 1 addition & 0 deletions testdata/haiku.bad
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
私も歩いてsatoshi貯めるか〜。
それはそう、でも暖房だよ暖房
オッドアイにはどうしても弱い。ふにゃ。
我々の業界ではご褒美です
5 changes: 2 additions & 3 deletions testdata/haiku.good
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,12 @@
ライブではむしろ俺氏も声を出す。
#夏草や兵どもが夢の跡
#Amethyst、あぁAmethyst、Amethyst
十一時、明日も早いしもう寝るか
十一時、あすも早いしもう寝るか
しらんけどたぶんそうって言っちゃった
たけのこの土佐荷が好きでたまらない
店員に声かけられた一人寿司
店員に声かけられたひとり寿司
サーモンを食べてないのは珍しい
東京に行くバス予約しないとな
店員に声かけられた一人寿司
竹林や ああ竹林や 竹林や
#冷凍のまま食ってるかと思った
靴下を脱がずに先にパンツから
Expand Down

0 comments on commit cdc9c6f

Please sign in to comment.