initial release
This commit is contained in:
397
cosmic rage/spellchecker.lua
Normal file
397
cosmic rage/spellchecker.lua
Normal file
@@ -0,0 +1,397 @@
|
||||
--[[
|
||||
Spell checker for MUSHclient, written by Nick Gammon.
|
||||
Written: 9th October 2006
|
||||
Updated: 11th October 2006
|
||||
Updated: 6th March 2007 to make progress bar optional
|
||||
Updated: 13th April 2007 to added IGNORE_MIXED_CASE, IGNORE_IMBEDDED_NUMBERS
|
||||
Updated: 15th February 2009 to convert to using SQLite database instead of Lua table
|
||||
Updated: 21st February 2009 to fix problem where words with 2 metaphones were only stored once.
|
||||
--]]
|
||||
|
||||
local SHOW_PROGRESS_BAR = true -- show progress bar? true or false
|
||||
|
||||
local METAPHONE_LENGTH = 4 -- how many characters of metaphone to get back
|
||||
local EDIT_DISTANCE = 4 -- how close a word must be to appear in the list of suggestions
|
||||
local CASE_SENSITIVE = false -- compare case? true or false
|
||||
local IGNORE_CAPITALIZED = false -- ignore words starting with a capital? true or false
|
||||
local IGNORE_MIXED_CASE = false -- ignore words in MixedCase (like that one)? true or false
|
||||
local IGNORE_IMBEDDED_NUMBERS = false -- ignore words with numbers in them? true or false
|
||||
|
||||
-- this is the pattern we use to find "words" in the text to be spell-checked
|
||||
local pattern = "%a+'?[%a%d]+" -- regexp to give us a word with a possible single imbedded quote
|
||||
|
||||
-- path to the spell check dictionaries
|
||||
local directory = utils.info ().app_directory .. "spell\\"
|
||||
-- file name of the user dictionary, in the above path
|
||||
local userdict = "userdict.txt"
|
||||
|
||||
-- stuff below used internally
|
||||
local make_upper -- this becomes the upper-case conversion function, see below
|
||||
local db -- SQLite3 dictionary database
|
||||
local cancelmessage = "spell check cancelled"
|
||||
local previousword --> not used right now
|
||||
local change, ignore -- tables of change-all, ignore-all words
|
||||
|
||||
-- dictionaries - add new entries along similar lines to add more dictionary files
|
||||
local files = {
|
||||
|
||||
-- lower-case words
|
||||
|
||||
"english-words.10",
|
||||
"english-words.20",
|
||||
"english-words.35",
|
||||
"english-words.40",
|
||||
|
||||
-- upper case words
|
||||
|
||||
"english-upper.10",
|
||||
"english-upper.35",
|
||||
"english-upper.40",
|
||||
|
||||
-- American words
|
||||
|
||||
"american-words.10",
|
||||
"american-words.20",
|
||||
|
||||
-- contractions (eg. aren't, doesn't)
|
||||
|
||||
"english-contractions.10",
|
||||
"english-contractions.35",
|
||||
|
||||
-- user dictionary
|
||||
userdict,
|
||||
}
|
||||
|
||||
-- trim leading and trailing spaces from a string
|
||||
local function trim (s)
|
||||
return (string.gsub (s, "^%s*(.-)%s*$", "%1"))
|
||||
end -- trim
|
||||
|
||||
-- insert a word into our metaphone table - called by reading dictionaries
|
||||
-- and also by adding a word during the spellcheck
|
||||
local function insert_word (word, user)
|
||||
|
||||
if word == "" then
|
||||
return
|
||||
end -- empty word
|
||||
|
||||
|
||||
-- get both metaphones
|
||||
local m1, m2 = utils.metaphone (word, METAPHONE_LENGTH)
|
||||
local fixed_word = string.gsub (word, "'", "''") -- convert ' to ''
|
||||
|
||||
assert (db:execute (string.format ("INSERT INTO words VALUES (NULL, '%s', '%s', %i)",
|
||||
fixed_word, m1, user)));
|
||||
|
||||
-- do 2nd metaphone, if any
|
||||
if m2 then
|
||||
assert (db:execute (string.format ("INSERT INTO words VALUES (NULL, '%s', '%s', %i)",
|
||||
fixed_word, m2, user)));
|
||||
end -- having alternative
|
||||
|
||||
end -- insert_word
|
||||
|
||||
|
||||
-- sort function for sorting the suggestions into edit-distance order
|
||||
local function suggestions_compare (word)
|
||||
return function (a, b)
|
||||
local diff = utils.edit_distance (make_upper (a), word) -
|
||||
utils.edit_distance (make_upper (b), word)
|
||||
if diff == 0 then
|
||||
return make_upper (a) < make_upper (b)
|
||||
else
|
||||
return diff < 0
|
||||
end -- differences the same?
|
||||
end -- compareit
|
||||
end -- function suggestions_compare
|
||||
|
||||
-- check for one word, called by spellcheck (invokes suggestion dialog)
|
||||
local function checkword_and_suggest (word)
|
||||
|
||||
if IGNORE_CAPITALIZED then
|
||||
-- ignore words starting in caps
|
||||
if string.find (word, "^[A-Z]") then
|
||||
return word, "ignore"
|
||||
end -- this round, ignore this word
|
||||
end -- if IGNORE_CAPITALIZED
|
||||
|
||||
if IGNORE_MIXED_CASE then
|
||||
-- ignore words in mixed case
|
||||
if string.find (word, "[A-Z]") and
|
||||
string.find (word, "[a-z]") then
|
||||
return word, "ignore"
|
||||
end -- this round, ignore this word
|
||||
end -- if IGNORE_MIXED_CASE
|
||||
|
||||
if IGNORE_IMBEDDED_NUMBERS then
|
||||
-- ignore words with numbers in them
|
||||
if string.find (word, "%d") then
|
||||
return word, "ignore"
|
||||
end -- this round, ignore this word
|
||||
end -- if IGNORE_IMBEDDED_NUMBERS
|
||||
|
||||
uc_word = make_upper (word) -- convert to upper-case if wanted
|
||||
|
||||
-- if we already did "ignore all" on this particular word, ignore it again
|
||||
if ignore [word] then
|
||||
return word, "ignore"
|
||||
end -- this round, ignore this word
|
||||
|
||||
-- if we said change A to B, change it again
|
||||
if change [word] then
|
||||
return change [word], "change"
|
||||
end -- change to this word
|
||||
|
||||
-- table of suggestions, based on the metaphone
|
||||
local keyed_suggestions = {}
|
||||
|
||||
-- get both metaphones
|
||||
local m1, m2 = utils.metaphone (word, METAPHONE_LENGTH)
|
||||
|
||||
local function lookup_metaphone (m)
|
||||
local found = false
|
||||
for row in db:rows(string.format ("SELECT name FROM words WHERE metaphone = '%s'", m)) do
|
||||
local word = row [1]
|
||||
if make_upper (word) == uc_word then
|
||||
found = true -- found exact match
|
||||
break
|
||||
end -- found
|
||||
if utils.edit_distance (make_upper (word), uc_word) < EDIT_DISTANCE then
|
||||
keyed_suggestions [word] = true
|
||||
end -- close enough
|
||||
end
|
||||
return found
|
||||
end -- lookup_metaphone
|
||||
|
||||
-- look up first metaphone
|
||||
if lookup_metaphone (m1) then
|
||||
return word, "ok"
|
||||
end -- word found
|
||||
|
||||
-- try 2nd metaphone
|
||||
if m2 then
|
||||
if lookup_metaphone (m2) then
|
||||
return word, "ok"
|
||||
end -- word found
|
||||
end -- have alternate metaphone
|
||||
|
||||
-- pull into indexed table
|
||||
local suggestions = {}
|
||||
for k in pairs (keyed_suggestions) do
|
||||
table.insert (suggestions, k)
|
||||
end -- for
|
||||
|
||||
table.sort (suggestions, suggestions_compare (uc_word))
|
||||
|
||||
-- not found? do spell check dialog
|
||||
local action, replacement = utils.spellcheckdialog (word, suggestions)
|
||||
|
||||
-- they cancelled?
|
||||
if not action then
|
||||
error (cancelmessage) --> forces us out of gsub loop
|
||||
end -- cancelled
|
||||
|
||||
-- ignore this only - just return
|
||||
if action == "ignore" then
|
||||
return word, "ignore" -- use current word
|
||||
end -- ignore word
|
||||
|
||||
-- ignore all of this word? add to list
|
||||
if action == "ignoreall" then
|
||||
ignore [word] = true
|
||||
return word, "ignore" -- use current word
|
||||
end -- ignore word
|
||||
|
||||
-- add to user dictionary?
|
||||
-- add to metaphone table, and rewrite dictionary
|
||||
if action == "add" then
|
||||
insert_word (word, 1)
|
||||
return word, "ok"
|
||||
end -- adding
|
||||
|
||||
-- change word once? return replacement
|
||||
if action == "change" then
|
||||
return checkword_and_suggest (replacement) -- however, re-check it
|
||||
end -- changing
|
||||
|
||||
-- change all occurrences? add to table, return replacement
|
||||
if action == "changeall" then
|
||||
local newword, newaction = checkword_and_suggest (replacement) -- re-check it
|
||||
if newaction == "ok" then
|
||||
change [word] = newword
|
||||
end -- if approved
|
||||
return newword -- return the new word
|
||||
end -- changing
|
||||
|
||||
error "unexpected result from dialog"
|
||||
end -- checkword_and_suggest
|
||||
|
||||
-- exported function to do the spellcheck
|
||||
function spellcheck (line)
|
||||
change = {} -- words to change
|
||||
ignore = {} -- words to ignore
|
||||
|
||||
-- we raise an error if they cancel the spell check dialog
|
||||
ok, result = xpcall (function ()
|
||||
return string.gsub (line, pattern, checkword_and_suggest)
|
||||
end, debug.traceback)
|
||||
|
||||
if ok then
|
||||
return result
|
||||
end -- not cancelled spell check
|
||||
|
||||
-- whoops! syntax error?
|
||||
if not string.find (result, cancelmessage, 1, true) then
|
||||
error (result)
|
||||
end -- some syntax error
|
||||
|
||||
return nil --> shows they cancelled
|
||||
end -- spellchecker
|
||||
|
||||
local notfound -- table of not-found words, for spellcheck_string
|
||||
|
||||
-- check for one word, called by spellcheck_string
|
||||
local function checkword (word)
|
||||
uc_word = make_upper (word) -- convert to upper-case if wanted
|
||||
|
||||
-- get first metaphone
|
||||
local m = utils.metaphone (word, METAPHONE_LENGTH)
|
||||
|
||||
local found = false
|
||||
for row in db:rows(string.format ("SELECT name FROM words WHERE metaphone = '%s'", m)) do
|
||||
local word = row [1]
|
||||
if make_upper (word) == uc_word then
|
||||
found = true -- found exact match
|
||||
break
|
||||
end -- found
|
||||
end
|
||||
|
||||
if found then return end -- do nothing if word found
|
||||
|
||||
-- otherwise insert our word
|
||||
table.insert (notfound, word)
|
||||
end -- function checkword
|
||||
|
||||
-- exported function to spellcheck a string
|
||||
function spellcheck_string (text)
|
||||
notfound = {}
|
||||
string.gsub (text, pattern, checkword)
|
||||
return notfound
|
||||
end -- spellcheck_string
|
||||
|
||||
-- exported function to add a word to the user dictionary
|
||||
function spellcheck_add_word (word, action, replacement)
|
||||
assert (action == "i", "Can only use action 'i' in user dictionary") -- only "i" supported right now
|
||||
insert_word (word, 1)
|
||||
end -- spellcheck_string
|
||||
|
||||
-- read one of the dictionaries
|
||||
local function read_dict (dlg, name)
|
||||
if SHOW_PROGRESS_BAR then
|
||||
dlg:step ()
|
||||
dlg:status (directory .. name)
|
||||
if dlg:checkcancel () then
|
||||
error "Dictionary loading cancelled"
|
||||
end -- if cancelled
|
||||
end -- if SHOW_PROGRESS_BAR
|
||||
|
||||
for line in io.lines (directory .. name) do
|
||||
insert_word (line, 0)
|
||||
end
|
||||
end -- read_dict
|
||||
|
||||
local function init ()
|
||||
|
||||
-- make a suitable function depending on whether they want case-sensitive or not
|
||||
if CASE_SENSITIVE then
|
||||
make_upper = function (s) return s end -- return original
|
||||
else
|
||||
make_upper = function (s) return s:upper () end -- make upper case
|
||||
end -- case-sensitivity test
|
||||
|
||||
-- if no user dictionary, create it
|
||||
local f = io.open (directory .. userdict, "r")
|
||||
if not f then
|
||||
f = io.output (directory .. userdict)
|
||||
f:close ()
|
||||
else
|
||||
f:close ()
|
||||
end -- checking for user dictionary
|
||||
|
||||
-- open database on disk
|
||||
db = assert (sqlite3.open( directory .. "spell.sqlite"))
|
||||
local words_table = false
|
||||
local count = 0
|
||||
|
||||
-- if database just created, there won't be a words table
|
||||
for row in db:nrows("SELECT * FROM sqlite_master WHERE type = 'table' AND name = 'words'") do
|
||||
if string.match (row.sql, "word_id") then -- better be newer version
|
||||
words_table = true
|
||||
end -- if
|
||||
end
|
||||
|
||||
-- enable WAL (Write-Ahead Logging)
|
||||
assert (db:execute "PRAGMA journal_mode=WAL;")
|
||||
|
||||
-- if no words table, make one
|
||||
if not words_table then
|
||||
-- create a table to hold the words
|
||||
assert (db:execute[[
|
||||
DROP TABLE IF EXISTS words;
|
||||
CREATE TABLE words(
|
||||
word_id INTEGER NOT NULL PRIMARY KEY autoincrement,
|
||||
name VARCHAR(10) NOT NULL,
|
||||
metaphone VARCHAR(10) NOT NULL,
|
||||
user INT(1)
|
||||
);
|
||||
CREATE INDEX metaphone_index ON words (metaphone);
|
||||
CREATE INDEX name_index ON words (name);
|
||||
]])
|
||||
end -- if
|
||||
|
||||
-- check if table empty
|
||||
for row in db:rows('SELECT COUNT(*) FROM words') do
|
||||
count = row [1]
|
||||
end
|
||||
|
||||
-- if empty, populate it
|
||||
if count == 0 then
|
||||
|
||||
local dlg
|
||||
|
||||
if SHOW_PROGRESS_BAR then
|
||||
dlg = progress.new ("Loading dictionaries into SQLite database ...")
|
||||
|
||||
dlg:range (0, #files)
|
||||
dlg:setstep (1)
|
||||
end -- if SHOW_PROGRESS_BAR
|
||||
|
||||
assert (db:execute "BEGIN TRANSACTION");
|
||||
|
||||
for k, v in ipairs (files) do
|
||||
ok, result = pcall (function ()
|
||||
read_dict (dlg, v)
|
||||
end)
|
||||
if not ok then
|
||||
if SHOW_PROGRESS_BAR then
|
||||
dlg:close ()
|
||||
end -- if SHOW_PROGRESS_BAR
|
||||
error (result)
|
||||
end -- not ok
|
||||
end -- reading each file
|
||||
|
||||
if SHOW_PROGRESS_BAR then
|
||||
dlg:close ()
|
||||
end -- if SHOW_PROGRESS_BAR
|
||||
|
||||
assert (db:execute "COMMIT");
|
||||
|
||||
end -- if nothing in database
|
||||
|
||||
|
||||
end -- init
|
||||
|
||||
-- when script is loaded, do initialization stuff
|
||||
|
||||
init ()
|
||||
|
||||
Reference in New Issue
Block a user