382 lines
9.8 KiB
Lua
382 lines
9.8 KiB
Lua
local tokenMap = {
|
|
"identifier",
|
|
"newline",
|
|
"whitespace",
|
|
"symbol",
|
|
"type",
|
|
"control",
|
|
"comment",
|
|
"number",
|
|
"string",
|
|
"char",
|
|
"specialValue"
|
|
}
|
|
|
|
local colorMap = {
|
|
["identifier"] = "Gray",
|
|
["symbol"] = "Gray",
|
|
["type"] = "Yellow",
|
|
["control"] = "Yellow",
|
|
["comment"] = "SkyBlue",
|
|
["number"] = "Dandelion",
|
|
["string"] = "Magenta",
|
|
["char"] = "WildStrawberry",
|
|
["specialValue"] = ""
|
|
}
|
|
|
|
function indexOf(l, v)
|
|
for j, k in ipairs(l) do
|
|
if k == v then
|
|
return j
|
|
end
|
|
end
|
|
return nil
|
|
end
|
|
|
|
function frequency(s)
|
|
local res = {}
|
|
for i = 1, #s do
|
|
local curChar = string.sub(s, i, i)
|
|
if res[curChar] == nil then
|
|
res[curChar] = 1
|
|
else
|
|
local n = res[curChar]
|
|
res[curChar] = n + 1
|
|
end
|
|
end
|
|
return res
|
|
end
|
|
|
|
function getc(s, i)
|
|
return string.sub(s, i, i)
|
|
end
|
|
|
|
function wrapInColor(s, colorName)
|
|
return "{$color{" .. colorName .. "}" .. s .. "}"
|
|
end
|
|
|
|
Token = {}
|
|
|
|
function Token:new(t, c)
|
|
local res = {
|
|
tokType = t,
|
|
tokContent = c
|
|
}
|
|
self.__index = self
|
|
return setmetatable(res, self)
|
|
end
|
|
|
|
Lexer = {}
|
|
|
|
function Lexer:new(src)
|
|
local res = {
|
|
source = src,
|
|
currStrPos = 1,
|
|
currTokPos = 1,
|
|
tokens = {},
|
|
buffer = "",
|
|
isStrAtEnd = false
|
|
}
|
|
self.__index = self
|
|
return setmetatable(res, self)
|
|
end
|
|
|
|
function Lexer:addToken(tokType, tokContent)
|
|
table.insert(self.tokens, Token:new(indexOf(tokenMap, tokType), tokContent))
|
|
end
|
|
|
|
function Lexer:printTokens()
|
|
for i,j in pairs(self.tokens) do
|
|
print("Token Type: ", tokenMap[j.tokType], "\nToken Content: ", j.tokContent)
|
|
end
|
|
end
|
|
|
|
function isKeywordType(s)
|
|
local cTypes = {"int", "long", "short", "char", "bool", "void", "signed", "unsigned", "float", "double", "size_t", "static", "extern"}
|
|
local res = false;
|
|
|
|
for i = 1, #cTypes do
|
|
res = s == cTypes[i]
|
|
if res == true then
|
|
break
|
|
end
|
|
end
|
|
|
|
return res
|
|
end
|
|
|
|
function isKeywordControl(s)
|
|
local controlKeywords = {"do", "while", "for", "switch", "case", "default", "if", "else", "break", "return"}
|
|
local res = false;
|
|
|
|
for i = 1, #controlKeywords do
|
|
res = s == controlKeywords[i]
|
|
if res == true then
|
|
break
|
|
end
|
|
end
|
|
|
|
return res
|
|
end
|
|
|
|
function isSymbol(s)
|
|
local symbols = ""
|
|
end
|
|
|
|
function isDecimalOrOctal(s)
|
|
if s == "" then
|
|
return false
|
|
end
|
|
local f = frequency(s)
|
|
local counter = 0
|
|
local charList = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
|
|
for i = 1, #charList do
|
|
counter = counter + (f[charList[i]] and f[charList[i]] or 0)
|
|
end
|
|
|
|
local charNumDiff = #s - counter
|
|
if charNumDiff == 3 then
|
|
local subs = string.sub(s, #s-2, #s)
|
|
local fsubs = frequency(subs)
|
|
local unsignedCount = (fsubs["u"] and fsubs["u"] or 0) + (fsubs["U"] and fsubs["U"] or 0)
|
|
local longCount = (fsubs["l"] and fsubs["l"] or 0) + (fsubs["L"] and fsubs["L"] or 0)
|
|
if unsignedCount > 1 then
|
|
return false
|
|
end
|
|
if longCount ~= 2 then
|
|
return false
|
|
else
|
|
return true
|
|
end
|
|
elseif charNumDiff == 2 then
|
|
local subs = string.sub(s, #s-1, #s)
|
|
local fsubs = frequency(subs)
|
|
local unsignedCount = (fsubs["u"] and fsubs["u"] or 0) + (fsubs["U"] and fsubs["U"] or 0)
|
|
local longCount = (fsubs["l"] and fsubs["l"] or 0) + (fsubs["L"] and fsubs["L"] or 0)
|
|
if unsignedCount == 1 and longCount == 1 then
|
|
return true
|
|
elseif longCount == 2 then
|
|
return true
|
|
else
|
|
return false
|
|
end
|
|
elseif charNumDiff == 1 then
|
|
local subs = getc(s, #s)
|
|
return subs == "u" or subs == "U" or subs == "l" or subs == "L"
|
|
else
|
|
return counter == #s
|
|
end
|
|
end
|
|
|
|
function isHexadecimal(s)
|
|
local f = frequency(s)
|
|
local counter = 0
|
|
local charList = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F'}
|
|
|
|
if f["x"] == nil and f["X"] == nil and string.sub(s,1,1) ~= "0" then
|
|
return false
|
|
end
|
|
|
|
for i = 1, #charList do
|
|
counter = counter + (f[charList[i]] and f[charList[i]] or 0)
|
|
end
|
|
|
|
local charNumDiff = #s - counter
|
|
if charNumDiff == 3 then
|
|
local subs = string.sub(s, #s-2, #s)
|
|
local fsubs = frequency(subs)
|
|
local unsignedCount = (fsubs["u"] and fsubs["u"] or 0) + (fsubs["U"] and fsubs["U"] or 0)
|
|
local longCount = (fsubs["l"] and fsubs["l"] or 0) + (fsubs["L"] and fsubs["L"] or 0)
|
|
if unsignedCount > 1 then
|
|
return false
|
|
end
|
|
if longCount ~= 2 then
|
|
return false
|
|
else
|
|
return true
|
|
end
|
|
elseif charNumDiff == 2 then
|
|
local subs = string.sub(s, #s-1, #s)
|
|
local fsubs = frequency(subs)
|
|
local unsignedCount = (fsubs["u"] and fsubs["u"] or 0) + (fsubs["U"] and fsubs["U"] or 0)
|
|
local longCount = (fsubs["l"] and fsubs["l"] or 0) + (fsubs["L"] and fsubs["L"] or 0)
|
|
if unsignedCount == 1 and longCount == 1 then
|
|
return true
|
|
elseif longCount == 2 then
|
|
return true
|
|
else
|
|
return false
|
|
end
|
|
elseif charNumDiff == 1 then
|
|
local subs = getc(s, #s)
|
|
return subs == "u" or subs == "U" or subs == "l" or subs == "L"
|
|
else
|
|
return counter == #s
|
|
end
|
|
end
|
|
|
|
function isNumber(s)
|
|
return isDecimalOrOctal(s) or isHexadecimal(s)
|
|
end
|
|
|
|
function isSymbol(s)
|
|
local symList = "!%^&*()-+=/?,<>{}[];:"
|
|
for i = 1, #symList do
|
|
if s == getc(symList, i) then
|
|
return true
|
|
end
|
|
end
|
|
return false
|
|
end
|
|
|
|
function isNewLine(s)
|
|
return s == "\n"
|
|
end
|
|
|
|
function isWhitespace(s)
|
|
return s == " "
|
|
end
|
|
|
|
function isDoubleQuote(s)
|
|
return s == "\""
|
|
end
|
|
|
|
function isSingleQuote(s)
|
|
return s == "\'"
|
|
end
|
|
|
|
function doesWordEnd(s)
|
|
return (isWhitespace(s) or isSymbol(s) or (s == "") or (s == "\n"))
|
|
end
|
|
|
|
function Lexer:readString()
|
|
self.buffer = self.buffer .. "\""
|
|
self.currStrPos = self.currStrPos + 1
|
|
repeat
|
|
local curr = getc(self.source, self.currStrPos)
|
|
self.buffer = self.buffer .. curr
|
|
if curr == "\"" then
|
|
break
|
|
end
|
|
self.currStrPos = self.currStrPos + 1
|
|
until false
|
|
|
|
self:addToken("string", self.buffer)
|
|
self.buffer = ""
|
|
end
|
|
|
|
function Lexer:readChar()
|
|
self.buffer = self.buffer .. "\'"
|
|
self.currStrPos = self.currStrPos + 1
|
|
local character = getc(self.source, self.currStrPos)
|
|
self.buffer = self.buffer .. character
|
|
self.currStrPos = self.currStrPos + 1
|
|
self.buffer = self.buffer .. "\'"
|
|
self:addToken("char", self.buffer)
|
|
self.buffer = ""
|
|
end
|
|
|
|
function Lexer:readComment()
|
|
self.buffer = self.buffer .. "//"
|
|
self.currStrPos = self.currStrPos + 2
|
|
repeat
|
|
local curr = getc(self.source, self.currStrPos)
|
|
if isNewLine(curr) or curr == "" then
|
|
break
|
|
end
|
|
self.buffer = self.buffer .. curr
|
|
self.currStrPos = self.currStrPos + 1
|
|
until false
|
|
|
|
self:addToken("comment", self.buffer)
|
|
|
|
if getc(self.source, self.currStrPos) == "\n" then
|
|
self:addToken("newline", "\n")
|
|
end
|
|
|
|
self.buffer = ""
|
|
end
|
|
|
|
|
|
function Lexer:read()
|
|
for i = 1, #self.source do
|
|
local curr = getc(self.source,self.currStrPos)
|
|
local nextChar = getc(self.source,self.currStrPos+1)
|
|
if isWhitespace(curr) then
|
|
if self.buffer ~= "" then
|
|
self:addToken("identifier", self.buffer)
|
|
end
|
|
self.buffer = ""
|
|
self:addToken("whitespace", " ")
|
|
elseif isDoubleQuote(curr) then
|
|
if self.buffer ~= "" then
|
|
self:addToken("identifier", self.buffer)
|
|
end
|
|
self.buffer = ""
|
|
self:readString()
|
|
elseif isSingleQuote(curr) then
|
|
if self.buffer ~= "" then
|
|
self:addToken("identifier", self.buffer)
|
|
end
|
|
self.buffer = ""
|
|
self:readChar()
|
|
elseif curr == "/" and nextChar == "/" then
|
|
if self.buffer ~= "" then
|
|
self:addToken("identifier", self.buffer)
|
|
end
|
|
self.buffer = ""
|
|
self:readComment()
|
|
elseif isSymbol(curr) then
|
|
if self.buffer ~= "" then
|
|
self:addToken("identifier", self.buffer)
|
|
end
|
|
self.buffer = ""
|
|
self:addToken("symbol", curr)
|
|
elseif isNewLine(curr) then
|
|
if self.buffer ~= "" then
|
|
self:addToken("identifier", self.buffer)
|
|
end
|
|
self.buffer = ""
|
|
self:addToken("newline", curr)
|
|
else
|
|
self.buffer = self.buffer .. curr
|
|
if isNumber(self.buffer) and doesWordEnd(nextChar) then
|
|
self:addToken("number", self.buffer)
|
|
self.buffer = ""
|
|
elseif isKeywordType(self.buffer) then
|
|
self:addToken("type", self.buffer)
|
|
self.buffer = ""
|
|
elseif isKeywordControl(self.buffer) then
|
|
self:addToken("control", self.buffer)
|
|
self.buffer = ""
|
|
end
|
|
end
|
|
self.currStrPos = self.currStrPos + 1
|
|
end
|
|
end
|
|
|
|
function Lexer:highlight()
|
|
local res = ""
|
|
for i = 1, #self.tokens do
|
|
local tokType = tokenMap[self.tokens[i].tokType]
|
|
local tokContent = self.tokens[i].tokContent
|
|
local hcolor = colorMap[tokType] and colorMap[tokType] or nil
|
|
if hcolor ~= nil then
|
|
res = res .. wrapInColor(tokContent, hcolor)
|
|
else
|
|
res = res .. tokContent
|
|
end
|
|
end
|
|
return res
|
|
end
|
|
|
|
function exec(src)
|
|
local lex = Lexer:new(src)
|
|
local res = ""
|
|
lex:read()
|
|
res = lex:highlight()
|
|
return res
|
|
end
|
|
|
|
return exec
|