Files
information-processing-1_12…/script/c-syntax-highlight.lua
2025-07-03 09:08:23 +09:00

382 lines
9.8 KiB
Lua

local tokenMap = {
"identifier",
"newline",
"whitespace",
"symbol",
"type",
"control",
"comment",
"number",
"string",
"char",
"specialValue"
}
local colorMap = {
["identifier"] = "Gray",
["symbol"] = "Gray",
["type"] = "Yellow",
["control"] = "Yellow",
["comment"] = "SkyBlue",
["number"] = "Dandelion",
["string"] = "Magenta",
["char"] = "WildStrawberry",
["specialValue"] = ""
}
function indexOf(l, v)
for j, k in ipairs(l) do
if k == v then
return j
end
end
return nil
end
function frequency(s)
local res = {}
for i = 1, #s do
local curChar = string.sub(s, i, i)
if res[curChar] == nil then
res[curChar] = 1
else
local n = res[curChar]
res[curChar] = n + 1
end
end
return res
end
function getc(s, i)
return string.sub(s, i, i)
end
function wrapInColor(s, colorName)
return "{$color{" .. colorName .. "}" .. s .. "}"
end
Token = {}
function Token:new(t, c)
local res = {
tokType = t,
tokContent = c
}
self.__index = self
return setmetatable(res, self)
end
Lexer = {}
function Lexer:new(src)
local res = {
source = src,
currStrPos = 1,
currTokPos = 1,
tokens = {},
buffer = "",
isStrAtEnd = false
}
self.__index = self
return setmetatable(res, self)
end
function Lexer:addToken(tokType, tokContent)
table.insert(self.tokens, Token:new(indexOf(tokenMap, tokType), tokContent))
end
function Lexer:printTokens()
for i,j in pairs(self.tokens) do
print("Token Type: ", tokenMap[j.tokType], "\nToken Content: ", j.tokContent)
end
end
function isKeywordType(s)
local cTypes = {"int", "long", "short", "char", "bool", "void", "signed", "unsigned", "float", "double", "size_t", "static", "extern"}
local res = false;
for i = 1, #cTypes do
res = s == cTypes[i]
if res == true then
break
end
end
return res
end
function isKeywordControl(s)
local controlKeywords = {"do", "while", "for", "switch", "case", "default", "if", "else", "break", "return"}
local res = false;
for i = 1, #controlKeywords do
res = s == controlKeywords[i]
if res == true then
break
end
end
return res
end
function isSymbol(s)
local symbols = ""
end
function isDecimalOrOctal(s)
if s == "" then
return false
end
local f = frequency(s)
local counter = 0
local charList = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
for i = 1, #charList do
counter = counter + (f[charList[i]] and f[charList[i]] or 0)
end
local charNumDiff = #s - counter
if charNumDiff == 3 then
local subs = string.sub(s, #s-2, #s)
local fsubs = frequency(subs)
local unsignedCount = (fsubs["u"] and fsubs["u"] or 0) + (fsubs["U"] and fsubs["U"] or 0)
local longCount = (fsubs["l"] and fsubs["l"] or 0) + (fsubs["L"] and fsubs["L"] or 0)
if unsignedCount > 1 then
return false
end
if longCount ~= 2 then
return false
else
return true
end
elseif charNumDiff == 2 then
local subs = string.sub(s, #s-1, #s)
local fsubs = frequency(subs)
local unsignedCount = (fsubs["u"] and fsubs["u"] or 0) + (fsubs["U"] and fsubs["U"] or 0)
local longCount = (fsubs["l"] and fsubs["l"] or 0) + (fsubs["L"] and fsubs["L"] or 0)
if unsignedCount == 1 and longCount == 1 then
return true
elseif longCount == 2 then
return true
else
return false
end
elseif charNumDiff == 1 then
local subs = getc(s, #s)
return subs == "u" or subs == "U" or subs == "l" or subs == "L"
else
return counter == #s
end
end
function isHexadecimal(s)
local f = frequency(s)
local counter = 0
local charList = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F'}
if f["x"] == nil and f["X"] == nil and string.sub(s,1,1) ~= "0" then
return false
end
for i = 1, #charList do
counter = counter + (f[charList[i]] and f[charList[i]] or 0)
end
local charNumDiff = #s - counter
if charNumDiff == 3 then
local subs = string.sub(s, #s-2, #s)
local fsubs = frequency(subs)
local unsignedCount = (fsubs["u"] and fsubs["u"] or 0) + (fsubs["U"] and fsubs["U"] or 0)
local longCount = (fsubs["l"] and fsubs["l"] or 0) + (fsubs["L"] and fsubs["L"] or 0)
if unsignedCount > 1 then
return false
end
if longCount ~= 2 then
return false
else
return true
end
elseif charNumDiff == 2 then
local subs = string.sub(s, #s-1, #s)
local fsubs = frequency(subs)
local unsignedCount = (fsubs["u"] and fsubs["u"] or 0) + (fsubs["U"] and fsubs["U"] or 0)
local longCount = (fsubs["l"] and fsubs["l"] or 0) + (fsubs["L"] and fsubs["L"] or 0)
if unsignedCount == 1 and longCount == 1 then
return true
elseif longCount == 2 then
return true
else
return false
end
elseif charNumDiff == 1 then
local subs = getc(s, #s)
return subs == "u" or subs == "U" or subs == "l" or subs == "L"
else
return counter == #s
end
end
function isNumber(s)
return isDecimalOrOctal(s) or isHexadecimal(s)
end
function isSymbol(s)
local symList = "!%^&*()-+=/?,<>{}[];:"
for i = 1, #symList do
if s == getc(symList, i) then
return true
end
end
return false
end
function isNewLine(s)
return s == "\n"
end
function isWhitespace(s)
return s == " "
end
function isDoubleQuote(s)
return s == "\""
end
function isSingleQuote(s)
return s == "\'"
end
function doesWordEnd(s)
return (isWhitespace(s) or isSymbol(s) or (s == "") or (s == "\n"))
end
function Lexer:readString()
self.buffer = self.buffer .. "\""
self.currStrPos = self.currStrPos + 1
repeat
local curr = getc(self.source, self.currStrPos)
self.buffer = self.buffer .. curr
if curr == "\"" then
break
end
self.currStrPos = self.currStrPos + 1
until false
self:addToken("string", self.buffer)
self.buffer = ""
end
function Lexer:readChar()
self.buffer = self.buffer .. "\'"
self.currStrPos = self.currStrPos + 1
local character = getc(self.source, self.currStrPos)
self.buffer = self.buffer .. character
self.currStrPos = self.currStrPos + 1
self.buffer = self.buffer .. "\'"
self:addToken("char", self.buffer)
self.buffer = ""
end
function Lexer:readComment()
self.buffer = self.buffer .. "//"
self.currStrPos = self.currStrPos + 2
repeat
local curr = getc(self.source, self.currStrPos)
if isNewLine(curr) or curr == "" then
break
end
self.buffer = self.buffer .. curr
self.currStrPos = self.currStrPos + 1
until false
self:addToken("comment", self.buffer)
if getc(self.source, self.currStrPos) == "\n" then
self:addToken("newline", "\n")
end
self.buffer = ""
end
function Lexer:read()
for i = 1, #self.source do
local curr = getc(self.source,self.currStrPos)
local nextChar = getc(self.source,self.currStrPos+1)
if isWhitespace(curr) then
if self.buffer ~= "" then
self:addToken("identifier", self.buffer)
end
self.buffer = ""
self:addToken("whitespace", " ")
elseif isDoubleQuote(curr) then
if self.buffer ~= "" then
self:addToken("identifier", self.buffer)
end
self.buffer = ""
self:readString()
elseif isSingleQuote(curr) then
if self.buffer ~= "" then
self:addToken("identifier", self.buffer)
end
self.buffer = ""
self:readChar()
elseif curr == "/" and nextChar == "/" then
if self.buffer ~= "" then
self:addToken("identifier", self.buffer)
end
self.buffer = ""
self:readComment()
elseif isSymbol(curr) then
if self.buffer ~= "" then
self:addToken("identifier", self.buffer)
end
self.buffer = ""
self:addToken("symbol", curr)
elseif isNewLine(curr) then
if self.buffer ~= "" then
self:addToken("identifier", self.buffer)
end
self.buffer = ""
self:addToken("newline", curr)
else
self.buffer = self.buffer .. curr
if isNumber(self.buffer) and doesWordEnd(nextChar) then
self:addToken("number", self.buffer)
self.buffer = ""
elseif isKeywordType(self.buffer) then
self:addToken("type", self.buffer)
self.buffer = ""
elseif isKeywordControl(self.buffer) then
self:addToken("control", self.buffer)
self.buffer = ""
end
end
self.currStrPos = self.currStrPos + 1
end
end
function Lexer:highlight()
local res = ""
for i = 1, #self.tokens do
local tokType = tokenMap[self.tokens[i].tokType]
local tokContent = self.tokens[i].tokContent
local hcolor = colorMap[tokType] and colorMap[tokType] or nil
if hcolor ~= nil then
res = res .. wrapInColor(tokContent, hcolor)
else
res = res .. tokContent
end
end
return res
end
function exec(src)
local lex = Lexer:new(src)
local res = ""
lex:read()
res = lex:highlight()
return res
end
return exec