local tokenMap = { "identifier", "newline", "whitespace", "symbol", "type", "control", "comment", "number", "string", "char", "specialValue" } local colorMap = { ["identifier"] = "Gray", ["symbol"] = "Gray", ["type"] = "Yellow", ["control"] = "Yellow", ["comment"] = "SkyBlue", ["number"] = "Dandelion", ["string"] = "Magenta", ["char"] = "WildStrawberry", ["specialValue"] = "" } function indexOf(l, v) for j, k in ipairs(l) do if k == v then return j end end return nil end function frequency(s) local res = {} for i = 1, #s do local curChar = string.sub(s, i, i) if res[curChar] == nil then res[curChar] = 1 else local n = res[curChar] res[curChar] = n + 1 end end return res end function getc(s, i) return string.sub(s, i, i) end function wrapInColor(s, colorName) return "{$color{" .. colorName .. "}" .. s .. "}" end Token = {} function Token:new(t, c) local res = { tokType = t, tokContent = c } self.__index = self return setmetatable(res, self) end Lexer = {} function Lexer:new(src) local res = { source = src, currStrPos = 1, currTokPos = 1, tokens = {}, buffer = "", isStrAtEnd = false } self.__index = self return setmetatable(res, self) end function Lexer:addToken(tokType, tokContent) table.insert(self.tokens, Token:new(indexOf(tokenMap, tokType), tokContent)) end function Lexer:printTokens() for i,j in pairs(self.tokens) do print("Token Type: ", tokenMap[j.tokType], "\nToken Content: ", j.tokContent) end end function isKeywordType(s) local cTypes = {"int", "long", "short", "char", "bool", "void", "signed", "unsigned", "float", "double", "size_t", "static", "extern"} local res = false; for i = 1, #cTypes do res = s == cTypes[i] if res == true then break end end return res end function isKeywordControl(s) local controlKeywords = {"do", "while", "for", "switch", "case", "default", "if", "else", "break", "return"} local res = false; for i = 1, #controlKeywords do res = s == controlKeywords[i] if res == true then break end end return res end function isSymbol(s) local symbols = "" end function isDecimalOrOctal(s) if s == "" then return false end local f = frequency(s) local counter = 0 local charList = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'} for i = 1, #charList do counter = counter + (f[charList[i]] and f[charList[i]] or 0) end local charNumDiff = #s - counter if charNumDiff == 3 then local subs = string.sub(s, #s-2, #s) local fsubs = frequency(subs) local unsignedCount = (fsubs["u"] and fsubs["u"] or 0) + (fsubs["U"] and fsubs["U"] or 0) local longCount = (fsubs["l"] and fsubs["l"] or 0) + (fsubs["L"] and fsubs["L"] or 0) if unsignedCount > 1 then return false end if longCount ~= 2 then return false else return true end elseif charNumDiff == 2 then local subs = string.sub(s, #s-1, #s) local fsubs = frequency(subs) local unsignedCount = (fsubs["u"] and fsubs["u"] or 0) + (fsubs["U"] and fsubs["U"] or 0) local longCount = (fsubs["l"] and fsubs["l"] or 0) + (fsubs["L"] and fsubs["L"] or 0) if unsignedCount == 1 and longCount == 1 then return true elseif longCount == 2 then return true else return false end elseif charNumDiff == 1 then local subs = getc(s, #s) return subs == "u" or subs == "U" or subs == "l" or subs == "L" else return counter == #s end end function isHexadecimal(s) local f = frequency(s) local counter = 0 local charList = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F'} if f["x"] == nil and f["X"] == nil and string.sub(s,1,1) ~= "0" then return false end for i = 1, #charList do counter = counter + (f[charList[i]] and f[charList[i]] or 0) end local charNumDiff = #s - counter if charNumDiff == 3 then local subs = string.sub(s, #s-2, #s) local fsubs = frequency(subs) local unsignedCount = (fsubs["u"] and fsubs["u"] or 0) + (fsubs["U"] and fsubs["U"] or 0) local longCount = (fsubs["l"] and fsubs["l"] or 0) + (fsubs["L"] and fsubs["L"] or 0) if unsignedCount > 1 then return false end if longCount ~= 2 then return false else return true end elseif charNumDiff == 2 then local subs = string.sub(s, #s-1, #s) local fsubs = frequency(subs) local unsignedCount = (fsubs["u"] and fsubs["u"] or 0) + (fsubs["U"] and fsubs["U"] or 0) local longCount = (fsubs["l"] and fsubs["l"] or 0) + (fsubs["L"] and fsubs["L"] or 0) if unsignedCount == 1 and longCount == 1 then return true elseif longCount == 2 then return true else return false end elseif charNumDiff == 1 then local subs = getc(s, #s) return subs == "u" or subs == "U" or subs == "l" or subs == "L" else return counter == #s end end function isNumber(s) return isDecimalOrOctal(s) or isHexadecimal(s) end function isSymbol(s) local symList = "!%^&*()-+=/?,<>{}[];:" for i = 1, #symList do if s == getc(symList, i) then return true end end return false end function isNewLine(s) return s == "\n" end function isWhitespace(s) return s == " " end function isDoubleQuote(s) return s == "\"" end function isSingleQuote(s) return s == "\'" end function doesWordEnd(s) return (isWhitespace(s) or isSymbol(s) or (s == "") or (s == "\n")) end function Lexer:readString() self.buffer = self.buffer .. "\"" self.currStrPos = self.currStrPos + 1 repeat local curr = getc(self.source, self.currStrPos) self.buffer = self.buffer .. curr if curr == "\"" then break end self.currStrPos = self.currStrPos + 1 until false self:addToken("string", self.buffer) self.buffer = "" end function Lexer:readChar() self.buffer = self.buffer .. "\'" self.currStrPos = self.currStrPos + 1 local character = getc(self.source, self.currStrPos) self.buffer = self.buffer .. character self.currStrPos = self.currStrPos + 1 self.buffer = self.buffer .. "\'" self:addToken("char", self.buffer) self.buffer = "" end function Lexer:readComment() self.buffer = self.buffer .. "//" self.currStrPos = self.currStrPos + 2 repeat local curr = getc(self.source, self.currStrPos) if isNewLine(curr) or curr == "" then break end self.buffer = self.buffer .. curr self.currStrPos = self.currStrPos + 1 until false self:addToken("comment", self.buffer) if getc(self.source, self.currStrPos) == "\n" then self:addToken("newline", "\n") end self.buffer = "" end function Lexer:read() for i = 1, #self.source do local curr = getc(self.source,self.currStrPos) local nextChar = getc(self.source,self.currStrPos+1) if isWhitespace(curr) then if self.buffer ~= "" then self:addToken("identifier", self.buffer) end self.buffer = "" self:addToken("whitespace", " ") elseif isDoubleQuote(curr) then if self.buffer ~= "" then self:addToken("identifier", self.buffer) end self.buffer = "" self:readString() elseif isSingleQuote(curr) then if self.buffer ~= "" then self:addToken("identifier", self.buffer) end self.buffer = "" self:readChar() elseif curr == "/" and nextChar == "/" then if self.buffer ~= "" then self:addToken("identifier", self.buffer) end self.buffer = "" self:readComment() elseif isSymbol(curr) then if self.buffer ~= "" then self:addToken("identifier", self.buffer) end self.buffer = "" self:addToken("symbol", curr) elseif isNewLine(curr) then if self.buffer ~= "" then self:addToken("identifier", self.buffer) end self.buffer = "" self:addToken("newline", curr) else self.buffer = self.buffer .. curr if isNumber(self.buffer) and doesWordEnd(nextChar) then self:addToken("number", self.buffer) self.buffer = "" elseif isKeywordType(self.buffer) then self:addToken("type", self.buffer) self.buffer = "" elseif isKeywordControl(self.buffer) then self:addToken("control", self.buffer) self.buffer = "" end end self.currStrPos = self.currStrPos + 1 end end function Lexer:highlight() local res = "" for i = 1, #self.tokens do local tokType = tokenMap[self.tokens[i].tokType] local tokContent = self.tokens[i].tokContent local hcolor = colorMap[tokType] and colorMap[tokType] or nil if hcolor ~= nil then res = res .. wrapInColor(tokContent, hcolor) else res = res .. tokContent end end return res end function exec(src) local lex = Lexer:new(src) local res = "" lex:read() res = lex:highlight() return res end return exec