-- unicode4d.lua
-- https://code.google.com/p/lua-files/source/browse/utf8.lua
local unicode4d = {}

local ffi = require "mffi"
local utf = require "utf16"
local util = require "util"
local c4d = util.c4d()
-- local icu = require "unicode/unicodeIcu"
local utf8bufferLen = 32768 * 2
local utf8buffer = ffi.newAnchor("uint8_t[?]", utf8bufferLen)
local utf16bufferLen = utf8bufferLen * 2
local utf16buffer = ffi.newAnchor("uint8_t[?]", utf16bufferLen)
local pa_unichar_t
if util.from4d() then
	pa_unichar_t = ffi.typeof("PA_Unichar*")
end

local function getUnistring(ustr)
	return ustr.fString
end

local function getUnistringLength(ustr)
	-- returns the number of unicode characters in a string
	-- (do not count NULL ending character)
	return ustr.fLength
end

local function unicharByteCount(uchar)
	local uint8 = ffi.cast("uint8_t*", uchar) -- change to 1 byte C-char
	local i = 0
	local c
	repeat
		c = uint8[i]
		i = i + 1
	until (c == 0 and uint8[i] == 0) -- uchar[i] here is next
	if i % 2 == 1 then
		i = i - 1 -- return even number, with >1 byte unicode chars do not end in 0
	end
	-- print("************* bytecount: "..i)
	return i
end

--[[
local function toUtf16Char(str)
	-- local utf = icu.convert (str, "UTF-8", "UTF-16LE")
	local buffer8 = ffi.cast("uint8_t*", str)
	local len = utf.utf8ToUtf16le(buffer8, #str, utf16buffer, utf16bufferLen)
	return utf16buffer, len
end
]]

local function toUtf16(str)
	-- if type(str) ~= "string" then
	-- util.printError("type of param is not a string, type: "..type(str), tostring(str))
	-- end
	local buffer8 = ffi.cast("uint8_t*", str)
	local len = utf.utf8ToUtf16le(buffer8, #str, utf16buffer, utf16bufferLen)
	return ffi.string(utf16buffer, len) -- utf16str
end

--[[
local function toUnichar(str) -- , unichar)
	if str == "" then
		local unichar = ffi.newNoAnchor("PA_Unichar*") --pa_unichar_t()  --ffi.cast("PA_Unichar*", 0) --pa_unichar_t(0) is this correct?, use pa_unichar_t()?
		return unichar
	end
	-- local bytelen = #utf16
	local utf16 = toUtf16(str)
	-- unichar = unichar or ffi.newNoAnchor("PA_Unichar[?]", #utf16)
	ffi.copy(unichar, utf16, #utf16)
	-- return ffi.cast("PA_Unichar*", unichar)
	return pa_unichar_t(unichar)
end
]]

-- local defaultUnicharSize = 1024
-- local defaultUnichar
local function toUnichar(str, option)
	if str == "" then
		local unichar = ffi.newNoAnchor("PA_Unichar*") -- pa_unichar_t()  --ffi.cast("PA_Unichar*", 0) --pa_unichar_t(0) is this correct?, use pa_unichar_t()?
		return unichar
	end
	--[[if not defaultUnichar then
		-- this function result is never saved, it is ok to return same buffer
		defaultUnichar = ffi.newNoAnchor("PA_Unichar[?]", defaultUnicharSize)
	end]]
	-- local bytelen = #utf16
	local utf16 = toUtf16(str)
	--[[
	local unichar
	if false and not option and #utf16 <= defaultUnicharSize then
		unichar = defaultUnichar
	else
		unichar = ffi.newNoAnchor("PA_Unichar[?]", #utf16)
		-- unichar = ffi.newNoAnchorNoTrace("PA_Unichar[?]", #utf16) -- prevent mffi trace infinite loop
	end
	]]
	local unichar = ffi.newNoAnchorNoTrace("PA_Unichar[?]", #utf16) -- prevent mffi trace infinite loop
	ffi.copy(unichar, utf16, #utf16)
	-- return ffi.cast("PA_Unichar*", unichar)
	return pa_unichar_t(unichar)
end
unicode4d.toUnichar = toUnichar

local function fromUtf16(uchar, bytelen)
	-- local utf16 = ffi.string(uchar, bytelen)
	-- local utf8 = icu.convert(utf16, "UTF-16LE", "UTF-8") --"info "..bytelen --icu.convert(utf16, "UTF-16LE", "UTF-8")
	local utf16 = ffi.cast("uint8_t*", uchar)
	local utf8 = utf.utf16leToUtf8(utf16, bytelen, utf8buffer, utf8bufferLen)
	return utf8
end

local function fromUnichar(uchar, ucharLen)
	if ucharLen and ucharLen < 1 then
		return ""
	end
	local bytelen
	if ucharLen then
		bytelen = ucharLen * 2 -- max 4x bytes in utf8 than actual chars? - usually 2x bytes
	else
		bytelen = unicharByteCount(uchar)
	end
	local utf8 = fromUtf16(uchar, bytelen)
	return utf8
end
unicode4d.fromUnichar = fromUnichar

local function toUnistring(str)
	local uchar = toUnichar(str, "create")
	return c4d.PA_CreateUnistring(uchar)
end
unicode4d.toUnistring = toUnistring

local function fromUnistring(ustr) -- PA_Unistring
	--[[do
		return tostring(getUnistringLength(ustr))
	end]]
	local uchar = getUnistring(ustr) -- returns a NULL terminated PA_Unichar UTF16 string
	local ucharLen = getUnistringLength(ustr)
	if ucharLen < 1 then
		return ""
	end
	if true then
		return fromUnichar(uchar, ucharLen)
	else
		local ucharMaxLen = ucharLen * 4 -- max utf8 size <= chars * 4 (+ end \0 bytes?)
		local utf16Len = ucharMaxLen
		local utf8 = ffi.newNoAnchor("uint8_t[?]", utf16Len)
		local eVTC_UTF_8 = 7
		local eVTC_UTF_16 = 2 -- eVTC_UTF_16 = eVTC_UTF_16_SMALLENDIAN = 2
		c4d.PA_ConvertCharsetToCharset(ffi.cast("char*", uchar), ucharMaxLen, eVTC_UTF_16, utf8, utf16Len, eVTC_UTF_8)
		return ffi.string(utf8)
	end
end
unicode4d.fromUnistring = fromUnistring

return unicode4d
