• UTF82SJIS

#N88BASIC

Function utf8ToSjis(utf8 As *Byte) As *Byte
	Dim utf16 As *Word
	Dim wlen As Long
	Dim sjis As *Byte

	utf8ToSjis = NULL
	If utf8 = NULL Then
		Exit Function
	End If
	
	'=============================='
	'	UTF8=>UTF16変換	    '
	'=============================='
	'	まずは、wlenの長さをとる
   wlen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0)
    If wlen = 0 Then
			Exit Function
	End If

	'	utf16の領域確保
	utf16 = calloc(wlen + 1)
	If utf16 = NULL Then
		Exit Function
	End If

	'	utf16変換
	If MultiByteToWideChar(CP_UTF8, 0, utf8, -1, utf16, wlen) <= 0 Then
		free(utf16)
		Exit Function
	End If

	'=============================='
	'	UTF16=>ShiftJIS変換   '
	'=============================='
	'	まずは、長さを取得
	Dim mlen As Long
	
	mlen = WideCharToMultiByte(CP_ACP,0,utf16,-1,NULL,0," ",NULL)
	If mlen = 0 Then
		free(utf16)
		Exit Function
	End If

	'	領域確保
	sjis = calloc(mlen + 1)
	If sjis = NULL Then
		free(utf16)
		Exit Function
	End If

	'	utf16変換
	If WideCharToMultiByte(CP_ACP,0,utf16,-1,sjis,mlen," ",NULL) <= 0 Then
		free(utf16)
		free(sjis)
		Exit Function
	End If

	free(utf16)
	utf8ToSjis = sjis
End Function

Dim utf8[1000] As Byte
Dim sjis As *Byte
Dim hF As HANDLE
Dim r As DWord
hF = CreateFile("utf8.txt", GENERIC_READ,0,ByVal 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,0)
ReadFile(hF, utf8,1000, Varptr(r),ByVal 0)
sjis = utf8ToSjis(utf8)
Print MakeStr(sjis)
CloseHandle(hF)
free(sjis)
Input sjis



  • WinAPIを使うべき処理
ShiftJIS→Unicode(UCS-2)変換
MultiByteToWideChar関数を使う。
MultiByteToWideChar(外部ページ) に詳しい。誰かコピーして。

Unicode(UCS-2)→ShiftJIS変換
WideCharToMultiByte関数を使う。
WideCharToMultiByte(外部ページ) に詳しい。誰かコピーして。

  • WinAPIを使わなくてもプログラム側で簡単に変換できるもの
ShiftJIS→EUC-JP変換
ShiftJISとEUC-JPはエンコードが違うだけで同じ符号体系を用いている。その為、この変換は比較的容易い。

Sub SJIS2EUCJP(src As *Byte,dst As *Byte)
    Dim is=0 As Long,id=0 As Long
    Do
        If src[is]=0 then
            dst[id]=0
            Exit Do
        Else If src[is]<&H80 then
            dst[id]=src[is]
            is++
            id++
        Else If (&HA1<=src[is]) And (src[is]<=&HDF) then
            dst[id]=&H8E
            dst[id+1]=src[is]
            is++
            id+=2
        Else
            Dim a As Long,b As Long
            b=src[is+1]-&H3F
            If b=0 then
                dst[id]=0
                Exit Do
            End If
            If b>63 then b--
            a=src[is]*2-&H101
            If a>62 then a-=128
            If b>94 then
                b-=94
                a++
            End If
            If (a<1) Or (94<a) Or (b<1) Or (94<b) then
                dst[id]=0
                Exit Do
            End If
            dst[id]=a+&HA0
            dst[id+1]=b+&HA0
            is+=2
            id+=2
        End If
    Loop
End Sub

EUC-JP→ShiftJIS変換
Sub EUCJP2SJIS(src As *Byte,dst As *Byte)
    Dim is=0 As Long,id=0 As Long
    Do
        If src[is]=0 then
            dst[id]=0
            Exit Do
        Else If src[is]<&H80 then
            dst[id]=src[is]
            is++
            id++
        Else If src[is]=&H8E then
            dst[id]=src[is+1]
            is+=2
            id++
        Else If src[is]=&H8F then'JIS X 0212-1990文字集合だが、Shift-JISには無いため?で代用する。
            dst[id]=Asc("?")
            is+=3
            id++
        Else
            Dim a As Long,b As Long
            a=src[is]-&HA0
            b=src[is+1]-&HA0
            If (a<1) Or (94<a) Or (b<1) Or (94<b) then
                dst[id]=0
                Exit Do
            End If
            a--
            If a and 1 then
                a--
                b+=94
            End If
            a=(a>>1)+&H81
            If b>63 then b++
            If a>&H9F then a+=64
            dst[id]=a
            dst[id+1]=b+&H3F
            is+=2
            id+=2
        End If
    Loop
End Sub

Unicode(UCS-2)→Unicode(UTF-8)変換
UCS-2とUTF-8はエンコードが違うだけで同じ符号体系を用いている。その為、この変換は比較的容易い。
Sub UCS22UTF8(src As *Byte,dst As *Byte)
    Dim is=2 As Long,id=0 As Long
    Dim fBE As Long
    If (src[0]=&HFF) and (src[1]=&HFE) then'BOM
        fBE=0
    Else If (src[0]=&HFE) and (src[1]=&HFF) then
        fBE=1
    Else'default=Big Endian(RFC 2781)
        fBE=1
        is-=2
    End If
    Do
        If src[is]=0 And src[is+1]=0 then
            dst[id]=0
            Exit Do
        Else
            Dim c As Long
            c=src[is+(1-fBE)] As Long*256+src[is+fBE]
            If c<&H80 then
                dst[id]=c
                id++
            Else If c<&H800 then
                dst[id]=&HC0 Or (c>>6)
                dst[id+1]=&H80 Or (c and &H3F)
                id+=2
            Else If c<&H10000 then
                dst[id]=&HE0 Or (c>>12)
                dst[id+1]=&H80 Or ((c>>6) and &H3F)
                dst[id+2]=&H80 Or (c and &H3F)
                id+=3
            Else
                dst[id]=0
                Exit Do
            End If
            is+=2
        End If
    Loop
End Sub

Unicode(UTF-8)→Unicode(UCS-2)
Sub UTF82UCS2(src As *Byte,dst As *Byte)
    Dim is=0 As Long,id=2 As Long
    If (src[0]=&HEF) And (src[1]=&HBB) And (src[2]=&HBF) then is+=3
    dst[0]=&HFE
    dst[1]=&HFF
    Do
        If src[is]=0 then
            dst[id]=0
            Exit Do
        Else
            Dim c As Long
            If src[is]<&H80 then
                c=src[is]
                is++
            Else If src[is]<&HE0 then
                If src[is+1]=0 then
                    dst[id]=0
                    Exit Do
                Else
                    c=((src[is] And &H1F)<<6) Or (src[is+1] And &H3F)
                    is+=2
                End If
            Else If src[is]<&HF0 then
                If src[is+1]=0 then
                    dst[id]=0
                    Exit Do
                Else If src[is+2]=0 then
                    dst[id]=0
                    Exit Do
                Else
                    c=((src[is] And &H0F)<<12) Or ((src[is+1] And &H3F)<<6) Or (src[is+2] And &H3F)
                    is+=3
                End If
            Else If src[is]<&HF8 then
                If src[is+1]=0 then
                    dst[id]=0
                    Exit Do
                Else If src[is+2]=0 then
                    dst[id]=0
                    Exit Do
                Else If src[is+3]=0 then
                    dst[id]=0
                    Exit Do
                Else
                    c=((src[is] And &H03)<<18) Or ((src[is+1] And &H3F)<<12) Or ((src[is+2] And &H3F)<<6) Or (src[is+3] And &H3F)
                    is+=4
                End If
            Else
                dst[id]=0
                Exit Do
            End If
            If c>&H10000 then'UCS2には拡張領域が無いため表現できない。
                dst[id]=0
                dst[id+1]=Asc("?")
                id+=2
            Else
                dst[id]=c>>8
                dst[id+1]=c And &HFF
                id+=2
            End If
        End If
    Loop
End Sub


  • nkf32を使って
nkf32は文字コードの各種変換を行う便利ライブラリ。
DLLとして提供されていて、使い易くなってる。
ABも実はこれを使っている。SubOperationフォルダーを参照。

使い方は結構簡単で、例えば次のようにするだけで、何らかの文字コードのファイルをEUCに変換してくれる。

#N88BASIC

'宣言
Declare Sub GetNkfVersion Lib "Nkf32.dll" (verStr As *Byte)
Declare Function SetNkfOption Lib "Nkf32.dll" (optStr As *Byte) As Long
Declare Sub NkfConvert Lib "Nkf32.dll" (outStr As *Byte, inStr As *Byte)
Declare Sub ToHankaku Lib "Nkf32.dll" (inStr As *Byte)
Declare Sub ToZenkakuKana Lib "Nkf32.dll" (outStr As *Byte, inStr As *Byte)
Declare Sub ToMime Lib "Nkf32.dll" Alias "EncodeSubject" (outStr As *Byte, inStr As *Byte)
Declare Sub EncodeSubject Lib "Nkf32.dll" (outStr As *Byte, inStr As *Byte)

Dim vstr[478] As Byte
Dim in As *Byte
Dim out As *Byte
Dim size As DWord
GetNkfVersion(vstr)
Print "nkf32のバージョン→" & MakeStr(vstr)
in = LoadFile(VarPtr(size))
If in And size Then Print "読み取りOK" Else End
If SetNkfOption("-e") = 0 Then Print "EUCに変換します"
out = calloc(size+1)
NkfConvert(out, in)
SaveFile(out, lstrlen(out))
Print "おわり"
End

'ファイルを開いて中身を返す
Function LoadFile(sss As *Long) As *Byte
Dim ofn As OPENFILENAME
Dim buffer[333] As Byte
ofn.lStructSize=SizeOf(OPENFILENAME)
ofn.hwndOwner=GetActiveWindow()
ofn.lpstrFilter=Ex"テキスト ファイル(*.txt)\0*.txt\0すべてのファイル(*.*)\0*\0"
ofn.nFilterIndex=1
ofn.lpstrFile=buffer
ofn.nMaxFile=333
ofn.lpstrTitle="ファイルを開く"
ofn.Flags=OFN_FILEMUSTEXIST or OFN_PATHMUSTEXIST
If GetOpenFileName(ofn)=0 Then Exit Function

'Open
Dim hF As HANDLE
Dim r As Dword
Dim x As *Byte
hF = CreateFile( buffer, GENERIC_READ, 0, ByVal 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0)
SetDWord(sss, GetFileSize(hF, 0))
x = calloc(GetDword(sss)+1)
ReadFile(hF, x, GetDword(sss), sss, ByVal 0)
CloseHandle(hF)
LoadFile = x
End Function

'oooの内容を保存
Function SaveFile(ooo As *Byte, sss As Long) As Long
Dim ofn As OPENFILENAME
Dim buffer[333] As Byte
ofn.lStructSize=SizeOf(OPENFILENAME)
ofn.hwndOwner=GetActiveWindow()
ofn.lpstrFilter=Ex"テキスト ファイル(*.txt)\0*.txt\0すべてのファイル(*.*)\0*\0"
ofn.nFilterIndex=1
ofn.lpstrFile=buffer
ofn.nMaxFile=333
ofn.lpstrTitle="ファイルを保存"
ofn.Flags=OFN_FILEMUSTEXIST or OFN_PATHMUSTEXIST
If GetSaveFileName(ofn)=0 Then Exit Function

'Open
Dim hF As HANDLE
Dim w As Dword
hF = CreateFile(buffer, GENERIC_WRITE, 0, ByVal 0, CREATE_NEW, FILE_ATTRIBUTE_NORMAL, 0)
WriteFile(hF, ooo, sss, VarPtr(w), ByVal 0)
CloseHandle(hF)
SaveFile = 44
End Function

タグ:

+ タグ編集
  • タグ:

このサイトはreCAPTCHAによって保護されており、Googleの プライバシーポリシー利用規約 が適用されます。

最終更新:2010年05月26日 14:21