「文字コードの変換」の編集履歴(バックアップ)一覧はこちら
「文字コードの変換」(2010/05/26 (水) 14:21:09) の最新版変更点
追加された行は緑色になります。
削除された行は赤色になります。
-UTF82SJIS
[[http://blog.goo.ne.jp/xmldtp/e/7eaeeb3dabfe975dbc57f73aefb1c059]]からコピペ
#N88BASIC
Function utf8ToSjis(utf8 As *Byte) As *Byte
Dim utf16 As *Word
Dim wlen As Long
Dim sjis As *Byte
utf8ToSjis = NULL
If utf8 = NULL Then
Exit Function
End If
'=============================='
' UTF8=>UTF16変換 '
'=============================='
' まずは、wlenの長さをとる
wlen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0)
If wlen = 0 Then
Exit Function
End If
' utf16の領域確保
utf16 = calloc(wlen + 1)
If utf16 = NULL Then
Exit Function
End If
' utf16変換
If MultiByteToWideChar(CP_UTF8, 0, utf8, -1, utf16, wlen) <= 0 Then
free(utf16)
Exit Function
End If
'=============================='
' UTF16=>ShiftJIS変換 '
'=============================='
' まずは、長さを取得
Dim mlen As Long
mlen = WideCharToMultiByte(CP_ACP,0,utf16,-1,NULL,0," ",NULL)
If mlen = 0 Then
free(utf16)
Exit Function
End If
' 領域確保
sjis = calloc(mlen + 1)
If sjis = NULL Then
free(utf16)
Exit Function
End If
' utf16変換
If WideCharToMultiByte(CP_ACP,0,utf16,-1,sjis,mlen," ",NULL) <= 0 Then
free(utf16)
free(sjis)
Exit Function
End If
free(utf16)
utf8ToSjis = sjis
End Function
Dim utf8[1000] As Byte
Dim sjis As *Byte
Dim hF As HANDLE
Dim r As DWord
hF = CreateFile("utf8.txt", GENERIC_READ,0,ByVal 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,0)
ReadFile(hF, utf8,1000, Varptr(r),ByVal 0)
sjis = utf8ToSjis(utf8)
Print MakeStr(sjis)
CloseHandle(hF)
free(sjis)
Input sjis
-WinAPIを使うべき処理
ShiftJIS→Unicode(UCS-2)変換
MultiByteToWideChar関数を使う。
[[MultiByteToWideChar(外部ページ)>http://yokohama.cool.ne.jp/chokuto/urawaza/api/MultiByteToWideChar.html]]に詳しい。誰かコピーして。
Unicode(UCS-2)→ShiftJIS変換
WideCharToMultiByte関数を使う。
[[WideCharToMultiByte(外部ページ)>http://yokohama.cool.ne.jp/chokuto/urawaza/api/WideCharToMultiByte.html]]に詳しい。誰かコピーして。
-WinAPIを使わなくてもプログラム側で簡単に変換できるもの
ShiftJIS→EUC-JP変換
ShiftJISとEUC-JPはエンコードが違うだけで同じ符号体系を用いている。その為、この変換は比較的容易い。
Sub SJIS2EUCJP(src As *Byte,dst As *Byte)
Dim is=0 As Long,id=0 As Long
Do
If src[is]=0 then
dst[id]=0
Exit Do
Else If src[is]<&H80 then
dst[id]=src[is]
is++
id++
Else If (&HA1<=src[is]) And (src[is]<=&HDF) then
dst[id]=&H8E
dst[id+1]=src[is]
is++
id+=2
Else
Dim a As Long,b As Long
b=src[is+1]-&H3F
If b=0 then
dst[id]=0
Exit Do
End If
If b>63 then b--
a=src[is]*2-&H101
If a>62 then a-=128
If b>94 then
b-=94
a++
End If
If (a<1) Or (94<a) Or (b<1) Or (94<b) then
dst[id]=0
Exit Do
End If
dst[id]=a+&HA0
dst[id+1]=b+&HA0
is+=2
id+=2
End If
Loop
End Sub
EUC-JP→ShiftJIS変換
Sub EUCJP2SJIS(src As *Byte,dst As *Byte)
Dim is=0 As Long,id=0 As Long
Do
If src[is]=0 then
dst[id]=0
Exit Do
Else If src[is]<&H80 then
dst[id]=src[is]
is++
id++
Else If src[is]=&H8E then
dst[id]=src[is+1]
is+=2
id++
Else If src[is]=&H8F then'JIS X 0212-1990文字集合だが、Shift-JISには無いため?で代用する。
dst[id]=Asc("?")
is+=3
id++
Else
Dim a As Long,b As Long
a=src[is]-&HA0
b=src[is+1]-&HA0
If (a<1) Or (94<a) Or (b<1) Or (94<b) then
dst[id]=0
Exit Do
End If
a--
If a and 1 then
a--
b+=94
End If
a=(a>>1)+&H81
If b>63 then b++
If a>&H9F then a+=64
dst[id]=a
dst[id+1]=b+&H3F
is+=2
id+=2
End If
Loop
End Sub
Unicode(UCS-2)→Unicode(UTF-8)変換
UCS-2とUTF-8はエンコードが違うだけで同じ符号体系を用いている。その為、この変換は比較的容易い。
Sub UCS22UTF8(src As *Byte,dst As *Byte)
Dim is=2 As Long,id=0 As Long
Dim fBE As Long
If (src[0]=&HFF) and (src[1]=&HFE) then'BOM
fBE=0
Else If (src[0]=&HFE) and (src[1]=&HFF) then
fBE=1
Else'default=Big Endian(RFC 2781)
fBE=1
is-=2
End If
Do
If src[is]=0 And src[is+1]=0 then
dst[id]=0
Exit Do
Else
Dim c As Long
c=src[is+(1-fBE)] As Long*256+src[is+fBE]
If c<&H80 then
dst[id]=c
id++
Else If c<&H800 then
dst[id]=&HC0 Or (c>>6)
dst[id+1]=&H80 Or (c and &H3F)
id+=2
Else If c<&H10000 then
dst[id]=&HE0 Or (c>>12)
dst[id+1]=&H80 Or ((c>>6) and &H3F)
dst[id+2]=&H80 Or (c and &H3F)
id+=3
Else
dst[id]=0
Exit Do
End If
is+=2
End If
Loop
End Sub
Unicode(UTF-8)→Unicode(UCS-2)
Sub UTF82UCS2(src As *Byte,dst As *Byte)
Dim is=0 As Long,id=2 As Long
If (src[0]=&HEF) And (src[1]=&HBB) And (src[2]=&HBF) then is+=3
dst[0]=&HFE
dst[1]=&HFF
Do
If src[is]=0 then
dst[id]=0
Exit Do
Else
Dim c As Long
If src[is]<&H80 then
c=src[is]
is++
Else If src[is]<&HE0 then
If src[is+1]=0 then
dst[id]=0
Exit Do
Else
c=((src[is] And &H1F)<<6) Or (src[is+1] And &H3F)
is+=2
End If
Else If src[is]<&HF0 then
If src[is+1]=0 then
dst[id]=0
Exit Do
Else If src[is+2]=0 then
dst[id]=0
Exit Do
Else
c=((src[is] And &H0F)<<12) Or ((src[is+1] And &H3F)<<6) Or (src[is+2] And &H3F)
is+=3
End If
Else If src[is]<&HF8 then
If src[is+1]=0 then
dst[id]=0
Exit Do
Else If src[is+2]=0 then
dst[id]=0
Exit Do
Else If src[is+3]=0 then
dst[id]=0
Exit Do
Else
c=((src[is] And &H03)<<18) Or ((src[is+1] And &H3F)<<12) Or ((src[is+2] And &H3F)<<6) Or (src[is+3] And &H3F)
is+=4
End If
Else
dst[id]=0
Exit Do
End If
If c>&H10000 then'UCS2には拡張領域が無いため表現できない。
dst[id]=0
dst[id+1]=Asc("?")
id+=2
Else
dst[id]=c>>8
dst[id+1]=c And &HFF
id+=2
End If
End If
Loop
End Sub
-UTF82SJIS
[[http://blog.goo.ne.jp/xmldtp/e/7eaeeb3dabfe975dbc57f73aefb1c059]]からコピペ
#N88BASIC
Function utf8ToSjis(utf8 As *Byte) As *Byte
Dim utf16 As *Word
Dim wlen As Long
Dim sjis As *Byte
utf8ToSjis = NULL
If utf8 = NULL Then
Exit Function
End If
'=============================='
' UTF8=>UTF16変換 '
'=============================='
' まずは、wlenの長さをとる
wlen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0)
If wlen = 0 Then
Exit Function
End If
' utf16の領域確保
utf16 = calloc(wlen + 1)
If utf16 = NULL Then
Exit Function
End If
' utf16変換
If MultiByteToWideChar(CP_UTF8, 0, utf8, -1, utf16, wlen) <= 0 Then
free(utf16)
Exit Function
End If
'=============================='
' UTF16=>ShiftJIS変換 '
'=============================='
' まずは、長さを取得
Dim mlen As Long
mlen = WideCharToMultiByte(CP_ACP,0,utf16,-1,NULL,0," ",NULL)
If mlen = 0 Then
free(utf16)
Exit Function
End If
' 領域確保
sjis = calloc(mlen + 1)
If sjis = NULL Then
free(utf16)
Exit Function
End If
' utf16変換
If WideCharToMultiByte(CP_ACP,0,utf16,-1,sjis,mlen," ",NULL) <= 0 Then
free(utf16)
free(sjis)
Exit Function
End If
free(utf16)
utf8ToSjis = sjis
End Function
Dim utf8[1000] As Byte
Dim sjis As *Byte
Dim hF As HANDLE
Dim r As DWord
hF = CreateFile("utf8.txt", GENERIC_READ,0,ByVal 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,0)
ReadFile(hF, utf8,1000, Varptr(r),ByVal 0)
sjis = utf8ToSjis(utf8)
Print MakeStr(sjis)
CloseHandle(hF)
free(sjis)
Input sjis
-WinAPIを使うべき処理
ShiftJIS→Unicode(UCS-2)変換
MultiByteToWideChar関数を使う。
[[MultiByteToWideChar(外部ページ)>http://yokohama.cool.ne.jp/chokuto/urawaza/api/MultiByteToWideChar.html]]に詳しい。誰かコピーして。
Unicode(UCS-2)→ShiftJIS変換
WideCharToMultiByte関数を使う。
[[WideCharToMultiByte(外部ページ)>http://yokohama.cool.ne.jp/chokuto/urawaza/api/WideCharToMultiByte.html]]に詳しい。誰かコピーして。
-WinAPIを使わなくてもプログラム側で簡単に変換できるもの
ShiftJIS→EUC-JP変換
ShiftJISとEUC-JPはエンコードが違うだけで同じ符号体系を用いている。その為、この変換は比較的容易い。
Sub SJIS2EUCJP(src As *Byte,dst As *Byte)
Dim is=0 As Long,id=0 As Long
Do
If src[is]=0 then
dst[id]=0
Exit Do
Else If src[is]<&H80 then
dst[id]=src[is]
is++
id++
Else If (&HA1<=src[is]) And (src[is]<=&HDF) then
dst[id]=&H8E
dst[id+1]=src[is]
is++
id+=2
Else
Dim a As Long,b As Long
b=src[is+1]-&H3F
If b=0 then
dst[id]=0
Exit Do
End If
If b>63 then b--
a=src[is]*2-&H101
If a>62 then a-=128
If b>94 then
b-=94
a++
End If
If (a<1) Or (94<a) Or (b<1) Or (94<b) then
dst[id]=0
Exit Do
End If
dst[id]=a+&HA0
dst[id+1]=b+&HA0
is+=2
id+=2
End If
Loop
End Sub
EUC-JP→ShiftJIS変換
Sub EUCJP2SJIS(src As *Byte,dst As *Byte)
Dim is=0 As Long,id=0 As Long
Do
If src[is]=0 then
dst[id]=0
Exit Do
Else If src[is]<&H80 then
dst[id]=src[is]
is++
id++
Else If src[is]=&H8E then
dst[id]=src[is+1]
is+=2
id++
Else If src[is]=&H8F then'JIS X 0212-1990文字集合だが、Shift-JISには無いため?で代用する。
dst[id]=Asc("?")
is+=3
id++
Else
Dim a As Long,b As Long
a=src[is]-&HA0
b=src[is+1]-&HA0
If (a<1) Or (94<a) Or (b<1) Or (94<b) then
dst[id]=0
Exit Do
End If
a--
If a and 1 then
a--
b+=94
End If
a=(a>>1)+&H81
If b>63 then b++
If a>&H9F then a+=64
dst[id]=a
dst[id+1]=b+&H3F
is+=2
id+=2
End If
Loop
End Sub
Unicode(UCS-2)→Unicode(UTF-8)変換
UCS-2とUTF-8はエンコードが違うだけで同じ符号体系を用いている。その為、この変換は比較的容易い。
Sub UCS22UTF8(src As *Byte,dst As *Byte)
Dim is=2 As Long,id=0 As Long
Dim fBE As Long
If (src[0]=&HFF) and (src[1]=&HFE) then'BOM
fBE=0
Else If (src[0]=&HFE) and (src[1]=&HFF) then
fBE=1
Else'default=Big Endian(RFC 2781)
fBE=1
is-=2
End If
Do
If src[is]=0 And src[is+1]=0 then
dst[id]=0
Exit Do
Else
Dim c As Long
c=src[is+(1-fBE)] As Long*256+src[is+fBE]
If c<&H80 then
dst[id]=c
id++
Else If c<&H800 then
dst[id]=&HC0 Or (c>>6)
dst[id+1]=&H80 Or (c and &H3F)
id+=2
Else If c<&H10000 then
dst[id]=&HE0 Or (c>>12)
dst[id+1]=&H80 Or ((c>>6) and &H3F)
dst[id+2]=&H80 Or (c and &H3F)
id+=3
Else
dst[id]=0
Exit Do
End If
is+=2
End If
Loop
End Sub
Unicode(UTF-8)→Unicode(UCS-2)
Sub UTF82UCS2(src As *Byte,dst As *Byte)
Dim is=0 As Long,id=2 As Long
If (src[0]=&HEF) And (src[1]=&HBB) And (src[2]=&HBF) then is+=3
dst[0]=&HFE
dst[1]=&HFF
Do
If src[is]=0 then
dst[id]=0
Exit Do
Else
Dim c As Long
If src[is]<&H80 then
c=src[is]
is++
Else If src[is]<&HE0 then
If src[is+1]=0 then
dst[id]=0
Exit Do
Else
c=((src[is] And &H1F)<<6) Or (src[is+1] And &H3F)
is+=2
End If
Else If src[is]<&HF0 then
If src[is+1]=0 then
dst[id]=0
Exit Do
Else If src[is+2]=0 then
dst[id]=0
Exit Do
Else
c=((src[is] And &H0F)<<12) Or ((src[is+1] And &H3F)<<6) Or (src[is+2] And &H3F)
is+=3
End If
Else If src[is]<&HF8 then
If src[is+1]=0 then
dst[id]=0
Exit Do
Else If src[is+2]=0 then
dst[id]=0
Exit Do
Else If src[is+3]=0 then
dst[id]=0
Exit Do
Else
c=((src[is] And &H03)<<18) Or ((src[is+1] And &H3F)<<12) Or ((src[is+2] And &H3F)<<6) Or (src[is+3] And &H3F)
is+=4
End If
Else
dst[id]=0
Exit Do
End If
If c>&H10000 then'UCS2には拡張領域が無いため表現できない。
dst[id]=0
dst[id+1]=Asc("?")
id+=2
Else
dst[id]=c>>8
dst[id+1]=c And &HFF
id+=2
End If
End If
Loop
End Sub
-nkf32を使って
nkf32は文字コードの各種変換を行う便利ライブラリ。
DLLとして提供されていて、使い易くなってる。
ABも実はこれを使っている。SubOperationフォルダーを参照。
使い方は結構簡単で、例えば次のようにするだけで、何らかの文字コードのファイルをEUCに変換してくれる。
#asciiart(blockquote){
#N88BASIC
'宣言
Declare Sub GetNkfVersion Lib "Nkf32.dll" (verStr As *Byte)
Declare Function SetNkfOption Lib "Nkf32.dll" (optStr As *Byte) As Long
Declare Sub NkfConvert Lib "Nkf32.dll" (outStr As *Byte, inStr As *Byte)
Declare Sub ToHankaku Lib "Nkf32.dll" (inStr As *Byte)
Declare Sub ToZenkakuKana Lib "Nkf32.dll" (outStr As *Byte, inStr As *Byte)
Declare Sub ToMime Lib "Nkf32.dll" Alias "EncodeSubject" (outStr As *Byte, inStr As *Byte)
Declare Sub EncodeSubject Lib "Nkf32.dll" (outStr As *Byte, inStr As *Byte)
Dim vstr[478] As Byte
Dim in As *Byte
Dim out As *Byte
Dim size As DWord
GetNkfVersion(vstr)
Print "nkf32のバージョン→" & MakeStr(vstr)
in = LoadFile(VarPtr(size))
If in And size Then Print "読み取りOK" Else End
If SetNkfOption("-e") = 0 Then Print "EUCに変換します"
out = calloc(size+1)
NkfConvert(out, in)
SaveFile(out, lstrlen(out))
Print "おわり"
End
'ファイルを開いて中身を返す
Function LoadFile(sss As *Long) As *Byte
Dim ofn As OPENFILENAME
Dim buffer[333] As Byte
ofn.lStructSize=SizeOf(OPENFILENAME)
ofn.hwndOwner=GetActiveWindow()
ofn.lpstrFilter=Ex"テキスト ファイル(*.txt)\0*.txt\0すべてのファイル(*.*)\0*\0"
ofn.nFilterIndex=1
ofn.lpstrFile=buffer
ofn.nMaxFile=333
ofn.lpstrTitle="ファイルを開く"
ofn.Flags=OFN_FILEMUSTEXIST or OFN_PATHMUSTEXIST
If GetOpenFileName(ofn)=0 Then Exit Function
'Open
Dim hF As HANDLE
Dim r As Dword
Dim x As *Byte
hF = CreateFile( buffer, GENERIC_READ, 0, ByVal 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0)
SetDWord(sss, GetFileSize(hF, 0))
x = calloc(GetDword(sss)+1)
ReadFile(hF, x, GetDword(sss), sss, ByVal 0)
CloseHandle(hF)
LoadFile = x
End Function
'oooの内容を保存
Function SaveFile(ooo As *Byte, sss As Long) As Long
Dim ofn As OPENFILENAME
Dim buffer[333] As Byte
ofn.lStructSize=SizeOf(OPENFILENAME)
ofn.hwndOwner=GetActiveWindow()
ofn.lpstrFilter=Ex"テキスト ファイル(*.txt)\0*.txt\0すべてのファイル(*.*)\0*\0"
ofn.nFilterIndex=1
ofn.lpstrFile=buffer
ofn.nMaxFile=333
ofn.lpstrTitle="ファイルを保存"
ofn.Flags=OFN_FILEMUSTEXIST or OFN_PATHMUSTEXIST
If GetSaveFileName(ofn)=0 Then Exit Function
'Open
Dim hF As HANDLE
Dim w As Dword
hF = CreateFile(buffer, GENERIC_WRITE, 0, ByVal 0, CREATE_NEW, FILE_ATTRIBUTE_NORMAL, 0)
WriteFile(hF, ooo, sss, VarPtr(w), ByVal 0)
CloseHandle(hF)
SaveFile = 44
End Function
}