「文字コードの変換」の編集履歴(バックアップ)一覧はこちら

文字コードの変換」(2010/05/26 (水) 14:21:09) の最新版変更点

追加された行は緑色になります。

削除された行は赤色になります。

-UTF82SJIS [[http://blog.goo.ne.jp/xmldtp/e/7eaeeb3dabfe975dbc57f73aefb1c059]]からコピペ #N88BASIC Function utf8ToSjis(utf8 As *Byte) As *Byte Dim utf16 As *Word Dim wlen As Long Dim sjis As *Byte utf8ToSjis = NULL If utf8 = NULL Then Exit Function End If '==============================' ' UTF8=>UTF16変換 ' '==============================' ' まずは、wlenの長さをとる wlen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0) If wlen = 0 Then Exit Function End If ' utf16の領域確保 utf16 = calloc(wlen + 1) If utf16 = NULL Then Exit Function End If ' utf16変換 If MultiByteToWideChar(CP_UTF8, 0, utf8, -1, utf16, wlen) <= 0 Then free(utf16) Exit Function End If '==============================' ' UTF16=>ShiftJIS変換 ' '==============================' ' まずは、長さを取得 Dim mlen As Long mlen = WideCharToMultiByte(CP_ACP,0,utf16,-1,NULL,0," ",NULL) If mlen = 0 Then free(utf16) Exit Function End If ' 領域確保 sjis = calloc(mlen + 1) If sjis = NULL Then free(utf16) Exit Function End If ' utf16変換 If WideCharToMultiByte(CP_ACP,0,utf16,-1,sjis,mlen," ",NULL) <= 0 Then free(utf16) free(sjis) Exit Function End If free(utf16) utf8ToSjis = sjis End Function Dim utf8[1000] As Byte Dim sjis As *Byte Dim hF As HANDLE Dim r As DWord hF = CreateFile("utf8.txt", GENERIC_READ,0,ByVal 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,0) ReadFile(hF, utf8,1000, Varptr(r),ByVal 0) sjis = utf8ToSjis(utf8) Print MakeStr(sjis) CloseHandle(hF) free(sjis) Input sjis -WinAPIを使うべき処理 ShiftJIS→Unicode(UCS-2)変換 MultiByteToWideChar関数を使う。 [[MultiByteToWideChar(外部ページ)>http://yokohama.cool.ne.jp/chokuto/urawaza/api/MultiByteToWideChar.html]]に詳しい。誰かコピーして。 Unicode(UCS-2)→ShiftJIS変換 WideCharToMultiByte関数を使う。 [[WideCharToMultiByte(外部ページ)>http://yokohama.cool.ne.jp/chokuto/urawaza/api/WideCharToMultiByte.html]]に詳しい。誰かコピーして。 -WinAPIを使わなくてもプログラム側で簡単に変換できるもの ShiftJIS→EUC-JP変換 ShiftJISとEUC-JPはエンコードが違うだけで同じ符号体系を用いている。その為、この変換は比較的容易い。 Sub SJIS2EUCJP(src As *Byte,dst As *Byte) Dim is=0 As Long,id=0 As Long Do If src[is]=0 then dst[id]=0 Exit Do Else If src[is]<&H80 then dst[id]=src[is] is++ id++ Else If (&HA1<=src[is]) And (src[is]<=&HDF) then dst[id]=&H8E dst[id+1]=src[is] is++ id+=2 Else Dim a As Long,b As Long b=src[is+1]-&H3F If b=0 then dst[id]=0 Exit Do End If If b>63 then b-- a=src[is]*2-&H101 If a>62 then a-=128 If b>94 then b-=94 a++ End If If (a<1) Or (94<a) Or (b<1) Or (94<b) then dst[id]=0 Exit Do End If dst[id]=a+&HA0 dst[id+1]=b+&HA0 is+=2 id+=2 End If Loop End Sub EUC-JP→ShiftJIS変換 Sub EUCJP2SJIS(src As *Byte,dst As *Byte) Dim is=0 As Long,id=0 As Long Do If src[is]=0 then dst[id]=0 Exit Do Else If src[is]<&H80 then dst[id]=src[is] is++ id++ Else If src[is]=&H8E then dst[id]=src[is+1] is+=2 id++ Else If src[is]=&H8F then'JIS X 0212-1990文字集合だが、Shift-JISには無いため?で代用する。 dst[id]=Asc("?") is+=3 id++ Else Dim a As Long,b As Long a=src[is]-&HA0 b=src[is+1]-&HA0 If (a<1) Or (94<a) Or (b<1) Or (94<b) then dst[id]=0 Exit Do End If a-- If a and 1 then a-- b+=94 End If a=(a>>1)+&H81 If b>63 then b++ If a>&H9F then a+=64 dst[id]=a dst[id+1]=b+&H3F is+=2 id+=2 End If Loop End Sub Unicode(UCS-2)→Unicode(UTF-8)変換 UCS-2とUTF-8はエンコードが違うだけで同じ符号体系を用いている。その為、この変換は比較的容易い。 Sub UCS22UTF8(src As *Byte,dst As *Byte) Dim is=2 As Long,id=0 As Long Dim fBE As Long If (src[0]=&HFF) and (src[1]=&HFE) then'BOM fBE=0 Else If (src[0]=&HFE) and (src[1]=&HFF) then fBE=1 Else'default=Big Endian(RFC 2781) fBE=1 is-=2 End If Do If src[is]=0 And src[is+1]=0 then dst[id]=0 Exit Do Else Dim c As Long c=src[is+(1-fBE)] As Long*256+src[is+fBE] If c<&H80 then dst[id]=c id++ Else If c<&H800 then dst[id]=&HC0 Or (c>>6) dst[id+1]=&H80 Or (c and &H3F) id+=2 Else If c<&H10000 then dst[id]=&HE0 Or (c>>12) dst[id+1]=&H80 Or ((c>>6) and &H3F) dst[id+2]=&H80 Or (c and &H3F) id+=3 Else dst[id]=0 Exit Do End If is+=2 End If Loop End Sub Unicode(UTF-8)→Unicode(UCS-2) Sub UTF82UCS2(src As *Byte,dst As *Byte) Dim is=0 As Long,id=2 As Long If (src[0]=&HEF) And (src[1]=&HBB) And (src[2]=&HBF) then is+=3 dst[0]=&HFE dst[1]=&HFF Do If src[is]=0 then dst[id]=0 Exit Do Else Dim c As Long If src[is]<&H80 then c=src[is] is++ Else If src[is]<&HE0 then If src[is+1]=0 then dst[id]=0 Exit Do Else c=((src[is] And &H1F)<<6) Or (src[is+1] And &H3F) is+=2 End If Else If src[is]<&HF0 then If src[is+1]=0 then dst[id]=0 Exit Do Else If src[is+2]=0 then dst[id]=0 Exit Do Else c=((src[is] And &H0F)<<12) Or ((src[is+1] And &H3F)<<6) Or (src[is+2] And &H3F) is+=3 End If Else If src[is]<&HF8 then If src[is+1]=0 then dst[id]=0 Exit Do Else If src[is+2]=0 then dst[id]=0 Exit Do Else If src[is+3]=0 then dst[id]=0 Exit Do Else c=((src[is] And &H03)<<18) Or ((src[is+1] And &H3F)<<12) Or ((src[is+2] And &H3F)<<6) Or (src[is+3] And &H3F) is+=4 End If Else dst[id]=0 Exit Do End If If c>&H10000 then'UCS2には拡張領域が無いため表現できない。 dst[id]=0 dst[id+1]=Asc("?") id+=2 Else dst[id]=c>>8 dst[id+1]=c And &HFF id+=2 End If End If Loop End Sub
-UTF82SJIS [[http://blog.goo.ne.jp/xmldtp/e/7eaeeb3dabfe975dbc57f73aefb1c059]]からコピペ #N88BASIC Function utf8ToSjis(utf8 As *Byte) As *Byte Dim utf16 As *Word Dim wlen As Long Dim sjis As *Byte utf8ToSjis = NULL If utf8 = NULL Then Exit Function End If '==============================' ' UTF8=>UTF16変換 ' '==============================' ' まずは、wlenの長さをとる wlen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0) If wlen = 0 Then Exit Function End If ' utf16の領域確保 utf16 = calloc(wlen + 1) If utf16 = NULL Then Exit Function End If ' utf16変換 If MultiByteToWideChar(CP_UTF8, 0, utf8, -1, utf16, wlen) <= 0 Then free(utf16) Exit Function End If '==============================' ' UTF16=>ShiftJIS変換 ' '==============================' ' まずは、長さを取得 Dim mlen As Long mlen = WideCharToMultiByte(CP_ACP,0,utf16,-1,NULL,0," ",NULL) If mlen = 0 Then free(utf16) Exit Function End If ' 領域確保 sjis = calloc(mlen + 1) If sjis = NULL Then free(utf16) Exit Function End If ' utf16変換 If WideCharToMultiByte(CP_ACP,0,utf16,-1,sjis,mlen," ",NULL) <= 0 Then free(utf16) free(sjis) Exit Function End If free(utf16) utf8ToSjis = sjis End Function Dim utf8[1000] As Byte Dim sjis As *Byte Dim hF As HANDLE Dim r As DWord hF = CreateFile("utf8.txt", GENERIC_READ,0,ByVal 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,0) ReadFile(hF, utf8,1000, Varptr(r),ByVal 0) sjis = utf8ToSjis(utf8) Print MakeStr(sjis) CloseHandle(hF) free(sjis) Input sjis -WinAPIを使うべき処理 ShiftJIS→Unicode(UCS-2)変換 MultiByteToWideChar関数を使う。 [[MultiByteToWideChar(外部ページ)>http://yokohama.cool.ne.jp/chokuto/urawaza/api/MultiByteToWideChar.html]]に詳しい。誰かコピーして。 Unicode(UCS-2)→ShiftJIS変換 WideCharToMultiByte関数を使う。 [[WideCharToMultiByte(外部ページ)>http://yokohama.cool.ne.jp/chokuto/urawaza/api/WideCharToMultiByte.html]]に詳しい。誰かコピーして。 -WinAPIを使わなくてもプログラム側で簡単に変換できるもの ShiftJIS→EUC-JP変換 ShiftJISとEUC-JPはエンコードが違うだけで同じ符号体系を用いている。その為、この変換は比較的容易い。 Sub SJIS2EUCJP(src As *Byte,dst As *Byte) Dim is=0 As Long,id=0 As Long Do If src[is]=0 then dst[id]=0 Exit Do Else If src[is]<&H80 then dst[id]=src[is] is++ id++ Else If (&HA1<=src[is]) And (src[is]<=&HDF) then dst[id]=&H8E dst[id+1]=src[is] is++ id+=2 Else Dim a As Long,b As Long b=src[is+1]-&H3F If b=0 then dst[id]=0 Exit Do End If If b>63 then b-- a=src[is]*2-&H101 If a>62 then a-=128 If b>94 then b-=94 a++ End If If (a<1) Or (94<a) Or (b<1) Or (94<b) then dst[id]=0 Exit Do End If dst[id]=a+&HA0 dst[id+1]=b+&HA0 is+=2 id+=2 End If Loop End Sub EUC-JP→ShiftJIS変換 Sub EUCJP2SJIS(src As *Byte,dst As *Byte) Dim is=0 As Long,id=0 As Long Do If src[is]=0 then dst[id]=0 Exit Do Else If src[is]<&H80 then dst[id]=src[is] is++ id++ Else If src[is]=&H8E then dst[id]=src[is+1] is+=2 id++ Else If src[is]=&H8F then'JIS X 0212-1990文字集合だが、Shift-JISには無いため?で代用する。 dst[id]=Asc("?") is+=3 id++ Else Dim a As Long,b As Long a=src[is]-&HA0 b=src[is+1]-&HA0 If (a<1) Or (94<a) Or (b<1) Or (94<b) then dst[id]=0 Exit Do End If a-- If a and 1 then a-- b+=94 End If a=(a>>1)+&H81 If b>63 then b++ If a>&H9F then a+=64 dst[id]=a dst[id+1]=b+&H3F is+=2 id+=2 End If Loop End Sub Unicode(UCS-2)→Unicode(UTF-8)変換 UCS-2とUTF-8はエンコードが違うだけで同じ符号体系を用いている。その為、この変換は比較的容易い。 Sub UCS22UTF8(src As *Byte,dst As *Byte) Dim is=2 As Long,id=0 As Long Dim fBE As Long If (src[0]=&HFF) and (src[1]=&HFE) then'BOM fBE=0 Else If (src[0]=&HFE) and (src[1]=&HFF) then fBE=1 Else'default=Big Endian(RFC 2781) fBE=1 is-=2 End If Do If src[is]=0 And src[is+1]=0 then dst[id]=0 Exit Do Else Dim c As Long c=src[is+(1-fBE)] As Long*256+src[is+fBE] If c<&H80 then dst[id]=c id++ Else If c<&H800 then dst[id]=&HC0 Or (c>>6) dst[id+1]=&H80 Or (c and &H3F) id+=2 Else If c<&H10000 then dst[id]=&HE0 Or (c>>12) dst[id+1]=&H80 Or ((c>>6) and &H3F) dst[id+2]=&H80 Or (c and &H3F) id+=3 Else dst[id]=0 Exit Do End If is+=2 End If Loop End Sub Unicode(UTF-8)→Unicode(UCS-2) Sub UTF82UCS2(src As *Byte,dst As *Byte) Dim is=0 As Long,id=2 As Long If (src[0]=&HEF) And (src[1]=&HBB) And (src[2]=&HBF) then is+=3 dst[0]=&HFE dst[1]=&HFF Do If src[is]=0 then dst[id]=0 Exit Do Else Dim c As Long If src[is]<&H80 then c=src[is] is++ Else If src[is]<&HE0 then If src[is+1]=0 then dst[id]=0 Exit Do Else c=((src[is] And &H1F)<<6) Or (src[is+1] And &H3F) is+=2 End If Else If src[is]<&HF0 then If src[is+1]=0 then dst[id]=0 Exit Do Else If src[is+2]=0 then dst[id]=0 Exit Do Else c=((src[is] And &H0F)<<12) Or ((src[is+1] And &H3F)<<6) Or (src[is+2] And &H3F) is+=3 End If Else If src[is]<&HF8 then If src[is+1]=0 then dst[id]=0 Exit Do Else If src[is+2]=0 then dst[id]=0 Exit Do Else If src[is+3]=0 then dst[id]=0 Exit Do Else c=((src[is] And &H03)<<18) Or ((src[is+1] And &H3F)<<12) Or ((src[is+2] And &H3F)<<6) Or (src[is+3] And &H3F) is+=4 End If Else dst[id]=0 Exit Do End If If c>&H10000 then'UCS2には拡張領域が無いため表現できない。 dst[id]=0 dst[id+1]=Asc("?") id+=2 Else dst[id]=c>>8 dst[id+1]=c And &HFF id+=2 End If End If Loop End Sub -nkf32を使って nkf32は文字コードの各種変換を行う便利ライブラリ。 DLLとして提供されていて、使い易くなってる。 ABも実はこれを使っている。SubOperationフォルダーを参照。 使い方は結構簡単で、例えば次のようにするだけで、何らかの文字コードのファイルをEUCに変換してくれる。 #asciiart(blockquote){ #N88BASIC '宣言 Declare Sub GetNkfVersion Lib "Nkf32.dll" (verStr As *Byte) Declare Function SetNkfOption Lib "Nkf32.dll" (optStr As *Byte) As Long Declare Sub NkfConvert Lib "Nkf32.dll" (outStr As *Byte, inStr As *Byte) Declare Sub ToHankaku Lib "Nkf32.dll" (inStr As *Byte) Declare Sub ToZenkakuKana Lib "Nkf32.dll" (outStr As *Byte, inStr As *Byte) Declare Sub ToMime Lib "Nkf32.dll" Alias "EncodeSubject" (outStr As *Byte, inStr As *Byte) Declare Sub EncodeSubject Lib "Nkf32.dll" (outStr As *Byte, inStr As *Byte) Dim vstr[478] As Byte Dim in As *Byte Dim out As *Byte Dim size As DWord GetNkfVersion(vstr) Print "nkf32のバージョン→" & MakeStr(vstr) in = LoadFile(VarPtr(size)) If in And size Then Print "読み取りOK" Else End If SetNkfOption("-e") = 0 Then Print "EUCに変換します" out = calloc(size+1) NkfConvert(out, in) SaveFile(out, lstrlen(out)) Print "おわり" End 'ファイルを開いて中身を返す Function LoadFile(sss As *Long) As *Byte Dim ofn As OPENFILENAME Dim buffer[333] As Byte ofn.lStructSize=SizeOf(OPENFILENAME) ofn.hwndOwner=GetActiveWindow() ofn.lpstrFilter=Ex"テキスト ファイル(*.txt)\0*.txt\0すべてのファイル(*.*)\0*\0" ofn.nFilterIndex=1 ofn.lpstrFile=buffer ofn.nMaxFile=333 ofn.lpstrTitle="ファイルを開く" ofn.Flags=OFN_FILEMUSTEXIST or OFN_PATHMUSTEXIST If GetOpenFileName(ofn)=0 Then Exit Function 'Open Dim hF As HANDLE Dim r As Dword Dim x As *Byte hF = CreateFile( buffer, GENERIC_READ, 0, ByVal 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0) SetDWord(sss, GetFileSize(hF, 0)) x = calloc(GetDword(sss)+1) ReadFile(hF, x, GetDword(sss), sss, ByVal 0) CloseHandle(hF) LoadFile = x End Function 'oooの内容を保存 Function SaveFile(ooo As *Byte, sss As Long) As Long Dim ofn As OPENFILENAME Dim buffer[333] As Byte ofn.lStructSize=SizeOf(OPENFILENAME) ofn.hwndOwner=GetActiveWindow() ofn.lpstrFilter=Ex"テキスト ファイル(*.txt)\0*.txt\0すべてのファイル(*.*)\0*\0" ofn.nFilterIndex=1 ofn.lpstrFile=buffer ofn.nMaxFile=333 ofn.lpstrTitle="ファイルを保存" ofn.Flags=OFN_FILEMUSTEXIST or OFN_PATHMUSTEXIST If GetSaveFileName(ofn)=0 Then Exit Function 'Open Dim hF As HANDLE Dim w As Dword hF = CreateFile(buffer, GENERIC_WRITE, 0, ByVal 0, CREATE_NEW, FILE_ATTRIBUTE_NORMAL, 0) WriteFile(hF, ooo, sss, VarPtr(w), ByVal 0) CloseHandle(hF) SaveFile = 44 End Function }

表示オプション

横に並べて表示:
変化行の前後のみ表示: