ファイルの文字コード解析

機能

Javaファイル、XMLファイル、プロパティファイルに対して、文字コードの解析とBOMの有無の判定をする。

入出力

入力

入力フォルダ内に解析対象のファイルを格納する。
入力画面

出力

出力フォルダ内に「Result_実行日時.txt」が出力される。
出力画面

Result_20180127135351.txt
C:\input\SJIS.java	UTF-8 BOM無
C:\input\UTF-8.java	UTF-8 BOM無
C:\input\UTF-8.properties	UTF-8 BOM無
C:\input\UTF-8.xml	UTF-8 BOM無
C:\input\UTF-8BOM.java	UTF-8 BOM有

前提

ソースコード

extractEncoding.js

var TARGET_FOLDER = "C:\\input";
var RESULT_FOLDER = "C:\\output"
var FILE_PATTERN = /\.java$|\.xml$|\.properties/;

main();

function main() {
    var shell = new ActiveXObject("WScript.Shell");
    var resultFileFolder = shell.ExpandEnvironmentStrings(RESULT_FOLDER);
    var resultFileName = "Result_" + formatYyyymmddhhmmss(new Date) + ".txt";
    var fso = new ActiveXObject("Scripting.FileSystemObject");
    var resultTextStream = fso.GetFolder(resultFileFolder).CreateTextFile(resultFileName);
    for (var filePaths = new Enumerator(getFilePathsRecursive(fso.GetFolder(TARGET_FOLDER), FILE_PATTERN)) ; !filePaths.atEnd() ; filePaths.moveNext()) {
        resultTextStream.WriteLine(getFileCharCode(filePaths.item()));
    }    
    resultTextStream.Close();
    Editor.FileOpen(resultFileFolder + "\\" + resultFileName);
}

function formatYyyymmddhhmmss(date) {
    var ret = "";
    ret += date.getFullYear();
    ret += ("0" + (date.getMonth() + 1)).slice(-2);
    ret += ("0" + date.getDate()).slice(-2);
    ret += ("0" + date.getHours()).slice(-2);
    ret += ("0" + date.getMinutes()).slice(-2);
    ret += ("0" + date.getSeconds()).slice(-2);
    return ret;
}

function getFilePathsRecursive(folder, pattern) {
    var array = [];
    for (var files = new Enumerator(folder.Files) ; !files.atEnd() ; files.moveNext()) {
        var file = files.item();
        if (pattern.test(file.Name)) {
            array.push(folder.Path + "\\" + file.Name);
        }
    }
    for (var subFolders = new Enumerator(folder.SubFolders) ; !subFolders.atEnd() ; subFolders.moveNext()) {
        var newArray = array.concat(getFilePathsRecursive(subFolders.item(), pattern));
        array = newArray;
    }
    return array;
}

function getFileCharCode(filePath) {
    var charCode = "";
    var stream = new ActiveXObject("ADODB.Stream");
    stream.Open();
    stream.Type = 1;
    stream.LoadFromFile(filePath);
    if (stream.Size > 1) {
        stream.Position = 0;
        switch (bytes2hex(stream.Read(2))) {
            case "feff":
                charCode = "UTF-16 BOM有(BE)";
                break;
            case "fffe":
                charCode = "UTF-16 BOM有(LE)";
                break;
        }
    }
    if (charCode === "" && stream.Size > 2) {
        stream.Position = 0;
        switch (bytes2hex(stream.Read(3))) {
            case "2b2f76":
                charCode = "UTF-7 BOM有";
                break;
            case "efbbbf":
                charCode = "UTF-8 BOM有";
                break;
        }
    }
    stream.Close();
    if (charCode === "") {
        Editor.Sleep(20);
        switch (Editor.GetCharCode()) {
            case 0:
                charCode = "SJIS";
                break;
            case 1:
                charCode = "JIS";
                break;
            case 2:
                charCode = "EUC";
                break;
            case 3:
                charCode = "UTF-16 BOM無";
                break;
            case 4:
                charCode = "UTF-8 BOM無";
                break;
            case 5:
                charCode = "UTF-7 BOM無";
                break;
            case 6:
                charCode = "UTF-16BE";
                break;
            case 7:
                charCode = "CESU-8";
                break;
            case 8:
                charCode = "Latin1";
                break;
            case 90:
                charCode = "CP_ACP";
                break;
            case 91:
                charCode = "CP_OEM";
                break;
            default:
                charCode = "Unknown";
                break;
        }
        Editor.FileClose();
    }
    return filePath + "\t" + charCode;
}

function bytes2hex(bytes) {
    var doc = new ActiveXObject("Msxml2.DOMDocument");
    var element = doc.createElement("hex");
    element.dataType = "bin.hex";
    element.nodeTypedValue = bytes;
    var hex = element.text;
    return hex;
}

実行.bat

"C:\Program Files (x86)\sakura\sakura.exe" -M="C:\tool\extractEncoding.js"

インストール時の留意点

  • extractEncoding.jsの文字コードSJISにする。
  • 入力フォルダパス、出力フォルダパスに日本語を使用しない。

参考文献

qiita.com