1,获取文件的流
InportStream importStream = file.getInputStream();
2,把文件流转化为字节流
//字节数组输出流
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
byte[] buffer = new byte[1024];
int len;
while ((len = inputStream.read(buffer)) > -1) {
baos.write(buffer, 0, len);
}
baos.flush();
}catch(IOException e){
e.printStackTrace();
}
return new ByteArrayInputStream(baos.toByteArray());
3,根据字节流的判断文件的编码
//默认编码
String charset = "GBK";
byte[] first3Bytes = new byte[3];
//缓存流
BufferedInputStream bis = new BufferedInputStream(inputStream1);
try {
boolean checked = false;
//标记
bis.mark(0);
//读取文件的前3个字符
int read = bis.read(first3Bytes, 0, 3);
if (read == -1) return charset;
if (first3Bytes[0] == (byte) 0xFF && first3Bytes[1] == (byte) 0xFE) {
charset = "UTF-16LE";
checked = true;
} else if (first3Bytes[0] == (byte) 0xFE && first3Bytes[1] == (byte) 0xFF) {
charset = "UTF-16BE";
checked = true;
} else if (first3Bytes[0] == (byte) 0xEF && first3Bytes[1] == (byte) 0xBB && first3Bytes[2] == (byte) 0xBF) {
charset = "UTF-8";
checked = true;
}
bis.reset();
if (!checked) {
int loc = 0;
while ((read = bis.read()) != -1) {
loc++;
if (read >= 0xF0) break;
if (0x80 <= read && read <= 0xBF) // 单独出现BF以下的,也算是GBK
break;
if (0xC0 <= read && read <= 0xDF) {
read = bis.read();
if (0x80 <= read && read <= 0xBF) // 双字节 (0xC0 - 0xDF) (0x80
continue;
else break;
} else if (0xE0 <= read && read <= 0xEF) {// 也有可能出错,但是几率较小
read = bis.read();
if (0x80 <= read && read <= 0xBF) {
read = bis.read();
if (0x80 <= read && read <= 0xBF) {
charset = "UTF-8";
break;
} else break;
} else break;
}
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
bis.close();
} catch (IOException e) {
e.printStackTrace();
}
}