java读取文件字符集示例方法

时间:2023-11-09 12:35:39 


public static String getCharset(File file) {
        String charset = "GBK";
        byte[] first3Bytes = new byte[3];
        try {
            boolean checked = false;
            BufferedInputStream bis = new BufferedInputStream(
                  new FileInputStream(file));
            bis.mark(0);
            int read = bis.read(first3Bytes, 0, 3);
            if (read == -1)
                return charset;
            if (first3Bytes[0] == (byte) 0xFF && first3Bytes[1] == (byte) 0xFE) {
                charset = "UTF-16LE";
                checked = true;
            } else if (first3Bytes[0] == (byte) 0xFE && first3Bytes[1]
                == (byte) 0xFF) {
                charset = "UTF-16BE";
                checked = true;
            } else if (first3Bytes[0] == (byte) 0xEF && first3Bytes[1]
                    == (byte) 0xBB
                    && first3Bytes[2] == (byte) 0xBF) {
                charset = "UTF-8";
                checked = true;
            }
            bis.reset();
            if (!checked) {
                int loc = 0;
                while ((read = bis.read()) != -1) {
                    loc++;
                    if (read >= 0xF0)
                        break;
                    //单独出现BF以下的,也算是GBK
                    if (0x80 <= read && read <= 0xBF)
                        break;
                    if (0xC0 <= read && read <= 0xDF) {
                        read = bis.read();
                        if (0x80 <= read && read <= 0xBF)// 双字节 (0xC0 - 0xDF)
                            // (0x80 -
                            // 0xBF),也可能在GB编码内
                            continue;
                        else
                            break;
                     // 也有可能出错,但是几率较小
                    } else if (0xE0 <= read && read <= 0xEF) {
                        read = bis.read();
                        if (0x80 <= read && read <= 0xBF) {
                            read = bis.read();
                            if (0x80 <= read && read <= 0xBF) {
                                charset = "UTF-8";
                                break;
                            } else
                                break;
                        } else
                            break;
                    }
                }
                System.out.println(loc + " " + Integer.toHexString(read));
            }
            bis.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return charset;
    }

标签:java,文件,字符集
0
投稿

猜你喜欢

  • Android绘图常用方法汇总

    2023-11-19 20:06:43
  • 一文带你搞懂Java中Get和Post的使用

    2023-04-16 23:57:24
  • Android Retrofit的使用详解

    2022-12-11 01:28:37
  • C#将时间转成文件名使用方法

    2022-08-15 05:59:22
  • android实现滚动文本效果

    2022-02-02 16:49:36
  • Android recyclerview实现纵向虚线时间轴的示例代码

    2023-08-23 07:03:39
  • 基于Spring Boot不同的环境使用不同的配置方法

    2022-11-06 13:21:20
  • ReentrantLock获取锁释放锁的流程示例分析

    2021-08-05 20:51:10
  • Spring AOP实现打印HTTP接口出入参日志

    2021-10-09 13:38:37
  • Spring Boot和Thymeleaf整合结合JPA实现分页效果(实例代码)

    2023-11-25 07:05:15
  • C#将Excel中的数据转换成DataSet

    2021-10-29 18:20:55
  • C#子线程执行完后通知主线程的方法

    2022-02-26 20:15:40
  • Android App中使用ViewPager+Fragment实现滑动切换效果

    2023-01-12 19:51:50
  • Java 数组高频考点分析讲解

    2021-09-01 13:14:36
  • Springboot实现给前端返回一个tree结构方法

    2022-04-02 23:37:07
  • SpringMVC中Controller类数据响应的方法

    2021-10-07 21:00:08
  • C#使用Socket实现本地多人聊天室

    2022-01-06 12:47:27
  • 利用Android实现光影流动特效的方法详解

    2023-09-03 01:22:31
  • Java设计模式中的命令模式

    2023-11-20 04:26:46
  • 如何使用SpringSecurity保护程序安全

    2022-09-08 19:57:50
  • asp之家 软件编程 m.aspxhome.com