From d27b8d75fab2059f8c72fd0d0a04d5613354ee2b Mon Sep 17 00:00:00 2001 From: "HuYingzhuo(hugo/hyzboy)" Date: Thu, 13 Jul 2023 14:30:08 +0800 Subject: [PATCH] added TextInputStream --- inc/hgl/io/TextInputStream.h | 78 +++++++++++++++++++++ src/CMakeLists.txt | 2 + src/IO/TextInputStream.cpp | 132 +++++++++++++++++++++++++++++++++++ 3 files changed, 212 insertions(+) create mode 100644 inc/hgl/io/TextInputStream.h create mode 100644 src/IO/TextInputStream.cpp diff --git a/inc/hgl/io/TextInputStream.h b/inc/hgl/io/TextInputStream.h new file mode 100644 index 0000000..0411e12 --- /dev/null +++ b/inc/hgl/io/TextInputStream.h @@ -0,0 +1,78 @@ +#pragma once +#include +#include + +namespace hgl +{ + namespace io + { + /** + * 文本输入流
+ * 它与TextOutputStream并无对应关注,主要作用是方便超大文本的读取与解晰。 + */ + class TextInputStream + { + private: + + InputStream *input_stream; ///<输入流 + + uint8 *buffer; ///<缓冲区 + int32 buffer_size; ///<缓冲区大小 + int32 cur_buf_size; ///<当前缓冲区大小 + + int64 stream_pos,stream_size; ///<流当前位置/大小 + + ByteOrderMask bom; /// int Parse(const T *); + + int TextBlockParse(); ///<文本块解析 + + public: + + TextInputStream(InputStream *i,const int buf_size=HGL_SIZE_1MB); + virtual ~TextInputStream() + { + SAFE_CLEAR_ARRAY(buffer); + } + + virtual bool OnBOM(const ByteOrderMask &){return true;} ///<读取到BOM头的回调函数 + + /** + * 读取到一行文本的回调函数(ansi/utf8) + * @param text 读取到的文本内容 + * @param len 读取到的文本字长度 + * @param line_end 当前行是否结束 + */ + virtual bool OnLine(const char *text,const int len,const bool line_end){return true;} + + /** + * 读取到一行文本的回调函数(utf16le/utf16be) + * @param text 读取到的文本内容 + * @param len 读取到的文本字长度 + * @param line_end 当前行是否结束 + */ + virtual bool OnLine(const u16char *text,const int len,const bool line_end){return true;} + + /** + * 读取到一行文本的回调函数(utf32le/utf32be) + * @param text 读取到的文本内容 + * @param len 读取到的文本字长度 + * @param line_end 当前行是否结束 + */ + virtual bool OnLine(const u32char *text,const int len,const bool line_end){return true;} + + virtual bool OnEnd(){return true;} ///<读取结束的回调函数 + virtual bool OnReadError(){return true;} ///<读取错误的回调函数 + virtual bool OnParseError(){return true;} ///<解析错误的回调函数 + + /** + * 运行并解晰文本 + * @return 解析出的文本行数 + */ + virtual int Run(); + };//class TextInputStream + }//namespace io +}//namespace hgl diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3255d4f..2e1e7d0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -114,9 +114,11 @@ SET(IO_FILE_FILES ${IO_INCLUDE_PATH}/FileAccess.h ${IO_INCLUDE_PATH}/FileInputStream.h ${IO_INCLUDE_PATH}/FileOutputStream.h ${IO_INCLUDE_PATH}/RandomAccessFile.h + ${IO_INCLUDE_PATH}/TextInputStream.h ${IO_INCLUDE_PATH}/TextOutputStream.h IO/FileAccess.cpp IO/FileInputStream.cpp + IO/TextInputStream.cpp IO/FileOutputStream.cpp IO/RandomAccessFile.cpp) diff --git a/src/IO/TextInputStream.cpp b/src/IO/TextInputStream.cpp new file mode 100644 index 0000000..efe15f9 --- /dev/null +++ b/src/IO/TextInputStream.cpp @@ -0,0 +1,132 @@ +#include + +namespace hgl +{ + namespace io + { + TextInputStream::TextInputStream(InputStream *i,const int buf_size) + { + input_stream=i; + buffer_size=buf_size; + buffer=new uint8[buffer_size]; + cur_buf_size=0; + + stream_pos=0; + stream_size=input_stream->Available(); + + bom=ByteOrderMask::NONE; + } + + template int TextInputStream::Parse(const T *p) + { + const T *sp=(const T *)p; + const T *end=(const T *)(buffer+cur_buf_size); + + int line_count=0; + + while(p=2) + { + bom=CheckBOM(p); + + const BOMFileHeader *bfh=GetBOM(bom); + + if(bfh) + { + if(bfh->size==cur_buf_size) + return(0); + + p+=bfh->size; + } + } + } + + if(bom==ByteOrderMask::UTF16LE||bom==ByteOrderMask::UTF16BE) + return Parse((u16char *)p); + else + if(bom==ByteOrderMask::UTF32LE||bom==ByteOrderMask::UTF32BE) + return Parse((u32char *)p); + else + return Parse((char *)p); + } + + int TextInputStream::Run() + { + if(!input_stream)return(-1); + + int64 read_size; + + int result; + int line_count=0; + + while(stream_posbuffer_size) + read_size=buffer_size; + + cur_buf_size=input_stream->Read(buffer,read_size); + + if(cur_buf_size!=read_size) + { + OnReadError(); + return(-1); + } + + result=TextBlockParse(); + + if(result<0) + { + OnReadError(); + return(result); + } + + line_count+=result; + + stream_pos+=cur_buf_size; + } + + return line_count; + } + }//namespace io +}//namespace hgl