(整理二)读取大日志文件

Posted elivn

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了(整理二)读取大日志文件相关的知识,希望对你有一定的参考价值。

  一般读取文件有三种方式:

  1.读取到内存中;

  2.分块读取;

  3.采用内存映射技术。

  一.读取到内存中

  此种方式比较适合小文件,可以通过文件流的方式直接读取到内存中进行处理。

  二.分块读取

  当文件很大时(特别是文件大小大于内存大小),读取到内存中就很不合理。这种时候,我们可以将文件进行分块,然后进行分块读取。

 1 /// <summary>
 2 /// 读取大文件方法
 3 /// </summary>
 4 /// <param name="initialPath">原文件绝对地址</param>
 5 /// <param name="aimPath">新文件绝对地址</param>
 6 private static void CopyFile(string initialPath, string aimPath)
 7 {
 8     //1,创建一个读取文件的文件流
 9     using (FileStream fsRead = new FileStream(initialPath, FileMode.Open))
10     {
11         //4,创建一个写文件的文件流
12         using (FileStream fsWrite = new FileStream(aimPath, FileMode.Create))
13         {
14             //2,建立缓冲区
15             byte[] eByte = new byte[1024 * 1024 * 10];//每次读取的大小
16             while (true)
17             {
18                 int r = fsRead.Read(eByte, 0, eByte.Length);
19                 if (r <= 0)
20                 {
21                     break;
22                 }
23                 //3,r表示向流中写入,本次实际读取到文件的大小
24                 fsWrite.Write(eByte, 0, r);
25             }
26  
27  
28         }
29     }
30 }

  此时代码仅仅只是简单的读取,无法直接对文件的内容进行分析。比如日志文件我们不仅需要读取,而且还需要对其进行分析,我们可以根据文件中特定字符进行分块处理。特定字符需要根据读取文件进行分析,比如‘\\n‘:可将文件分成10块,分成10块的时候,必定出现,一段被截断的情况,针对这种情况,可采用一次初查定位,根据10个位点,倒着查出‘\\n‘(特定字符)符号,查到了这个符号,那么,‘\\n‘之前(包括‘\\n‘)都是完整的段数,这样就能准确的放置出10个位置,然后用Stream中的Postion定位,就可以分出10块。

  

string path = @"C:\\work\\project_log.sql";
byte[] arr = new byte[1024*4]; // 要读取的字节数
var start = 0;
using (var fs = File.OpenRead(path))
{
    // 读取大文件的关键在这里
    fs.Position = 5418579000;
    fs.Read(arr, start, arr.Length);
}
var str = Encoding.UTF8.GetString(arr);
Console.WriteLine(str);
using System;
using System.Collections.Generic;
using System.Text;
using System.Threading;
using System.IO;
namespace CommonLib.Threading.IO.ReadFile
{
    /// <summary>
    /// 多线程文件读取器
    /// </summary>
    public abstract class FileReader
    {
        private string filePath;
        private List<FileReadPoint> readPoint=new List<FileReadPoint>();
        private bool isStart;
        private int threadCompleteCount;
        public event EventHandler FileReadEnd;//文件读取完成
        public bool IsStart
        {
            get { return isStart; }
        }
        public string FilePath
        {
            get { return filePath; }
        }
        private FileReader()
        { 
        }
        public FileReader(string filePath)
        {
            this.filePath = filePath;
        }
        /// <summary>
        /// 获取读取文件的起始点和结束点
        /// 文件起始点会在参数point中给出
        /// </summary>
        /// <param name="point">读取文件的起始点和结束点</param>
        /// <param name="stream">文件流</param>
        /// <param name="length">文件长度</param>
        protected abstract void GetPoint(FileReadPoint point,FileStream stream,long length);
        /// <summary>
        /// 设置文件读取起始点
        /// </summary>
        /// <param name="stream"></param>
        /// <returns></returns>
        protected virtual int SetStartPoint(FileStream stream)
        {
            return 0;
        }
        /// <summary>
        /// 对已用多线程分块读取的文件做的处理
        /// </summary>
        /// <param name="threadStream"></param>
        protected abstract void DoFileRead(ThreadStream threadStream);

        /// <summary>
        /// 初始化分块读取文件的点
        /// </summary>
        /// <returns></returns>
        public bool Create()
        {
            FileInfo fileInfo = new FileInfo(filePath);
            fileInfo.Refresh();
            if (fileInfo.Exists)
            {
                filePath = fileInfo.FullName;
                using (FileStream stream = new FileStream(filePath, FileMode.Open, FileAccess.ReadWrite, FileShare.ReadWrite))
                {
                    if (readPoint.Count != 0)
                    {
                        readPoint.Clear();
                    }
                    long startPoint = SetStartPoint(stream);
                    long length = stream.Length;
                    while (startPoint < length)
                    {
                        stream.Position = startPoint;
                        FileReadPoint fPoint = new FileReadPoint();
                        fPoint.StartPoint = startPoint;
                        GetPoint(fPoint, stream, length);
                        if (fPoint.StartPoint + fPoint.ReadCount > length)
                        {
                            fPoint.ReadCount = length - fPoint.StartPoint;
                        }
                        readPoint.Add(fPoint);
                        startPoint = fPoint.StartPoint + fPoint.ReadCount;
                    }
                }
                return true;
            }
            else
            {
                return false;
            }
        }
        /// <summary>
        /// 启动多线程文件读取
        /// </summary>
        public void StartRead()
        {
            if (!isStart)
            {
                threadCompleteCount = 0;
                foreach (FileReadPoint fp in readPoint)
                {
                    Thread thread = new Thread(OnReadFile);
                    thread.IsBackground = true;
                    thread.SetApartmentState(ApartmentState.MTA);
                    thread.Start(fp);
                }
                isStart = true;
            }
        }
 

        [MTAThread()]
        private void OnReadFile(object obj)
        {
            FileReadPoint fp = obj as FileReadPoint;
            if (fp != null)
            {
                using (FileStream stream = new FileStream(filePath, FileMode.Open, FileAccess.ReadWrite, FileShare.ReadWrite))
                {
                    stream.Position = fp.StartPoint;
                    ThreadStream threadStream = new ThreadStream(stream, fp);
                    DoFileRead(threadStream);
                }
            }
            if (FileReadEnd != null)
            {
                lock (readPoint)
                {
                    threadCompleteCount++;
                    if (threadCompleteCount == readPoint.Count)
                    {
                        FileReadEnd(this, new EventArgs());
                    }
                }
            }
        }

    }

    public class FileReadPoint
    {
        private long startPoint = 0L;

        public long StartPoint
        {
            get { return startPoint; }
            set { startPoint = value; }
        }

        private long readCount = 1L;

        public long ReadCount
        {
            get { return readCount; }
            set {
                if (value >= 1)
                {
                    readCount = value;//readCount必须大于1
                }
            }
        }
    }

    public sealed class ThreadStream
    {
        private int MAXBLOCK = 1024 * 1024 * 4;

        private FileStream fileStream;
        private FileReadPoint fPoint;
        private long currentCount = 0L;

        public FileReadPoint FPoint
        {
            get { return fPoint; }
        }

        private ThreadStream()
        { 
        }

        public ThreadStream(FileStream stream, FileReadPoint point)
        {
            this.fileStream = stream;
            this.fPoint = point;
        }

        /// <summary>
        /// 读取剩余的所有字节
        /// </summary>
        /// <returns></returns>
        public byte[] ReadAll()
        {
            if (currentCount < fPoint.ReadCount)
            {
                long lastCount = fPoint.ReadCount - currentCount;
                byte[] data = new byte[lastCount];
                long currentDataIndex = 0L;
                while (lastCount > MAXBLOCK)
                {
                    AddData(MAXBLOCK,data, currentDataIndex);
                    lastCount = lastCount - MAXBLOCK;
                    currentDataIndex += MAXBLOCK;
                }
                if (lastCount > 0)
                {
                    AddData((int)lastCount, data, currentDataIndex);
                }
                currentCount = fPoint.ReadCount;
                return data;
            }
            else
            {
                return null;
            }
        }

        /// <summary>
        /// 分块读取字节
        /// </summary>
        /// <param name="block"></param>
        /// <returns></returns>
        public byte[] Read(int block)
        {
            if (currentCount < fPoint.ReadCount)
            {
                int currentBlock = block;
                if (currentCount + block > fPoint.ReadCount)
                {
                    currentBlock = (int)(fPoint.ReadCount - currentCount);
                }
                byte[] data = new byte[currentBlock];
                fileStream.Read(data, 0, data.Length);
                currentCount += currentBlock;
                return data;

            }
            else
            {
                return null;
            }
        }

        private void AddData(int block,byte[] data, long currentDataIndex)
        {
            byte[] cutData = Read(block);
            Array.Copy(cutData, 0, data, currentDataIndex, cutData.Length);
        }

    }
}

  三.内存映射

  内存映射文件是利用虚拟内存把文件映射到进程的地址空间中去,在此之后进程操作文件,就像操作进程空间里的地址一样了,比如使用c语言的 memcpy等内存操作的函数。这种方法能够很好的应用在需要频繁处理一个文件或者是一个大文件的场合,这种方式处理IO效率比普通IO效率要高。

  内存映射文件包含虚拟内存中文件的内容。 借助文件和内存空间之间的这种映射,应用(包括多个进程)可以直接对内存执行读取和写入操作,从而修改文件。 自 .NET Framework 4 起,可以使用托管代码访问内存映射文件,就像本机 Windows 函数访问内存映射文件一样。

  内存映射文件分为两种类型:

  1.持久化内存映射文件

  持久化文件是与磁盘上的源文件相关联的内存映射文件。 当最后一个进程处理完文件时,数据保存到磁盘上的源文件中。 此类内存映射文件适用于处理非常大的源文件。

  2.非持久化内存映射文件

  非持久化文件是不与磁盘上的文件相关联的内存映射文件。 当最后一个进程处理完文件时,数据会丢失,且文件被垃圾回收器回收。 此类文件适合创建共享内存,以进行进程内通信 (IPC)。

  详情可看:https://docs.microsoft.com/zh-cn/dotnet/standard/io/memory-mapped-files

  

using System;
using System.IO;
using System.IO.MemoryMappedFiles;
using System.Text;

namespace ConsoleDemo
{
    class Program
    {
        private const string TXT_FILE_PATH = @"E:\\work\\超大文本文件读取\\Filea.txt";
        private const string SPLIT_VARCHAR = "";
        private const char SPLIT_CHAR = ;
        private static long FILE_SIZE = 0;
        static void Main(string[] args)
        {
            long ttargetRowNum = 10000000;
            DateTime beginTime = DateTime.Now;
            string line = CreateMemoryMapFile(ttargetRowNum);
            double totalSeconds = DateTime.Now.Subtract(beginTime).TotalSeconds;
            Console.WriteLine(line);
            Console.WriteLine(string.Format("查找第{0}行,共耗时:{1}s", ttargetRowNum, totalSeconds));
            Console.ReadLine();
        }

        /// <summary>
        /// 创建内存映射文件
        /// </summary>
        private static string CreateMemoryMapFile(long ttargetRowNum)
        {
            string line = string.Empty;
            using (FileStream fs = new FileStream(TXT_FILE_PATH, FileMode.Open, FileAccess.ReadWrite))
            {
                long targetRowNum = ttargetRowNum + 1;//目标行
                long curRowNum = 1;//当前行
                FILE_SIZE = fs.Length;
                using (MemoryMappedFile mmf = MemoryMappedFile.CreateFromFile(fs, "test", fs.Length, MemoryMappedFileAccess.ReadWrite, null, HandleInheritability.None, false))
                {
                    long offset = 0;
                    //int limit = 250;
                    int limit = 200;
                    try
                    {
                        StringBuilder sbDefineRowLine = new StringBuilder();
                        do
                        {
                            long remaining = fs.Length - offset;
                            using (MemoryMappedViewStream mmStream = mmf.CreateViewStream(offset, remaining > limit ? limit : remaining))
                            //using (MemoryMappedViewStream mmStream = mmf.CreateViewStream(offset, remaining))
                            {
                                offset += limit;
                                using (StreamReader sr = new StreamReader(mmStream))
                                {
                                    //string ss = sr.ReadToEnd().ToString().Replace("\\n", "囧").Replace(Environment.NewLine, "囧");
                                    string ss = sr.ReadToEnd().ToString().Replace("\\n", SPLIT_VARCHAR).Replace(Environment.NewLine, SPLIT_VARCHAR);
                                    if (curRowNum <= targetRowNum)
                                    {
                                        if (curRowNum < targetRowNum)
                                        {
                                            string s = sbDefineRowLine.ToString();
                                            int pos = s.LastIndexOf(SPLIT_CHAR);
                                            if (pos > 0)
                                                sbDefineRowLine.Remove(0, pos);

                                        }
                                        else
                                        {
                                            line = sbDefineRowLine.ToString();
                                            return line;
                                        }
                                        if (ss.Contains(SPLIT_VARCHAR))
                                        {
                                            curRowNum += GetNewLineNumsOfStr(ss);
                                            sbDefineRowLine.Append(ss);
                                        }
                                        else
                                        {
                                            sbDefineRowLine.Append(ss);
                                        }
                                    }

                                    sr.Dispose();
                                }

                                mmStream.Dispose();
                            }
                        } while (offset < fs.Length);
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine(e.Message);
                    }
                    return line;
                }
            }
        }

        private static long GetNewLineNumsOfStr(string s)
        {
            string[] _lst = s.Split(SPLIT_CHAR);
            return _lst.Length - 1;
        }
    }
}

测试截图:

技术分享图片

  下面的示例为极大文件的一部分创建内存映射视图,并控制其中一部分。

using System;
using System.IO;
using System.IO.MemoryMappedFiles;
using System.Runtime.InteropServices;

class Program
{
    static void Main(string[] args)
    {
        long offset = 0x10000000; // 256 megabytes
        long length = 0x20000000; // 512 megabytes

        // Create the memory-mapped file.
        using (var mmf = MemoryMappedFile.CreateFromFile(@"c:\\ExtremelyLargeImage.data", FileMode.Open,"ImgA"))
        {
            // Create a random access view, from the 256th megabyte (the offset)
            // to the 768th megabyte (the offset plus length).
            using (var accessor = mmf.CreateViewAccessor(offset, length))
            {
                int colorSize = Marshal.SizeOf(typeof(MyColor));
                MyColor color;

                // Make changes to the view.
                for (long i = 0; i < length; i += colorSize)
                {
                    accessor.Read(i, out color);
                    color.Brighten(10);
                    accessor.Write(i, ref color);
                }
            }
        }
    }
}

public struct MyColor
{
    public short Red;
    public short Green;
    public short Blue;
    public short Alpha;

    // Make the view brighter.
    public void Brighten(short value)
    {
        Red = (short)Math.Min(short.MaxValue, (int)Red + value);
        Green = (short)Math.Min(short.MaxValue, (int)Green + value);
        Blue = (short)Math.Min(short.MaxValue, (int)Blue + value);
        Alpha = (short)Math.Min(short.MaxValue, (int)Alpha + value);
    }
}

  下面的示例为另一个进程打开相同的内存映射文件。

 
using System;
using System.IO.MemoryMappedFiles;
using System.Runtime.InteropServices;


class Program
{
    static void Main(string[] args)
    {
        // Assumes another process has created the memory-mapped file.
        using (var mmf = MemoryMappedFile.OpenExisting("ImgA"))
        {
            using (var accessor = mmf.CreateViewAccessor(4000000, 2000000))
            {
                int colorSize = Marshal.SizeOf(typeof(MyColor));
                MyColor color;

                // Make changes to the view.
                for (long i = 0; i < 1500000; i += colorSize)
                {
                    accessor.Read(i, out color);
                    color.Brighten(20);
                    accessor.Write(i, ref color);
                }
            }
        }
    }
}

public struct MyColor
{
    public short Red;
    public short Green;
    public short Blue;
    public short Alpha;

    // Make the view brigher.
    public void Brighten(short value)
    {
        Red = (short)Math.Min(short.MaxValue, (int)Red + value);
        Green = (short)Math.Min(short.MaxValue, (int)Green + value);
        Blue = (short)Math.Min(short.MaxValue, (int)Blue + value);
        Alpha = (short)Math.Min(short.MaxValue, (int)Alpha + value);
    }
}

 

  共享内存是内存映射文件的一种特殊情况,内存映射的是一块内存,而非磁盘上的文件。共享内存的主语是进程(Process),操作系统默认会给每一 个进程分配一个内存空间,每一个进程只允许访问操作系统分配给它的哪一段内存,而不能访问其他进程的。而有时候需要在不同进程之间访问同一段内存,怎么办 呢?操作系统给出了创建访问共享内存的API,需要共享内存的进程可以通过这一组定义好的API来访问多个进程之间共有的内存,各个进程访问这一段内存就 像访问一个硬盘上的文件一样。而.Net 4.0中引入了System.IO. MemoryMappedFiles命名空间,这个命名空间的类对windows 共享内存相关API做了封装,使.Net程序员可以更方便的使用内存映射文件。

  在C#中使用共享内存。以下App1的代码让用户输入一行文本到共享内存中;App2不停的刷新控制台,输出最新的共享内存内容;App3实现的功能和App2相同,但读取方法不同。

App1代码:
using System;
using System.Collections.Generic;android从资源文件中读取文件流显示
using System.Linq;
using System.Text;

using System.IO;

//引用内存映射文件命名空间
using System.IO.MemoryMappedFiles;

namespace App1
{
    class Program
    {
        static void Main(string[] args)
        {
            long capacity = 1<<10<<10;

            //创建或者打开共享内存
            using (var mmf = MemoryMappedFile.CreateOrOpen("testMmf", capacity, MemoryMappedFileAccess.ReadWrite))
            {
                //通过MemoryMappedFile的CreateViewAccssor方法获得共享内存的访问器
                var viewAccessor = mmf.CreateViewAccessor(0, capacity);
                //循环写入,使在这个进程中可以向共享内存中写入不同的字符串值
                while (true)
                {
                    Console.WriteLine("请输入一行要写入共享内存的文字:");

                    string input = Console.ReadLine();

                    //向共享内存开始位置写入字符串的长度
                    viewAccessor.Write(0, input.Length);

                    //向共享内存4位置写入字符
                    viewAccessor.WriteArray<char>(4, input.ToArray(), 0, input.Length);
                }

            }
            
        }
    }
}
App2代码:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading;

//引用使用内存映射文件需要的命名空间
using System.IO.MemoryMappedFiles;

namespace App2
{
    class Program
    {
        static void Main(string[] args)
        {
              long capacity = 1<<10<<10;

              using (var mmf = MemoryMappedFile.OpenExisting("testMmf"))
              {
                  MemoryMappedViewAccessor viewAccessor = mmf.CreateViewAccessor(0, capacity);

                  //循环刷新共享内存字符串的值
                  while (true)
                  {
                      //读取字符长度
                      int strLength = viewAccessor.ReadInt32(0);                      
                      char[] charsInMMf = new char[strLength];
                      //读取字符
                      viewAccessor.ReadArray<char>(4, charsInMMf, 0, strLength);
                      Console.Clear();
                      Console.Write(charsInMMf);
                      Console.Write("\\r");
                      Thread.Sleep(200);
                  }
              }
        }
    }
}
App3代码:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

using System.IO.MemoryMappedFiles;
using System.IO;

namespace App3
{
    class Program
    {
        static void Main(string[] args)
        {
            long capacity = 1 << 10 << 10;
            //打开共享内存
            using (var mmf = MemoryMappedFile.OpenExisting("testMmf"))
            {
                //使用CreateViewStream方法返回stream实例
                using (var mmViewStream = mmf.CreateViewStream(0, capacity))
                {
                    //这里要制定Unicode编码否则会出问题
                    using (BinaryReader rdr = new BinaryReader(mmViewStream,Encoding.Unicode))
                    {
                        while (true)
                        {
                            mmViewStream.Seek(0, SeekOrigin.Begin);

                            int length = rdr.ReadInt32();

                            char[] chars = rdr.ReadChars(length);

                            Console.Write(chars);
                            Console.Write("\\r");

                            System.Threading.Thread.Sleep(200);
                            Console.Clear();
                        }
                    }
                }
            }
        }
    }
}

在读数据时用了2种方法。

因为在之前很少会用到进程之间的通信,所以此方法只是想初步的认识下。此程序写的过于简陋,有很多东西都没有去判断。比如说是怎么创建了一个共享内存怎么取删除它等等。。。

 

  附NET4.0之前通过WinAPI的调用

using System;
using System.Collections.Generic;
using System.Text;
using System.Runtime.InteropServices;

namespace BlueVision.SaYuan.FileMapping
{
    public class ShareMemory
    {
        [DllImport( "user32.dll", CharSet = CharSet.Auto )]
        public static extern IntPtr SendMessage( IntPtr hWnd, int Msg, int wParam, IntPtr lParam );

        [DllImport( "Kernel32.dll", CharSet = CharSet.Auto )]
        public static extern IntPtr CreateFileMapping( IntPtr hFile, IntPtr lpAttributes, uint flProtect, uint dwMaxSizeHi, uint dwMaxSizeLow, string lpName );

        [DllImport( "Kernel32.dll", CharSet = CharSet.Auto )]
        public static extern IntPtr OpenFileMapping( int dwDesiredAccess, [MarshalAs( UnmanagedType.Bool )] bool bInheritHandle, string lpName );

        [DllImport( "Kernel32.dll", CharSet = CharSet.Auto )]
        public static extern IntPtr MapViewOfFile( IntPtr hFileMapping, uint dwDesiredAccess, uint dwFileOffsetHigh, uint dwFileOffsetLow, uint dwNumberOfBytesToMap );

        [DllImport( "Kernel32.dll", CharSet = CharSet.Auto )]
        public static extern bool UnmapViewOfFile( IntPtr pvBaseAddress );

        [DllImport( "Kernel32.dll", CharSet = CharSet.Auto )]
        public static extern bool CloseHandle( IntPtr handle );

        [DllImport( "kernel32", EntryPoint = "GetLastError" )]
        public static extern int GetLastError();

        [DllImport( "kernel32.dll" )]
        static extern void GetSystemInfo( out SYSTEM_INFO lpSystemInfo );

        [StructLayout( LayoutKind.Sequential )]
        public struct SYSTEM_INFO
        {
            public ushort processorArchitecture;
            ushort reserved;
            public uint pageSize;
            public IntPtr minimumApplicationAddress;
            public IntPtr maximumApplicationAddress;
            public IntPtr activeProcessorMask;
            public uint numberOfProcessors;
            public uint processorType;
            public uint allocationGranularity;
            public ushort processorLevel;
            public ushort processorRevision;
        }
        /// <summary>
        /// 获取系统的分配粒度
        /// </summary>
        /// <returns></returns>
        public static uint GetPartitionsize()
        {
            SYSTEM_INFO sysInfo;
            GetSystemInfo( out sysInfo );
            return sysInfo.allocationGranularity;
        }

        const int ERROR_ALREADY_EXISTS = 183;

        const int FILE_MAP_COPY = 0x0001;
        const int FILE_MAP_WRITE = 0x0002;
        const int FILE_MAP_READ = 0x0004;
        const int FILE_MAP_ALL_ACCESS = 0x0002 | 0x0004;

        const int PAGE_READONLY = 0x02;
        const int PAGE_READWRITE = 0x04;
        const int PAGE_WRITECOPY = 0x08;
        const int PAGE_EXECUTE = 0x10;
        const int PAGE_EXECUTE_READ = 0x20;
        const int PAGE_EXECUTE_READWRITE = 0x40;

        const int SEC_COMMIT = 0x8000000;
        const int SEC_IMAGE = 0x1000000;
        const int SEC_NOCACHE = 0x10000000;
        const int SEC_RESERVE = 0x4000000;

        IntPtr m_fHandle;

        IntPtr m_hSharedMemoryFile = IntPtr.Zero;
        IntPtr m_pwData = IntPtr.Zero;
        bool m_bAlreadyExist = false;
        bool m_bInit = false;
        uint m_MemSize = 0x1400000;//20M
        long m_offsetBegin = 0;
        long m_FileSize = 0;
        FileReader File = new FileReader();


        /// <summary>
        ///  初始化文件
        /// </summary>
        /// <param name="MemSize">缓冲大小</param>
        public ShareMemory( string filename, uint memSize )
        {
            // 分页映射文件时,每页的起始位置startpos,必须为64K的整数倍。
            // memSize即缓存区的大小必须是系统分配粒度的整倍说,window系统的分配粒度是64KB
            this.m_MemSize = memSize;
            Init( filename );
        }


        /// <summary>
        /// 默认映射20M缓冲
        /// </summary>
        /// <param name="filename"></param>
        public ShareMemory( string filename )
        {
            this.m_MemSize = 0x1400000;
            Init( filename );
        }

        ~ShareMemory()
        {
            Close();
        }

        /// <summary>
        /// 初始化共享内存
        /// 
        /// 共享内存名称
        /// 共享内存大小
        /// </summary>
        /// <param name="strName"></param>
        protected void Init( string strName )
        {
            //if (lngSize <= 0 || lngSize > 0x00800000) lngSize = 0x00800000;

            if ( !System.IO.File.Exists( strName ) ) throw new Exception( "未找到文件" );

            System.IO.FileInfo f = new System.IO.FileInfo( strName );

            m_FileSize = f.Length;

            m_fHandle = File.Open( strName );

            if ( strName.Length > 0 )
            {
                //创建文件映射
                m_hSharedMemoryFile = CreateFileMapping( m_fHandle, IntPtr.Zero, ( uint )PAGE_READONLY, 0, ( uint )m_FileSize, "mdata" );
                if ( m_hSharedMemoryFile == IntPtr.Zero )
                {
                    m_bAlreadyExist = false;
                    m_bInit = false;
                    throw new Exception( "CreateFileMapping失败LastError=" + GetLastError().ToString() );
                }
                else
                    m_bInit = true;

                ////映射第一块文件
                //m_pwData = MapViewOfFile(m_hSharedMemoryFile, FILE_MAP_READ, 0, 0, (uint)m_MemSize);
                //if (m_pwData == IntPtr.Zero)
                //{
                //    m_bInit = false;
                //    throw new Exception("m_hSharedMemoryFile失败LastError=" + GetLastError().ToString());
                //}

            }
        }
        /// <summary>
        /// 获取高32位
        /// </summary>
        /// <param name="intValue"></param>
        /// <returns></returns>
        private static uint GetHighWord( UInt64 intValue )
        {
            return Convert.ToUInt32( intValue >> 32 );
        }
        /// <summary>
        /// 获取低32位
        /// </summary>
        /// <param name="intValue"></param>
        /// <returns></returns>
        private static uint GetLowWord( UInt64 intValue )
        {

            return Convert.ToUInt32( intValue & 0x00000000FFFFFFFF );
        }

        /// <summary>
        /// 获取下一个文件块 块大小为20M
        /// </summary>
        /// <returns>false 表示已经是最后一块文件</returns>
        public uint GetNextblock()
        {
            if ( !this.m_bInit ) throw new Exception( "文件未初始化。" );
            //if ( m_offsetBegin + m_MemSize >= m_FileSize ) return false;

            uint m_Size = GetMemberSize();
            if ( m_Size == 0 ) return m_Size;

            // 更改缓冲区大小
            m_MemSize = m_Size;

            //卸载前一个文件
            //bool l_result = UnmapViewOfFile( m_pwData );
            //m_pwData = IntPtr.Zero;


            m_pwData = MapViewOfFile( m_hSharedMemoryFile, FILE_MAP_READ, GetHighWord( ( UInt64 )m_offsetBegin ), GetLowWord( ( UInt64 )m_offsetBegin ), m_Size );
            if ( m_pwData == IntPtr.Zero )
            {
                m_bInit = false;
                throw new Exception( "映射文件块失败" + GetLastError().ToString() );
            }
            m_offsetBegin = m_offsetBegin + m_Size;

            return m_Size; //创建成功
        }
        /// <summary>
        /// 返回映射区大小
        /// </summary>
        /// <returns></returns>
        private uint GetMemberSize()
        {
            if ( m_offsetBegin >= m_FileSize )
            {
                return 0;
            }
            else if ( m_offsetBegin + m_MemSize >= m_FileSize )
            {
                long temp = m_FileSize - m_offsetBegin;
                return ( uint )temp;
            }
            else
                return m_MemSize;
        }

        /// <summary>
        /// 关闭内存映射
        /// </summary>
        public void Close()
        {
            if ( m_bInit )
            {
                UnmapViewOfFile( m_pwData );
                CloseHandle( m_hSharedMemoryFile );
                File.Close();
            }
        }

        /// <summary>
        /// 从当前块中获取数据
        /// </summary>
        /// <param name="bytData">数据</param>
        /// <param name="lngAddr">起始数据</param>
        /// <param name="lngSize">数据长度,最大值=缓冲长度</param>
        /// <param name="Unmap">读取完成是否卸载缓冲区</param>
        /// <returns></returns>
        public void Read( ref byte[] bytData, int lngAddr, int lngSize, bool Unmap )
        {
            if ( lngAddr + lngSize > m_MemSize )
                throw new Exception( "Read操作超出数据区" );
            if ( m_bInit )
            {
                // string bb = Marshal.PtrToStringAuto(m_pwData);//
                Marshal.Copy( m_pwData, bytData, lngAddr, lngSize );
            }
            else
            {
                throw new Exception( "文件未初始化" );
            }

            if ( Unmap )
            {
                bool l_result = UnmapViewOfFile( m_pwData );
                if ( l_result )
                    m_pwData = IntPtr.Zero;
            }
        }

        /// <summary>
        /// 从当前块中获取数据
        /// </summary>
        /// <param name="bytData">数据</param>
        /// <param name="lngAddr">起始数据</param>
        /// <param name="lngSize">数据长度,最大值=缓冲长度</param>
        /// <exception cref="Exception: Read操作超出数据区"></exception>
        /// <exception cref="Exception: 文件未初始化"></exception>
        /// <returns></returns>
        public void Read( ref byte[] bytData, int lngAddr, int lngSize )
        {
            if ( lngAddr + lngSize > m_MemSize )
                throw new Exception( "Read操作超出数据区" );
            if ( m_bInit )
            {
                Marshal.Copy( m_pwData, bytData, lngAddr, lngSize );
            }
            else
            {
                throw new Exception( "文件未初始化" );
            }
        }

        /// <summary>
        /// 从当前块中获取数据
        /// </summary>
        /// <param name="lngAddr">缓存区偏移量</param>
        /// <param name="byteData">数据数组</param>
        /// <param name="StartIndex">数据数组开始复制的下标</param>
        /// <param name="lngSize">数据长度,最大值=缓冲长度</param>
        /// <exception cref="Exception: 起始数据超过缓冲区长度"></exception>
        /// <exception cref="Exception: 文件未初始化"></exception>
        /// <returns>返回实际读取值</returns>
        public uint ReadBytes( int lngAddr, ref byte[] byteData, int StartIndex, uint intSize )
        {
            if ( lngAddr >= m_MemSize )
                throw new Exception( "起始数据超过缓冲区长度" );

            if ( lngAddr + intSize > m_MemSize )
                intSize = m_MemSize - ( uint )lngAddr;

            if ( m_bInit )
            {
                IntPtr s = new IntPtr( ( long )m_pwData + lngAddr ); // 地址偏移
                Marshal.Copy( s, byteData, StartIndex, ( int )intSize );
            }
            else
            {
                throw new Exception( "文件未初始化" );
            }

            return intSize;
        }

        /// <summary>
        /// 写数据
        /// </summary>
        /// <param name="bytData">数据</param>
        /// <param name="lngAddr">起始地址</param>
        /// <param name="lngSize">个数</param>
        /// <returns></returns>
        private int Write( byte[] bytData, int lngAddr, int lngSize )
        {
            if ( lngAddr + lngSize > m_MemSize ) return 2; //超出数据区
            if ( m_bInit )
            {
                Marshal.Copy( bytData, lngAddr, m_pwData, lngSize );
            }
            else
            {
                return 1; //共享内存未初始化
            }
            return 0; //写成功
        }
    }
    internal class FileReader
    {
        const uint GENERIC_READ = 0x80000000;
        const uint OPEN_EXISTING = 3;
        System.IntPtr handle;

        [DllImport( "kernel32", SetLastError = true )]
        public static extern System.IntPtr CreateFile(
            string FileName,          // file name
            uint DesiredAccess,       // access mode
            uint ShareMode,           // share mode
            uint SecurityAttributes,  // Security Attributes
            uint CreationDisposition, // how to create
            uint FlagsAndAttributes,  // file attributes
            int hTemplateFile         // handle to template file
        );

        [System.Runtime.InteropServices.DllImport( "kernel32", SetLastError = true )]
        static extern bool CloseHandle
        (
            System.IntPtr hObject // handle to object
        );



        public IntPtr Open( string FileName )
        {
            // open the existing file for reading       
            handle = CreateFile
            (
                FileName,
                GENERIC_READ,
                0,
                0,
                OPEN_EXISTING,
                0,
                0
            );

            if ( handle != System.IntPtr.Zero )
            {
                return handle;
            }
            else
            {
                throw new Exception( "打开文件失败" );
            }
        }

        public bool Close()
        {
            return CloseHandle( handle );
        }
    }
}

 

  注:以上部分转自其他人博客的代码。

  https://www.cnblogs.com/elivn/p/9105741.html

以上是关于(整理二)读取大日志文件的主要内容,如果未能解决你的问题,请参考以下文章

优化Scala代码以读取不适合内存的大文件的有效方法

我的Android进阶之旅NDK开发之在C++代码中使用Android Log打印日志,打印出C++的函数耗时以及代码片段耗时详情

如何有效地读取非常大的 gzip 压缩日志文件的最后一行?

php读取大文件如日志文件

python中使用pyspark 读取和整理日志数据并将数据写入到es中去

运维小分享整理Apache日志