cpp: read and write utf-8 text file

Posted ®Geovin Du Dream Park™

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了cpp: read and write utf-8 text file相关的知识,希望对你有一定的参考价值。

 

/*****************************************************************//**
 * \\file   geovindu.h
 * \\brief  业务操作方法
 *
 * \\author geovindu,Geovin Du
 * \\date   2023-04-22
***********************************************************************/
/**
 * https://learn.microsoft.com/zh-cn/cpp/build/reference/utf-8-set-source-and-executable-character-sets-to-utf-8?view=msvc-170
 * 
 * .
 */


#pragma once

#define _UNICODE

#ifndef GEOVINDU_H
#define GEOVINDU_H

#include <iostream>
#include <windows.h>
#include<string>
#include<string.h>
#include<fstream>
#include<stdio.h>
#include<cstdlib>
#include<cstring>
#include<iomanip>
#include <iostream>
#include <windows.h>


namespace geovindu


	class Geovin
	

	private:

	public:


		/// <summary>
		/// 
		/// </summary>
		/// <param name="buffer"></param>
		/// <param name="len"></param>
		/// <returns></returns>
		//string to_utf8(const wchar_t* buffer, int len);
		/// <summary>
		/// 
		/// </summary>
		/// <param name="str"></param>
		/// <returns></returns>
		//string to_utf8(const wstring& str);

		/// <summary>
		/// 
		/// </summary>
		/// <param name="str"></param>
		//void createFile(wstring& str);
		/// <summary>
		/// 写成UTF-8文本文件
		/// </summary>
		void createFile();

	;

;


#endif
#define UNICODE

  

#define _UNICODE

#include <iostream>
#include <windows.h>
#include<string>
#include<string.h>
#include<fstream>
#include<stdio.h>
#include<cstdlib>
#include<cstring>
#include<iomanip>
#include "geovindu.h"


using namespace std;

namespace geovindu


	/// <summary>
	/// 
	/// </summary>
	/// <param name="buffer"></param>
	/// <param name="len"></param>
	/// <returns></returns>
	string to_utf8(const wchar_t* buffer, int len)
	
		int nChars = ::WideCharToMultiByte(
			CP_UTF8,
			0,
			buffer,
			len,
			NULL,
			0,
			NULL,
			NULL);
		if (nChars == 0) return "";
		string newbuffer;
		newbuffer.resize(nChars);
		::WideCharToMultiByte(
			CP_UTF8,
			0,
			buffer,
			len,
			const_cast<char*>(newbuffer.c_str()),
			nChars,
			NULL,
			NULL);

		return newbuffer;
	
	/// <summary>
	/// 
	/// </summary>
	/// <param name="str"></param>
	/// <returns></returns>
	string to_utf8(const wstring& str)
	
		return to_utf8(str.c_str(), (int)str.size());
	
	/// <summary>
	/// 
	/// </summary>
	void createFile(wstring& strchinese)
	

		ofstream testFile;

		testFile.open("demoinput.txt", std::ios::out | std::ios::binary);

		//std::wstring text = strchinese;			

		std::string outtext = to_utf8(strchinese);

		testFile << outtext;

		testFile.close();

	
	///<summary>
	/// 现有的文本写成UTF-8文本文件
	///</summary>
	void Geovin::createFile()
	

		ofstream testFile;

		testFile.open("geovindudemo.txt", std::ios::out | std::ios::binary);

		std::wstring text =
			L"涂聚文,你好,世界欢迎你!동생은 점수를 많이 땄어요\\t geovindu\\n Geovin Du \\nНематериальное наследие водной рифмы\\n"
			L"奇松・怪石・雲海と温泉\\t大黄河を望む炳霊寺、驚異の張掖丹霞とシルクロードの要所9日間\\n"
			L"Tours más solicitados\\tParaíso en la Tierra - 13 Días\\n"
			L"Entdecken Sie die schönsten Reiseziele von China mit unseren empfohlenen Touren.\\n"
			L"Explorez les destinations les plus étonnantes de la Chine avec les visites recommandées.\\n"
			L"\\n";

		std::string outtext = to_utf8(text);

		testFile << outtext;

		testFile.close();

	

;

#define UNICODE

  

// ConsoleTextFileDemoApp.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//geovindu Geovin Du
#define _UNICODE
#define _CRT_SECURE_NO_WARNINGS


#include <iostream>
#include <windows.h>
#include <string>
#include <string.h>
#include <fstream>
#include <stdio.h>
#include <cstdlib>
#include <cstring>
#include <iomanip>
#include <cstdio>
#include <codecvt>
#include <assert.h>
#include <windows.h>
#include <iostream>
#include <fstream>
#include <io.h>
#include <vector>

#include "ConvertEncode.h"
#include "geovindu.h"
#include "FileHelper.h"


using namespace std;
using namespace geovindu;





/// <summary>
/// 写成UTF-8文本文件
/// </summary>
void createFile(wstring& strchinese)

	ConvertEncode encode;
	wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> convert;

	ofstream testFile;

	testFile.open("geovinduinput.txt", std::ios::out | std::ios::binary);

	//std::wstring text = strchinese;


	std::string outtext = convert.to_bytes(strchinese);//

	testFile << outtext;

	testFile.close();

	std::string narrowStr = convert.to_bytes(strchinese);
	
		std::ofstream ofs("geovinduinput2.txt");			//文件是utf8编码
		ofs << narrowStr;
	


/// <summary>
/// 读文写文件 utf-8的文本文件
/// </summary>
void readfile()

	ConvertEncode encode;
	char sname[50];
	string stuID;//学号
	int num;//编号
	double english;//英语成绩
	double math;//数学成绩
	double cpp;//C++成绩
	vector<string> lines;
	string line;
	ifstream fin;
	fin.open("geovinduinput.txt", ios::in); //utf-8文件读
	if (!fin)
	
		cout << "Fail to open the file!" << endl;
		exit(0);
	

	//创建链表,并保存数据
	while (1)
	
		if (!(fin >> sname >> stuID >> english >> math >> cpp))//从文件中读取数据 中文没有读出来
		
			break;
		
		else
		
			cout << encode.UTF8ToGBDu(sname) << "\\t" << stuID << "\\t" << english << "\\t" << math << "\\t" << cpp << endl;
		
	

	while (getline(fin, line)) 
		lines.push_back(line);
	
	fin.close();
	//cout << encode.UTF8ToGBDu(sname) << "\\t" << stuID << "\\t" << english << "\\t" << math << "\\t" << cpp << endl;
	



const int FBLOCK_MAX_BYTES = 256;
/*
// File Type.
typedef enum FileType

	ANSI = 0,
	unicode,
	UTF8,
FILETYPE;

FILETYPE GetTextFileType(const std::string& strFileName);

int UnicodeToANSI(char* pDes, const wchar_t* pSrc);
*/

int main(void)

    std::cout << "Hello World! 涂聚文\\n";

	




	/*代码无用
			FileHelper helper;
			// file test.
			std::string strFileANSI = "studentANSI.txt";
			std::string strFileUNICODE = "student.txt";
			std::string strFileUTF8 = "geovindudemo.txt";

			// please change the file name to test.
			std::string strFileName = strFileUTF8;
			//文件类型没有读对
			TEXTFILETYPE fileType = helper.GetTextFileType(strFileName);

			if (TextFileType_UNICODE == fileType)
			
				wchar_t szBuf[FBLOCK_MAX_BYTES];
				memset(szBuf, 0, sizeof(wchar_t) * FBLOCK_MAX_BYTES);

				std::string strMessage;

				FILE* fp = NULL;
				fp = fopen(strFileName.c_str(), "rb");
				if (fp != NULL)
				
					// Unicode file should offset wchar_t bits(2 byte) from start.
					fseek(fp, sizeof(wchar_t), 0);
					while (fread(szBuf, sizeof(wchar_t), FBLOCK_MAX_BYTES, fp) > 0)
					
						char szTemp[FBLOCK_MAX_BYTES] =  0 ;

						helper.UnicodeToANSI(szTemp, szBuf);
						strMessage += szTemp;
						memset(szBuf, 0, sizeof(wchar_t) * FBLOCK_MAX_BYTES);
					
				
				cout << "UNICODE" << endl;
				std::cout << strMessage << std::endl;

				fclose(fp);
			
			else if (TextFileType_UTF8 == fileType)
			
				char szBuf[FBLOCK_MAX_BYTES];
				memset(szBuf, 0, sizeof(char) * FBLOCK_MAX_BYTES);

				std::string strMessage;

				FILE* fp = NULL;
				fp = fopen(strFileName.c_str(), "rb");
				if (fp != NULL)
				
					// UTF-8 file should offset 3 byte from start position.
					fseek(fp, sizeof(char) * 3, 0);
					while (fread(szBuf, sizeof(char), FBLOCK_MAX_BYTES, fp) > 0)
					
						strMessage += szBuf;
						memset(szBuf, 0, sizeof(char) * FBLOCK_MAX_BYTES);
					
				
				cout << "utf-8" << endl;
				std::cout << strMessage << std::endl;

				fclose(fp);
			
			else
			
				char szBuf[FBLOCK_MAX_BYTES];
				memset(szBuf, 0, sizeof(char) * FBLOCK_MAX_BYTES);

				std::string strMessage;

				FILE* fp = NULL;
				fp = fopen(strFileName.c_str(), "rb");
				if (fp != NULL)
				
					// common file do not offset.
					while (fread(szBuf, sizeof(char), FBLOCK_MAX_BYTES, fp) > 0)
					
						strMessage += szBuf;
						memset(szBuf, 0, sizeof(char) * FBLOCK_MAX_BYTES);
					
				
				cout << "ANSI" << endl;
				std::cout << strMessage << std::endl;

				fclose(fp);


			


	*/




	readfile();
	//读内容
	//std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
	//std::ifstream ifs(L"geovinduinput.txt");
	//while (!ifs.eof())
	//
	//	string line;
	//	getline(ifs, line);
	//	wstring wb = conv.from_bytes(line);
	//	wcout.imbue(locale("chs"));			//更改区域设置 只为控制台输出显示 其他语言显示不了,中文可以
	//	wcout << wb << endl;
	//
	//ifs.close();


    Geovin geovin;
    geovin.createFile();
	wstring allstr;
    wstring sname;
	wstring stuID;//学号
	int num;//编号
	double english;//英语成绩
	double math;//数学成绩
	double cpp;//C++成绩
	int location = 0;//位置编号
	int flag = 0;//标记是否有对应的编号

	wcout << "请输入新增学生的信息" << endl;
	wcout << "姓名\\t" << "学号\\t" << "英语\\t" << "数学\\t" << "C++\\t" << endl;
	wcin.imbue(locale("chs"));//获取的是中文
   
    wcin >> sname >> stuID >> english >> math >> cpp;

	//allstr = sname + \' \' + stuID;
	allstr.append(sname); //C++ wstring::append
	allstr.append(L"\\t");
	allstr.append(stuID);
	allstr.append(L"\\t");
	allstr.append(to_wstring(english));
	allstr.append(L"\\t");
	allstr.append(to_wstring(math));
	allstr.append(L"\\t");
	allstr.append(to_wstring(cpp));
   // createFile(allstr);


	system("pause");
	return 0;




// 运行程序: Ctrl + F5 或调试 >“开始执行(不调试)”菜单
// 调试程序: F5 或调试 >“开始调试”菜单

// 入门使用技巧: 
//   1. 使用解决方案资源管理器窗口添加/管理文件
//   2. 使用团队资源管理器窗口连接到源代码管理
//   3. 使用输出窗口查看生成输出和其他消息
//   4. 使用错误列表窗口查看错误
//   5. 转到“项目”>“添加新项”以创建新的代码文件,或转到“项目”>“添加现有项”以将现有代码文件添加到项目
//   6. 将来,若要再次打开此项目,请转到“文件”>“打开”>“项目”并选择 .sln 文件





#define UNICODE

  

 

 

read and write in C

can use chmod to change the permission of a file
chmod a+r myfile: a means add,r means read


write:

fwrite:

http://www.tutorialspoint.com/c_standard_library/c_function_fwrite.htm
size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) writes data from the array pointed to, by ptr to the given stream.
Parameters

    ptr This is the pointer to the array of elements to be written.

    size This is the size in bytes of each element to be written.

    nmemb This is the number of elements, each one with a size of size bytes.

    stream This is the pointer to a FILE object that specifies an output stream.

Return Value

This function returns the total number of elements successfully returned as a size_t object, which is an integral data type. If this number differs from the nmemb parameter, it will show an error.
Example

The following example shows the usage of fwrite() function.

#include<stdio.h>

int main ()
{
   FILE *fp;
   char str[] = "This is tutorialspoint.com";

   fp = fopen( "file.txt" , "w" );
   fwrite(str , 1 , sizeof(str) , fp );

   fclose(fp);
 
   return(0);
}


feof():
Now let‘s see the content of the above file using the following program

#include <stdio.h>

int main ()
{
   FILE *fp;
   int c;

   fp = fopen("file.txt","r");
   while(1)
   {
      c = fgetc(fp);
      if( feof(fp) )
      {
         break ;
      }
      printf("%c", c);
   }
   fclose(fp);
   return(0);
}


fgets and fscanf

The function fgets read until a newline (and also stores it). Fscanf with the %s specifier reads until any blank space and doesn‘t store it.

better not use fscanf  read binary file

As a side note, you‘re not specifying the size of the buffer in scanf and it‘s unsafe. Try:

fscanf(ptr, "%9s", str)

Upon successful completion, these functions shall return the number of successfully matched and assigned input items; this number can be zero in the event of an early matching failure. If the input ends before the first matching failure or conversion, EOF shall be returned. If a read error occurs, the error indicator for the stream is set, EOF shall be returned, and errno shall be set to indicate the error


write to text file

#include <stdio.h>

int printf(const char *format, ...);
int fprintf(FILE *stream, const char *format, ...);

char c=‘ ‘;//for separating numbers by blank space

for(i=0;i<n;i++)

{

fprintf(fp1,"%d",v1[i]);

}

to see content in shell:cat filename

write to binary file

int *v;

v=malloc(n * sizeof(int));//define vector with demension of n

//cz v is int,so we should set size "int",otherwise we can‘t store correctly

fwrite(v,sizeof(int),n,fp);

The C library function size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) writes data from the array pointed to, by ptr to the given stream.

Parameters

  • ptr This is the pointer to the array of elements to be written.

  • size This is the size in bytes of each element to be written.

  • nmemb This is the number of elements, each one with a size of size bytes.

  • stream This is the pointer to a FILE object that specifies an output stream.

Return Value

This function returns the total number of elements successfully returned as a size_t object, which is an integral data type. If this number differs from the nmemb parameter, it will show an error.

fread

like fwrite(they two are binary input/output)

size_t fread(void *ptr, size_t size, size_t nmembFILE *" stream );

On success, fread() and fwrite() return the number of items read or written. This number equals the number of bytes transferred only when size is 1. If an error occurs, or the end of the file is reached, the return value is a short item count (or zero).

fread() does not distinguish between end-of-file and error, and callers must use feof(3) and ferror(3) to determine which occurred.

while((fread(&c,sizeof(int),1,fp2)))//when not reach the end,it is >0

or use if(feof(fp2))
            break;


Rewind

if we want to restart reading the file from the beginning:

rewind(fp);

or

fseek(fp,0,SEEK_SET);

Understand clearly about each parameters(sometimes can even cause segment

core or dump error cz of access forbidden memory).


以上是关于cpp: read and write utf-8 text file的主要内容,如果未能解决你的问题,请参考以下文章

c_cpp read_write_data_as_raw.cpp

C: read/write

write之后为啥read不出来 为啥?

read and write in C

read( ),readln( ),writeln() ,write( )有啥区别?

read()与readLine()有啥不同; write()与writeLine()有啥不同