ParserError: NULL byte detected. This byte cannot be processed in Python‘s native csv library(

Posted 2021-11-24 Data+Science+Insight

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了ParserError: NULL byte detected. This byte cannot be processed in Python‘s native csv library(相关的知识，希望对你有一定的参考价值。

ParserError: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead

问题：

解决：

完整错误：

问题：

file_name = os.listdir(base_dir)[0]

col_list = [feature list]
col = col_list
#encoding
#data = pd.read_csv("D:\\\\test\\\\repo\\\\data.csv",sep = ',',encoding="GBK",usecols=range(len(col)))
    
data = pd.read_csv("D:\\\\test\\\\repo\\\\data.csv",sep = ',',encoding = 'unicode_escape', engine ='python')


#data = pd.read_csv("D:\\\\test\\\\repo\\\\data.csv",sep = ',',encoding = 'utf-8', engine ='python')

path = "D:\\\\test\\\\repo\\\\data.csv"

解决：

engine ='c'

file_name = os.listdir(base_dir)[0]

#encoding
#data = pd.read_csv("D:\\\\test\\\\repo\\\\data.csv",sep = ',',encoding="GBK",usecols=range(len(col)))
    
data = pd.read_csv("D:\\\\test\\\\repo\\\\data.csv",sep = ',',encoding = 'unicode_escape', engine ='c')


#data = pd.read_csv("D:\\\\test\\\\repo\\\\data.csv",sep = ',',encoding = 'utf-8', engine ='python')

path = "D:\\\\test\\\\repo\\\\data.csv"

完整错误：

---------------------------------------------------------------------------
Error Traceback (most recent call last)
D:\\anaconda\\lib\\site-packages\\pandas\\io\\parsers.py in _next_iter_line(self, row_num)
2967 assert self.data is not None
-> 2968 return next(self.data)
2969 except csv.Error as e:

Error: line contains NULL byte

During handling of the above exception, another exception occurred:

ParserError Traceback (most recent call last)
<ipython-input-12-c5d0c651c50e> in <module>
85 ]
86
---> 87 data = inference_process(data_dir)
88 #print(data.head())
89 f=open("break_model1.pkl",'rb')

<ipython-input-12-c5d0c651c50e> in inference_process(base_dir)
18 #encoding
19 # data = pd.read_csv("D:\\\\test\\\\repo\\\\data.csv",sep = ',',encoding="GBK",usecols=range(len(col)))
---> 20 data = pd.read_csv("D:\\\\test\\\\repo\\\\data.csv",sep = ',',encoding = 'unicode_escape', engine ='python')
21 # data = pd.read_csv("D:\\\\test\\\\repo\\\\data.csv",sep = ',',encoding = 'utf-8', engine ='python')
22

D:\\anaconda\\lib\\site-packages\\pandas\\io\\parsers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
608 kwds.update(kwds_defaults)
609
--> 610 return _read(filepath_or_buffer, kwds)
611
612

D:\\anaconda\\lib\\site-packages\\pandas\\io\\parsers.py in _read(filepath_or_buffer, kwds)
460
461 # Create the parser.
--> 462 parser = TextFileReader(filepath_or_buffer, **kwds)
463
464 if chunksize or iterator:

D:\\anaconda\\lib\\site-packages\\pandas\\io\\parsers.py in __init__(self, f, engine, **kwds)
817 self.options["has_index_names"] = kwds["has_index_names"]
818
--> 819 self._engine = self._make_engine(self.engine)
820
821 def close(self):

D:\\anaconda\\lib\\site-packages\\pandas\\io\\parsers.py in _make_engine(self, engine)
1048 )
1049 # error: Too many arguments for "ParserBase"
-> 1050 return mapping[engine](self.f, **self.options) # type: ignore[call-arg]
1051
1052 def _failover_to_python(self):

D:\\anaconda\\lib\\site-packages\\pandas\\io\\parsers.py in __init__(self, f, **kwds)
2308 self.num_original_columns,
2309 self.unnamed_cols,
-> 2310 ) = self._infer_columns()
2311 except (TypeError, ValueError):
2312 self.close()

D:\\anaconda\\lib\\site-packages\\pandas\\io\\parsers.py in _infer_columns(self)
2615 for level, hr in enumerate(header):
2616 try:
-> 2617 line = self._buffered_line()
2618
2619 while self.line_pos <= hr:

D:\\anaconda\\lib\\site-packages\\pandas\\io\\parsers.py in _buffered_line(self)
2809 return self.buf[0]
2810 else:
-> 2811 return self._next_line()
2812
2813 def _check_for_bom(self, first_row):

D:\\anaconda\\lib\\site-packages\\pandas\\io\\parsers.py in _next_line(self)
2906
2907 while True:
-> 2908 orig_line = self._next_iter_line(row_num=self.pos + 1)
2909 self.pos += 1
2910

D:\\anaconda\\lib\\site-packages\\pandas\\io\\parsers.py in _next_iter_line(self, row_num)
2989 msg += ". " + reason
2990
-> 2991 self._alert_malformed(msg, row_num)
2992 return None
2993

D:\\anaconda\\lib\\site-packages\\pandas\\io\\parsers.py in _alert_malformed(self, msg, row_num)
2946 """
2947 if self.error_bad_lines:
-> 2948 raise ParserError(msg)
2949 elif self.warn_bad_lines:
2950 base = f"Skipping line {row_num}: "

ParserError: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instea

以上是关于ParserError: NULL byte detected. This byte cannot be processed in Python‘s native csv library(的主要内容，如果未能解决你的问题，请参考以下文章

java Estudo de leitura e escrita em arquivos em Java - bytes e strings

JQuery ajax请求一直返回Error（parsererror）

JSON::ParserError - 416: '"#define RSAPrivateKey_dup GRPC_SHADOW_' 处出现意外标记

TypeError: Unrecognized value type: ＜class ‘str‘＞ ParserError: Unknown string format

JQuery ajax请求返回（parsererror）异常处理

对 Spring Rest Web 服务的 jQuery corss 域 ajax 请求的 Parsererror