Python Sql 代码错误 - sqlite3.OperationalError: SQL 变量太多

Posted

技术标签:

【中文标题】Python Sql 代码错误 - sqlite3.OperationalError: SQL 变量太多【英文标题】:Python Sql code error - sqlite3.OperationalError: too many SQL variables 【发布时间】:2020-07-13 01:25:28 【问题描述】:

我正在尝试以下代码,但出现错误

if not os.path.isfile('train.db'):
    disk_engine = create_engine('sqlite:///train.db')
    start = dt.datetime.now()
    chunksize = 15000
    j = 0
    index_start = 1
    for df in pd.read_csv('final_features.csv', names=['Unnamed: 0','id','is_duplicate','cwc_min','cwc_max','csc_min','csc_max','ctc_min','ctc_max','last_word_eq','first_word_eq','abs_len_diff','mean_len','token_set_ratio','token_sort_ratio','fuzz_ratio','fuzz_partial_ratio','longest_substr_ratio','freq_qid1','freq_qid2','q1len','q2len','q1_n_words','q2_n_words','word_Common','word_Total','word_share','freq_q1+q2','freq_q1-q2','0_x','1_x','2_x','3_x','4_x','5_x','6_x','7_x','8_x','9_x','10_x','11_x','12_x','13_x','14_x','15_x','16_x','17_x','18_x','19_x','20_x','21_x','22_x','23_x','24_x','25_x','26_x','27_x','28_x','29_x','30_x','31_x','32_x','33_x','34_x','35_x','36_x','37_x','38_x','39_x','40_x','41_x','42_x','43_x','44_x','45_x','46_x','47_x','48_x','49_x','50_x','51_x','52_x','53_x','54_x','55_x','56_x','57_x','58_x','59_x','60_x','61_x','62_x','63_x','64_x','65_x','66_x','67_x','68_x','69_x','70_x','71_x','72_x','73_x','74_x','75_x','76_x','77_x','78_x','79_x','80_x','81_x','82_x','83_x','84_x','85_x','86_x','87_x','88_x','89_x','90_x','91_x','92_x','93_x','94_x','95_x','0_y','1_y','2_y','3_y','4_y','5_y','6_y','7_y','8_y','9_y','10_y','11_y','12_y','13_y','14_y','15_y','16_y','17_y','18_y','19_y','20_y','21_y','22_y','23_y','24_y','25_y','26_y','27_y','28_y','29_y','30_y','31_y','32_y','33_y','34_y','35_y','36_y','37_y','38_y','39_y','40_y','41_y','42_y','43_y','44_y','45_y','46_y','47_y','48_y','49_y','50_y','51_y','52_y','53_y','54_y','55_y','56_y','57_y','58_y','59_y','60_y','61_y','62_y','63_y','64_y','65_y','66_y','67_y','68_y','69_y','70_y','71_y','72_y','73_y','74_y','75_y','76_y','77_y','78_y','79_y','80_y','81_y','82_y','83_y','84_y','85_y','86_y','87_y','88_y','89_y','90_y','91_y','92_y','93_y','94_y','95_y'], chunksize=chunksize, iterator=True, encoding='utf-8', ):
        df.index += index_start
        j+=1
        print(' rows'.format(j*chunksize))
        df.to_sql('data', disk_engine, if_exists='append')
        index_start = df.index[-1] + 1

这是我得到的 o/p

    15000 rows
---------------------------------------------------------------------------
OperationalError                          Traceback (most recent call last)
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1192                         parameters,
-> 1193                         context)
   1194         except BaseException as e:

~\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
    506     def do_execute(self, cursor, statement, parameters, context=None):
--> 507         cursor.execute(statement, parameters)
    508 

OperationalError: too many SQL variables

The above exception was the direct cause of the following exception:

OperationalError                          Traceback (most recent call last)
<ipython-input-83-b376654c990a> in <module>()
     14         j+=1
     15         print(' rows'.format(j*chunksize))
---> 16         df.to_sql('data', disk_engine, if_exists='append')
     17         index_start = df.index[-1] + 1

~\Anaconda3\lib\site-packages\pandas\core\generic.py in to_sql(self, name, con, schema, if_exists, index, index_label, chunksize, dtype)
   2125     ...     df2.to_excel(writer, sheet_name='Sheet_name_2')
   2126 
-> 2127     ExcelWriter can also be used to append to an existing Excel file:
   2128 
   2129     >>> with pd.ExcelWriter('output.xlsx',

~\Anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype)
    448     index=True,
    449     index_label=None,
--> 450     chunksize=None,
    451     dtype=None,
    452     method=None,

~\Anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype)
   1147 
   1148     @staticmethod
-> 1149     def _query_iterator(
   1150         result, chunksize, columns, index_col=None, coerce_float=True, parse_dates=None
   1151     ):

~\Anaconda3\lib\site-packages\pandas\io\sql.py in insert(self, chunksize)
    661         ----------
    662         conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection
--> 663         keys : list of str
    664            Column names
    665         data_iter : generator of list

~\Anaconda3\lib\site-packages\pandas\io\sql.py in _execute_insert(self, conn, keys, data_iter)
    636         return str(CreateTable(self.table).compile(self.pd_sql.connectable))
    637 
--> 638     def _execute_create(self):
    639         # Inserting table into database, add to MetaData object
    640         self.table = self.table.tometadata(self.pd_sql.meta)

~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in execute(self, object, *multiparams, **params)
    946             raise exc.ObjectNotExecutableError(object)
    947         else:
--> 948             return meth(self, multiparams, params)
    949 
    950     def _execute_function(self, func, multiparams, params):

~\Anaconda3\lib\site-packages\sqlalchemy\sql\elements.py in _execute_on_connection(self, connection, multiparams, params)
    267     def _execute_on_connection(self, connection, multiparams, params):
    268         if self.supports_execution:
--> 269             return connection._execute_clauseelement(self, multiparams, params)
    270         else:
    271             raise exc.ObjectNotExecutableError(self)

~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_clauseelement(self, elem, multiparams, params)
   1058             compiled_sql,
   1059             distilled_params,
-> 1060             compiled_sql, distilled_params
   1061         )
   1062         if self._has_events or self.engine._has_events:

~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1198                 parameters,
   1199                 cursor,
-> 1200                 context)
   1201 
   1202         if self._has_events or self.engine._has_events:

~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
   1411                 util.raise_from_cause(
   1412                     sqlalchemy_exception,
-> 1413                     exc_info
   1414                 )
   1415             else:

~\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py in raise_from_cause(exception, exc_info)
    201     exc_type, exc_value, exc_tb = exc_info
    202     cause = exc_value if exc_value is not exception else None
--> 203     reraise(type(exception), exception, tb=exc_tb, cause=cause)
    204 
    205 if py3k:

~\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py in reraise(tp, value, tb, cause)
    184             value.__cause__ = cause
    185         if value.__traceback__ is not tb:
--> 186             raise value.with_traceback(tb)
    187         raise value
    188 

~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1191                         statement,
   1192                         parameters,
-> 1193                         context)
   1194         except BaseException as e:
   1195             self._handle_dbapi_exception(

~\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
    505 
    506     def do_execute(self, cursor, statement, parameters, context=None):
--> 507         cursor.execute(statement, parameters)
    508 
    509     def do_execute_no_params(self, cursor, statement, context=None):

我已经尝试过各种 chunksize 值,但它不起作用。谁能建议我修复这个错误。我在 jupyter notebook 中运行这段代码。我已经更新了 pandas 和其他库的版本,因此不存在兼容性问题。

【问题讨论】:

【参考方案1】:

此错误与传递给 sqlite3 的参数数量有关。本质上,幕后发生的事情是向数据库引擎发出了一个 SQL 查询:INSERT INTO myTable (col1, col2, col3,..., col_n) VALUES (?, ?, ?,..., ?),其中? 是您的数据帧中传递给数据库的值。

发生此错误是因为您的数据框非常宽(有很多列),因此在插入期间,许多参数被传递到 SQL 语句中。您实际上可以在证明我的解释的错误堆栈中看到:

--> 507         cursor.execute(statement, parameters)

简单地说,SQLite 只能处理有限数量的正在传递的参数。这只是 SQLite 的一个限制。您可以在page 中向下滚动到 #9,了解更多信息。

设置 chunksize 不会解决您的问题。我的建议是使用另一个数据库,如 postgres 或 mysql

【讨论】:

以上是关于Python Sql 代码错误 - sqlite3.OperationalError: SQL 变量太多的主要内容,如果未能解决你的问题,请参考以下文章

这个 Python 代码是不是容易受到 SQL 注入的影响? (SQLite3)

当涉及到以注释开头的 SQL 查询时,Python sqlite3 模块是不是存在错误和缓慢?

Python Flask SQLalchemy sqlite3 防止数据库搜索中的 SQL 注入

python安装sqlite3模块

使用 sqlite3 在 django 中出现“太多 SQL 变量”错误

写入数据库时​​,Python 3 中 SQLite3 的磁盘 I/O 错误