python migrator.sh

Posted 2021-05-10

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了python migrator.sh相关的知识，希望对你有一定的参考价值。

#! /bin/sh

usage_error () {
    echo 'Usage: sh migrator.sh <path to sqlite_to_postgres.py> <path to sqlite db file> <an empty dir to output dump files>'
    echo
    echo 'Example:'
    echo '>sh migrator.sh sqlite_to_postgres.py ~/reviewboard.db /tmp/dumps'
    echo
    echo 'Tested on:'
    echo 'Python 2.7.3'
    echo 'SQLite 3.7.9'
}
if [ ! $# -eq 3 ]
then
  usage_error
  exit 1
fi

if [ ! -r $1 ]
then
  echo $1' is not readable.'
  echo 'Please give the correct path to sqlite_to_postgres.py'
  exit 1
fi

if [ ! -r $2 ]
then
  echo $2' is not readable'
  exit 1
fi

if [ ! -d $3 ]
then
  echo $3' is not a valid directory'
  exit 1
fi


#Get the list of tables
echo .tables | sqlite3 $2 > $3/lsoftbls

#Get dumps from sqlite
for i in `cat $3/lsoftbls`
do
  echo 'Generating sqlite dumps for '$i
  echo '.output '$3'/'$i'.dump' > $3/dumper
  echo 'pragma table_info('$i');' >> $3/dumper
  echo '.dump '$i >> $3/dumper
  echo '.quit'  >> $3/dumper
  cat $3/dumper | sqlite3 $2
done

#Use the python script to convert the sqlite dumps to psql dumps
echo
echo 'Now converting the sqlite dumps into psql format...'
echo
for i in `ls -1 $3/*.dump`
do
  python $1 $i
done


#Remove the sqlite3 dumps and the file 'lsoftbls'
echo
echo 'Removing temporary files..'
rm $3/*.dump
rm $3/lsoftbls
rm $3/dumper

echo 'Removing empty dump files..'
wc -l $3/*.psql | grep -w 0 | awk '{ print $NF }' | xargs rm

echo ; echo 'Done.'; echo
echo 'Please find the psql dumps at '$3

0|display_name|varchar(64)|1||0
1|name|varchar(64)|1||0
2|local_site_id|integer|0||0
3|incoming_request_count|integer|0||0
4|invite_only|bool|1||0
5|id|integer|1||1
6|mailing_list|varchar(75)|1||0
7|visible|bool|1||0
PRAGMA foreign_keys=OFF;
BEGIN TRANSACTION; 
CREATE TABLE "reviews_group"("display_name" varchar(64) NOT NULL, "name" varchar(64) NOT NULL, "local_site_id" integer NULL, "incoming_request_count" integer NULL, "invite_only" bool NOT NULL, "id" integer NOT NULL UNIQUE PRIMARY KEY, "mailing_list" varchar(75) NOT NULL, "visible" bool NOT NULL);
INSERT INTO "reviews_group" VALUES('Developers','developers',NULL,127,0,1,'',1);
INSERT INTO "reviews_group" VALUES('Testers','testers',NULL,2,0,2,'',1);
INSERT INTO "reviews_group" VALUES('QA','qa',NULL,1,0,3,'',1);
INSERT INTO "reviews_group" VALUES('Release Engineers','releng',NULL,7,0,4,'',1);
INSERT INTO "reviews_group" VALUES('Managers','mgrs',NULL,1,0,5,'',1);
COMMIT;

INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Developers','developers',NULL,127,FALSE,1,'',TRUE);
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Testers','testers',NULL,2,FALSE,2,'',TRUE);
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('QA','qa',NULL,1,FALSE,3,'',TRUE);
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Release Engineers','releng',NULL,7,FALSE,4,'',TRUE);
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Managers','mgrs',NULL,1,FALSE,5,'',TRUE);

#! /usr/bin/python
# SQLite3 uses 1 and 0 whereas PostgreSQL uses TRUE and FALSE for booleans
# This python script serves a single purpose of converting the sqlite dumps
# into postres-compatible dumps by converting the boolean values.

import random
import sys
import os.path

BOUNDARY = '%$#@!~R@ND0M^&*()_B0UND@RY<>?:'+str(int(random.random()*(10**10)))
COLUMNS = []
COLUMN_NAMES = ""
COLUMN_TYPES = ()

def usage():
  '''
  Print usage and exit
  '''
  print "Usage: ./bool_changer.py <filename.dump>"
  sys.exit()

def fix_column_names(first_line):
  '''
  The insert statement from sqlite3 dump is as follows:
    INSERT INTO "test" VALUES(1,'Hello');

  We need to add the column information to the statements like this:
    INSERT INTO "test" (id,name) VALUES(1,'Wibble');

  This is necessary because the column orders may be different in psql db.
  '''
  global COLUMN_NAMES
  index = first_line.index(' VALUES')
  return first_line[:index] + COLUMN_NAMES + first_line[index:]

def fix_bool(stmt):
  from_here = 'VALUES('
  start_pos = stmt.index(from_here) + len(from_here)
  cur_pos = start_pos
  newstmt = stmt[:start_pos]  #  [INSERT ... VALUES(]
  stmtlen = len(stmt)
  no_of_cols = len(COLUMN_TYPES)

  for i in range(0,no_of_cols):

    if COLUMN_TYPES[i] == 'bool':
      newstmt += stmt[start_pos:cur_pos] #nothing happens if both are same
      if stmt[cur_pos] == '1': newstmt += 'TRUE'
      elif stmt[cur_pos] == '0': newstmt += 'FALSE'
      if i == no_of_cols-1: #i.e. last column
        newstmt += ');\n'
        break
      newstmt += ','        #not last column
      cur_pos += 2
      start_pos = cur_pos
    else:
      if i == no_of_cols-1:         #if it's the last non-bool column, then
        newstmt += stmt[start_pos:] #simply insert everything that's left
        break                       #and leave

      if stmt[cur_pos] != "'":
        for cur_pos in range(cur_pos+1,stmtlen):
          if stmt[cur_pos] == ',':
            cur_pos += 1
            break #the inner loop and go to next column
      else: # the 'problematic' place. cur_pos in "'"
        cur_pos += 1 #what's next after "'"?
        while cur_pos < stmtlen:
          if stmt[cur_pos] == "'":
            if stmt[cur_pos+1] == "'": #ignore escaped quote ('')
              cur_pos += 2
              continue #searching
            elif stmt[cur_pos+1] == ",": #end of string
              cur_pos += 2
              break #to next column
          cur_pos += 1
  return newstmt

def get_psql_inserts(insert_lines):
  '''
  This method will get a list of one or more lines that together constitute
  a single insert statement from the sqlite dump, manipulates it and 
  returns the list containing the psql compatible insert statement.
  '''
  global BOUNDARY

  #First fix the column name issue.
  insert_lines[0] = fix_column_names(insert_lines[0])
  
  if 'bool' in COLUMN_TYPES:
    insert_stmt = BOUNDARY.join(insert_lines) 
    insert_stmt = fix_bool(insert_stmt)
    insert_lines = insert_stmt.split(BOUNDARY)

  return insert_lines


def process_dump(input_file,output_file):
  '''
  Process the file lazily line by line
  '''
  def process_insert(insert_lines):
    '''
    Helper method to write psql commands into output_file
    '''
    psql_inserts = get_psql_inserts(insert_lines)
    output_file.writelines(psql_inserts)

  global COLUMNS
  global COLUMN_NAMES
  global COLUMN_TYPES
  after_pragma = False     #The first few lines will be schema info upto the
                           #line that starts with "PRAGMA"
  insert_started = False   
  insert_lines = []
  insert_stmt_start = 'INSERT'

  for line in input_file:
    #Get the schema info from the head of the dump file
    if not after_pragma:
      if line[0].isdigit():
        COLUMNS.append(tuple(line.split('|')[1:3]))
      elif line.startswith('PRAGMA'):
        after_pragma = True
        COLUMN_NAMES = str(tuple([name for name,datatype in COLUMNS]))
        COLUMN_TYPES = tuple([datatype for name,datatype in COLUMNS])
        #Python uses single quotes for enclosing a string.
        #But psql uses double quotes on "column names" and
        #single quotes on strings inside VALUES(..)
        COLUMN_NAMES = ' ' + COLUMN_NAMES.replace("'",'"') 
      continue

    #Ignore the lines from PRAGMA and before INSERT. 
    if not insert_started:
      if line.startswith('CREATE TABLE'):
        table_name = line[line.index('"'):]
        table_name = table_name[:table_name.index('"',1)+1] # '"table_name"'
        insert_stmt_start = 'INSERT INTO ' + table_name
      elif line.startswith('INSERT'): 
        insert_started = True
      else: continue
      
    #If the control reaches here, it must mean that the first insert statement
    #has appeared. But the insert statements may span multiple lines. So, we
    #collect those lines and process them.

    if line.startswith(insert_stmt_start):
      if insert_lines:               #True from 2nd insert statement
        process_insert(insert_lines) #Insert the previous insert statement
      insert_lines = [line]          #and append the current one
    elif insert_lines: 
      insert_lines.append(line)

  if not insert_lines: return
  while insert_lines[-1].endswith(';\n') and \
        (insert_lines[-1].startswith('CREATE INDEX') or \
         insert_lines[-1].startswith('COMMIT')):
    insert_lines.pop()    #remove the create index and commit lines at the end
  process_insert(insert_lines) #fix the last insert statement

      

  
if __name__ == '__main__':
  if len(sys.argv) != 2:
    usage()
  
  filename = sys.argv[1]
  output_filename = filename + '.psql'
  
  if not os.path.isfile(filename):
    print "FATAL: Not a valid filename"
    usage()

  print sys.argv[0], ': Trying to convert', sys.argv[1]
  try:
    input_file = open(filename,'r')
    output_file = open(output_filename,'w')
    process_dump(input_file,output_file)
  finally:
    input_file.close()
    output_file.close()
  print sys.argv[0], ': Converted to', output_filename
  print

以上是关于python migrator.sh的主要内容，如果未能解决你的问题，请参考以下文章