水平分割表:一种是当多个过程频繁访问数据表的不同行时,水平分割表,并消除新表中的冗余数据列;若个别过程要访问整个数据,则要用连接*作,这也无妨分割表;典型案例是电信话单按月分割存放。
垂直分割表(不破坏第三范式),一种是当多个过程频繁访问表的不同列时,可将表垂直分成几个表,减少磁盘I/O(每行的数据列少,每页存的数据行就多,相应占用的页就少),更新时不必考虑锁,没有冗余数据。缺点是要在插入或删除数据时要考虑数据的完整性,用存储过程维护。
# -*- coding: UTF-8 -*-
import MySQLdb
import datetime
import time
# Mysql数据库按照存储日期水平分割的脚本 # Au:Claves # Email:claves@claves.me # Date:2016/04/24
def check_db_data_count(db_handle, db_cursor_handle, field_name, source_table_name, new_table_name, start_time, end_time): # 校验原表数据和新表数据
source_sql =" select count(*) from "+ source_table_name+" where ("+field_name+" >= '"+start_time+"') and ( "+field_name+" < '"+end_time+"')"
new_table_sql = "select count(*) from "+new_table_name
try:
# db_handle.commit()
db_cursor_handle.execute(source_sql)
source_count = db_cursor_handle.fetchone()
db_cursor_handle.execute(new_table_sql)
new_count = db_cursor_handle.fetchone()
db_handle.commit()
except:
db_handle.rollback()
print start_time+"-"+end_time+"::::"+"source talbe count:"+str(source_count[0])+" new talbe count:"+str(new_count[0])
return (source_count[0],new_count[0])
def copy_db_struct(db_cursor_handle, source_table_name, new_table_name): # 从原表中拷贝表结构到新表中
sql = "create table if not EXISTS " + new_table_name + " like " + source_table_name
db_cursor_handle.execute(sql)
def copy_db_data(db_handle,db_cursor_handle,field_name,source_table_name,new_table_name,start_time,end_time):#拷贝表中数据
sql = " insert into " + new_table_name + " select * from " + source_table_name+" where ("+field_name+" >= '" + start_time + "') and ( "+field_name+" < '"+end_time+"')";
try:
db_handle.commit()
db_cursor_handle.execute(sql)
db_handle.commit()
except:
db_handle.rollback()
start_date = datetime.date(2015, 12, 1) # 开始时间
end_date = datetime.date(2016, 4, 30) # 结束日期
source_table_name = "historydata" # 原数据库中表名
field_name_date = "storetime" # 元数据库中,保存时间的字段名
space = 1 # 水平分割的时间跨度(已天为单位)
source_count = 0 # 临时变量,保存原表中总数据量
new_count = 0 # 临时变量,保存所有创建新表中总数据量
db = MySQLdb.connect("localhost", "root", "password", "dbname");
cursor = db.cursor() #mysql cursor handle
while(start_date <= end_date):
tmp_start_time = start_date.strftime("%Y-%m-%d 00:00:00")
tmp_nextday_time = datetime.date.fromordinal(start_date.toordinal()+space)
tmp_nextday_time =tmp_nextday_time.strftime("%Y-%m-%d 00:00:00")
tmp_new_table_name = source_table_name+"_" + start_date.strftime("%Y%m%d")
copy_db_struct(cursor, source_table_name ,tmp_new_table_name)
copy_db_data(db, cursor, field_name_date, source_table_name, tmp_new_table_name, tmp_start_time, tmp_nextday_time)
tmp_count = check_db_data_count(db, cursor, field_name_date, source_table_name, tmp_new_table_name, tmp_start_time, tmp_nextday_time)
source_count += tmp_count[0]
new_count += tmp_count[1]
start_date = datetime.date.fromordinal(start_date.toordinal()+space)
print "source_count:"+str(source_count)+"----"+"new_count:"+str(new_count)