Green Mole
Join Date: Dec 2004
Posts: 10
|
THanks for the quick response.
Thanks for the point to the previous post. I was unable to find it as I was not doing a search for merge.
Here is what I come up with this morning:
<?
include "connect.php";
//this is a class I use to do inserts and queries in mysql*********************************************
include "sequl.inc";
/* ******************************************
1) get sites already in sites and set new_site_id to its number
****************************************** */
$query ="select import_sites.*, sites.site_id as new from import_sites ";
$query.="right join sites ";
$query.="on import_sites.site_url=sites.site_url ";
$results=sequl::getit($query);
print_r($results);
foreach ($results as $result){
$old=$result['site_id'];
$new=$result['new'];
$update ="update import_sites set new_site_id=$new ";
$update.="where site_id=$old";
print "$update\n";
sequl::insert($update);
//update import_spider
$update="update import_spider set new_site_id=$new ";
$update.="where site_id=$old";
sequl::insert($update);
}
/* ******************************************
2) get new sites; they dont have new_site_id yet
****************************************** */
$query="select * from import_sites where new_site_id = 0";
$results=sequl::getit($query);
print_r($results);
foreach ($results as $result){
$old=$result['site_id'];
$url=$result['site_url'];
$insert="insert into sites (site_url) values ('$url')";
$new=sequl::insert($insert,1);
$update ="update import_sites set new_site_id=$new ";
$update.="where site_id=$old";
print "$update\n";
sequl::insert($update);
//update import_spider
$update="update import_spider set new_site_id=$new ";
$update.="where site_id=$old";
sequl::insert($update);
}
//get spiders where they are the same
$query="SELECT spider.spider_id AS new, import_spider.spider_id AS old
FROM import_spider
RIGHT JOIN spider ON import_spider.file = spider.file
WHERE import_spider.new_site_id = spider.site_id ";
$results=sequl::getit($query);
foreach($results as $result){
$old=$result['old'];
$new=$result['new'];
$update="update import_spider set new_spider_id=$new
where spider_id=$old";
sequl::insert($update);
}
//get import_spiders that need to be inserted into spider
$query="select * from import_spider where new_spider_id=0";
$results=sequl::getit($query);
foreach ($results as $result){
$old=$result['spider_id'];
$insert="insert into spider (file, first_words, upddate, md5,
site_id, path, num_words, last_modified, filesize)
select 'file', first_words, upddate, md5, site_id, path,
num_words, last_modified, filesize from import_spider
where spider_id=$old";
print "\n$insert\n";
$new=sequl::insert($insert,1);
$update="update import_spider set new_spider_id=$new
where spider_id=$old";
print "$update\n";
sequl::insert($update);
}//end foreach
$query="select spider_id as old, new_spider_id as new from import_spider";
$results=sequl::getit($query);
foreach ($results as $result){
$old= $result['old'];
$new= $result['new'];
$update="update import_engine
set new_spider_id=$new
where spider_id=$old";
sequl::insert($update);
}
$query="select import_keywords.key_id as old, keywords.key_id as new,
keywords.keyword
from import_keywords
right join keywords
on import_keywords.keyword=keywords.keyword";
$results=sequl::getit($query);
foreach ($results as $result){
$old= $result['old'];
$new= $result['new'];
$update="update import_keywords set new_key_id =$new
where key_id=$old";
sequl::insert($update);
}
$query="select key_id as old from import_keywords where new_key_id=0";
print "$query\n";
$results=sequl::getit($query);
foreach ($results as $result){
$old= $result['old'];
$insert="insert into keywords (key_id,twoletters,keyword)
select key_id,twoletters,keyword from import_keywords
where key_id=$old";
print "\n$insert\n";
$new=sequl::insert($insert,1);
/* $update="update import_keywords set new_key_id=$new
where key_id=$old";
print "$update\n"; */
sequl::insert($update);
}//end foreach
?>
#
# Table structure for table `import_engine`
#
CREATE TABLE import_engine (
spider_id mediumint(9) NOT NULL default '0',
key_id mediumint(9) NOT NULL default '0',
weight smallint(4) NOT NULL default '0',
new_spider_id mediumint(9) NOT NULL default '0',
KEY key_id (key_id)
) TYPE=MyISAM;
# --------------------------------------------------------
#
# Table structure for table `import_keywords`
#
CREATE TABLE import_keywords (
key_id int(9) NOT NULL auto_increment,
twoletters char(2) NOT NULL default '',
keyword varchar(64) NOT NULL default '',
new_key_id int(9) NOT NULL default '0',
PRIMARY KEY (key_id),
UNIQUE KEY keyword (keyword),
UNIQUE KEY key_id (key_id),
KEY twoletters (twoletters)
) TYPE=MyISAM;
# --------------------------------------------------------
#
# Table structure for table `import_sites`
#
CREATE TABLE import_sites (
site_id mediumint(9) NOT NULL auto_increment,
site_url varchar(127) NOT NULL default '',
upddate timestamp(14) NOT NULL,
username varchar(32) default NULL,
password varchar(32) default NULL,
port smallint(6) default NULL,
locked tinyint(1) NOT NULL default '0',
new_site_id mediumint(9) NOT NULL default '0',
PRIMARY KEY (site_id),
UNIQUE KEY site_id (site_id)
) TYPE=MyISAM;
# --------------------------------------------------------
#
# Table structure for table `import_spider`
#
CREATE TABLE import_spider (
spider_id mediumint(9) NOT NULL auto_increment,
file varchar(127) NOT NULL default '',
first_words text NOT NULL,
upddate timestamp(14) NOT NULL,
md5 varchar(50) default NULL,
site_id mediumint(9) NOT NULL default '0',
path varchar(127) NOT NULL default '',
num_words int(11) NOT NULL default '1',
last_modified timestamp(14) NOT NULL,
filesize int(11) NOT NULL default '0',
new_spider_id mediumint(9) NOT NULL default '0',
new_site_id mediumint(9) NOT NULL default '0',
PRIMARY KEY (spider_id),
UNIQUE KEY spider_id (spider_id),
KEY site_id (site_id)
) TYPE=MyISAM;
*********************************************
*********************************************
*********************************************
*********************************************
In the demo where I tried this it seems to be working, I am sure there are some pitfalls but I have not uncovered them yet. What do you think?
|