Skip to content
GitLab
Menu
Why GitLab
Pricing
Contact Sales
Explore
Why GitLab
Pricing
Contact Sales
Explore
Sign in
Get free trial
Changes
Page history
Update Database conversion
authored
Aug 27, 2022
by
Émilie Pagé-Perron
Hide whitespace changes
Inline
Side-by-side
Database-conversion.md
View page @
fc5794cd
...
...
@@ -1032,60 +1032,52 @@ SET
# 3 Bibliography Migration
Can't find all notes but this is part of it:
```
sql
UPDATE
publications
t1
,
publications
t2
SET
t1
.
bibtexkey
=
concat
(
year
,
t2
.
id
)
WHERE
t1
.
id
=
t2
.
id
AND
t1
.
bibtexkey
is
null
Update mapping for primary authors
`UPDATE cdli_db_tmp.primary_pub_authors join cdli_db.authors ON cdli_db_tmp.primary_pub_authors.`
author
` = cdli_db.authors.author SET cdli_db_tmp.primary_pub_authors.author_id = cdli_db.authors.id WHERE cdli_db_tmp.primary_pub_authors.`
author
` = cdli_db.authors.author`
UPDATE
authors
t1
,
authors
t2
SET
t1
.
last
=
SUBSTRING_INDEX
(
t2
.
author
,
","
,
1
)
WHERE
t1
.
id
=
t2
.
id
AND
t1
.
last
=
""
add primary publications
```
INSERT INTO cdli_db.publications (year, designation)
SELECT distinct publication_date, primary_publication FROM cdli_db_old.cataloguesnew;
```
Remove designations which are not publications
```
Delete from publications where designation like "%CDLI%";
Delete from publications where designation like "%unpublished%";
```
Fill the contribution fields
```
update cdli_db.publications Set accepted_by = 820;
update cdli_db.publications Set accepted = 1;
```
```
sql
Add publications to artifacts_publications
```
INSERT INTO cdli_db.artifacts_publications (artifact_id, publication_id, publication_comments)
Select cdli_db_old.cataloguesnew.id_text, cdli_db.publications.id, cdli_db_old.cataloguesnew.author_remarks
from cdli_db_old.cataloguesnew
join cdli_db.publications ON cdli_db_old.cataloguesnew.primary_publication = cdli_db.publications.designation
where cdli_db_old.cataloguesnew.primary_publication = cdli_db.publications.designation;
UPDATE
`artifacts_publications`
SET
`publication_type`
=
"primary"
WHERE
`publication_type`
IS
NULL
;
UPDATE
`primary_pub_authors`
join
cdli_db
.
authors
ON
`primary_pub_authors`
.
`author`
=
cdli_db
.
authors
.
author
SET
primary_pub_authors
.
author_id
=
cdli_db
.
authors
.
id
WHERE
`primary_pub_authors`
.
`author`
=
cdli_db
.
authors
.
author
```
### Set authors for primary publications
```
sql
CREATE
VIEW
cdli_db_tmp
.
primary_pubs
AS
SELECT
cdli_db_tmp
.
primary_pub_authors
.
author
,
cdli_db_tmp
.
primary_pub_authors
.
author_id
,
cdli_db_tmp
.
primary_pub_authors
.
author_id_2
,
cdli_db_tmp
.
primary_pub_authors
.
author_id_3
,
cdli_db_old
.
cataloguesnew
.
primary_publication
FROM
cdli_db_old
.
cataloguesnew
join
cdli_db_tmp
.
primary_pub_authors
on
cdli_db_tmp
.
primary_pub_authors
.
author
=
cdli_db_old
.
cataloguesnew
.
author
Where
cdli_db_old
.
cataloguesnew
.
author
!=
'CDLI'
or
cdli_db_old
.
cataloguesnew
.
author
!=
'nn'
Update publication type before inserting other types
`UPDATE `
artifacts_publications
` SET `
publication_type
`="primary" WHERE `
publication_type
` IS NULL;`
Put
back
fk
keys
add authors to primary publications
```
INSERT INTO cdli_db.authors_publications (author_id, publication_id)
SELECT cdli_db_tmp.primary_pubs.author_id, cdli_db.publications.id
FROM cdli_db.publications
Join
cdli_db_tmp
.
primary_pubs
on
cdli_db_tmp
.
primary_pubs
.
primary_publication
=
cdli_db
.
publications
.
designation
Join cdli_db_tmp.primary_pubs on cdli_db_tmp.primary_pubs.primary_publication = cdli_db.publications.designation
;
INSERT INTO cdli_db.authors_publications (author_id, publication_id)
SELECT cdli_db_tmp.primary_pubs.author_id_2, cdli_db.publications.id
...
...
@@ -1098,22 +1090,205 @@ FROM cdli_db.publications
Join cdli_db_tmp.primary_pubs on cdli_db_tmp.primary_pubs.primary_publication = cdli_db.publications.designation where cdli_db_tmp.primary_pubs.author_id_3 is not null;
```
```
sql
insert
into
cdli_db
.
publications
(
id
,
designation
,
bibtexkey
)
VALUES
(
(
SELECT
cdli_db_tmp
.
pub_history
.
id
),
(
SELECT
cdli_db_tmp
.
pub_history
.
pub
),
(
SELECT
cdli_db_tmp
.
pub_history
.
id
cdli_db_tmp
.
pub_history
));
### publication history, citation and collation
if no ";"(344 000 entries, 17 800 left)
```
insert into cdli_db_tmp.pub_history
(artifact_id, pub)
select id_text, cdli_db_old.cataloguesnew.publication_history from cdli_db_old.cataloguesnew
where cdli_db_old.cataloguesnew.publication_history not like "%;%";
```
before first ";"
```
insert into cdli_db_tmp.pub_history
(artifact_id, pub)
select id_text,SUBSTRING_INDEX(cdli_db_old.cataloguesnew.publication_history, ";", 1) from cdli_db_old.cataloguesnew
where cdli_db_old.cataloguesnew.publication_history like "%;%";
```
create a table to work the rest
`INSERT INTO pub_history_full (artifact_id, pub) Select id, publication_history from cdli_db_old.cataloguesnew;`
remove entries with only one pub
`delete FROM pub_history_full where pub not like "%;%";`
update to remove first pub from all entries
`UPDATE pub_history_full SET pub= SUBSTR(REPLACE(pub,SUBSTRING_INDEX(pub,';',1),''),2);`
add all the new single entries
```
insert into cdli_db_tmp.pub_history
(artifact_id, pub)
select artifact_id, pub from pub_history_full
where pub_history_full.pub not like "%;%";
```
remove the single entries
publication_history
`delete FROM pub_history_full where pub not like "%;%";`
insert
each
value
into
publications
table
SELECT
LAST_INSERT_ID
(),
cdli_db_old
.
cataloguesmew
.
text_id
,
and
"history"
in
artifacts_publications
insert
into
cdli_db
.
publications
(
id
,
designation
,
bibtexkey
)
SELECT
cdli_db_tmp
.
pub_history
.
id
,
cdli_db_tmp
.
pub_history
.
pub
,
SELECT
cdli_db_tmp
.
pub_history
.
id
from
from
cdli_db_tmp
.
pub_history
insert the first
```
insert into cdli_db_tmp.pub_history
(artifact_id, pub)
select artifact_id,SUBSTRING_INDEX(pub_history_full.pub, ";", 1) from pub_history_full
where pub_history_full.pub like "%;%";
```
update to remove first pub from all entries
`UPDATE pub_history_full SET pub= SUBSTR(REPLACE(pub,SUBSTRING_INDEX(pub,';',1),''),2);`
add all the new single entries
```
insert into cdli_db_tmp.pub_history
(artifact_id, pub)
select artifact_id, pub from pub_history_full
where pub_history_full.pub not like "%;%";
```
remove the single entries
`delete FROM pub_history_full where pub not like "%;%";`
insert the first
```
insert into cdli_db_tmp.pub_history
(artifact_id, pub)
select artifact_id,SUBSTRING_INDEX(pub_history_full.pub, ";", 1) from pub_history_full
where pub_history_full.pub like "%;%";
```
update to remove first pub from all entries
`UPDATE pub_history_full SET pub= SUBSTR(REPLACE(pub,SUBSTRING_INDEX(pub,';',1),''),2);`
add all the new single entries
```
insert into cdli_db_tmp.pub_history
(artifact_id, pub)
select artifact_id, pub from pub_history_full
where pub_history_full.pub not like "%;%";
```
remove the single entries
`delete FROM pub_history_full where pub not like "%;%";`
again a dozen times.
set type for all entries to secondary
```
update cdli_db_tmp.pub_history
set type = "secondary";
```
fill full to work citations
```
INSERT INTO pub_history_full (artifact_id, pub) Select id, citation from cdli_db_old.cataloguesnew;
delete from pub_history where pub = "";
delete from pub_history_full where pub = "";
```
add all the new single entries
```
insert into cdli_db_tmp.pub_history
(artifact_id, pub)
select artifact_id, pub from pub_history_full
where pub_history_full.pub not like "%;%";
```
remove the single entries
`delete FROM pub_history_full where pub not like "%;%";`
insert the first
```
insert into cdli_db_tmp.pub_history
(artifact_id, pub)
select artifact_id,SUBSTRING_INDEX(pub_history_full.pub, ";", 1) from pub_history_full
where pub_history_full.pub like "%;%";
```
update to remove first pub from all entries
`UPDATE pub_history_full SET pub= SUBSTR(REPLACE(pub,SUBSTRING_INDEX(pub,';',1),''),2);`
as many times as needed
set type for all null entries to citation
`update pub_history set type = "citation" where cdli_db_tmp.pub_history.type is null`
fill full to work collations
```
INSERT INTO pub_history_full (artifact_id, pub) Select id, published_collation from cdli_db_old.cataloguesnew;
delete from pub_history_full where pub = "";
```
fill pub_history with data ( roll through as many times as needed)
```
insert into cdli_db_tmp.pub_history
(artifact_id, pub)
select artifact_id, pub from pub_history_full
where pub_history_full.pub not like "%;%";
delete FROM pub_history_full where pub not like "%;%";
insert into cdli_db_tmp.pub_history
(artifact_id, pub)
select artifact_id,SUBSTRING_INDEX(pub_history_full.pub, ";", 1) from pub_history_full
where pub_history_full.pub like "%;%";
UPDATE pub_history_full SET pub= SUBSTR(REPLACE(pub,SUBSTRING_INDEX(pub,';',1),''),2);
```
set type for all null entries to collation
`update pub_history set type = "collation" where cdli_db_tmp.pub_history.type is null`
fill the publications table with pub data
`ALTER TABLE `
pub_history
` ADD `
pub_id
` INT NOT NULL AFTER `
type
`;`
```
insert into cdli_db.publications (designation)
select distinct pub from cdli_db_tmp.pub_history
update cdli_db_tmp.pub_history
join cdli_db.publications on cdli_db_tmp.pub_history.pub = cdli_db.publications.designation
set pub_id = cdli_db.publications.id where
cdli_db_tmp.pub_history.pub = cdli_db.publications.designation;
```
fill the atifacts_publications table with pub data
```
insert into cdli_db.artifacts_publications (artifact_id, publication_id, publication_type)
select artifact_id, publication_id, type from cdli_db_tmp.pub_history
UPDATE `publications` SET designation = bibtexkey WHERE designation is null;
```
# 4 Artifacts updates / update events & cat credits
check for missing new fields before running the query
...
...
...
...