Commit 2d3e5bd1 authored by Dmitry Mozzherin's avatar Dmitry Mozzherin
Browse files

Fix #76 Non ASCII apostrophe shows in canonical

parent 39fb1117
Pipeline #117725546 passed with stages
in 4 minutes and 18 seconds
......@@ -2,6 +2,8 @@
## Unreleased
- Fix [#75]: Non ASCII apostrophe does not show up in canonical.
## [v0.13.0]
- Add [#74]: Simple format output is now in CSV format.
......
This diff is collapsed.
......@@ -919,6 +919,7 @@ func (p *Engine) newWordNode(n *node32, wt WordType) *wordNode {
pos := Pos{Type: wt, Start: int(t.begin), End: int(t.end)}
wrd := wordNode{Value: val, NormValue: val, Pos: pos}
children := n.flatChildren()
var canApostrophe bool
for _, v := range children {
switch v.token32.pegRule {
case ruleAuthorEtAl:
......@@ -927,17 +928,20 @@ func (p *Engine) newWordNode(n *node32, wt WordType) *wordNode {
}
case ruleUpperCharExtended, ruleLowerCharExtended:
p.AddWarn(CharBadWarn)
wrd.normalize()
_ = wrd.normalize()
case ruleWordApostr:
p.AddWarn(CanonicalApostropheWarn)
wrd.normalize()
canApostrophe = true
_ = wrd.normalize()
case ruleWordStartsWithDigit:
p.AddWarn(SpeciesNumericWarn)
wrd.normalizeNums()
case ruleApostrOther:
p.AddWarn(ApostrOtherWarn)
nv, _ := str.ToASCII([]byte(wrd.Value), str.GlobalTransliterations)
wrd.NormValue = string(nv)
if !canApostrophe {
nv, _ := str.ToASCII([]byte(wrd.Value), str.GlobalTransliterations)
wrd.NormValue = string(nv)
}
}
}
if wt == GenusType || wt == UninomialType {
......
......@@ -198,7 +198,7 @@ Filius <- 'f.' / 'fil.' / 'filius'
AuthorSuffix <- 'bis'
AuthorPrefixGlued <- ('d' / 'O' / 'L') Apostrophe
AuthorPrefixGlued <- ('d' / 'O' / 'L' / 'Mc' / 'M') Apostrophe
AuthorPrefix <- AuthorPrefix1 / AuthorPrefix2
......
This diff is collapsed.
......@@ -355,6 +355,21 @@ Architectonica offlexa Iredale, 1931
{"parsed":true,"quality":1,"verbatim":"Architectonica offlexa Iredale, 1931","normalized":"Architectonica offlexa Iredale 1931","canonicalName":{"full":"Architectonica offlexa","simple":"Architectonica offlexa","stem":"Architectonica offlex"},"authorship":"Iredale 1931","details":[{"genus":{"value":"Architectonica"},"specificEpithet":{"value":"offlexa","authorship":{"value":"Iredale 1931","basionymAuthorship":{"authors":["Iredale"],"year":{"value":"1931"}}}}}],"positions":[["genus",0,14],["specificEpithet",15,22],["authorWord",23,30],["year",32,36]],"surrogate":false,"virus":false,"hybrid":false,"bacteria":false,"nameStringId":"d8088d2a-6d20-5ef6-9ec8-68753e2e6da0","parserVersion":"test_version"}
d8088d2a-6d20-5ef6-9ec8-68753e2e6da0,"Architectonica offlexa Iredale, 1931",Architectonica offlexa,Architectonica offlexa,Architectonica offlex,Iredale 1931,1931,1
Maracanda amoena Mc'Lach
Maracanda amoena Mc'Lach
{"parsed":true,"quality":1,"verbatim":"Maracanda amoena Mc'Lach","normalized":"Maracanda amoena Mc'Lach","canonicalName":{"full":"Maracanda amoena","simple":"Maracanda amoena","stem":"Maracanda amoen"},"authorship":"Mc'Lach","details":[{"genus":{"value":"Maracanda"},"specificEpithet":{"value":"amoena","authorship":{"value":"Mc'Lach","basionymAuthorship":{"authors":["Mc'Lach"]}}}}],"positions":[["genus",0,9],["specificEpithet",10,16],["authorWord",17,24]],"surrogate":false,"virus":false,"hybrid":false,"bacteria":false,"nameStringId":"b561edfc-29e8-5e8d-8849-60899356be0d","parserVersion":"test_version"}
b561edfc-29e8-5e8d-8849-60899356be0d,Maracanda amoena Mc'Lach,Maracanda amoena,Maracanda amoena,Maracanda amoen,Mc'Lach,,1
Maracanda amoena Mc’Lach
Maracanda amoena Mc’Lach
{"parsed":true,"quality":3,"qualityWarnings":[[3,"Not an ASCII apostrophe"]],"verbatim":"Maracanda amoena Mc’Lach","normalized":"Maracanda amoena Mc'Lach","canonicalName":{"full":"Maracanda amoena","simple":"Maracanda amoena","stem":"Maracanda amoen"},"authorship":"Mc'Lach","details":[{"genus":{"value":"Maracanda"},"specificEpithet":{"value":"amoena","authorship":{"value":"Mc'Lach","basionymAuthorship":{"authors":["Mc'Lach"]}}}}],"positions":[["genus",0,9],["specificEpithet",10,16],["authorWord",17,24]],"surrogate":false,"virus":false,"hybrid":false,"bacteria":false,"nameStringId":"98ddd2f7-2f78-5970-adac-677273dc3caf","parserVersion":"test_version"}
98ddd2f7-2f78-5970-adac-677273dc3caf,Maracanda amoena Mc’Lach,Maracanda amoena,Maracanda amoena,Maracanda amoen,Mc'Lach,,3
Zanthopsis bispinosa M'Coy, 1849
Zanthopsis bispinosa M'Coy, 1849
{"parsed":true,"quality":1,"verbatim":"Zanthopsis bispinosa M'Coy, 1849","normalized":"Zanthopsis bispinosa M'Coy 1849","canonicalName":{"full":"Zanthopsis bispinosa","simple":"Zanthopsis bispinosa","stem":"Zanthopsis bispinos"},"authorship":"M'Coy 1849","details":[{"genus":{"value":"Zanthopsis"},"specificEpithet":{"value":"bispinosa","authorship":{"value":"M'Coy 1849","basionymAuthorship":{"authors":["M'Coy"],"year":{"value":"1849"}}}}}],"positions":[["genus",0,10],["specificEpithet",11,20],["authorWord",21,26],["year",28,32]],"surrogate":false,"virus":false,"hybrid":false,"bacteria":false,"nameStringId":"88b58b88-d8fd-55d9-a9c4-ddd11459820e","parserVersion":"test_version"}
88b58b88-d8fd-55d9-a9c4-ddd11459820e,"Zanthopsis bispinosa M'Coy, 1849",Zanthopsis bispinosa,Zanthopsis bispinosa,Zanthopsis bispinos,M'Coy 1849,1849,1
Scilla rupestris v.d. Merwe
Scilla rupestris v.d. Merwe
{"parsed":true,"quality":1,"verbatim":"Scilla rupestris v.d. Merwe","normalized":"Scilla rupestris v.d. Merwe","canonicalName":{"full":"Scilla rupestris","simple":"Scilla rupestris","stem":"Scilla rupestr"},"authorship":"v.d. Merwe","details":[{"genus":{"value":"Scilla"},"specificEpithet":{"value":"rupestris","authorship":{"value":"v.d. Merwe","basionymAuthorship":{"authors":["v.d. Merwe"]}}}}],"positions":[["genus",0,6],["specificEpithet",7,16],["authorWord",17,21],["authorWord",22,27]],"surrogate":false,"virus":false,"hybrid":false,"bacteria":false,"nameStringId":"72ec3a37-8a80-5a82-97dd-b6a67a52d209","parserVersion":"test_version"}
......@@ -1662,6 +1677,11 @@ Trophon d'orbignyi Carcelles, 1946
{"parsed":true,"quality":3,"qualityWarnings":[[3,"Apostrophe is not allowed in canonical"]],"verbatim":"Trophon d'orbignyi Carcelles, 1946","normalized":"Trophon dorbignyi Carcelles 1946","canonicalName":{"full":"Trophon dorbignyi","simple":"Trophon dorbignyi","stem":"Trophon dorbigny"},"authorship":"Carcelles 1946","details":[{"genus":{"value":"Trophon"},"specificEpithet":{"value":"dorbignyi","authorship":{"value":"Carcelles 1946","basionymAuthorship":{"authors":["Carcelles"],"year":{"value":"1946"}}}}}],"positions":[["genus",0,7],["specificEpithet",8,18],["authorWord",19,28],["year",30,34]],"surrogate":false,"virus":false,"hybrid":false,"bacteria":false,"nameStringId":"935d4414-05d4-5c16-be30-466f6144b666","parserVersion":"test_version"}
935d4414-05d4-5c16-be30-466f6144b666,"Trophon d'orbignyi Carcelles, 1946",Trophon dorbignyi,Trophon dorbignyi,Trophon dorbigny,Carcelles 1946,1946,3
Phrynosoma m’callii
Phrynosoma m’callii
{"parsed":true,"quality":3,"qualityWarnings":[[3,"Apostrophe is not allowed in canonical"],[3,"Not an ASCII apostrophe"]],"verbatim":"Phrynosoma m’callii","normalized":"Phrynosoma mcallii","canonicalName":{"full":"Phrynosoma mcallii","simple":"Phrynosoma mcallii","stem":"Phrynosoma mcalli"},"details":[{"genus":{"value":"Phrynosoma"},"specificEpithet":{"value":"mcallii"}}],"positions":[["genus",0,10],["specificEpithet",11,19]],"surrogate":false,"virus":false,"hybrid":false,"bacteria":false,"nameStringId":"7907df5c-50f2-532c-a8fe-e5b75f924f73","parserVersion":"test_version"}
7907df5c-50f2-532c-a8fe-e5b75f924f73,Phrynosoma m’callii,Phrynosoma mcallii,Phrynosoma mcallii,Phrynosoma mcalli,,,3
Arca m'coyi Tenison-Woods, 1878
Arca m'coyi Tenison-Woods, 1878
{"parsed":true,"quality":3,"qualityWarnings":[[3,"Apostrophe is not allowed in canonical"]],"verbatim":"Arca m'coyi Tenison-Woods, 1878","normalized":"Arca mcoyi Tenison-Woods 1878","canonicalName":{"full":"Arca mcoyi","simple":"Arca mcoyi","stem":"Arca mcoy"},"authorship":"Tenison-Woods 1878","details":[{"genus":{"value":"Arca"},"specificEpithet":{"value":"mcoyi","authorship":{"value":"Tenison-Woods 1878","basionymAuthorship":{"authors":["Tenison-Woods"],"year":{"value":"1878"}}}}}],"positions":[["genus",0,4],["specificEpithet",5,11],["authorWord",12,25],["year",27,31]],"surrogate":false,"virus":false,"hybrid":false,"bacteria":false,"nameStringId":"fa855178-bdde-5ebf-b6b1-c1a1aa60bffa","parserVersion":"test_version"}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment