Commit a74e259c authored by Dmitry Mozzherin's avatar Dmitry Mozzherin
Browse files

Close #93 parse 'y' as author separator

Sometimes 'y' is used instead of '&' as an author separator.
We parse with with a warning of level 2.
parent 8a01b701
Pipeline #230192036 passed with stages
in 3 minutes and 33 seconds
......@@ -2,11 +2,18 @@
## Unreleased
## [v.0.14.2]
## [v.0.14.4]
- Add [#96]: Do not parse names starting with "Candidatus".
- Add [#93]: Parse 'y' (Spanish '&') as an author separator.
## [v.0.14.3]
- Add [#95]: Remove make depenency on gRPC tooling.
- Add [#94]: Do not parse names with "bacterium" "epithet.
## [v.0.14.2]
- Add [#90]: Allow `ß` in names.
- Add [#89]: Support `subspec.` as a rank.
- Add [#82]: Support authors with prefix `zu`.
......@@ -140,6 +147,7 @@ array of names instead of a stream.
This document follows [changelog guidelines]
[v0.14.4]: https://gitlab.com/gogna/gnparser/compare/v0.14.3...v0.14.4
[v0.14.3]: https://gitlab.com/gogna/gnparser/compare/v0.14.2...v0.14.3
[v0.14.2]: https://gitlab.com/gogna/gnparser/compare/v0.14.1...v0.14.2
[v0.14.1]: https://gitlab.com/gogna/gnparser/compare/v0.14.0...v0.14.1
......
......@@ -106,6 +106,8 @@ func (p *Engine) newNode(t token32) (*node32, bool) {
p.AddWarn(GenusUpperCharAfterDash)
case ruleLowerGreek:
p.AddWarn(GreekLetterInRank)
case ruleAuthorSepSpanish:
p.AddWarn(SpanishAndAsSeparator)
}
if _, ok := nodeRules[t.pegRule]; ok {
node := &node32{token32: t}
......
......@@ -163,10 +163,12 @@ AuthorsTeam <- Author (AuthorSep Author)* (_? ','? _? Year)?
AuthorSep <- AuthorSep1 / AuthorSep2
AuthorSep1 <- _? (',' _)? ( '&' / 'et' / 'and' / 'apud') _?
AuthorSep1 <- _? (',' _)? ( '&' / AuthorSepSpanish / 'et' / 'and' / 'apud') _?
AuthorSep2 <- _? ',' _?
AuthorSepSpanish <- _? 'y' _?
AuthorEx <- ('ex' '.'? / 'in') _
AuthorEmend <- 'emend' '.'? _
......
This diff is collapsed.
......@@ -35,6 +35,7 @@ const (
RankUncommonWarn
SpaceMultipleWarn
SpaceNonStandardWarn
SpanishAndAsSeparator
SpeciesNumericWarn
SuperSpeciesWarn
UTF8ConvBadWarn
......
......@@ -13,183 +13,187 @@ type Warning struct {
}
var warningMap = map[grm.Warning]Warning{
grm.TailWarn: Warning{
grm.TailWarn: {
Quality: 3,
Message: "Unparsed tail",
},
grm.ApostrOtherWarn: Warning{
grm.ApostrOtherWarn: {
Quality: 3,
Message: "Not an ASCII apostrophe",
},
grm.AuthAmbiguousFiliusWarn: Warning{
grm.AuthAmbiguousFiliusWarn: {
Quality: 2,
Message: "Ambiguous f. (filius or forma)",
},
grm.AuthDoubleParensWarn: Warning{
grm.AuthDoubleParensWarn: {
Quality: 3,
Message: "Authorship in double parentheses",
},
grm.AuthExWarn: Warning{
grm.AuthExWarn: {
Quality: 2,
Message: "Ex authors are not required",
},
grm.AuthExWithDotWarn: Warning{
grm.AuthExWithDotWarn: {
Quality: 3,
Message: "`ex` ends with dot",
},
grm.AuthEmendWarn: Warning{
grm.AuthEmendWarn: {
Quality: 2,
Message: "Emend authors are not required",
},
grm.AuthEmendWithoutDotWarn: Warning{
grm.AuthEmendWithoutDotWarn: {
Quality: 3,
Message: "`emend` without a period",
},
grm.AuthMissingOneParensWarn: Warning{
grm.AuthMissingOneParensWarn: {
Quality: 3,
Message: "Authorship is missing one parenthesis",
},
grm.AuthQuestionWarn: Warning{
grm.AuthQuestionWarn: {
Quality: 3,
Message: "Author as a question mark",
},
grm.AuthShortWarn: Warning{
grm.AuthShortWarn: {
Quality: 3,
Message: "Author is too short",
},
grm.AuthUnknownWarn: Warning{
grm.AuthUnknownWarn: {
Quality: 2,
Message: "Author is unknown",
},
grm.AuthUpperCaseWarn: Warning{
grm.AuthUpperCaseWarn: {
Quality: 2,
Message: "Author in upper case",
},
grm.BacteriaMaybeWarn: Warning{
grm.BacteriaMaybeWarn: {
Quality: 1,
Message: "The genus is a homonym of a bacterial genus",
},
grm.BotanyAuthorNotSubgenWarn: Warning{
grm.BotanyAuthorNotSubgenWarn: {
Quality: 2,
Message: "Possible ICN author instead of subgenus",
},
grm.CanonicalApostropheWarn: Warning{
grm.CanonicalApostropheWarn: {
Quality: 3,
Message: "Apostrophe is not allowed in canonical",
},
grm.CapWordQuestionWarn: Warning{
grm.CapWordQuestionWarn: {
Quality: 3,
Message: "Uninomial word with question mark",
},
grm.CharBadWarn: Warning{
grm.CharBadWarn: {
Quality: 2,
Message: "Non-standard characters in canonical",
},
grm.GenusAbbrWarn: Warning{
grm.GenusAbbrWarn: {
Quality: 3,
Message: "Abbreviated uninomial word",
},
grm.GenusUpperCharAfterDash: Warning{
grm.GenusUpperCharAfterDash: {
Quality: 2,
Message: "Apparent genus with capital character after hyphen",
},
grm.GreekLetterInRank: Warning{
grm.GreekLetterInRank: {
Quality: 2,
Message: "Deprecated Greek letter enumeration in rank",
},
grm.HTMLTagsEntitiesWarn: Warning{
grm.HTMLTagsEntitiesWarn: {
Quality: 3,
Message: "HTML tags or entities in the name",
},
grm.HybridCharNoSpaceWarn: Warning{
grm.HybridCharNoSpaceWarn: {
Quality: 3,
Message: "Hybrid char not separated by space",
},
grm.HybridFormulaWarn: Warning{
grm.HybridFormulaWarn: {
Quality: 2,
Message: "Hybrid formula",
},
grm.HybridFormulaIncompleteWarn: Warning{
grm.HybridFormulaIncompleteWarn: {
Quality: 3,
Message: "Incomplete hybrid formula",
},
grm.HybridFormulaProbIncompleteWarn: Warning{
grm.HybridFormulaProbIncompleteWarn: {
Quality: 2,
Message: "Probably incomplete hybrid formula",
},
grm.HybridNamedWarn: Warning{
grm.HybridNamedWarn: {
Quality: 2,
Message: "Named hybrid",
},
grm.NameApproxWarn: Warning{
grm.NameApproxWarn: {
Quality: 3,
Message: "Name is approximate",
},
grm.NameComparisonWarn: Warning{
grm.NameComparisonWarn: {
Quality: 3,
Message: "Name comparison",
},
grm.RankUncommonWarn: Warning{
grm.RankUncommonWarn: {
Quality: 3,
Message: "Uncommon rank",
},
grm.SpaceMultipleWarn: Warning{
grm.SpaceMultipleWarn: {
Quality: 2,
Message: "Multiple adjacent space characters",
},
grm.SpaceNonStandardWarn: Warning{
grm.SpaceNonStandardWarn: {
Quality: 3,
Message: "Non-standard space characters",
},
grm.SpeciesNumericWarn: Warning{
grm.SpanishAndAsSeparator: {
Quality: 2,
Message: "Spanish 'y' is used instead of '&'",
},
grm.SpeciesNumericWarn: {
Quality: 3,
Message: "Numeric prefix",
},
grm.SuperSpeciesWarn: Warning{
grm.SuperSpeciesWarn: {
Quality: 2,
Message: "Ambiguity: subgenus or superspecies found",
},
grm.UTF8ConvBadWarn: Warning{
grm.UTF8ConvBadWarn: {
Quality: 3,
Message: "Incorrect conversion to UTF-8",
},
grm.UninomialComboWarn: Warning{
grm.UninomialComboWarn: {
Quality: 2,
Message: "Combination of two uninomials",
},
grm.WhiteSpaceTrailWarn: Warning{
grm.WhiteSpaceTrailWarn: {
Quality: 2,
Message: "Trailing whitespace",
},
grm.YearCharWarn: Warning{
grm.YearCharWarn: {
Quality: 2,
Message: "Year with latin character",
},
grm.YearDotWarn: Warning{
grm.YearDotWarn: {
Quality: 2,
Message: "Year with period",
},
grm.YearOrigMisplacedWarn: Warning{
grm.YearOrigMisplacedWarn: {
Quality: 2,
Message: "Misplaced basionym year",
},
grm.YearPageWarn: Warning{
grm.YearPageWarn: {
Quality: 3,
Message: "Year with page info",
},
grm.YearParensWarn: Warning{
grm.YearParensWarn: {
Quality: 2,
Message: "Year with parentheses",
},
grm.YearQuestionWarn: Warning{
grm.YearQuestionWarn: {
Quality: 2,
Message: "Year with question mark",
},
grm.YearRangeWarn: Warning{
grm.YearRangeWarn: {
Quality: 3,
Message: "Years range",
},
grm.YearSqBraketsWarn: Warning{
grm.YearSqBraketsWarn: {
Quality: 3,
Message: "Year with square brakets",
},
......
......@@ -2498,6 +2498,23 @@ Acanthochiton
00392ae2-1bd9-5a14-bea9-9d26f1107892,Acanthochiton ex quisitus,1,Acanthochiton,Acanthochiton,Acanthochiton,,,3
#>
#SECTION names with Spanish 'y' instead of '&'
Caloptenopsis crassiusculus (Martínez y Fernández-Castillo, 1896)
Caloptenopsis crassiusculus (Martínez y Fernández-Castillo, 1896)
{"parsed":true,"quality":2,"qualityWarnings":[[2,"Spanish 'y' is used instead of '\u0026'"]],"verbatim":"Caloptenopsis crassiusculus (Martínez y Fernández-Castillo, 1896)","normalized":"Caloptenopsis crassiusculus (Martínez \u0026 Fernández-Castillo 1896)","cardinality":2,"canonicalName":{"full":"Caloptenopsis crassiusculus","simple":"Caloptenopsis crassiusculus","stem":"Caloptenopsis crassiuscul"},"authorship":"(Martínez \u0026 Fernández-Castillo 1896)","details":[{"genus":{"value":"Caloptenopsis"},"specificEpithet":{"value":"crassiusculus","authorship":{"value":"(Martínez \u0026 Fernández-Castillo 1896)","basionymAuthorship":{"authors":["Martínez","Fernández-Castillo"],"year":{"value":"1896"}}}}}],"positions":[["genus",0,13],["specificEpithet",14,27],["authorWord",29,37],["authorWord",40,58],["year",60,64]],"surrogate":false,"virus":false,"hybrid":false,"bacteria":false,"nameStringId":"0080ce8d-aba5-512d-8e33-8ee3914e386a","parserVersion":"test_version"}
0080ce8d-aba5-512d-8e33-8ee3914e386a,"Caloptenopsis crassiusculus (Martínez y Fernández-Castillo, 1896)",2,Caloptenopsis crassiusculus,Caloptenopsis crassiusculus,Caloptenopsis crassiuscul,(Martínez & Fernández-Castillo 1896),1896,2
Dicranum saxatile Lagasca y Segura, García & Clemente y Rubio, 1802
Dicranum saxatile Lagasca y Segura, García & Clemente y Rubio, 1802
{"parsed":true,"quality":2,"qualityWarnings":[[2,"Spanish 'y' is used instead of '\u0026'"]],"verbatim":"Dicranum saxatile Lagasca y Segura, García \u0026 Clemente y Rubio, 1802","normalized":"Dicranum saxatile Lagasca, Segura, García, Clemente \u0026 Rubio 1802","cardinality":2,"canonicalName":{"full":"Dicranum saxatile","simple":"Dicranum saxatile","stem":"Dicranum saxatil"},"authorship":"Lagasca, Segura, García, Clemente \u0026 Rubio 1802","details":[{"genus":{"value":"Dicranum"},"specificEpithet":{"value":"saxatile","authorship":{"value":"Lagasca, Segura, García, Clemente \u0026 Rubio 1802","basionymAuthorship":{"authors":["Lagasca","Segura","García","Clemente","Rubio"],"year":{"value":"1802"}}}}}],"positions":[["genus",0,8],["specificEpithet",9,17],["authorWord",18,25],["authorWord",28,34],["authorWord",36,42],["authorWord",45,53],["authorWord",56,61],["year",63,67]],"surrogate":false,"virus":false,"hybrid":false,"bacteria":false,"nameStringId":"39054306-2722-5119-a040-f8671b5b31a0","parserVersion":"test_version"}
39054306-2722-5119-a040-f8671b5b31a0,"Dicranum saxatile Lagasca y Segura, García & Clemente y Rubio, 1802",2,Dicranum saxatile,Dicranum saxatile,Dicranum saxatil,"Lagasca, Segura, García, Clemente & Rubio 1802",1802,2
Carabus (Tanaocarabus) hendrichsi Bolvar y Pieltain, Rotger & Coronado 1967
Carabus (Tanaocarabus) hendrichsi Bolvar y Pieltain, Rotger & Coronado 1967
{"parsed":true,"quality":2,"qualityWarnings":[[2,"Spanish 'y' is used instead of '\u0026'"]],"verbatim":"Carabus (Tanaocarabus) hendrichsi Bolvar y Pieltain, Rotger \u0026 Coronado 1967","normalized":"Carabus (Tanaocarabus) hendrichsi Bolvar, Pieltain, Rotger \u0026 Coronado 1967","cardinality":2,"canonicalName":{"full":"Carabus hendrichsi","simple":"Carabus hendrichsi","stem":"Carabus hendrichs"},"authorship":"Bolvar, Pieltain, Rotger \u0026 Coronado 1967","details":[{"genus":{"value":"Carabus"},"specificEpithet":{"value":"hendrichsi","authorship":{"value":"Bolvar, Pieltain, Rotger \u0026 Coronado 1967","basionymAuthorship":{"authors":["Bolvar","Pieltain","Rotger","Coronado"],"year":{"value":"1967"}}}},"infragenericEpithet":{"value":"Tanaocarabus"}}],"positions":[["genus",0,7],["infragenericEpithet",9,21],["specificEpithet",23,33],["authorWord",34,40],["authorWord",43,51],["authorWord",53,59],["authorWord",62,70],["year",71,75]],"surrogate":false,"virus":false,"hybrid":false,"bacteria":false,"nameStringId":"519c0687-2303-5b8c-a69f-68e2bd055b5e","parserVersion":"test_version"}
519c0687-2303-5b8c-a69f-68e2bd055b5e,"Carabus (Tanaocarabus) hendrichsi Bolvar y Pieltain, Rotger & Coronado 1967",2,Carabus hendrichsi,Carabus hendrichsi,Carabus hendrichs,"Bolvar, Pieltain, Rotger & Coronado 1967",1967,2
#>
### Unparseable Tail
#SECTION: Names with unparsed_tail at the end<
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment