Commit e2bfd11e authored by George Nachman's avatar George Nachman

Add an option for aggressive base character detection for issue 7788.

parent 2d006a9e
......@@ -21,6 +21,8 @@
// Zero-width spaces.
+ (instancetype)zeroWidthSpaceCharacterSetForUnicodeVersion:(NSInteger)version;
+ (instancetype)baseCharactersForUnicodeVersion:(NSInteger)version;
+ (NSCharacterSet *)urlCharacterSet;
+ (NSCharacterSet *)filenameCharacterSet;
......
This diff is collapsed.
......@@ -1526,8 +1526,25 @@ static TECObjectRef CreateTECConverterForUTF8Variants(TextEncodingVariant varian
static dispatch_once_t onceToken;
static NSCharacterSet *exceptions;
dispatch_once(&onceToken, ^{
// These characters are forced to be base characters.
exceptions = [[NSCharacterSet characterSetWithCharactersInString:@"\uff9e\uff9f"] retain];
// These characters are forced to be base characters. Apple's function
// is a bit overzealous in its definition of composed characters. For
// example, it treats 0b95 0bcd 0b95 0bc1 as a single composed
// character. In issue 7788 we see this violates user expectations;
// since b95 is a base character, it doesn't make sense. However Apple
// has decided to define grapheme cluster, it doesn't match what we
// actually want, which is to segment on base characters. It isn't as
// simple as simply splitting on base characters because combining
// marks can be picky about which preceding characters they'll combine
// with. For example, skin tone modifiers don't combine with all emoji.
// Apple's function does pick those out properly, so we use it as a
// starting point and then segment further where we're sure it's safe
// to do so.
// This also came up in issue 6048 for FF9E and FF9F.
if ([iTermAdvancedSettingsModel aggressiveBaseCharacterDetection]) {
exceptions = [NSCharacterSet baseCharactersForUnicodeVersion:12];
} else {
exceptions = [[NSCharacterSet characterSetWithCharactersInString:@"\uff9e\uff9f"] retain];
}
});
CFIndex index = 0;
NSInteger minimumLocation = 0;
......@@ -1545,14 +1562,10 @@ static TECObjectRef CreateTECConverterForUTF8Variants(TextEncodingVariant varian
}
range = NSMakeRange(tempRange.location, tempRange.length);
if (range.length > 0) {
// CFStringGetRangeOfComposedCharactersAtIndex thinks that U+FF9E and U+FF9F are
// combining marks. Terminal.app and the person in issue 6048 disagree. Prevent them
// from combining.
NSRange rangeOfFirstException = [self rangeOfCharacterFromSet:exceptions
options:NSLiteralSearch
range:range];
if (rangeOfFirstException.location != NSNotFound &&
rangeOfFirstException.location > range.location) {
range:NSMakeRange(range.location + 1, range.length - 1)];
if (rangeOfFirstException.location != NSNotFound) {
range.length = rangeOfFirstException.location - range.location;
minimumLocation = NSMaxRange(range);
}
......
......@@ -36,6 +36,7 @@ extern NSString *const iTermAdvancedSettingsDidChange;
+ (double)activeUpdateCadence;
+ (int)adaptiveFrameRateThroughputThreshold;
+ (BOOL)addNewTabAtEndOfTabs;
+ (BOOL)aggressiveBaseCharacterDetection;
+ (BOOL)aggressiveFocusFollowsMouse;
+ (BOOL)alertsIndicateShortcuts;
+ (BOOL)allowDragOfTabIntoNewWindow;
......
......@@ -517,6 +517,7 @@ DEFINE_BOOL(shouldSetLCTerminal, YES, SECTION_EXPERIMENTAL @"Set LC_TERMINAL=iTe
DEFINE_BOOL(clearBellIconAggressively, YES, SECTION_EXPERIMENTAL @"Clear bell icon when a session becomes active.\nWhen off, you must type in the session to clear the bell icon.");
DEFINE_BOOL(workAroundNumericKeypadBug, YES, SECTION_EXPERIMENTAL @"Treat the equals sign on the numeric keypad as a key on the numeric keypad.\nFor mysterious reasons, macOS does not treat this key as belonging to the numeric keypad. Enable this setting to work around the bug.");
DEFINE_BOOL(tmuxVariableWindowSizesSupported, NO, SECTION_EXPERIMENTAL @"Allow variable window sizes in tmux integration");
DEFINE_BOOL(aggressiveBaseCharacterDetection, NO, SECTION_EXPERIMENTAL @"Detect base unicode characters with lookup table.\nApple's algorithm for segmenting composed characters makes bad choices, such as for Tamil. Enable this to reduce text overlapping.");
#pragma mark - Scripting
#define SECTION_SCRIPTING @"Scripting: "
......
தமிழ் பெயர்.txt
திருக்குறள் தமிழ் உரை.txt
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment