ICU 77.1 77.1
messageformat2.h
Go to the documentation of this file.
1// © 2024 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include "unicode/utypes.h"
5
6#ifndef MESSAGEFORMAT2_H
7#define MESSAGEFORMAT2_H
8
9#if U_SHOW_CPLUSPLUS_API
10
11#if !UCONFIG_NO_NORMALIZATION
12
13#if !UCONFIG_NO_FORMATTING
14
15#if !UCONFIG_NO_MF2
16
21
23#include "unicode/messageformat2_data_model.h"
24#include "unicode/messageformat2_function_registry.h"
25#include "unicode/normalizer2.h"
26#include "unicode/unistr.h"
27
28#ifndef U_HIDE_DEPRECATED_API
29
30U_NAMESPACE_BEGIN
31
32namespace message2 {
33
34 class Environment;
35 class MessageContext;
36 class StaticErrors;
37 class InternalValue;
38
54 class U_I18N_API MessageFormatter : public UObject {
55 // Note: This class does not currently inherit from the existing
56 // `Format` class.
57 public:
65 MessageFormatter& operator=(MessageFormatter&&) noexcept;
72 virtual ~MessageFormatter();
73
88 UnicodeString formatToString(const MessageArguments& arguments, UErrorCode &status);
89
105 FormattedMessage format(const MessageArguments& arguments, UErrorCode &status) const {
106 (void) arguments;
107 if (U_SUCCESS(status)) {
108 status = U_UNSUPPORTED_ERROR;
109 }
110 return FormattedMessage(status);
111 }
112
121 const Locale& getLocale() const { return locale; }
122
133
143 const MFDataModel& getDataModel() const;
144
168 } UMFErrorHandlingBehavior;
169
176 class U_I18N_API Builder : public UObject {
177 private:
178 friend class MessageFormatter;
179
180 // The pattern to be parsed to generate the formatted message
181 UnicodeString pattern;
182 bool hasPattern = false;
183 bool hasDataModel = false;
184 // The data model to be used to generate the formatted message
185 // Initialized either by `setDataModel()`, or by the parser
186 // through a call to `setPattern()`
187 MFDataModel dataModel;
188 // Normalized representation of the pattern;
189 // ignored if `setPattern()` wasn't called
190 UnicodeString normalizedInput;
191 // Errors (internal representation of parse errors)
192 // Ignored if `setPattern()` wasn't called
193 StaticErrors* errors;
194 Locale locale;
195 // Not owned
196 const MFFunctionRegistry* customMFFunctionRegistry;
197 // Error behavior; see comment in `MessageFormatter` class
198 bool signalErrors = false;
199
200 void clearState();
201 public:
211 Builder& setLocale(const Locale& locale);
227 Builder& setPattern(const UnicodeString& pattern, UParseError& parseError, UErrorCode& status);
296 MessageFormatter build(UErrorCode& status) const;
315 virtual ~Builder();
316 }; // class MessageFormatter::Builder
317
318 // TODO: Shouldn't be public; only used for testing
327 const UnicodeString& getNormalizedPattern() const { return normalizedInput; }
328
329 private:
330 friend class Builder;
331 friend class Checker;
332 friend class MessageArguments;
333 friend class MessageContext;
334
336
337 MessageFormatter() = delete; // default constructor not implemented
338
339 // Do not define default assignment operator
340 const MessageFormatter &operator=(const MessageFormatter &) = delete;
341
342 // Selection methods
343
344 // Takes a vector of FormattedPlaceholders
345 void resolveSelectors(MessageContext&, const Environment& env, UErrorCode&, UVector&) const;
346 // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (output)
347 void filterVariants(const UVector&, UVector&, UErrorCode&) const;
348 // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (input/output)
349 void sortVariants(const UVector&, UVector&, UErrorCode&) const;
350 // Takes a vector of strings (input) and a vector of strings (output)
351 void matchSelectorKeys(const UVector&, MessageContext&, InternalValue* rv, UVector&, UErrorCode&) const;
352 // Takes a vector of FormattedPlaceholders (input),
353 // and a vector of vectors of strings (output)
354 void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const;
355
356 // Formatting methods
357
358 // Used for normalizing variable names and keys for comparison
359 UnicodeString normalizeNFC(const UnicodeString&) const;
360 [[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&) const;
361 void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const;
362 // Evaluates a function call
363 // Dispatches on argument type
364 [[nodiscard]] InternalValue* evalFunctionCall(FormattedPlaceholder&& argument,
365 MessageContext& context,
366 UErrorCode& status) const;
367 // Dispatches on function name
368 [[nodiscard]] InternalValue* evalFunctionCall(const FunctionName& functionName,
369 InternalValue* argument,
370 FunctionOptions&& options,
371 MessageContext& context,
372 UErrorCode& status) const;
373 // Formats an expression that appears in a pattern or as the definition of a local variable
374 [[nodiscard]] InternalValue* formatExpression(const Environment&,
376 MessageContext&,
377 UErrorCode&) const;
378 [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const;
379 [[nodiscard]] InternalValue* formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const;
380 [[nodiscard]] FormattedPlaceholder evalArgument(const data_model::VariableName&, MessageContext&, UErrorCode&) const;
381 void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const;
382
383 // Function registry methods
384 bool hasCustomMFFunctionRegistry() const {
385 return (customMFFunctionRegistry != nullptr);
386 }
387
388 // Precondition: custom function registry exists
389 // Note: this is non-const because the values in the MFFunctionRegistry are mutable
390 // (a FormatterFactory can have mutable state)
391 const MFFunctionRegistry& getCustomMFFunctionRegistry() const;
392
393 bool isCustomFormatter(const FunctionName&) const;
394 FormatterFactory* lookupFormatterFactory(const FunctionName&, UErrorCode& status) const;
395 bool isBuiltInSelector(const FunctionName&) const;
396 bool isBuiltInFormatter(const FunctionName&) const;
397 bool isCustomSelector(const FunctionName&) const;
398 const SelectorFactory* lookupSelectorFactory(MessageContext&, const FunctionName&, UErrorCode&) const;
399 bool isSelector(const FunctionName& fn) const { return isBuiltInSelector(fn) || isCustomSelector(fn); }
400 bool isFormatter(const FunctionName& fn) const { return isBuiltInFormatter(fn) || isCustomFormatter(fn); }
401 const Formatter* lookupFormatter(const FunctionName&, UErrorCode&) const;
402
403 Selector* getSelector(MessageContext&, const FunctionName&, UErrorCode&) const;
404 Formatter* getFormatter(const FunctionName&, UErrorCode&) const;
405 bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const;
406
407 // Checking for resolution errors
408 void checkDeclarations(MessageContext&, Environment*&, UErrorCode&) const;
409 void check(MessageContext&, const Environment&, const data_model::Expression&, UErrorCode&) const;
410 void check(MessageContext&, const Environment&, const data_model::Operand&, UErrorCode&) const;
411 void check(MessageContext&, const Environment&, const OptionMap&, UErrorCode&) const;
412
413 void initErrors(UErrorCode&);
414 void clearErrors() const;
415 void cleanup() noexcept;
416
417 // The locale this MessageFormatter was created with
418 /* const */ Locale locale;
419
420 // Registry for built-in functions
421 MFFunctionRegistry standardMFFunctionRegistry;
422 // Registry for custom functions; may be null if no custom registry supplied
423 // Note: this is *not* owned by the MessageFormatter object
424 // The reason for this choice is to have a non-destructive MessageFormatter::Builder,
425 // while also not requiring the function registry to be deeply-copyable. Making the
426 // function registry copyable would impose a requirement on any implementations
427 // of the FormatterFactory and SelectorFactory interfaces to implement a custom
428 // clone() method, which is necessary to avoid sharing between copies of the
429 // function registry (and thus double-frees)
430 // Not deeply immutable (the values in the function registry are mutable,
431 // as a FormatterFactory can have mutable state
432 const MFFunctionRegistry* customMFFunctionRegistry;
433
434 // Data model, representing the parsed message
435 MFDataModel dataModel;
436
437 // Normalized version of the input string (optional whitespace removed)
438 UnicodeString normalizedInput;
439
440 // Errors -- only used while parsing and checking for data model errors; then
441 // the MessageContext keeps track of errors
442 // Must be a raw pointer to avoid including the internal header file
443 // defining StaticErrors
444 // Owned by `this`
445 StaticErrors* errors = nullptr;
446
447 // Error handling behavior.
448 // If true, then formatting methods set their UErrorCode arguments
449 // to signal MessageFormat errors, and no useful output is returned.
450 // If false, then MessageFormat errors are not signaled and the
451 // formatting methods return best-effort output.
452 // The default is false.
453 bool signalErrors = false;
454
455 // Used for implementing normalizeNFC()
456 const Normalizer2* nfcNormalizer = nullptr;
457
458 }; // class MessageFormatter
459
460} // namespace message2
461
462U_NAMESPACE_END
463
464#endif // U_HIDE_DEPRECATED_API
465
466#endif /* #if !UCONFIG_NO_MF2 */
467
468#endif /* #if !UCONFIG_NO_FORMATTING */
469
470#endif /* #if !UCONFIG_NO_NORMALIZATION */
471
472#endif /* U_SHOW_CPLUSPLUS_API */
473
474#endif // MESSAGEFORMAT2_H
475
476// eof
A Locale object represents a specific geographical, political, or cultural region.
Definition locid.h:195
UObject is the common ICU "boilerplate" class.
Definition uobject.h:223
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition unistr.h:296
Not yet implemented: The result of a message formatting operation.
A FormattablePlaceholder encapsulates an input value (a message2::Formattable) together with an optio...
Structure encapsulating named options passed to a custom selector or formatter.
The MFDataModel class describes a parsed representation of the text of a message.
Defines mappings from names of formatters and selectors to functions implementing them.
The MessageArguments class represents the named arguments to a message.
The mutable Builder class allows each part of the MessageFormatter to be initialized separately; call...
Builder & setFunctionRegistry(const MFFunctionRegistry &functionRegistry)
Sets a custom function registry.
MessageFormatter build(UErrorCode &status) const
Constructs a new immutable MessageFormatter using the pattern or data model that was previously set,...
Builder & setLocale(const Locale &locale)
Sets the locale to use for formatting.
Builder(UErrorCode &status)
Default constructor.
Builder & setErrorHandlingBehavior(UMFErrorHandlingBehavior type)
Set the error handling behavior for this formatter.
Builder & setPattern(const UnicodeString &pattern, UParseError &parseError, UErrorCode &status)
Sets the pattern (contents of the message) and parses it into a data model.
Builder & setDataModel(MFDataModel &&dataModel)
Sets a data model.
const Locale & getLocale() const
Accesses the locale that this MessageFormatter object was created with.
FormattedMessage format(const MessageArguments &arguments, UErrorCode &status) const
Not yet implemented; formats the message to a FormattedMessage object, using the data model that was ...
const UnicodeString & getNormalizedPattern() const
Returns a string consisting of the input with optional spaces removed.
UnicodeString getPattern() const
Serializes the data model as a string in MessageFormat 2.0 syntax.
MessageFormatter & operator=(MessageFormatter &&) noexcept
Move assignment operator: The source MessageFormatter will be left in a valid but undefined state.
UnicodeString formatToString(const MessageArguments &arguments, UErrorCode &status)
Formats the message to a string, using the data model that was previously set or parsed,...
UMFErrorHandlingBehavior
Used in conjunction with the MessageFormatter::Builder::setErrorHandlingBehavior() method.
@ U_MF_BEST_EFFORT
Suppress errors and return best-effort output.
@ U_MF_STRICT
Signal all MessageFormat errors using the UErrorCode argument.
const MFDataModel & getDataModel() const
Accesses the data model referred to by this MessageFormatter object.
The Expression class corresponds to the expression nonterminal in the MessageFormat 2 grammar and the...
The Literal class corresponds to the literal nonterminal in the MessageFormat 2 grammar,...
The Operand class corresponds to the operand nonterminal in the MessageFormat 2 grammar,...
A Pattern is a sequence of formattable parts.
C++ API: Formats messages using the draft MessageFormat 2.0.
C++ API: New API for Unicode Normalization.
A UParseError struct is used to returned detailed information about parsing errors.
Definition parseerr.h:58
C++ API: Unicode String.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition utypes.h:430
@ U_UNSUPPORTED_ERROR
Requested operation not supported in current context.
Definition utypes.h:482
#define U_SUCCESS(x)
Does the error code indicate success?
Definition utypes.h:743
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition utypes.h:316