This commit is contained in:
2025-09-07 22:09:54 +02:00
parent e1b817252c
commit 2fc0d000b6
7796 changed files with 2159515 additions and 933 deletions

View File

@ -0,0 +1,21 @@
"""
babel.messages
~~~~~~~~~~~~~~
Support for ``gettext`` message catalogs.
:copyright: (c) 2013-2025 by the Babel Team.
:license: BSD, see LICENSE for more details.
"""
from babel.messages.catalog import (
Catalog,
Message,
TranslationError,
)
__all__ = [
"Catalog",
"Message",
"TranslationError",
]

View File

@ -0,0 +1,34 @@
import sys
from functools import partial
def find_entrypoints(group_name: str):
"""
Find entrypoints of a given group using either `importlib.metadata` or the
older `pkg_resources` mechanism.
Yields tuples of the entrypoint name and a callable function that will
load the actual entrypoint.
"""
if sys.version_info >= (3, 10):
# "Changed in version 3.10: importlib.metadata is no longer provisional."
try:
from importlib.metadata import entry_points
except ImportError:
pass
else:
eps = entry_points(group=group_name)
# Only do this if this implementation of `importlib.metadata` is
# modern enough to not return a dict.
if not isinstance(eps, dict):
for entry_point in eps:
yield (entry_point.name, entry_point.load)
return
try:
from pkg_resources import working_set
except ImportError:
pass
else:
for entry_point in working_set.iter_entry_points(group_name):
yield (entry_point.name, partial(entry_point.load, require=True))

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,168 @@
"""
babel.messages.checkers
~~~~~~~~~~~~~~~~~~~~~~~
Various routines that help with validation of translations.
:since: version 0.9
:copyright: (c) 2013-2025 by the Babel Team.
:license: BSD, see LICENSE for more details.
"""
from __future__ import annotations
from collections.abc import Callable
from babel.messages.catalog import PYTHON_FORMAT, Catalog, Message, TranslationError
#: list of format chars that are compatible to each other
_string_format_compatibilities = [
{'i', 'd', 'u'},
{'x', 'X'},
{'f', 'F', 'g', 'G'},
]
def num_plurals(catalog: Catalog | None, message: Message) -> None:
"""Verify the number of plurals in the translation."""
if not message.pluralizable:
if not isinstance(message.string, str):
raise TranslationError("Found plural forms for non-pluralizable "
"message")
return
# skip further tests if no catalog is provided.
elif catalog is None:
return
msgstrs = message.string
if not isinstance(msgstrs, (list, tuple)):
msgstrs = (msgstrs,)
if len(msgstrs) != catalog.num_plurals:
raise TranslationError("Wrong number of plural forms (expected %d)" %
catalog.num_plurals)
def python_format(catalog: Catalog | None, message: Message) -> None:
"""Verify the format string placeholders in the translation."""
if 'python-format' not in message.flags:
return
msgids = message.id
if not isinstance(msgids, (list, tuple)):
msgids = (msgids,)
msgstrs = message.string
if not isinstance(msgstrs, (list, tuple)):
msgstrs = (msgstrs,)
for msgid, msgstr in zip(msgids, msgstrs):
if msgstr:
_validate_format(msgid, msgstr)
def _validate_format(format: str, alternative: str) -> None:
"""Test format string `alternative` against `format`. `format` can be the
msgid of a message and `alternative` one of the `msgstr`\\s. The two
arguments are not interchangeable as `alternative` may contain less
placeholders if `format` uses named placeholders.
If the string formatting of `alternative` is compatible to `format` the
function returns `None`, otherwise a `TranslationError` is raised.
Examples for compatible format strings:
>>> _validate_format('Hello %s!', 'Hallo %s!')
>>> _validate_format('Hello %i!', 'Hallo %d!')
Example for an incompatible format strings:
>>> _validate_format('Hello %(name)s!', 'Hallo %s!')
Traceback (most recent call last):
...
TranslationError: the format strings are of different kinds
This function is used by the `python_format` checker.
:param format: The original format string
:param alternative: The alternative format string that should be checked
against format
:raises TranslationError: on formatting errors
"""
def _parse(string: str) -> list[tuple[str, str]]:
result: list[tuple[str, str]] = []
for match in PYTHON_FORMAT.finditer(string):
name, format, typechar = match.groups()
if typechar == '%' and name is None:
continue
result.append((name, str(typechar)))
return result
def _compatible(a: str, b: str) -> bool:
if a == b:
return True
for set in _string_format_compatibilities:
if a in set and b in set:
return True
return False
def _check_positional(results: list[tuple[str, str]]) -> bool:
positional = None
for name, _char in results:
if positional is None:
positional = name is None
else:
if (name is None) != positional:
raise TranslationError('format string mixes positional '
'and named placeholders')
return bool(positional)
a, b = map(_parse, (format, alternative))
if not a:
return
# now check if both strings are positional or named
a_positional, b_positional = map(_check_positional, (a, b))
if a_positional and not b_positional and not b:
raise TranslationError('placeholders are incompatible')
elif a_positional != b_positional:
raise TranslationError('the format strings are of different kinds')
# if we are operating on positional strings both must have the
# same number of format chars and those must be compatible
if a_positional:
if len(a) != len(b):
raise TranslationError('positional format placeholders are '
'unbalanced')
for idx, ((_, first), (_, second)) in enumerate(zip(a, b)):
if not _compatible(first, second):
raise TranslationError('incompatible format for placeholder '
'%d: %r and %r are not compatible' %
(idx + 1, first, second))
# otherwise the second string must not have names the first one
# doesn't have and the types of those included must be compatible
else:
type_map = dict(a)
for name, typechar in b:
if name not in type_map:
raise TranslationError(f'unknown named placeholder {name!r}')
elif not _compatible(typechar, type_map[name]):
raise TranslationError(
f'incompatible format for placeholder {name!r}: '
f'{typechar!r} and {type_map[name]!r} are not compatible',
)
def _find_checkers() -> list[Callable[[Catalog | None, Message], object]]:
from babel.messages._compat import find_entrypoints
checkers: list[Callable[[Catalog | None, Message], object]] = []
checkers.extend(load() for (name, load) in find_entrypoints('babel.checkers'))
if len(checkers) == 0:
# if entrypoints are not available or no usable egg-info was found
# (see #230), just resort to hard-coded checkers
return [num_plurals, python_format]
return checkers
checkers: list[Callable[[Catalog | None, Message], object]] = _find_checkers()

View File

@ -0,0 +1,852 @@
"""
babel.messages.extract
~~~~~~~~~~~~~~~~~~~~~~
Basic infrastructure for extracting localizable messages from source files.
This module defines an extensible system for collecting localizable message
strings from a variety of sources. A native extractor for Python source
files is builtin, extractors for other sources can be added using very
simple plugins.
The main entry points into the extraction functionality are the functions
`extract_from_dir` and `extract_from_file`.
:copyright: (c) 2013-2025 by the Babel Team.
:license: BSD, see LICENSE for more details.
"""
from __future__ import annotations
import ast
import io
import os
import sys
import tokenize
from collections.abc import (
Callable,
Collection,
Generator,
Iterable,
Mapping,
MutableSequence,
)
from functools import lru_cache
from os.path import relpath
from textwrap import dedent
from tokenize import COMMENT, NAME, NL, OP, STRING, generate_tokens
from typing import TYPE_CHECKING, Any, TypedDict
from babel.messages._compat import find_entrypoints
from babel.util import parse_encoding, parse_future_flags, pathmatch
if TYPE_CHECKING:
from typing import IO, Final, Protocol
from _typeshed import SupportsItems, SupportsRead, SupportsReadline
from typing_extensions import TypeAlias
class _PyOptions(TypedDict, total=False):
encoding: str
class _JSOptions(TypedDict, total=False):
encoding: str
jsx: bool
template_string: bool
parse_template_string: bool
class _FileObj(SupportsRead[bytes], SupportsReadline[bytes], Protocol):
def seek(self, __offset: int, __whence: int = ...) -> int: ...
def tell(self) -> int: ...
_SimpleKeyword: TypeAlias = tuple[int | tuple[int, int] | tuple[int, str], ...] | None
_Keyword: TypeAlias = dict[int | None, _SimpleKeyword] | _SimpleKeyword
# 5-tuple of (filename, lineno, messages, comments, context)
_FileExtractionResult: TypeAlias = tuple[str, int, str | tuple[str, ...], list[str], str | None]
# 4-tuple of (lineno, message, comments, context)
_ExtractionResult: TypeAlias = tuple[int, str | tuple[str, ...], list[str], str | None]
# Required arguments: fileobj, keywords, comment_tags, options
# Return value: Iterable of (lineno, message, comments, context)
_CallableExtractionMethod: TypeAlias = Callable[
[_FileObj | IO[bytes], Mapping[str, _Keyword], Collection[str], Mapping[str, Any]],
Iterable[_ExtractionResult],
]
_ExtractionMethod: TypeAlias = _CallableExtractionMethod | str
GROUP_NAME: Final[str] = 'babel.extractors'
DEFAULT_KEYWORDS: dict[str, _Keyword] = {
'_': None,
'gettext': None,
'ngettext': (1, 2),
'ugettext': None,
'ungettext': (1, 2),
'dgettext': (2,),
'dngettext': (2, 3),
'N_': None,
'pgettext': ((1, 'c'), 2),
'npgettext': ((1, 'c'), 2, 3),
}
DEFAULT_MAPPING: list[tuple[str, str]] = [('**.py', 'python')]
# New tokens in Python 3.12, or None on older versions
FSTRING_START = getattr(tokenize, "FSTRING_START", None)
FSTRING_MIDDLE = getattr(tokenize, "FSTRING_MIDDLE", None)
FSTRING_END = getattr(tokenize, "FSTRING_END", None)
def _strip_comment_tags(comments: MutableSequence[str], tags: Iterable[str]):
"""Helper function for `extract` that strips comment tags from strings
in a list of comment lines. This functions operates in-place.
"""
def _strip(line: str):
for tag in tags:
if line.startswith(tag):
return line[len(tag):].strip()
return line
comments[:] = map(_strip, comments)
def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool:
subdir = os.path.basename(dirpath)
# Legacy default behavior: ignore dot and underscore directories
return not (subdir.startswith('.') or subdir.startswith('_'))
def extract_from_dir(
dirname: str | os.PathLike[str] | None = None,
method_map: Iterable[tuple[str, str]] = DEFAULT_MAPPING,
options_map: SupportsItems[str, dict[str, Any]] | None = None,
keywords: Mapping[str, _Keyword] = DEFAULT_KEYWORDS,
comment_tags: Collection[str] = (),
callback: Callable[[str, str, dict[str, Any]], object] | None = None,
strip_comment_tags: bool = False,
directory_filter: Callable[[str], bool] | None = None,
) -> Generator[_FileExtractionResult, None, None]:
"""Extract messages from any source files found in the given directory.
This function generates tuples of the form ``(filename, lineno, message,
comments, context)``.
Which extraction method is used per file is determined by the `method_map`
parameter, which maps extended glob patterns to extraction method names.
For example, the following is the default mapping:
>>> method_map = [
... ('**.py', 'python')
... ]
This basically says that files with the filename extension ".py" at any
level inside the directory should be processed by the "python" extraction
method. Files that don't match any of the mapping patterns are ignored. See
the documentation of the `pathmatch` function for details on the pattern
syntax.
The following extended mapping would also use the "genshi" extraction
method on any file in "templates" subdirectory:
>>> method_map = [
... ('**/templates/**.*', 'genshi'),
... ('**.py', 'python')
... ]
The dictionary provided by the optional `options_map` parameter augments
these mappings. It uses extended glob patterns as keys, and the values are
dictionaries mapping options names to option values (both strings).
The glob patterns of the `options_map` do not necessarily need to be the
same as those used in the method mapping. For example, while all files in
the ``templates`` folders in an application may be Genshi applications, the
options for those files may differ based on extension:
>>> options_map = {
... '**/templates/**.txt': {
... 'template_class': 'genshi.template:TextTemplate',
... 'encoding': 'latin-1'
... },
... '**/templates/**.html': {
... 'include_attrs': ''
... }
... }
:param dirname: the path to the directory to extract messages from. If
not given the current working directory is used.
:param method_map: a list of ``(pattern, method)`` tuples that maps of
extraction method names to extended glob patterns
:param options_map: a dictionary of additional options (optional)
:param keywords: a dictionary mapping keywords (i.e. names of functions
that should be recognized as translation functions) to
tuples that specify which of their arguments contain
localizable strings
:param comment_tags: a list of tags of translator comments to search for
and include in the results
:param callback: a function that is called for every file that message are
extracted from, just before the extraction itself is
performed; the function is passed the filename, the name
of the extraction method and and the options dictionary as
positional arguments, in that order
:param strip_comment_tags: a flag that if set to `True` causes all comment
tags to be removed from the collected comments.
:param directory_filter: a callback to determine whether a directory should
be recursed into. Receives the full directory path;
should return True if the directory is valid.
:see: `pathmatch`
"""
if dirname is None:
dirname = os.getcwd()
if options_map is None:
options_map = {}
if directory_filter is None:
directory_filter = default_directory_filter
absname = os.path.abspath(dirname)
for root, dirnames, filenames in os.walk(absname):
dirnames[:] = [
subdir for subdir in dirnames
if directory_filter(os.path.join(root, subdir))
]
dirnames.sort()
filenames.sort()
for filename in filenames:
filepath = os.path.join(root, filename).replace(os.sep, '/')
yield from check_and_call_extract_file(
filepath,
method_map,
options_map,
callback,
keywords,
comment_tags,
strip_comment_tags,
dirpath=absname,
)
def check_and_call_extract_file(
filepath: str | os.PathLike[str],
method_map: Iterable[tuple[str, str]],
options_map: SupportsItems[str, dict[str, Any]],
callback: Callable[[str, str, dict[str, Any]], object] | None,
keywords: Mapping[str, _Keyword],
comment_tags: Collection[str],
strip_comment_tags: bool,
dirpath: str | os.PathLike[str] | None = None,
) -> Generator[_FileExtractionResult, None, None]:
"""Checks if the given file matches an extraction method mapping, and if so, calls extract_from_file.
Note that the extraction method mappings are based relative to dirpath.
So, given an absolute path to a file `filepath`, we want to check using
just the relative path from `dirpath` to `filepath`.
Yields 5-tuples (filename, lineno, messages, comments, context).
:param filepath: An absolute path to a file that exists.
:param method_map: a list of ``(pattern, method)`` tuples that maps of
extraction method names to extended glob patterns
:param options_map: a dictionary of additional options (optional)
:param callback: a function that is called for every file that message are
extracted from, just before the extraction itself is
performed; the function is passed the filename, the name
of the extraction method and and the options dictionary as
positional arguments, in that order
:param keywords: a dictionary mapping keywords (i.e. names of functions
that should be recognized as translation functions) to
tuples that specify which of their arguments contain
localizable strings
:param comment_tags: a list of tags of translator comments to search for
and include in the results
:param strip_comment_tags: a flag that if set to `True` causes all comment
tags to be removed from the collected comments.
:param dirpath: the path to the directory to extract messages from.
:return: iterable of 5-tuples (filename, lineno, messages, comments, context)
:rtype: Iterable[tuple[str, int, str|tuple[str], list[str], str|None]
"""
# filename is the relative path from dirpath to the actual file
filename = relpath(filepath, dirpath)
for pattern, method in method_map:
if not pathmatch(pattern, filename):
continue
options = {}
for opattern, odict in options_map.items():
if pathmatch(opattern, filename):
options = odict
break
if callback:
callback(filename, method, options)
for message_tuple in extract_from_file(
method, filepath,
keywords=keywords,
comment_tags=comment_tags,
options=options,
strip_comment_tags=strip_comment_tags,
):
yield (filename, *message_tuple)
break
def extract_from_file(
method: _ExtractionMethod,
filename: str | os.PathLike[str],
keywords: Mapping[str, _Keyword] = DEFAULT_KEYWORDS,
comment_tags: Collection[str] = (),
options: Mapping[str, Any] | None = None,
strip_comment_tags: bool = False,
) -> list[_ExtractionResult]:
"""Extract messages from a specific file.
This function returns a list of tuples of the form ``(lineno, message, comments, context)``.
:param filename: the path to the file to extract messages from
:param method: a string specifying the extraction method (.e.g. "python")
:param keywords: a dictionary mapping keywords (i.e. names of functions
that should be recognized as translation functions) to
tuples that specify which of their arguments contain
localizable strings
:param comment_tags: a list of translator tags to search for and include
in the results
:param strip_comment_tags: a flag that if set to `True` causes all comment
tags to be removed from the collected comments.
:param options: a dictionary of additional options (optional)
:returns: list of tuples of the form ``(lineno, message, comments, context)``
:rtype: list[tuple[int, str|tuple[str], list[str], str|None]
"""
if method == 'ignore':
return []
with open(filename, 'rb') as fileobj:
return list(extract(method, fileobj, keywords, comment_tags,
options, strip_comment_tags))
def _match_messages_against_spec(
lineno: int,
messages: list[str | None],
comments: list[str],
fileobj: _FileObj,
spec: tuple[int | tuple[int, str], ...],
):
translatable = []
context = None
# last_index is 1 based like the keyword spec
last_index = len(messages)
for index in spec:
if isinstance(index, tuple): # (n, 'c')
context = messages[index[0] - 1]
continue
if last_index < index:
# Not enough arguments
return
message = messages[index - 1]
if message is None:
return
translatable.append(message)
# keyword spec indexes are 1 based, therefore '-1'
if isinstance(spec[0], tuple):
# context-aware *gettext method
first_msg_index = spec[1] - 1
else:
first_msg_index = spec[0] - 1
# An empty string msgid isn't valid, emit a warning
if not messages[first_msg_index]:
filename = (getattr(fileobj, "name", None) or "(unknown)")
sys.stderr.write(
f"{filename}:{lineno}: warning: Empty msgid. It is reserved by GNU gettext: gettext(\"\") "
f"returns the header entry with meta information, not the empty string.\n",
)
return
translatable = tuple(translatable)
if len(translatable) == 1:
translatable = translatable[0]
return lineno, translatable, comments, context
@lru_cache(maxsize=None)
def _find_extractor(name: str):
for ep_name, load in find_entrypoints(GROUP_NAME):
if ep_name == name:
return load()
return None
def extract(
method: _ExtractionMethod,
fileobj: _FileObj,
keywords: Mapping[str, _Keyword] = DEFAULT_KEYWORDS,
comment_tags: Collection[str] = (),
options: Mapping[str, Any] | None = None,
strip_comment_tags: bool = False,
) -> Generator[_ExtractionResult, None, None]:
"""Extract messages from the given file-like object using the specified
extraction method.
This function returns tuples of the form ``(lineno, message, comments, context)``.
The implementation dispatches the actual extraction to plugins, based on the
value of the ``method`` parameter.
>>> source = b'''# foo module
... def run(argv):
... print(_('Hello, world!'))
... '''
>>> from io import BytesIO
>>> for message in extract('python', BytesIO(source)):
... print(message)
(3, u'Hello, world!', [], None)
:param method: an extraction method (a callable), or
a string specifying the extraction method (.e.g. "python");
if this is a simple name, the extraction function will be
looked up by entry point; if it is an explicit reference
to a function (of the form ``package.module:funcname`` or
``package.module.funcname``), the corresponding function
will be imported and used
:param fileobj: the file-like object the messages should be extracted from
:param keywords: a dictionary mapping keywords (i.e. names of functions
that should be recognized as translation functions) to
tuples that specify which of their arguments contain
localizable strings
:param comment_tags: a list of translator tags to search for and include
in the results
:param options: a dictionary of additional options (optional)
:param strip_comment_tags: a flag that if set to `True` causes all comment
tags to be removed from the collected comments.
:raise ValueError: if the extraction method is not registered
:returns: iterable of tuples of the form ``(lineno, message, comments, context)``
:rtype: Iterable[tuple[int, str|tuple[str], list[str], str|None]
"""
if callable(method):
func = method
elif ':' in method or '.' in method:
if ':' not in method:
lastdot = method.rfind('.')
module, attrname = method[:lastdot], method[lastdot + 1:]
else:
module, attrname = method.split(':', 1)
func = getattr(__import__(module, {}, {}, [attrname]), attrname)
else:
func = _find_extractor(method)
if func is None:
# if no named entry point was found,
# we resort to looking up a builtin extractor
func = _BUILTIN_EXTRACTORS.get(method)
if func is None:
raise ValueError(f"Unknown extraction method {method!r}")
results = func(fileobj, keywords.keys(), comment_tags,
options=options or {})
for lineno, funcname, messages, comments in results:
if not isinstance(messages, (list, tuple)):
messages = [messages]
if not messages:
continue
specs = keywords[funcname] or None if funcname else None
# {None: x} may be collapsed into x for backwards compatibility.
if not isinstance(specs, dict):
specs = {None: specs}
if strip_comment_tags:
_strip_comment_tags(comments, comment_tags)
# None matches all arities.
for arity in (None, len(messages)):
try:
spec = specs[arity]
except KeyError:
continue
if spec is None:
spec = (1,)
result = _match_messages_against_spec(lineno, messages, comments, fileobj, spec)
if result is not None:
yield result
def extract_nothing(
fileobj: _FileObj,
keywords: Mapping[str, _Keyword],
comment_tags: Collection[str],
options: Mapping[str, Any],
) -> list[_ExtractionResult]:
"""Pseudo extractor that does not actually extract anything, but simply
returns an empty list.
"""
return []
def extract_python(
fileobj: IO[bytes],
keywords: Mapping[str, _Keyword],
comment_tags: Collection[str],
options: _PyOptions,
) -> Generator[_ExtractionResult, None, None]:
"""Extract messages from Python source code.
It returns an iterator yielding tuples in the following form ``(lineno,
funcname, message, comments)``.
:param fileobj: the seekable, file-like object the messages should be
extracted from
:param keywords: a list of keywords (i.e. function names) that should be
recognized as translation functions
:param comment_tags: a list of translator tags to search for and include
in the results
:param options: a dictionary of additional options (optional)
:rtype: ``iterator``
"""
funcname = lineno = message_lineno = None
call_stack = -1
buf = []
messages = []
translator_comments = []
in_def = in_translator_comments = False
comment_tag = None
encoding = parse_encoding(fileobj) or options.get('encoding', 'UTF-8')
future_flags = parse_future_flags(fileobj, encoding)
next_line = lambda: fileobj.readline().decode(encoding)
tokens = generate_tokens(next_line)
# Current prefix of a Python 3.12 (PEP 701) f-string, or None if we're not
# currently parsing one.
current_fstring_start = None
for tok, value, (lineno, _), _, _ in tokens:
if call_stack == -1 and tok == NAME and value in ('def', 'class'):
in_def = True
elif tok == OP and value == '(':
if in_def:
# Avoid false positives for declarations such as:
# def gettext(arg='message'):
in_def = False
continue
if funcname:
call_stack += 1
elif in_def and tok == OP and value == ':':
# End of a class definition without parens
in_def = False
continue
elif call_stack == -1 and tok == COMMENT:
# Strip the comment token from the line
value = value[1:].strip()
if in_translator_comments and \
translator_comments[-1][0] == lineno - 1:
# We're already inside a translator comment, continue appending
translator_comments.append((lineno, value))
continue
# If execution reaches this point, let's see if comment line
# starts with one of the comment tags
for comment_tag in comment_tags:
if value.startswith(comment_tag):
in_translator_comments = True
translator_comments.append((lineno, value))
break
elif funcname and call_stack == 0:
nested = (tok == NAME and value in keywords)
if (tok == OP and value == ')') or nested:
if buf:
messages.append(''.join(buf))
del buf[:]
else:
messages.append(None)
messages = tuple(messages) if len(messages) > 1 else messages[0]
# Comments don't apply unless they immediately
# precede the message
if translator_comments and \
translator_comments[-1][0] < message_lineno - 1:
translator_comments = []
yield (message_lineno, funcname, messages,
[comment[1] for comment in translator_comments])
funcname = lineno = message_lineno = None
call_stack = -1
messages = []
translator_comments = []
in_translator_comments = False
if nested:
funcname = value
elif tok == STRING:
val = _parse_python_string(value, encoding, future_flags)
if val is not None:
if not message_lineno:
message_lineno = lineno
buf.append(val)
# Python 3.12+, see https://peps.python.org/pep-0701/#new-tokens
elif tok == FSTRING_START:
current_fstring_start = value
if not message_lineno:
message_lineno = lineno
elif tok == FSTRING_MIDDLE:
if current_fstring_start is not None:
current_fstring_start += value
elif tok == FSTRING_END:
if current_fstring_start is not None:
fstring = current_fstring_start + value
val = _parse_python_string(fstring, encoding, future_flags)
if val is not None:
buf.append(val)
elif tok == OP and value == ',':
if buf:
messages.append(''.join(buf))
del buf[:]
else:
messages.append(None)
if translator_comments:
# We have translator comments, and since we're on a
# comma(,) user is allowed to break into a new line
# Let's increase the last comment's lineno in order
# for the comment to still be a valid one
old_lineno, old_comment = translator_comments.pop()
translator_comments.append((old_lineno + 1, old_comment))
elif tok != NL and not message_lineno:
message_lineno = lineno
elif call_stack > 0 and tok == OP and value == ')':
call_stack -= 1
elif funcname and call_stack == -1:
funcname = None
elif tok == NAME and value in keywords:
funcname = value
if current_fstring_start is not None and tok not in {FSTRING_START, FSTRING_MIDDLE}:
# In Python 3.12, tokens other than FSTRING_* mean the
# f-string is dynamic, so we don't wan't to extract it.
# And if it's FSTRING_END, we've already handled it above.
# Let's forget that we're in an f-string.
current_fstring_start = None
def _parse_python_string(value: str, encoding: str, future_flags: int) -> str | None:
# Unwrap quotes in a safe manner, maintaining the string's encoding
# https://sourceforge.net/tracker/?func=detail&atid=355470&aid=617979&group_id=5470
code = compile(
f'# coding={str(encoding)}\n{value}',
'<string>',
'eval',
ast.PyCF_ONLY_AST | future_flags,
)
if isinstance(code, ast.Expression):
body = code.body
if isinstance(body, ast.Constant):
return body.value
if isinstance(body, ast.JoinedStr): # f-string
if all(isinstance(node, ast.Constant) for node in body.values):
return ''.join(node.value for node in body.values)
# TODO: we could raise an error or warning when not all nodes are constants
return None
def extract_javascript(
fileobj: _FileObj,
keywords: Mapping[str, _Keyword],
comment_tags: Collection[str],
options: _JSOptions,
lineno: int = 1,
) -> Generator[_ExtractionResult, None, None]:
"""Extract messages from JavaScript source code.
:param fileobj: the seekable, file-like object the messages should be
extracted from
:param keywords: a list of keywords (i.e. function names) that should be
recognized as translation functions
:param comment_tags: a list of translator tags to search for and include
in the results
:param options: a dictionary of additional options (optional)
Supported options are:
* `jsx` -- set to false to disable JSX/E4X support.
* `template_string` -- if `True`, supports gettext(`key`)
* `parse_template_string` -- if `True` will parse the
contents of javascript
template strings.
:param lineno: line number offset (for parsing embedded fragments)
"""
from babel.messages.jslexer import Token, tokenize, unquote_string
funcname = message_lineno = None
messages = []
last_argument = None
translator_comments = []
concatenate_next = False
encoding = options.get('encoding', 'utf-8')
last_token = None
call_stack = -1
dotted = any('.' in kw for kw in keywords)
for token in tokenize(
fileobj.read().decode(encoding),
jsx=options.get("jsx", True),
template_string=options.get("template_string", True),
dotted=dotted,
lineno=lineno,
):
if ( # Turn keyword`foo` expressions into keyword("foo") calls:
funcname and # have a keyword...
(last_token and last_token.type == 'name') and # we've seen nothing after the keyword...
token.type == 'template_string' # this is a template string
):
message_lineno = token.lineno
messages = [unquote_string(token.value)]
call_stack = 0
token = Token('operator', ')', token.lineno)
if options.get('parse_template_string') and not funcname and token.type == 'template_string':
yield from parse_template_string(token.value, keywords, comment_tags, options, token.lineno)
elif token.type == 'operator' and token.value == '(':
if funcname:
message_lineno = token.lineno
call_stack += 1
elif call_stack == -1 and token.type == 'linecomment':
value = token.value[2:].strip()
if translator_comments and \
translator_comments[-1][0] == token.lineno - 1:
translator_comments.append((token.lineno, value))
continue
for comment_tag in comment_tags:
if value.startswith(comment_tag):
translator_comments.append((token.lineno, value.strip()))
break
elif token.type == 'multilinecomment':
# only one multi-line comment may precede a translation
translator_comments = []
value = token.value[2:-2].strip()
for comment_tag in comment_tags:
if value.startswith(comment_tag):
lines = value.splitlines()
if lines:
lines[0] = lines[0].strip()
lines[1:] = dedent('\n'.join(lines[1:])).splitlines()
for offset, line in enumerate(lines):
translator_comments.append((token.lineno + offset,
line))
break
elif funcname and call_stack == 0:
if token.type == 'operator' and token.value == ')':
if last_argument is not None:
messages.append(last_argument)
if len(messages) > 1:
messages = tuple(messages)
elif messages:
messages = messages[0]
else:
messages = None
# Comments don't apply unless they immediately precede the
# message
if translator_comments and \
translator_comments[-1][0] < message_lineno - 1:
translator_comments = []
if messages is not None:
yield (message_lineno, funcname, messages,
[comment[1] for comment in translator_comments])
funcname = message_lineno = last_argument = None
concatenate_next = False
translator_comments = []
messages = []
call_stack = -1
elif token.type in ('string', 'template_string'):
new_value = unquote_string(token.value)
if concatenate_next:
last_argument = (last_argument or '') + new_value
concatenate_next = False
else:
last_argument = new_value
elif token.type == 'operator':
if token.value == ',':
if last_argument is not None:
messages.append(last_argument)
last_argument = None
else:
messages.append(None)
concatenate_next = False
elif token.value == '+':
concatenate_next = True
elif call_stack > 0 and token.type == 'operator' \
and token.value == ')':
call_stack -= 1
elif funcname and call_stack == -1:
funcname = None
elif call_stack == -1 and token.type == 'name' and \
token.value in keywords and \
(last_token is None or last_token.type != 'name' or
last_token.value != 'function'):
funcname = token.value
last_token = token
def parse_template_string(
template_string: str,
keywords: Mapping[str, _Keyword],
comment_tags: Collection[str],
options: _JSOptions,
lineno: int = 1,
) -> Generator[_ExtractionResult, None, None]:
"""Parse JavaScript template string.
:param template_string: the template string to be parsed
:param keywords: a list of keywords (i.e. function names) that should be
recognized as translation functions
:param comment_tags: a list of translator tags to search for and include
in the results
:param options: a dictionary of additional options (optional)
:param lineno: starting line number (optional)
"""
from babel.messages.jslexer import line_re
prev_character = None
level = 0
inside_str = False
expression_contents = ''
for character in template_string[1:-1]:
if not inside_str and character in ('"', "'", '`'):
inside_str = character
elif inside_str == character and prev_character != r'\\':
inside_str = False
if level:
expression_contents += character
if not inside_str:
if character == '{' and prev_character == '$':
level += 1
elif level and character == '}':
level -= 1
if level == 0 and expression_contents:
expression_contents = expression_contents[0:-1]
fake_file_obj = io.BytesIO(expression_contents.encode())
yield from extract_javascript(fake_file_obj, keywords, comment_tags, options, lineno)
lineno += len(line_re.findall(expression_contents))
expression_contents = ''
prev_character = character
_BUILTIN_EXTRACTORS = {
'ignore': extract_nothing,
'python': extract_python,
'javascript': extract_javascript,
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,204 @@
"""
babel.messages.jslexer
~~~~~~~~~~~~~~~~~~~~~~
A simple JavaScript 1.5 lexer which is used for the JavaScript
extractor.
:copyright: (c) 2013-2025 by the Babel Team.
:license: BSD, see LICENSE for more details.
"""
from __future__ import annotations
import re
from collections.abc import Generator
from typing import NamedTuple
operators: list[str] = sorted([
'+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=',
'+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=',
'>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')',
'[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':',
], key=len, reverse=True)
escapes: dict[str, str] = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'}
name_re = re.compile(r'[\w$_][\w\d$_]*', re.UNICODE)
dotted_name_re = re.compile(r'[\w$_][\w\d$_.]*[\w\d$_.]', re.UNICODE)
division_re = re.compile(r'/=?')
regex_re = re.compile(r'/(?:[^/\\]*(?:\\.[^/\\]*)*)/[a-zA-Z]*', re.DOTALL)
line_re = re.compile(r'(\r\n|\n|\r)')
line_join_re = re.compile(r'\\' + line_re.pattern)
uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}')
hex_escape_re = re.compile(r'[a-fA-F0-9]{1,2}')
class Token(NamedTuple):
type: str
value: str
lineno: int
_rules: list[tuple[str | None, re.Pattern[str]]] = [
(None, re.compile(r'\s+', re.UNICODE)),
(None, re.compile(r'<!--.*')),
('linecomment', re.compile(r'//.*')),
('multilinecomment', re.compile(r'/\*.*?\*/', re.UNICODE | re.DOTALL)),
('dotted_name', dotted_name_re),
('name', name_re),
('number', re.compile(r'''(
(?:0|[1-9]\d*)
(\.\d+)?
([eE][-+]?\d+)? |
(0x[a-fA-F0-9]+)
)''', re.VERBOSE)),
('jsx_tag', re.compile(r'(?:</?[^>\s]+|/>)', re.I)), # May be mangled in `get_rules`
('operator', re.compile(r'(%s)' % '|'.join(map(re.escape, operators)))),
('template_string', re.compile(r'''`(?:[^`\\]*(?:\\.[^`\\]*)*)`''', re.UNICODE)),
('string', re.compile(r'''(
'(?:[^'\\]*(?:\\.[^'\\]*)*)' |
"(?:[^"\\]*(?:\\.[^"\\]*)*)"
)''', re.VERBOSE | re.DOTALL)),
]
def get_rules(jsx: bool, dotted: bool, template_string: bool) -> list[tuple[str | None, re.Pattern[str]]]:
"""
Get a tokenization rule list given the passed syntax options.
Internal to this module.
"""
rules = []
for token_type, rule in _rules:
if not jsx and token_type and 'jsx' in token_type:
continue
if not template_string and token_type == 'template_string':
continue
if token_type == 'dotted_name':
if not dotted:
continue
token_type = 'name'
rules.append((token_type, rule))
return rules
def indicates_division(token: Token) -> bool:
"""A helper function that helps the tokenizer to decide if the current
token may be followed by a division operator.
"""
if token.type == 'operator':
return token.value in (')', ']', '}', '++', '--')
return token.type in ('name', 'number', 'string', 'regexp')
def unquote_string(string: str) -> str:
"""Unquote a string with JavaScript rules. The string has to start with
string delimiters (``'``, ``"`` or the back-tick/grave accent (for template strings).)
"""
assert string and string[0] == string[-1] and string[0] in '"\'`', \
'string provided is not properly delimited'
string = line_join_re.sub('\\1', string[1:-1])
result: list[str] = []
add = result.append
pos = 0
while True:
# scan for the next escape
escape_pos = string.find('\\', pos)
if escape_pos < 0:
break
add(string[pos:escape_pos])
# check which character is escaped
next_char = string[escape_pos + 1]
if next_char in escapes:
add(escapes[next_char])
# unicode escapes. trie to consume up to four characters of
# hexadecimal characters and try to interpret them as unicode
# character point. If there is no such character point, put
# all the consumed characters into the string.
elif next_char in 'uU':
escaped = uni_escape_re.match(string, escape_pos + 2)
if escaped is not None:
escaped_value = escaped.group()
if len(escaped_value) == 4:
try:
add(chr(int(escaped_value, 16)))
except ValueError:
pass
else:
pos = escape_pos + 6
continue
add(next_char + escaped_value)
pos = escaped.end()
continue
else:
add(next_char)
# hex escapes. conversion from 2-digits hex to char is infallible
elif next_char in 'xX':
escaped = hex_escape_re.match(string, escape_pos + 2)
if escaped is not None:
escaped_value = escaped.group()
add(chr(int(escaped_value, 16)))
pos = escape_pos + 2 + len(escaped_value)
continue
else:
add(next_char)
# bogus escape. Just remove the backslash.
else:
add(next_char)
pos = escape_pos + 2
if pos < len(string):
add(string[pos:])
return ''.join(result)
def tokenize(source: str, jsx: bool = True, dotted: bool = True, template_string: bool = True, lineno: int = 1) -> Generator[Token, None, None]:
"""
Tokenize JavaScript/JSX source. Returns a generator of tokens.
:param source: The JavaScript source to tokenize.
:param jsx: Enable (limited) JSX parsing.
:param dotted: Read dotted names as single name token.
:param template_string: Support ES6 template strings
:param lineno: starting line number (optional)
"""
may_divide = False
pos = 0
end = len(source)
rules = get_rules(jsx=jsx, dotted=dotted, template_string=template_string)
while pos < end:
# handle regular rules first
for token_type, rule in rules: # noqa: B007
match = rule.match(source, pos)
if match is not None:
break
# if we don't have a match we don't give up yet, but check for
# division operators or regular expression literals, based on
# the status of `may_divide` which is determined by the last
# processed non-whitespace token using `indicates_division`.
else:
if may_divide:
match = division_re.match(source, pos)
token_type = 'operator'
else:
match = regex_re.match(source, pos)
token_type = 'regexp'
if match is None:
# woops. invalid syntax. jump one char ahead and try again.
pos += 1
continue
token_value = match.group()
if token_type is not None:
token = Token(token_type, token_value, lineno)
may_divide = indicates_division(token)
yield token
lineno += len(line_re.findall(token_value))
pos = match.end()

View File

@ -0,0 +1,210 @@
"""
babel.messages.mofile
~~~~~~~~~~~~~~~~~~~~~
Writing of files in the ``gettext`` MO (machine object) format.
:copyright: (c) 2013-2025 by the Babel Team.
:license: BSD, see LICENSE for more details.
"""
from __future__ import annotations
import array
import struct
from typing import TYPE_CHECKING
from babel.messages.catalog import Catalog, Message
if TYPE_CHECKING:
from _typeshed import SupportsRead, SupportsWrite
LE_MAGIC: int = 0x950412de
BE_MAGIC: int = 0xde120495
def read_mo(fileobj: SupportsRead[bytes]) -> Catalog:
"""Read a binary MO file from the given file-like object and return a
corresponding `Catalog` object.
:param fileobj: the file-like object to read the MO file from
:note: The implementation of this function is heavily based on the
``GNUTranslations._parse`` method of the ``gettext`` module in the
standard library.
"""
catalog = Catalog()
headers = {}
filename = getattr(fileobj, 'name', '')
buf = fileobj.read()
buflen = len(buf)
unpack = struct.unpack
# Parse the .mo file header, which consists of 5 little endian 32
# bit words.
magic = unpack('<I', buf[:4])[0] # Are we big endian or little endian?
if magic == LE_MAGIC:
version, msgcount, origidx, transidx = unpack('<4I', buf[4:20])
ii = '<II'
elif magic == BE_MAGIC:
version, msgcount, origidx, transidx = unpack('>4I', buf[4:20])
ii = '>II'
else:
raise OSError(0, 'Bad magic number', filename)
# Now put all messages from the .mo file buffer into the catalog
# dictionary
for _i in range(msgcount):
mlen, moff = unpack(ii, buf[origidx:origidx + 8])
mend = moff + mlen
tlen, toff = unpack(ii, buf[transidx:transidx + 8])
tend = toff + tlen
if mend < buflen and tend < buflen:
msg = buf[moff:mend]
tmsg = buf[toff:tend]
else:
raise OSError(0, 'File is corrupt', filename)
# See if we're looking at GNU .mo conventions for metadata
if mlen == 0:
# Catalog description
lastkey = key = None
for item in tmsg.splitlines():
item = item.strip()
if not item:
continue
if b':' in item:
key, value = item.split(b':', 1)
lastkey = key = key.strip().lower()
headers[key] = value.strip()
elif lastkey:
headers[lastkey] += b'\n' + item
if b'\x04' in msg: # context
ctxt, msg = msg.split(b'\x04')
else:
ctxt = None
if b'\x00' in msg: # plural forms
msg = msg.split(b'\x00')
tmsg = tmsg.split(b'\x00')
msg = [x.decode(catalog.charset) for x in msg]
tmsg = [x.decode(catalog.charset) for x in tmsg]
else:
msg = msg.decode(catalog.charset)
tmsg = tmsg.decode(catalog.charset)
catalog[msg] = Message(msg, tmsg, context=ctxt)
# advance to next entry in the seek tables
origidx += 8
transidx += 8
catalog.mime_headers = headers.items()
return catalog
def write_mo(fileobj: SupportsWrite[bytes], catalog: Catalog, use_fuzzy: bool = False) -> None:
"""Write a catalog to the specified file-like object using the GNU MO file
format.
>>> import sys
>>> from babel.messages import Catalog
>>> from gettext import GNUTranslations
>>> from io import BytesIO
>>> catalog = Catalog(locale='en_US')
>>> catalog.add('foo', 'Voh')
<Message ...>
>>> catalog.add((u'bar', u'baz'), (u'Bahr', u'Batz'))
<Message ...>
>>> catalog.add('fuz', 'Futz', flags=['fuzzy'])
<Message ...>
>>> catalog.add('Fizz', '')
<Message ...>
>>> catalog.add(('Fuzz', 'Fuzzes'), ('', ''))
<Message ...>
>>> buf = BytesIO()
>>> write_mo(buf, catalog)
>>> x = buf.seek(0)
>>> translations = GNUTranslations(fp=buf)
>>> if sys.version_info[0] >= 3:
... translations.ugettext = translations.gettext
... translations.ungettext = translations.ngettext
>>> translations.ugettext('foo')
u'Voh'
>>> translations.ungettext('bar', 'baz', 1)
u'Bahr'
>>> translations.ungettext('bar', 'baz', 2)
u'Batz'
>>> translations.ugettext('fuz')
u'fuz'
>>> translations.ugettext('Fizz')
u'Fizz'
>>> translations.ugettext('Fuzz')
u'Fuzz'
>>> translations.ugettext('Fuzzes')
u'Fuzzes'
:param fileobj: the file-like object to write to
:param catalog: the `Catalog` instance
:param use_fuzzy: whether translations marked as "fuzzy" should be included
in the output
"""
messages = list(catalog)
messages[1:] = [m for m in messages[1:]
if m.string and (use_fuzzy or not m.fuzzy)]
messages.sort()
ids = strs = b''
offsets = []
for message in messages:
# For each string, we need size and file offset. Each string is NUL
# terminated; the NUL does not count into the size.
if message.pluralizable:
msgid = b'\x00'.join([
msgid.encode(catalog.charset) for msgid in message.id
])
msgstrs = []
for idx, string in enumerate(message.string):
if not string:
msgstrs.append(message.id[min(int(idx), 1)])
else:
msgstrs.append(string)
msgstr = b'\x00'.join([
msgstr.encode(catalog.charset) for msgstr in msgstrs
])
else:
msgid = message.id.encode(catalog.charset)
msgstr = message.string.encode(catalog.charset)
if message.context:
msgid = b'\x04'.join([message.context.encode(catalog.charset),
msgid])
offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
ids += msgid + b'\x00'
strs += msgstr + b'\x00'
# The header is 7 32-bit unsigned integers. We don't use hash tables, so
# the keys start right after the index tables.
keystart = 7 * 4 + 16 * len(messages)
valuestart = keystart + len(ids)
# The string table first has the list of keys, then the list of values.
# Each entry has first the size of the string, then the file offset.
koffsets = []
voffsets = []
for o1, l1, o2, l2 in offsets:
koffsets += [l1, o1 + keystart]
voffsets += [l2, o2 + valuestart]
offsets = koffsets + voffsets
fileobj.write(struct.pack('Iiiiiii',
LE_MAGIC, # magic
0, # version
len(messages), # number of entries
7 * 4, # start of key index
7 * 4 + len(messages) * 8, # start of value index
0, 0, # size and offset of hash table
) + array.array.tobytes(array.array("i", offsets)) + ids + strs)

View File

@ -0,0 +1,266 @@
"""
babel.messages.plurals
~~~~~~~~~~~~~~~~~~~~~~
Plural form definitions.
:copyright: (c) 2013-2025 by the Babel Team.
:license: BSD, see LICENSE for more details.
"""
from __future__ import annotations
from babel.core import Locale, default_locale
# XXX: remove this file, duplication with babel.plural
LC_CTYPE: str | None = default_locale('LC_CTYPE')
PLURALS: dict[str, tuple[int, str]] = {
# Afar
# 'aa': (),
# Abkhazian
# 'ab': (),
# Avestan
# 'ae': (),
# Afrikaans - From Pootle's PO's
'af': (2, '(n != 1)'),
# Akan
# 'ak': (),
# Amharic
# 'am': (),
# Aragonese
# 'an': (),
# Arabic - From Pootle's PO's
'ar': (6, '(n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=0 && n%100<=2 ? 4 : 5)'),
# Assamese
# 'as': (),
# Avaric
# 'av': (),
# Aymara
# 'ay': (),
# Azerbaijani
# 'az': (),
# Bashkir
# 'ba': (),
# Belarusian
'be': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
# Bulgarian - From Pootle's PO's
'bg': (2, '(n != 1)'),
# Bihari
# 'bh': (),
# Bislama
# 'bi': (),
# Bambara
# 'bm': (),
# Bengali - From Pootle's PO's
'bn': (2, '(n != 1)'),
# Tibetan - as discussed in private with Andrew West
'bo': (1, '0'),
# Breton
'br': (
6,
'(n==1 ? 0 : n%10==1 && n%100!=11 && n%100!=71 && n%100!=91 ? 1 : n%10==2 && n%100!=12 && n%100!=72 && '
'n%100!=92 ? 2 : (n%10==3 || n%10==4 || n%10==9) && n%100!=13 && n%100!=14 && n%100!=19 && n%100!=73 && '
'n%100!=74 && n%100!=79 && n%100!=93 && n%100!=94 && n%100!=99 ? 3 : n%1000000==0 ? 4 : 5)',
),
# Bosnian
'bs': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
# Catalan - From Pootle's PO's
'ca': (2, '(n != 1)'),
# Chechen
# 'ce': (),
# Chamorro
# 'ch': (),
# Corsican
# 'co': (),
# Cree
# 'cr': (),
# Czech
'cs': (3, '((n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2)'),
# Church Slavic
# 'cu': (),
# Chuvash
'cv': (1, '0'),
# Welsh
'cy': (5, '(n==1 ? 1 : n==2 ? 2 : n==3 ? 3 : n==6 ? 4 : 0)'),
# Danish
'da': (2, '(n != 1)'),
# German
'de': (2, '(n != 1)'),
# Divehi
# 'dv': (),
# Dzongkha
'dz': (1, '0'),
# Greek
'el': (2, '(n != 1)'),
# English
'en': (2, '(n != 1)'),
# Esperanto
'eo': (2, '(n != 1)'),
# Spanish
'es': (2, '(n != 1)'),
# Estonian
'et': (2, '(n != 1)'),
# Basque - From Pootle's PO's
'eu': (2, '(n != 1)'),
# Persian - From Pootle's PO's
'fa': (1, '0'),
# Finnish
'fi': (2, '(n != 1)'),
# French
'fr': (2, '(n > 1)'),
# Friulian - From Pootle's PO's
'fur': (2, '(n > 1)'),
# Irish
'ga': (5, '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)'),
# Galician - From Pootle's PO's
'gl': (2, '(n != 1)'),
# Hausa - From Pootle's PO's
'ha': (2, '(n != 1)'),
# Hebrew
'he': (2, '(n != 1)'),
# Hindi - From Pootle's PO's
'hi': (2, '(n != 1)'),
# Croatian
'hr': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
# Hungarian
'hu': (1, '0'),
# Armenian - From Pootle's PO's
'hy': (1, '0'),
# Icelandic - From Pootle's PO's
'is': (2, '(n%10==1 && n%100!=11 ? 0 : 1)'),
# Italian
'it': (2, '(n != 1)'),
# Japanese
'ja': (1, '0'),
# Georgian - From Pootle's PO's
'ka': (1, '0'),
# Kongo - From Pootle's PO's
'kg': (2, '(n != 1)'),
# Khmer - From Pootle's PO's
'km': (1, '0'),
# Korean
'ko': (1, '0'),
# Kurdish - From Pootle's PO's
'ku': (2, '(n != 1)'),
# Lao - Another member of the Tai language family, like Thai.
'lo': (1, '0'),
# Lithuanian
'lt': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'),
# Latvian
'lv': (3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'),
# Maltese - From Pootle's PO's
'mt': (4, '(n==1 ? 0 : n==0 || ( n%100>=1 && n%100<=10) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'),
# Norwegian Bokmål
'nb': (2, '(n != 1)'),
# Dutch
'nl': (2, '(n != 1)'),
# Norwegian Nynorsk
'nn': (2, '(n != 1)'),
# Norwegian
'no': (2, '(n != 1)'),
# Punjabi - From Pootle's PO's
'pa': (2, '(n != 1)'),
# Polish
'pl': (3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
# Portuguese
'pt': (2, '(n != 1)'),
# Brazilian
'pt_BR': (2, '(n > 1)'),
# Romanian - From Pootle's PO's
'ro': (3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2)'),
# Russian
'ru': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
# Slovak
'sk': (3, '((n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2)'),
# Slovenian
'sl': (4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'),
# Serbian - From Pootle's PO's
'sr': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
# Southern Sotho - From Pootle's PO's
'st': (2, '(n != 1)'),
# Swedish
'sv': (2, '(n != 1)'),
# Thai
'th': (1, '0'),
# Turkish
'tr': (1, '0'),
# Ukrainian
'uk': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
# Venda - From Pootle's PO's
've': (2, '(n != 1)'),
# Vietnamese - From Pootle's PO's
'vi': (1, '0'),
# Xhosa - From Pootle's PO's
'xh': (2, '(n != 1)'),
# Chinese - From Pootle's PO's (modified)
'zh': (1, '0'),
}
DEFAULT_PLURAL: tuple[int, str] = (2, '(n != 1)')
class _PluralTuple(tuple):
"""A tuple with plural information."""
__slots__ = ()
@property
def num_plurals(self) -> int:
"""The number of plurals used by the locale."""
return self[0]
@property
def plural_expr(self) -> str:
"""The plural expression used by the locale."""
return self[1]
@property
def plural_forms(self) -> str:
"""The plural expression used by the catalog or locale."""
return f'nplurals={self[0]}; plural={self[1]};'
def __str__(self) -> str:
return self.plural_forms
def get_plural(locale: Locale | str | None = None) -> _PluralTuple:
"""A tuple with the information catalogs need to perform proper
pluralization. The first item of the tuple is the number of plural
forms, the second the plural expression.
:param locale: the `Locale` object or locale identifier. Defaults to the system character type locale.
>>> get_plural(locale='en')
(2, '(n != 1)')
>>> get_plural(locale='ga')
(5, '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)')
The object returned is a special tuple with additional members:
>>> tup = get_plural("ja")
>>> tup.num_plurals
1
>>> tup.plural_expr
'0'
>>> tup.plural_forms
'nplurals=1; plural=0;'
Converting the tuple into a string prints the plural forms for a
gettext catalog:
>>> str(tup)
'nplurals=1; plural=0;'
"""
locale = Locale.parse(locale or LC_CTYPE)
try:
tup = PLURALS[str(locale)]
except KeyError:
try:
tup = PLURALS[locale.language]
except KeyError:
tup = DEFAULT_PLURAL
return _PluralTuple(tup)

View File

@ -0,0 +1,744 @@
"""
babel.messages.pofile
~~~~~~~~~~~~~~~~~~~~~
Reading and writing of files in the ``gettext`` PO (portable object)
format.
:copyright: (c) 2013-2025 by the Babel Team.
:license: BSD, see LICENSE for more details.
"""
from __future__ import annotations
import os
import re
from collections.abc import Iterable
from typing import TYPE_CHECKING, Literal
from babel.core import Locale
from babel.messages.catalog import Catalog, Message
from babel.util import TextWrapper, _cmp
if TYPE_CHECKING:
from typing import IO, AnyStr
from _typeshed import SupportsWrite
def unescape(string: str) -> str:
r"""Reverse `escape` the given string.
>>> print(unescape('"Say:\\n \\"hello, world!\\"\\n"'))
Say:
"hello, world!"
<BLANKLINE>
:param string: the string to unescape
"""
def replace_escapes(match):
m = match.group(1)
if m == 'n':
return '\n'
elif m == 't':
return '\t'
elif m == 'r':
return '\r'
# m is \ or "
return m
return re.compile(r'\\([\\trn"])').sub(replace_escapes, string[1:-1])
def denormalize(string: str) -> str:
r"""Reverse the normalization done by the `normalize` function.
>>> print(denormalize(r'''""
... "Say:\n"
... " \"hello, world!\"\n"'''))
Say:
"hello, world!"
<BLANKLINE>
>>> print(denormalize(r'''""
... "Say:\n"
... " \"Lorem ipsum dolor sit "
... "amet, consectetur adipisicing"
... " elit, \"\n"'''))
Say:
"Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
<BLANKLINE>
:param string: the string to denormalize
"""
if '\n' in string:
escaped_lines = string.splitlines()
if string.startswith('""'):
escaped_lines = escaped_lines[1:]
lines = map(unescape, escaped_lines)
return ''.join(lines)
else:
return unescape(string)
def _extract_locations(line: str) -> list[str]:
"""Extract locations from location comments.
Locations are extracted while properly handling First Strong
Isolate (U+2068) and Pop Directional Isolate (U+2069), used by
gettext to enclose filenames with spaces and tabs in their names.
"""
if "\u2068" not in line and "\u2069" not in line:
return line.lstrip().split()
locations = []
location = ""
in_filename = False
for c in line:
if c == "\u2068":
if in_filename:
raise ValueError("location comment contains more First Strong Isolate "
"characters, than Pop Directional Isolate characters")
in_filename = True
continue
elif c == "\u2069":
if not in_filename:
raise ValueError("location comment contains more Pop Directional Isolate "
"characters, than First Strong Isolate characters")
in_filename = False
continue
elif c == " ":
if in_filename:
location += c
elif location:
locations.append(location)
location = ""
else:
location += c
else:
if location:
if in_filename:
raise ValueError("location comment contains more First Strong Isolate "
"characters, than Pop Directional Isolate characters")
locations.append(location)
return locations
class PoFileError(Exception):
"""Exception thrown by PoParser when an invalid po file is encountered."""
def __init__(self, message: str, catalog: Catalog, line: str, lineno: int) -> None:
super().__init__(f'{message} on {lineno}')
self.catalog = catalog
self.line = line
self.lineno = lineno
class _NormalizedString:
def __init__(self, *args: str) -> None:
self._strs: list[str] = []
for arg in args:
self.append(arg)
def append(self, s: str) -> None:
self._strs.append(s.strip())
def denormalize(self) -> str:
return ''.join(map(unescape, self._strs))
def __bool__(self) -> bool:
return bool(self._strs)
def __repr__(self) -> str:
return os.linesep.join(self._strs)
def __cmp__(self, other: object) -> int:
if not other:
return 1
return _cmp(str(self), str(other))
def __gt__(self, other: object) -> bool:
return self.__cmp__(other) > 0
def __lt__(self, other: object) -> bool:
return self.__cmp__(other) < 0
def __ge__(self, other: object) -> bool:
return self.__cmp__(other) >= 0
def __le__(self, other: object) -> bool:
return self.__cmp__(other) <= 0
def __eq__(self, other: object) -> bool:
return self.__cmp__(other) == 0
def __ne__(self, other: object) -> bool:
return self.__cmp__(other) != 0
class PoFileParser:
"""Support class to read messages from a ``gettext`` PO (portable object) file
and add them to a `Catalog`
See `read_po` for simple cases.
"""
_keywords = [
'msgid',
'msgstr',
'msgctxt',
'msgid_plural',
]
def __init__(self, catalog: Catalog, ignore_obsolete: bool = False, abort_invalid: bool = False) -> None:
self.catalog = catalog
self.ignore_obsolete = ignore_obsolete
self.counter = 0
self.offset = 0
self.abort_invalid = abort_invalid
self._reset_message_state()
def _reset_message_state(self) -> None:
self.messages = []
self.translations = []
self.locations = []
self.flags = []
self.user_comments = []
self.auto_comments = []
self.context = None
self.obsolete = False
self.in_msgid = False
self.in_msgstr = False
self.in_msgctxt = False
def _add_message(self) -> None:
"""
Add a message to the catalog based on the current parser state and
clear the state ready to process the next message.
"""
self.translations.sort()
if len(self.messages) > 1:
msgid = tuple(m.denormalize() for m in self.messages)
else:
msgid = self.messages[0].denormalize()
if isinstance(msgid, (list, tuple)):
string = ['' for _ in range(self.catalog.num_plurals)]
for idx, translation in self.translations:
if idx >= self.catalog.num_plurals:
self._invalid_pofile("", self.offset, "msg has more translations than num_plurals of catalog")
continue
string[idx] = translation.denormalize()
string = tuple(string)
else:
string = self.translations[0][1].denormalize()
msgctxt = self.context.denormalize() if self.context else None
message = Message(msgid, string, list(self.locations), set(self.flags),
self.auto_comments, self.user_comments, lineno=self.offset + 1,
context=msgctxt)
if self.obsolete:
if not self.ignore_obsolete:
self.catalog.obsolete[self.catalog._key_for(msgid, msgctxt)] = message
else:
self.catalog[msgid] = message
self.counter += 1
self._reset_message_state()
def _finish_current_message(self) -> None:
if self.messages:
if not self.translations:
self._invalid_pofile("", self.offset, f"missing msgstr for msgid '{self.messages[0].denormalize()}'")
self.translations.append([0, _NormalizedString("")])
self._add_message()
def _process_message_line(self, lineno, line, obsolete=False) -> None:
if line.startswith('"'):
self._process_string_continuation_line(line, lineno)
else:
self._process_keyword_line(lineno, line, obsolete)
def _process_keyword_line(self, lineno, line, obsolete=False) -> None:
for keyword in self._keywords:
try:
if line.startswith(keyword) and line[len(keyword)] in [' ', '[']:
arg = line[len(keyword):]
break
except IndexError:
self._invalid_pofile(line, lineno, "Keyword must be followed by a string")
else:
self._invalid_pofile(line, lineno, "Start of line didn't match any expected keyword.")
return
if keyword in ['msgid', 'msgctxt']:
self._finish_current_message()
self.obsolete = obsolete
# The line that has the msgid is stored as the offset of the msg
# should this be the msgctxt if it has one?
if keyword == 'msgid':
self.offset = lineno
if keyword in ['msgid', 'msgid_plural']:
self.in_msgctxt = False
self.in_msgid = True
self.messages.append(_NormalizedString(arg))
elif keyword == 'msgstr':
self.in_msgid = False
self.in_msgstr = True
if arg.startswith('['):
idx, msg = arg[1:].split(']', 1)
self.translations.append([int(idx), _NormalizedString(msg)])
else:
self.translations.append([0, _NormalizedString(arg)])
elif keyword == 'msgctxt':
self.in_msgctxt = True
self.context = _NormalizedString(arg)
def _process_string_continuation_line(self, line, lineno) -> None:
if self.in_msgid:
s = self.messages[-1]
elif self.in_msgstr:
s = self.translations[-1][1]
elif self.in_msgctxt:
s = self.context
else:
self._invalid_pofile(line, lineno, "Got line starting with \" but not in msgid, msgstr or msgctxt")
return
s.append(line)
def _process_comment(self, line) -> None:
self._finish_current_message()
if line[1:].startswith(':'):
for location in _extract_locations(line[2:]):
pos = location.rfind(':')
if pos >= 0:
try:
lineno = int(location[pos + 1:])
except ValueError:
continue
self.locations.append((location[:pos], lineno))
else:
self.locations.append((location, None))
elif line[1:].startswith(','):
for flag in line[2:].lstrip().split(','):
self.flags.append(flag.strip())
elif line[1:].startswith('.'):
# These are called auto-comments
comment = line[2:].strip()
if comment: # Just check that we're not adding empty comments
self.auto_comments.append(comment)
else:
# These are called user comments
self.user_comments.append(line[1:].strip())
def parse(self, fileobj: IO[AnyStr] | Iterable[AnyStr]) -> None:
"""
Reads from the file-like object `fileobj` and adds any po file
units found in it to the `Catalog` supplied to the constructor.
"""
for lineno, line in enumerate(fileobj):
line = line.strip()
if not isinstance(line, str):
line = line.decode(self.catalog.charset)
if not line:
continue
if line.startswith('#'):
if line[1:].startswith('~'):
self._process_message_line(lineno, line[2:].lstrip(), obsolete=True)
else:
try:
self._process_comment(line)
except ValueError as exc:
self._invalid_pofile(line, lineno, str(exc))
else:
self._process_message_line(lineno, line)
self._finish_current_message()
# No actual messages found, but there was some info in comments, from which
# we'll construct an empty header message
if not self.counter and (self.flags or self.user_comments or self.auto_comments):
self.messages.append(_NormalizedString('""'))
self.translations.append([0, _NormalizedString('""')])
self._add_message()
def _invalid_pofile(self, line, lineno, msg) -> None:
assert isinstance(line, str)
if self.abort_invalid:
raise PoFileError(msg, self.catalog, line, lineno)
print("WARNING:", msg)
print(f"WARNING: Problem on line {lineno + 1}: {line!r}")
def read_po(
fileobj: IO[AnyStr] | Iterable[AnyStr],
locale: Locale | str | None = None,
domain: str | None = None,
ignore_obsolete: bool = False,
charset: str | None = None,
abort_invalid: bool = False,
) -> Catalog:
"""Read messages from a ``gettext`` PO (portable object) file from the given
file-like object (or an iterable of lines) and return a `Catalog`.
>>> from datetime import datetime
>>> from io import StringIO
>>> buf = StringIO('''
... #: main.py:1
... #, fuzzy, python-format
... msgid "foo %(name)s"
... msgstr "quux %(name)s"
...
... # A user comment
... #. An auto comment
... #: main.py:3
... msgid "bar"
... msgid_plural "baz"
... msgstr[0] "bar"
... msgstr[1] "baaz"
... ''')
>>> catalog = read_po(buf)
>>> catalog.revision_date = datetime(2007, 4, 1)
>>> for message in catalog:
... if message.id:
... print((message.id, message.string))
... print(' ', (message.locations, sorted(list(message.flags))))
... print(' ', (message.user_comments, message.auto_comments))
(u'foo %(name)s', u'quux %(name)s')
([(u'main.py', 1)], [u'fuzzy', u'python-format'])
([], [])
((u'bar', u'baz'), (u'bar', u'baaz'))
([(u'main.py', 3)], [])
([u'A user comment'], [u'An auto comment'])
.. versionadded:: 1.0
Added support for explicit charset argument.
:param fileobj: the file-like object (or iterable of lines) to read the PO file from
:param locale: the locale identifier or `Locale` object, or `None`
if the catalog is not bound to a locale (which basically
means it's a template)
:param domain: the message domain
:param ignore_obsolete: whether to ignore obsolete messages in the input
:param charset: the character set of the catalog.
:param abort_invalid: abort read if po file is invalid
"""
catalog = Catalog(locale=locale, domain=domain, charset=charset)
parser = PoFileParser(catalog, ignore_obsolete, abort_invalid=abort_invalid)
parser.parse(fileobj)
return catalog
WORD_SEP = re.compile('('
r'\s+|' # any whitespace
r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash
')')
def escape(string: str) -> str:
r"""Escape the given string so that it can be included in double-quoted
strings in ``PO`` files.
>>> escape('''Say:
... "hello, world!"
... ''')
'"Say:\\n \\"hello, world!\\"\\n"'
:param string: the string to escape
"""
return '"%s"' % string.replace('\\', '\\\\') \
.replace('\t', '\\t') \
.replace('\r', '\\r') \
.replace('\n', '\\n') \
.replace('\"', '\\"')
def normalize(string: str, prefix: str = '', width: int = 76) -> str:
r"""Convert a string into a format that is appropriate for .po files.
>>> print(normalize('''Say:
... "hello, world!"
... ''', width=None))
""
"Say:\n"
" \"hello, world!\"\n"
>>> print(normalize('''Say:
... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
... ''', width=32))
""
"Say:\n"
" \"Lorem ipsum dolor sit "
"amet, consectetur adipisicing"
" elit, \"\n"
:param string: the string to normalize
:param prefix: a string that should be prepended to every line
:param width: the maximum line width; use `None`, 0, or a negative number
to completely disable line wrapping
"""
if width and width > 0:
prefixlen = len(prefix)
lines = []
for line in string.splitlines(True):
if len(escape(line)) + prefixlen > width:
chunks = WORD_SEP.split(line)
chunks.reverse()
while chunks:
buf = []
size = 2
while chunks:
length = len(escape(chunks[-1])) - 2 + prefixlen
if size + length < width:
buf.append(chunks.pop())
size += length
else:
if not buf:
# handle long chunks by putting them on a
# separate line
buf.append(chunks.pop())
break
lines.append(''.join(buf))
else:
lines.append(line)
else:
lines = string.splitlines(True)
if len(lines) <= 1:
return escape(string)
# Remove empty trailing line
if lines and not lines[-1]:
del lines[-1]
lines[-1] += '\n'
return '""\n' + '\n'.join([(prefix + escape(line)) for line in lines])
def _enclose_filename_if_necessary(filename: str) -> str:
"""Enclose filenames which include white spaces or tabs.
Do the same as gettext and enclose filenames which contain white
spaces or tabs with First Strong Isolate (U+2068) and Pop
Directional Isolate (U+2069).
"""
if " " not in filename and "\t" not in filename:
return filename
if not filename.startswith("\u2068"):
filename = "\u2068" + filename
if not filename.endswith("\u2069"):
filename += "\u2069"
return filename
def write_po(
fileobj: SupportsWrite[bytes],
catalog: Catalog,
width: int = 76,
no_location: bool = False,
omit_header: bool = False,
sort_output: bool = False,
sort_by_file: bool = False,
ignore_obsolete: bool = False,
include_previous: bool = False,
include_lineno: bool = True,
) -> None:
r"""Write a ``gettext`` PO (portable object) template file for a given
message catalog to the provided file-like object.
>>> catalog = Catalog()
>>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)],
... flags=('fuzzy',))
<Message...>
>>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)])
<Message...>
>>> from io import BytesIO
>>> buf = BytesIO()
>>> write_po(buf, catalog, omit_header=True)
>>> print(buf.getvalue().decode("utf8"))
#: main.py:1
#, fuzzy, python-format
msgid "foo %(name)s"
msgstr ""
<BLANKLINE>
#: main.py:3
msgid "bar"
msgid_plural "baz"
msgstr[0] ""
msgstr[1] ""
<BLANKLINE>
<BLANKLINE>
:param fileobj: the file-like object to write to
:param catalog: the `Catalog` instance
:param width: the maximum line width for the generated output; use `None`,
0, or a negative number to completely disable line wrapping
:param no_location: do not emit a location comment for every message
:param omit_header: do not include the ``msgid ""`` entry at the top of the
output
:param sort_output: whether to sort the messages in the output by msgid
:param sort_by_file: whether to sort the messages in the output by their
locations
:param ignore_obsolete: whether to ignore obsolete messages and not include
them in the output; by default they are included as
comments
:param include_previous: include the old msgid as a comment when
updating the catalog
:param include_lineno: include line number in the location comment
"""
sort_by = None
if sort_output:
sort_by = "message"
elif sort_by_file:
sort_by = "location"
for line in generate_po(
catalog,
ignore_obsolete=ignore_obsolete,
include_lineno=include_lineno,
include_previous=include_previous,
no_location=no_location,
omit_header=omit_header,
sort_by=sort_by,
width=width,
):
if isinstance(line, str):
line = line.encode(catalog.charset, 'backslashreplace')
fileobj.write(line)
def generate_po(
catalog: Catalog,
*,
ignore_obsolete: bool = False,
include_lineno: bool = True,
include_previous: bool = False,
no_location: bool = False,
omit_header: bool = False,
sort_by: Literal["message", "location"] | None = None,
width: int = 76,
) -> Iterable[str]:
r"""Yield text strings representing a ``gettext`` PO (portable object) file.
See `write_po()` for a more detailed description.
"""
# xgettext always wraps comments even if --no-wrap is passed;
# provide the same behaviour
comment_width = width if width and width > 0 else 76
comment_wrapper = TextWrapper(width=comment_width, break_long_words=False)
header_wrapper = TextWrapper(width=width, subsequent_indent="# ", break_long_words=False)
def _format_comment(comment, prefix=''):
for line in comment_wrapper.wrap(comment):
yield f"#{prefix} {line.strip()}\n"
def _format_message(message, prefix=''):
if isinstance(message.id, (list, tuple)):
if message.context:
yield f"{prefix}msgctxt {normalize(message.context, prefix=prefix, width=width)}\n"
yield f"{prefix}msgid {normalize(message.id[0], prefix=prefix, width=width)}\n"
yield f"{prefix}msgid_plural {normalize(message.id[1], prefix=prefix, width=width)}\n"
for idx in range(catalog.num_plurals):
try:
string = message.string[idx]
except IndexError:
string = ''
yield f"{prefix}msgstr[{idx:d}] {normalize(string, prefix=prefix, width=width)}\n"
else:
if message.context:
yield f"{prefix}msgctxt {normalize(message.context, prefix=prefix, width=width)}\n"
yield f"{prefix}msgid {normalize(message.id, prefix=prefix, width=width)}\n"
yield f"{prefix}msgstr {normalize(message.string or '', prefix=prefix, width=width)}\n"
for message in _sort_messages(catalog, sort_by=sort_by):
if not message.id: # This is the header "message"
if omit_header:
continue
comment_header = catalog.header_comment
if width and width > 0:
lines = []
for line in comment_header.splitlines():
lines += header_wrapper.wrap(line)
comment_header = '\n'.join(lines)
yield f"{comment_header}\n"
for comment in message.user_comments:
yield from _format_comment(comment)
for comment in message.auto_comments:
yield from _format_comment(comment, prefix='.')
if not no_location:
locs = []
# sort locations by filename and lineno.
# if there's no <int> as lineno, use `-1`.
# if no sorting possible, leave unsorted.
# (see issue #606)
try:
locations = sorted(message.locations,
key=lambda x: (x[0], isinstance(x[1], int) and x[1] or -1))
except TypeError: # e.g. "TypeError: unorderable types: NoneType() < int()"
locations = message.locations
for filename, lineno in locations:
location = filename.replace(os.sep, '/')
location = _enclose_filename_if_necessary(location)
if lineno and include_lineno:
location = f"{location}:{lineno:d}"
if location not in locs:
locs.append(location)
yield from _format_comment(' '.join(locs), prefix=':')
if message.flags:
yield f"#{', '.join(['', *sorted(message.flags)])}\n"
if message.previous_id and include_previous:
yield from _format_comment(
f'msgid {normalize(message.previous_id[0], width=width)}',
prefix='|',
)
if len(message.previous_id) > 1:
norm_previous_id = normalize(message.previous_id[1], width=width)
yield from _format_comment(f'msgid_plural {norm_previous_id}', prefix='|')
yield from _format_message(message)
yield '\n'
if not ignore_obsolete:
for message in _sort_messages(
catalog.obsolete.values(),
sort_by=sort_by,
):
for comment in message.user_comments:
yield from _format_comment(comment)
yield from _format_message(message, prefix='#~ ')
yield '\n'
def _sort_messages(messages: Iterable[Message], sort_by: Literal["message", "location"] | None) -> list[Message]:
"""
Sort the given message iterable by the given criteria.
Always returns a list.
:param messages: An iterable of Messages.
:param sort_by: Sort by which criteria? Options are `message` and `location`.
:return: list[Message]
"""
messages = list(messages)
if sort_by == "message":
messages.sort()
elif sort_by == "location":
messages.sort(key=lambda m: m.locations)
return messages

View File

@ -0,0 +1,108 @@
from __future__ import annotations
from babel.messages import frontend
try:
# See: https://setuptools.pypa.io/en/latest/deprecated/distutils-legacy.html
from setuptools import Command
try:
from setuptools.errors import BaseError, OptionError, SetupError
except ImportError: # Error aliases only added in setuptools 59 (2021-11).
OptionError = SetupError = BaseError = Exception
except ImportError:
from distutils.cmd import Command
from distutils.errors import DistutilsSetupError as SetupError
def check_message_extractors(dist, name, value):
"""Validate the ``message_extractors`` keyword argument to ``setup()``.
:param dist: the distutils/setuptools ``Distribution`` object
:param name: the name of the keyword argument (should always be
"message_extractors")
:param value: the value of the keyword argument
:raise `DistutilsSetupError`: if the value is not valid
"""
assert name == "message_extractors"
if not isinstance(value, dict):
raise SetupError(
'the value of the "message_extractors" parameter must be a dictionary',
)
class compile_catalog(frontend.CompileCatalog, Command):
"""Catalog compilation command for use in ``setup.py`` scripts.
If correctly installed, this command is available to Setuptools-using
setup scripts automatically. For projects using plain old ``distutils``,
the command needs to be registered explicitly in ``setup.py``::
from babel.messages.setuptools_frontend import compile_catalog
setup(
...
cmdclass = {'compile_catalog': compile_catalog}
)
.. versionadded:: 0.9
"""
class extract_messages(frontend.ExtractMessages, Command):
"""Message extraction command for use in ``setup.py`` scripts.
If correctly installed, this command is available to Setuptools-using
setup scripts automatically. For projects using plain old ``distutils``,
the command needs to be registered explicitly in ``setup.py``::
from babel.messages.setuptools_frontend import extract_messages
setup(
...
cmdclass = {'extract_messages': extract_messages}
)
"""
class init_catalog(frontend.InitCatalog, Command):
"""New catalog initialization command for use in ``setup.py`` scripts.
If correctly installed, this command is available to Setuptools-using
setup scripts automatically. For projects using plain old ``distutils``,
the command needs to be registered explicitly in ``setup.py``::
from babel.messages.setuptools_frontend import init_catalog
setup(
...
cmdclass = {'init_catalog': init_catalog}
)
"""
class update_catalog(frontend.UpdateCatalog, Command):
"""Catalog merging command for use in ``setup.py`` scripts.
If correctly installed, this command is available to Setuptools-using
setup scripts automatically. For projects using plain old ``distutils``,
the command needs to be registered explicitly in ``setup.py``::
from babel.messages.setuptools_frontend import update_catalog
setup(
...
cmdclass = {'update_catalog': update_catalog}
)
.. versionadded:: 0.9
"""
COMMANDS = {
"compile_catalog": compile_catalog,
"extract_messages": extract_messages,
"init_catalog": init_catalog,
"update_catalog": update_catalog,
}