test
This commit is contained in:
		| @ -0,0 +1,86 @@ | ||||
| """ | ||||
| Script which takes one or more file paths and reports on their detected | ||||
| encodings | ||||
|  | ||||
| Example:: | ||||
|  | ||||
|     % chardetect somefile someotherfile | ||||
|     somefile: windows-1252 with confidence 0.5 | ||||
|     someotherfile: ascii with confidence 1.0 | ||||
|  | ||||
| If no paths are provided, it takes its input from stdin. | ||||
|  | ||||
| """ | ||||
|  | ||||
|  | ||||
| import argparse | ||||
| import sys | ||||
|  | ||||
| from .. import __version__ | ||||
| from ..universaldetector import UniversalDetector | ||||
|  | ||||
|  | ||||
| def description_of(lines, name="stdin"): | ||||
|     """ | ||||
|     Return a string describing the probable encoding of a file or | ||||
|     list of strings. | ||||
|  | ||||
|     :param lines: The lines to get the encoding of. | ||||
|     :type lines: Iterable of bytes | ||||
|     :param name: Name of file or collection of lines | ||||
|     :type name: str | ||||
|     """ | ||||
|     u = UniversalDetector() | ||||
|     for line in lines: | ||||
|         line = bytearray(line) | ||||
|         u.feed(line) | ||||
|         # shortcut out of the loop to save reading further - particularly useful if we read a BOM. | ||||
|         if u.done: | ||||
|             break | ||||
|     u.close() | ||||
|     result = u.result | ||||
|     if result["encoding"]: | ||||
|         return f'{name}: {result["encoding"]} with confidence {result["confidence"]}' | ||||
|     return f"{name}: no result" | ||||
|  | ||||
|  | ||||
| def main(argv=None): | ||||
|     """ | ||||
|     Handles command line arguments and gets things started. | ||||
|  | ||||
|     :param argv: List of arguments, as if specified on the command-line. | ||||
|                  If None, ``sys.argv[1:]`` is used instead. | ||||
|     :type argv: list of str | ||||
|     """ | ||||
|     # Get command line arguments | ||||
|     parser = argparse.ArgumentParser( | ||||
|         description="Takes one or more file paths and reports their detected \ | ||||
|                      encodings" | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "input", | ||||
|         help="File whose encoding we would like to determine. \ | ||||
|                               (default: stdin)", | ||||
|         type=argparse.FileType("rb"), | ||||
|         nargs="*", | ||||
|         default=[sys.stdin.buffer], | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "--version", action="version", version=f"%(prog)s {__version__}" | ||||
|     ) | ||||
|     args = parser.parse_args(argv) | ||||
|  | ||||
|     for f in args.input: | ||||
|         if f.isatty(): | ||||
|             print( | ||||
|                 "You are running chardetect interactively. Press " | ||||
|                 "CTRL-D twice at the start of a blank line to signal the " | ||||
|                 "end of your input. If you want help, run chardetect " | ||||
|                 "--help\n", | ||||
|                 file=sys.stderr, | ||||
|             ) | ||||
|         print(description_of(f, f.name)) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
		Reference in New Issue
	
	Block a user