scrapy / scrapy
1 7
import argparse
2 7
import warnings
3 7
from shlex import split
4 7
from http.cookies import SimpleCookie
5 7
from urllib.parse import urlparse
6

7 7
from w3lib.http import basic_auth_header
8

9

10 7
class CurlParser(argparse.ArgumentParser):
11 7
    def error(self, message):
12 7
        error_msg = f'There was an error parsing the curl command: {message}'
13 7
        raise ValueError(error_msg)
14

15

16 7
curl_parser = CurlParser()
17 7
curl_parser.add_argument('url')
18 7
curl_parser.add_argument('-H', '--header', dest='headers', action='append')
19 7
curl_parser.add_argument('-X', '--request', dest='method')
20 7
curl_parser.add_argument('-d', '--data', '--data-raw', dest='data')
21 7
curl_parser.add_argument('-u', '--user', dest='auth')
22

23

24 7
safe_to_ignore_arguments = [
25
    ['--compressed'],
26
    # `--compressed` argument is not safe to ignore, but it's included here
27
    # because the `HttpCompressionMiddleware` is enabled by default
28
    ['-s', '--silent'],
29
    ['-v', '--verbose'],
30
    ['-#', '--progress-bar']
31
]
32

33 7
for argument in safe_to_ignore_arguments:
34 7
    curl_parser.add_argument(*argument, action='store_true')
35

36

37 7
def _parse_headers_and_cookies(parsed_args):
38 7
    headers = []
39 7
    cookies = {}
40 7
    for header in parsed_args.headers or ():
41 7
        name, val = header.split(':', 1)
42 7
        name = name.strip()
43 7
        val = val.strip()
44 7
        if name.title() == 'Cookie':
45 7
            for name, morsel in SimpleCookie(val).items():
46 7
                cookies[name] = morsel.value
47
        else:
48 7
            headers.append((name, val))
49

50 7
    if parsed_args.auth:
51 7
        user, password = parsed_args.auth.split(':', 1)
52 7
        headers.append(('Authorization', basic_auth_header(user, password)))
53

54 7
    return headers, cookies
55

56

57 7
def curl_to_request_kwargs(curl_command, ignore_unknown_options=True):
58
    """Convert a cURL command syntax to Request kwargs.
59

60
    :param str curl_command: string containing the curl command
61
    :param bool ignore_unknown_options: If true, only a warning is emitted when
62
                                        cURL options are unknown. Otherwise
63
                                        raises an error. (default: True)
64
    :return: dictionary of Request kwargs
65
    """
66

67 7
    curl_args = split(curl_command)
68

69 7
    if curl_args[0] != 'curl':
70 7
        raise ValueError('A curl command must start with "curl"')
71

72 7
    parsed_args, argv = curl_parser.parse_known_args(curl_args[1:])
73

74 7
    if argv:
75 7
        msg = f'Unrecognized options: {", ".join(argv)}'
76 7
        if ignore_unknown_options:
77 7
            warnings.warn(msg)
78
        else:
79 7
            raise ValueError(msg)
80

81 7
    url = parsed_args.url
82

83
    # curl automatically prepends 'http' if the scheme is missing, but Request
84
    # needs the scheme to work
85 7
    parsed_url = urlparse(url)
86 7
    if not parsed_url.scheme:
87 7
        url = 'http://' + url
88

89 7
    method = parsed_args.method or 'GET'
90

91 7
    result = {'method': method.upper(), 'url': url}
92

93 7
    headers, cookies = _parse_headers_and_cookies(parsed_args)
94

95 7
    if headers:
96 7
        result['headers'] = headers
97 7
    if cookies:
98 7
        result['cookies'] = cookies
99 7
    if parsed_args.data:
100 7
        result['body'] = parsed_args.data
101 7
        if not parsed_args.method:
102
            # if the "data" is specified but the "method" is not specified,
103
            # the default method is 'POST'
104 7
            result['method'] = 'POST'
105

106 7
    return result

Read our documentation on viewing source code .

Loading