import
requests
import
subprocess
import
json
import
time
import
os
def
call_js_x_s(func_name,
*
args):
with
open
(
'x_s.js'
,
'r'
) as f:
js_code
=
f.read().strip()
js_args
=
[f
"'{arg}'"
if
isinstance
(arg,
str
)
else
str
(arg)
for
arg
in
args]
js_func_call
=
f
"{func_name}({', '.join(js_args)})"
js_complete_code
=
f
"{js_code};\nconsole.log({js_func_call});"
res
=
subprocess.run([
'node'
,
'-e'
, js_complete_code], capture_output
=
True
, text
=
True
)
return
res.stdout.strip()
def
get_header(url):
note_id
=
url.replace(
'https://edith.xiaohongshu.com'
, '')
x_s
=
call_js_x_s(
'sign'
, note_id, '')
text
=
json.loads(x_s.replace(
"'"
,
"\""
))
xs, xt
=
text[
'X-s'
], text[
'X-t'
]
headers
=
{
"accept-encoding"
:
"gzip, deflate, br"
,
"accept-language"
:
"zh-CN,zh;q=0.9,en;q=0.8,en-US;q=0.7"
,
"cache-control"
:
"no-cache"
,
"content-type"
:
"application/json;charset=UTF-8"
,
'authority'
:
"www.xiaohongshu.com"
,
'method'
:
'GET'
,
'scheme'
:
'https'
,
'accept'
:
"application/json, text/plain, */*"
,
'cookie'
:
'xsecappid=xhs-pc-web; a1=186c96c5e2366amhap5lr1rtd8j877gagbvcz5t0f50000292212; '
'webId=70ede759c64517a22acfe9f753189540; '
'gid=yYKSjKSKq0kqyYKSjKS2dD29JqKK06x0U21Fh8iyF9fYVS28YWWk0u888JjJJyJ8fYq4qfy2; '
'gid.sign=6YYYnPk0iBrNr1pTgoLmn+uI2XY=; web_session=040069796f614313dd5255296e364bc5fb95e4; '
'webBuild=2.5.2; websectiga=7750c37de43b7be9de8ed9ff8ea0e576519e8cd2157322eb972ecb429a7735d4; '
'sec_poison_id=4ba14929-e7b7-454e-9312-d2fc6965b5c3; extra_exp_ids=yamcha_0327_exp,h5_1208_exp3,'
'ques_clt2; extra_exp_ids.sig=ETM51AFqVyLPOioG2x0qNaEzMLVwrEIN37uTpfkLqxc'
,
'User-Agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42'
,
'origin'
:
"https://www.xiaohongshu.com"
,
'referer'
:
"https://www.xiaohongshu.com"
,
'X-s'
: xs,
'X-t'
:
str
(xt)
}
return
headers
def
get_req(headers, url):
response
=
requests.get(url, headers
=
headers)
if
response.status_code
=
=
200
:
return
response.text
else
:
print
(response.status_code)
def
start(my_url):
headers
=
get_header(my_url)
comm
=
get_req(headers, my_url)
data
=
json.loads(comm)
comments
=
data[
"data"
][
"comments"
]
if
comments:
file
=
open
(
id
+
".json"
,
"a"
, encoding
=
'utf-8'
)
file
.write(comm)
file
.close()
cursor
=
data[
"data"
][
"cursor"
]
if
cursor:
if
my_url.split(
'cursor='
)[
-
1
]:
my_url
=
my_url.split(
'cursor='
)[
0
]
+
'cursor='
son
=
my_url
+
cursor
time.sleep(
2
)
start(son)
def
main():
my_url
=
f
'https://edith.xiaohongshu.com/api/sns/web/v2/comment/page?note_id={id}&cursor='
start(my_url)
if
__name__
=
=
'__main__'
:
mes
=
'https://www.xiaohongshu.com/explore/646311810000000027010b9c'
url
=
f
'https://edith.xiaohongshu.com/api/sns/web/v2/comment/page?note_id={id}&cursor='
id
=
mes.split(
'/'
)[
-
1
]
file_path
=
id
+
".txt"
if
os.path.exists(file_path):
os.remove(file_path)
main()