Understanding JWT#
This is a little script to help understand JSON Web Token (JWT). Also see:
# SPDX-FileCopyrightText: 2021-2023 Univention GmbH
#
# SPDX-License-Identifier: AGPL-3.0-only
# This little description lifted of https://auth0.com/blog/how-to-handle-jwt-in-python/
import base64
from pprint import pprint
# pip install pyjwt[crypto]
import jwt
from cryptography.hazmat.primitives import serialization
from jwt_secrets import PRIVATE, PUBLIC, shared
from recursive_jwt import recursive_jwt
from icecream import ic
# A jwt is centered around a payload, an object of arbitrary keys and values.
# Some of the keys have a predefined meaning, https://datatracker.ietf.org/doc/html/rfc7519#section-4.1, or
# in a shorter table here: https://www.iana.org/assignments/jwt/jwt.xhtml
payload1 = dict(sub='the subject',
foo='bar')
# This payload gets encoded and signed using a key, which can be shared or an asymmetric secret key.
token1 = jwt.encode(payload=payload1,
key=shared)
# Lets have a look at the token...
ic(token1) # eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ0aGUgc3ViamVjdCIsImZvbyI6ImJhciJ9.Dw6fFM
# -Jk7oP1g7Iu9FKl9cO5PEF7ITXEzRqWaD3sIo
# It consists of three separate base64 encoded parts, joined by a dot: header, payload, signature.
header, payload, signature = token1.split('.')
ic(base64.b64decode(header)) # header b'{"typ":"JWT","alg":"HS256"}'
ic(base64.b64decode(payload)) # payload b'{"sub":"the subject","foo":"bar"}'
ic(base64.b64decode(
signature + '==')) # signature b'\x0f\x0e\x9f\x14\xc2d\xee\x83\xf5\x83\xb2.\xf4R\xa5\xf5\xc3\xb9<A{
# !5\xc4\xcd\x1a\x96h=\xec"'
# ^^ this is needed for b64 to do its work. Why is not known yet.
# We can get the unverified header from the token using the library. This means no key is required,
# but also that the header can't be trusted this way.
univerified_header = jwt.get_unverified_header(token1)
ic(univerified_header) # {'typ': 'JWT', 'alg': 'HS256'}
# Decoding the token (a.k.a getting the payload) won't work with the library
# because what use is the payload if we haven't checked it's signature?
try:
jwt.decode(token1, algorithms=[univerified_header['alg']]) # don't do this: verify that algo is a valid one
except jwt.exceptions.InvalidSignatureError:
ic('no secret, no verifification, no decoding')
# So we set the key.
payload_decoded = jwt.decode(token1,
key=shared,
algorithms=["HS256"])
# so now we have a verified payload
ic(payload_decoded) # {'sub': 'the subject', 'foo': 'bar'}
# I lied above. If we really need the unverified payload, we can, be setting
# the option and leaving out the key.
payload_unverified = jwt.decode(token1,
# no key
algorithms=[univerified_header['alg']], # We can also get the algo from here,
# but unverified!
options={"verify_signature": False})
# In this case, nobody tampered with the payload.
assert payload_unverified == payload_decoded
# and now with public keys
# We have created a public key pair using ssh, and stored the contents of the
# keys in a separate file. We need to load the keys before using them. And the
# empty password has to be passed, we can't just leave it out.
public = serialization.load_ssh_public_key(PUBLIC.encode())
private = serialization.load_ssh_private_key(PRIVATE.encode(), password=b'')
# Encoding looks like above, just with a private key this time
token2 = jwt.encode(payload=payload1,
key=private,
algorithm='RS256')
# And decoding also looks the same
payload2_decoded = jwt.decode(token2,
key=public,
algorithms=['RS256'])
print()
print('=' * 40)
print()
print('verification')
# Now we want to verify the access token using the public key from keycloak.
# We get it from http://10.200.41.167:8080/auth/realms/bettermarks, but the
# key is not directly usable.
access_token = \
"eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJMWVFSbjgxcXFCS196anVIcmZWcXU0QjZKejQweUlob1VtaGlmOWJPZDVzIn0" \
".eyJleHAiOjE2MjU3Mzk3ODgsImlhdCI6MTYyNTczOTQ4OCwiYXV0aF90aW1lIjoxNjI1NzM5NDg4LCJqdGkiOiJlNjY1NTY5OC05YzkyLTQ3M2UtYjk1Yy1kZmUyNWE2OGE5YWQiLCJpc3MiOiJodHRwOi8vMTAuMjAwLjQxLjE2Nzo4MDgwL2F1dGgvcmVhbG1zL2JldHRlcm1hcmtzIiwiYXVkIjoiYWNjb3VudCIsInN1YiI6ImY6YWFhMTIzNjMtNGE3YS00YTNkLThiZDEtYTUxNzAwYzY3MTc0OmZvbyIsInR5cCI6IkJlYXJlciIsImF6cCI6InB5dGhvbi1jbGllbnQtdGVzdCgxMC4yMDAuNDEuMTY0KSIsInNlc3Npb25fc3RhdGUiOiJhMGY4MmFiMy05NzMzLTQ1OGItOGM4ZS0zYzAxNTQzNmYyMjUiLCJhY3IiOiIxIiwicmVhbG1fYWNjZXNzIjp7InJvbGVzIjpbImRlZmF1bHQtcm9sZXMtYmV0dGVybWFya3MiLCJvZmZsaW5lX2FjY2VzcyIsInVtYV9hdXRob3JpemF0aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtzIiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJvcGVuaWQgYmV0dGVybWFya3NJRCBwcm9maWxlIGVtYWlsIiwiZW1haWxfdmVyaWZpZWQiOmZhbHNlLCJuYW1lIjoiZm9vIGZvbyBmb28iLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJmb28iLCJnaXZlbl9uYW1lIjoiZm9vIGZvbyIsImZhbWlseV9uYW1lIjoiZm9vIiwiZW1haWwiOiJmb283ODlAZXhhbXBsZS5jb20ifQ.VzwIj4uivol1fv1iO4JdVIYBeuHLqSuA60EoQ-Ntd0apZ0vsLKHs-fOEAhLcLU6yNci990ac40C0ZW1-20DBV2vqzNxUXlC7e9eXY9Fx1b32kQZCNZvcEdNQj7j1dAu0hnxycj9fXlFkmXsD0W_DcdALvjRBcKTLBM_JQDUKpqUrwbUVgXxw0YFCUtY5jLCPEHHm8lUC3OFWVVjOHUJ08xTEwn5PXrhEfDaeXBbXHfKEkUNK-AibrZ1Mm3-hFUV6pNzq97aoaGCvH_-BIlJeRr5VGKP5XIay939vXpM_SLEzsWNgvESow0Rgp-ld4wT-q3ygCmF93QqxU2axedg2gQ"
keycloak_public_key = "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAjxldobTiWegAsYhbVT+MdjNNCffFKkih1xt1nKaR" \
"+FGCOXV9zYpGnsBD+KCCG6atNTYgL8RDPsuYY/XdcmqpoYL4cXFJQVtyXUPEi38/AQ24uUgTifQURC1N1d5eGGPoJE4r9M" \
"/dGLmCsHTyVagXDQw1WG4Lbx/UNkDM72uk+rw9Phc+VZHvMXHcDpsIIc3fhf6g2Je4PLYu4" \
"/GPm86NCPyQlBNzYnmebxxzmAyLg9EI1YFhD2eP1paLag+difvF098a7ed+8sHnJUsLrn94E8RY28Oe5lpDh5g0wl" \
"+ph9AK5FIrWYw0HIYaHjq2S2rrUQeBynb2OwfokoKqLqmcrQIDAQAB"
# Instead we need to pre- and append header and footer. So the internet tells us.
keycloak_with_headers = "-----BEGIN PUBLIC KEY-----\n" + keycloak_public_key + "\n-----END PUBLIC KEY-----"
verified_token = jwt.decode(access_token,
key=keycloak_with_headers,
audience='account', # No idea yet what that means, again, the internet tells us...
algorithms=['RS256'],
options=dict(verify_exp=False)
)
# and now we have a verified token.
ic(verified_token)
print()
# And now for a little helper method to look at the complete token, might be useful. The helper
# function tries to recursively decode whatever it finds. But you can also use jwt.io or similar.
print('=' * 40)
print()
print('Access token')
# access_token
ic(recursive_jwt(access_token))