diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 701105e0..493cf62d 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -13,7 +13,7 @@ jobs: python-version: '3.11' - uses: actions/checkout@v4 - run: python -m pip install .[docs] - - run: python -m sphinx -W -b html docs/ build/html/ + - run: python docs/make_bql_doc.py - uses: actions/upload-pages-artifact@v3 with: path: build/html @@ -26,6 +26,6 @@ jobs: environment: name: github-pages runs-on: ubuntu-latest - if: github.ref == 'refs/heads/master' + if: github.ref == 'refs/heads/mlell' steps: - uses: actions/deploy-pages@v4 diff --git a/README.rst b/README.rst index 1db11c69..45a08dfd 100644 --- a/README.rst +++ b/README.rst @@ -5,3 +5,18 @@ beanquery is a customizable and extensible lightweight SQL query tool that works on tabular data, including `Beancount`__ ledger data. __ https://beancount.github.io/ + +With this tool you can write *queries* to extract information from your +Beancount ledger. This is the documentation of functions and field names +which are available for queries. + +Please read the Manual of the `Beancount Query Language (BQL)`__ if you +do not yet know how to write queries. + + +__ http://furius.ca/beancount/doc/query + +After you have learned the BQL, you can use the +`list and documentation of available functions and columns`__ + +__ https://beancount.github.io/beanquery diff --git a/beanquery/query_compile.py b/beanquery/query_compile.py index eda87ec7..d6519288 100644 --- a/beanquery/query_compile.py +++ b/beanquery/query_compile.py @@ -406,11 +406,25 @@ def __call__(self, context): class EvalFunction(EvalNode): + """Base class for function evaluation nodes. + + Class Attributes: + __intypes__: List of input parameter types for type checking. + __outtype__: Output type of the function. None means no type + annotation. To annotate that the function returns None, use + types.NoneType. + """ __slots__ = ('operands',) # Type constraints on the input arguments. __intypes__ = [] + # Output type annotation + __outtype__ = None + + # Input argument names for documentation. + __param_names__ = [] + def __init__(self, context, operands, dtype): super().__init__(dtype) self.context = context diff --git a/beanquery/query_env.py b/beanquery/query_env.py index 7606b193..053e7b1c 100644 --- a/beanquery/query_env.py +++ b/beanquery/query_env.py @@ -9,6 +9,7 @@ import datetime import decimal +import inspect import re import textwrap @@ -29,6 +30,26 @@ from beanquery import query_compile from beanquery import types +# Type categories for function classification +# These types are matched against the first input type of the function. +# Functions can be force-assigned to one or more of these categories +# by use of the group argument in the function decorators 'function', +# 'register' or 'aggregate' in the 'query_env' module. +TYPE_CATEGORIES = { + 'amount': [amount.Amount], + 'account': [], # Must be manually assigned as account names are strings + 'position': [position.Position, inventory.Inventory], + 'metadata': [], # Must be manually assigned, signatures in query_env.py: 'object' + 'date': [datetime.date], + 'atomic': [] # fallback, default category +} + +# Function groups for documentation. BQL functions are added to the appropriate +# list inside this dictionary by the @function, @register and @aggregate +# decorators. +FUNCTION_DOC_GROUPS = { + x : [] for x in TYPE_CATEGORIES +} class ColumnsRegistry(dict): @@ -45,10 +66,76 @@ def __init__(self): return decorator -def function(intypes, outtype, pass_context=None, name=None): +def _extract_param_names(func): + """Extract parameter names from a function. Used to generate function + documentation. + + Args: + func: A function to extract parameter names from. + skip_self: If True, skip 'self' parameter (for class methods). + + Returns: + A list of parameter names, optionally excluding 'self' + """ + sig = inspect.signature(func) + param_names = list(sig.parameters.keys()) + + return param_names + + +def _add_to_doc_groups(func_class, intypes, groups): + """Add a function class to the appropriate documentation groups. + + If explicit groups are provided, use those. Otherwise, determine the group + based on the first input type using TYPE_CATEGORIES. + + Args: + func_class: The function class to add to documentation groups. + intypes: List of input types for the function. + groups: Explicit groups specified by the decorator, or None. + """ + target_groups = groups if groups else [] + + # If no explicit groups specified, determine from first input type + if not target_groups and intypes: + first_type = intypes[0] + for category, type_set in TYPE_CATEGORIES.items(): + if first_type in type_set: + target_groups = [category] + break + # Default to 'atomic' if no match found + if not target_groups: + target_groups = ['atomic'] + + # Add to documentation groups + for group in target_groups: + if group in FUNCTION_DOC_GROUPS: + FUNCTION_DOC_GROUPS[group].append(func_class) + + +def function(intypes, outtype, pass_context=None, name=None, groups=None): + """Decorator to register a function in the query environment. Expects + to decorate a function that takes operands as arguments. + + Args: + intypes: List of input types. + outtype: Return value. + pass_context: Whether the function receives the context as first argument + or is pure. + name: Name of the function. + groups: In which group(s) the function in the help output (list). See + TYPE_CATEGORIES for valid group names. + """ def decorator(func): + # Extract parameter names from the original function, excluding 'context' if present + param_names = _extract_param_names(func) + if pass_context: + param_names = param_names[1:] # Remove 'context' from param names + class Func(query_compile.EvalFunction): __intypes__ = intypes + __outtype__ = outtype + __param_names__ = param_names pure = not pass_context def __init__(self, context, operands): super().__init__(context, operands, outtype) @@ -63,22 +150,38 @@ def __call__(self, row): Func.__name__ = name if name is not None else func.__name__ Func.__doc__ = func.__doc__ query_compile.FUNCTIONS[Func.__name__].append(Func) + _add_to_doc_groups(Func, intypes, groups) + return func return decorator -def register(name=None): +def register(name=None, groups=None): + """Decorator to register a function in the query environment with + more fine-grained control. Expects to decorate a class that implements + the query_compile.EvalFunction interface. Setting __intypes__, + __outtype__ and __param_names__ is left to the decorated class. + + Args: + name: Name of the function. + groups: In which group(s) the function in the help output (list). See + TYPE_CATEGORIES for valid group names. + """ def decorator(cls): if name is not None: cls.__name__ = name query_compile.FUNCTIONS[cls.__name__].append(cls) + _add_to_doc_groups(cls, cls.__intypes__, groups) + return cls return decorator @register('getitem') class GetItem2(query_compile.EvalFunction): + """Get one item from a dict object if it exists, otherwise a default value.""" __intypes__ = [dict, str] + __param_names__ = ['d', 'key'] def __init__(self, context, operands): super().__init__(context, operands, object) @@ -93,7 +196,9 @@ def __call__(self, row): @register('getitem') class GetItem3(query_compile.EvalFunction): + """Get one item from a dict object if it exists, otherwise a default value.""" __intypes__ = [dict, str, types.Any] + __param_names__ = ['d', 'key', 'default'] def __init__(self, context, operands): super().__init__(context, operands, object) @@ -128,6 +233,7 @@ def bool_(x): @function([str], int, name='int') @function([object], int, name='int') def int_(x): + """Convert the object to an integer number.""" try: return int(x) except (ValueError, TypeError): @@ -140,6 +246,7 @@ def int_(x): @function([str], Decimal, name='decimal') @function([object], Decimal, name='decimal') def decimal_(x): + """Convert the object to a decimal number.""" try: return Decimal(x) except (ValueError, TypeError, decimal.InvalidOperation): @@ -148,6 +255,7 @@ def decimal_(x): @function([types.Any], str, name='str') def str_(x): + """Convert any object to a string.""" if x is True: return 'TRUE' if x is False: @@ -155,10 +263,13 @@ def str_(x): return str(x) -@function([datetime.date], datetime.date, name='date') -@function([str], datetime.date, name='date') -@function([object], datetime.date, name='date') +@function([datetime.date], datetime.date, name = 'date', groups=['date']) +@function([str], datetime.date, name='date', groups=['atomic', 'date']) +@function([object], datetime.date, name='date', groups=['atomic', 'date']) def date_(x): + """Convert the argument to a date. The argument should be + a string in the format YYYY-MM-DD. Date objects are passed + unchanged. Objects are converted to None.""" if isinstance(x, datetime.date): return x if isinstance(x, str): @@ -169,7 +280,7 @@ def date_(x): return None -@function([int, int, int], datetime.date, name='date') +@function([int, int, int], datetime.date, name='date', groups=['date']) def date_from_ymd(year, month, day): """Construct a date with year, month, day arguments.""" try: @@ -287,7 +398,7 @@ def weekday_(x): return x.strftime('%a') -@function([], datetime.date) +@function([], datetime.date, groups=['date']) def today(): """Today's date""" return datetime.date.today() @@ -295,20 +406,20 @@ def today(): # Operations on accounts. -@function([str], str) -@function([str, int], str) +@function([str], str, groups=['account']) +@function([str, int], str, groups=['account']) def root(acc, n=1): """Get the root name(s) of the account.""" return account.root(n, acc) -@function([str], str) +@function([str], str, groups=['account']) def parent(acc): """Get the parent name of the account.""" return account.parent(acc) -@function([str], str) +@function([str], str, groups=['account']) def leaf(acc): """Get the name of the leaf subaccount.""" return account.leaf(acc) @@ -354,7 +465,7 @@ def lower(string): NONENONE = None, None -@function([str], datetime.date, pass_context=True) +@function([str], datetime.date, pass_context=True, groups=['account', 'metadata']) def open_date(context, acc): """Get the date of the open directive of the account.""" open_entry, _ = context.tables['accounts'].accounts.get(acc, NONENONE) @@ -363,7 +474,7 @@ def open_date(context, acc): return open_entry.date -@function([str], datetime.date, pass_context=True) +@function([str], datetime.date, pass_context=True, groups=['account', 'metadata']) def close_date(context, acc): """Get the date of the close directive of the account.""" _, close_entry = context.tables['accounts'].accounts.get(acc, NONENONE) @@ -372,10 +483,12 @@ def close_date(context, acc): return close_entry.date -@function([str], dict, pass_context=True) -@function([str, str], object, pass_context=True) +@function([str], dict, pass_context=True, groups=['account', 'metadata']) +@function([str, str], object, pass_context=True, groups=['account', 'metadata']) def open_meta(context, account, key=None): - """Get the metadata dict of the open directive of the account.""" + """Get the metadata dict of the open directive of the account. + With one argument, returns all metadata as a dict object. With two + arguments, returns the value of a specific metadata key.""" open_entry, _ = context.tables['accounts'].accounts.get(account, NONENONE) if open_entry is None: return None @@ -385,30 +498,30 @@ def open_meta(context, account, key=None): # Stub kept only for function type checking and for generating documentation. -@function([str], object) +@function([str], object, pass_context = True, groups = ['metadata']) def meta(context, key): """Get some metadata key of the posting.""" raise NotImplementedError # Stub kept only for function type checking and for generating documentation. -@function([str], object) +@function([str], object, pass_context = True, groups = ['metadata']) def entry_meta(context, key): """Get some metadata key of the transaction.""" raise NotImplementedError # Stub kept only for function type checking and for generating documentation. -@function([str], object) +@function([str], object, pass_context=True, groups = ['metadata']) def any_meta(context, key): """Get metadata from the posting or its parent transaction if not present.""" raise NotImplementedError -@function([str], dict, pass_context=True) -@function([str, str], object, pass_context=True) -@function([str], dict, pass_context=True, name='commodity_meta') -@function([str, str], object, pass_context=True, name='commodity_meta') +@function([str], dict, pass_context=True, groups = ['amount', 'metadata']) +@function([str, str], object, pass_context=True, groups = ['amount', 'metadata']) +@function([str], dict, pass_context=True, name='commodity_meta', groups = ['amount', 'metadata']) +@function([str, str], object, pass_context=True, name='commodity_meta', groups = ['amount', 'metadata']) def currency_meta(context, commodity, key=None): """Get the metadata dict of the commodity directive of the currency.""" entry = context.tables['commodities'].commodities.get(commodity) @@ -419,7 +532,7 @@ def currency_meta(context, commodity, key=None): return entry.meta.get(key) -@function([str], str, pass_context=True) +@function([str], str, pass_context=True, groups=['account']) def account_sortkey(context, acc): """Get a string to sort accounts in order taking into account the types.""" account_types = context.tables['accounts'].types @@ -428,7 +541,7 @@ def account_sortkey(context, acc): # Stub kept only for function type checking and for generating documentation. -@function([str], bool) +@function([str], bool, groups=['account']) def has_account(context, pattern): """True if the transaction has at least one posting matching the regular expression argument.""" raise NotImplementedError @@ -453,13 +566,14 @@ def has_account(context, pattern): @function([position.Position], amount.Amount, name='units') def position_units(pos): - """Get the number of units of a position (stripping cost).""" + """Get the number of units. Returns the amount, stripping cost.""" return convert.get_units(pos) @function([inventory.Inventory], inventory.Inventory, name='units') def inventory_units(inv): - """Get the number of units of an inventory (stripping cost).""" + """For all position in the inventory, strip the information about + at which cost they were acquired. The result is another inventory.""" return inv.reduce(convert.get_units) @@ -471,7 +585,9 @@ def position_cost(pos): @function([inventory.Inventory], inventory.Inventory, name='cost') def inventory_cost(inv): - """Get the cost of an inventory.""" + """Get the cost of all positions in an inventory. Returns an + inventory with as many positions as there were currencies by which + the positions in the original inventory were acquired.""" return inv.reduce(convert.get_cost) @@ -515,10 +631,11 @@ def inventory_value(context, inv, date=None): return inv.reduce(convert.get_value, price_map, date) -@function([str, str], Decimal, pass_context=True) -@function([str, str, datetime.date], Decimal, pass_context=True, name='getprice') +@function([str, str], Decimal, pass_context=True, groups = ['position']) +@function([str, str, datetime.date], Decimal, pass_context=True, name='getprice', groups = ['position']) def getprice(context, base, quote, date=None): - """Fetch a price.""" + """Fetch a price. Arguments: Base currency, e.g. 'EUR'; Commodity name (string); + Date: Price as of this date. Default: Latest price.""" price_map = context.tables['prices'].price_map pair = (base.upper(), quote.upper()) _, price = prices.get_price(price_map, pair, date) @@ -555,7 +672,8 @@ def joinstr(values): return ','.join(values) -@function([str, inventory.Inventory], amount.Amount, name='only') +@function([str, inventory.Inventory], amount.Amount, name='only', + groups=['amount']) def only_inventory(currency, inventory_): """Get one currency's amount from the inventory.""" return inventory_.get_currency_units(currency) @@ -611,10 +729,14 @@ def day(x): types.ALIASES[datetime.date] = Date -@function([str], datetime.date) -@function([str, str], datetime.date) +@function([str], datetime.date, groups=['atomic', 'date']) +@function([str, str], datetime.date, groups=['atomic', 'date']) def parse_date(string, frmt=None): - """Parse date from string.""" + """Parse date from string (first argument). Without second argument, + the 'dateutil' library is used to parse the string, and can deal with + several time stamp formats. The optional second argument specifies the + format as in the 'datetime' library, for example: + '%Y-%m-%d' to parse '2022-01-20'.""" if frmt is None: return dateutil.parser.parse(string).date() return datetime.datetime.strptime(string, frmt).date() @@ -632,9 +754,12 @@ def date_add(x, y): return x + datetime.timedelta(days=y) -@function([str, datetime.date], datetime.date) +@function([str, datetime.date], datetime.date, groups = ['date']) def date_trunc(field, x): - """Truncate a date to the specified precision.""" + """Truncate a date to the specified precision. Example: date_trunc('month', + date). Make sure to use single quotes in the first argument, as + double-quoted strings are parsed as column names for backwards compatibility + reasons.""" if field == 'week': return x - relativedelta(weekday=weekday(0, -1)) if field == 'month': @@ -652,9 +777,27 @@ def date_trunc(field, x): return None -@function([str, datetime.date], int) +@function([str, datetime.date], int, groups = ['date']) def date_part(field, x): - """Extract the specified field from a date.""" + """Extract the specified field from a date. + + Arguments: + + field: Date part to extract, for example, 'year', 'month', 'week', 'day'. Details below. + since the UNIX epoch. + x: The date to extract the field from. + + Details: + The 'field' argument can be any of + + * 'weekday'/'dow', 'week', 'month', 'quarter', 'year', 'decade', 'century', 'millennium', or + * 'epoch': returns the number of seconds since the UNIX epoch. + * 'isoweekday'/'isodow', 'isoyear': The ISO 8601 week number or year, which + might differ from the conventional understanding around New Year's eve. + + Make sure to use single quotes for 'field', as double-quoted strings are parsed + as column names for backwards compatibility reasons. + """ if field == 'weekday' or field == 'dow': return x.weekday() if field == 'isoweekday' or field == 'isodow': @@ -682,9 +825,14 @@ def date_part(field, x): return None -@function([str], relativedelta) +@function([str], relativedelta, groups = ['date']) def interval(x): - """Construct a relative time interval.""" + """Construct a relative time interval. + + Arguments: + x: A string of the form 'N unit' where unit is one of 'day', 'month', 'year' + (Plural forms are also accepted). Examples: '1 month', '-20 days'. + """ m = re.fullmatch(r'([-+]?[0-9]+)\s+(day|month|year)s?', x) if not m: return None @@ -707,13 +855,9 @@ def interval(x): return None -@function([relativedelta, datetime.date, datetime.date], datetime.date) +@function([relativedelta, datetime.date, datetime.date], datetime.date, groups = ['date']) def date_bin(stride, source, origin): - """Bin a date into the specified stride aligned with the specified origin. - As an extension to the the SQL standard ``date_bin()`` function this - function also accepts strides containing units of months and years. - """ if stride.months or stride.years: if origin + stride <= origin: # FIXME: this should raise and error: stride must be greater than zero @@ -745,22 +889,53 @@ def date_bin(stride, source, origin): return result -@function([str, datetime.date, datetime.date], datetime.date, name='date_bin') +@function([str, datetime.date, datetime.date], datetime.date, name='date_bin', groups = ['date']) def date_bin_str(stride, source, origin): + """Bin a date into the specified stride aligned with the specified origin. + + As an extension to the the SQL standard ``date_bin()`` function this + function also accepts strides containing units of months and years. + + Arguments: + stride: A string representing a time interval, e.g. '1 day', '1 month', + '1 year'; Make sure to use single quotes in the first argument, as + double-quoted strings are parsed as column names for backwards + compatibility. + source: The date to bin; origin: The start of the binning interval. + relativedelta: Relative time interval, as generated by interval(). + """ return date_bin(interval(stride), source, origin) -def aggregator(intypes, name=None): +def aggregator(intypes, outtype = None, name=None, groups = None): + """Decorator to register an aggregator function. + + Args: + intypes: A list of types that the aggregator can accept. + outtype: The output type of the aggregator function. "None" + means no type annotation. To indicate that the function + returns None, use types.NoneType. + name: The name of the aggregator function. + groups: A list of groups that the aggregator belongs to. See + TYPE_CATEGORIES for valid group names. + """ def decorator(cls): cls.__intypes__ = intypes + cls.__outtype__ = outtype + # The decorated functions do not have explicit parameter names in the signature + # We use single lowercase letters a, b, c, ... as placeholders + cls.__param_names__ = "abcdefghijklmnopqrstuvwxyz"[:len(intypes)] + if name is not None: cls.__name__ = name query_compile.FUNCTIONS[cls.__name__].append(cls) + _add_to_doc_groups(cls, intypes, groups) + return cls return decorator -@aggregator([types.Asterisk], name='count') +@aggregator([types.Asterisk], int, name='count') class Count(query_compile.EvalAggregator): """Count the number of input rows.""" def __init__(self, context, operands): @@ -770,7 +945,7 @@ def update(self, store, context): store[self.handle] += 1 -@aggregator([types.Any], name='count') +@aggregator([types.Any], int, name='count') class CountArg(query_compile.EvalAggregator): """Count the number of non-NULL occurrences of the argument.""" def __init__(self, context, operands): @@ -782,7 +957,7 @@ def update(self, store, context): store[self.handle] += 1 -@aggregator([int], name='sum') +@aggregator([int], int, name='sum') class SumInt(query_compile.EvalAggregator): """Calculate the sum of the numerical argument.""" def __init__(self, context, operands): @@ -794,7 +969,7 @@ def update(self, store, context): store[self.handle] += value -@aggregator([Decimal], name='sum') +@aggregator([Decimal], Decimal, name='sum') class SumDecimal(query_compile.EvalAggregator): """Calculate the sum of the numerical argument.""" def update(self, store, context): @@ -803,7 +978,7 @@ def update(self, store, context): store[self.handle] += value -@aggregator([amount.Amount], name='sum') +@aggregator([amount.Amount], inventory.Inventory, name='sum') class SumAmount(query_compile.EvalAggregator): """Calculate the sum of the amount. The result is an Inventory.""" def __init__(self, context, operands): @@ -815,7 +990,7 @@ def update(self, store, context): store[self.handle].add_amount(value) -@aggregator([position.Position], name='sum') +@aggregator([position.Position], inventory.Inventory, name='sum', groups = ['position']) class SumPosition(query_compile.EvalAggregator): """Calculate the sum of the position. The result is an Inventory.""" def __init__(self, context, operands): @@ -827,7 +1002,7 @@ def update(self, store, context): store[self.handle].add_position(value) -@aggregator([inventory.Inventory], name='sum') +@aggregator([inventory.Inventory], inventory.Inventory, name='sum') class SumInventory(query_compile.EvalAggregator): """Calculate the sum of the inventories. The result is an Inventory.""" def __init__(self, context, operands): @@ -839,7 +1014,7 @@ def update(self, store, context): store[self.handle].add_inventory(value) -@aggregator([types.Any], name='first') +@aggregator([types.Any], types.Any, name='first') class First(query_compile.EvalAggregator): """Keep the first of the values seen.""" def initialize(self, store): @@ -851,7 +1026,7 @@ def update(self, store, context): store[self.handle] = value -@aggregator([types.Any], name='last') +@aggregator([types.Any], types.Any, name='last') class Last(query_compile.EvalAggregator): """Keep the last of the values seen.""" def initialize(self, store): @@ -862,7 +1037,7 @@ def update(self, store, context): store[self.handle] = value -@aggregator([types.Any], name='min') +@aggregator([types.Any], types.Any, name='min') class Min(query_compile.EvalAggregator): """Compute the minimum of the values.""" def initialize(self, store): @@ -876,7 +1051,7 @@ def update(self, store, context): store[self.handle] = value -@aggregator([types.Any], name='max') +@aggregator([types.Any], types.Any, name='max') class Max(query_compile.EvalAggregator): """Compute the maximum of the values.""" def initialize(self, store): @@ -888,3 +1063,47 @@ def update(self, store, context): cur = store[self.handle] if cur is None or value > cur: store[self.handle] = value + +def _describe_functions(functions, aggregates=False, type_filter=None): + """Describe functions, optionally filtered by input type category. + + Args: + functions: Dictionary of (function name: EvalFunction subclass), + the actual class, not an object, which would represent a particular + function call. + aggregates: If True, show aggregates; if False, show regular functions + type_filter: Optional filter by input type category (see TYPE_CATEGORIES) + """ + # Determine which functions to iterate over + if type_filter: + # Use the pre-populated FUNCTION_DOC_GROUPS for filtering + funcs_to_process = FUNCTION_DOC_GROUPS.get(type_filter, []) + else: + # Collect all functions from all groups + funcs_to_process = [] + for name, funcs in functions.items(): + funcs_to_process.extend(funcs) + + entries = [] + for func in funcs_to_process: + # Filter by aggregate vs non-aggregate + if aggregates != issubclass(func, query_compile.EvalAggregator): + continue + + # Get the function name + name = func.__name__.lower() + + # Assemble function signature for output using parameter names + args = ', '.join(f'{param_name}: {types.name(dtype)}' + for param_name, dtype in zip(func.__param_names__, func.__intypes__)) + + if func.__outtype__: + outtype = types.name(func.__outtype__) + else: + outtype = None + + doc = func.__doc__ or '' + entries.append((name, doc, args, outtype)) + + entries.sort() + return entries diff --git a/beanquery/shell.py b/beanquery/shell.py index c4675bf1..8707b038 100644 --- a/beanquery/shell.py +++ b/beanquery/shell.py @@ -629,27 +629,22 @@ def help_targets(self): template = textwrap.dedent(""" The list of comma-separated target expressions may consist of columns, - simple functions and aggregate functions. If you use any aggregate - function, you must also provide a GROUP-BY clause. + simple functions and aggregate functions. You can use AS to determine + the output column name, for example: - Columns - ------- + SELECT yearmonth(date) AS month .... - {columns} + If you use any aggregate function, you must also provide a GROUP-BY + clause. - Functions - --------- + See the online Beanquery documentation for the full list of columns, + functions, and aggregates: - {functions} - - Aggregate functions - ------------------- - - {aggregates} + https://beancount.github.io/beanquery/ """) - print(template.format(**_describe(self.context.tables['postings'], - query_compile.FUNCTIONS)), file=self.outfile) + + print(template, file=self.outfile) def help_from(self): template = textwrap.dedent(""" @@ -657,20 +652,13 @@ def help_from(self): A logical expression that consist of columns on directives (mostly transactions) and simple functions. - Columns - ------- - - {columns} - - Functions - --------- + See the online Beanquery documentation for the full list of columns, + functions, and aggregates: - {functions} + https://beancount.github.io/beanquery/ """) - print(template.format(**_describe(self.context.tables['entries'], - query_compile.FUNCTIONS)), - file=self.outfile) + print(template, file=self.outfile) def help_where(self): template = textwrap.dedent(""" @@ -678,20 +666,13 @@ def help_where(self): A logical expression that consist of columns on postings and simple functions. - Columns - ------- + See the online Beanquery documentation for the full list of columns, + functions, and aggregates: - {columns} - - Functions - --------- - - {functions} + https://beancount.github.io/beanquery/ """) - print(template.format(**_describe(self.context.tables['postings'], - query_compile.FUNCTIONS)), file=self.outfile) - + print(template, file=self.outfile) def _describe_columns(columns): out = io.StringIO() @@ -702,35 +683,6 @@ def _describe_columns(columns): print(file=out) return out.getvalue().rstrip() - -def _describe_functions(functions, aggregates=False): - entries = [] - for name, funcs in functions.items(): - if aggregates != issubclass(funcs[0], query_compile.EvalAggregator): - continue - name = name.lower() - for func in funcs: - args = ', '.join(types.name(d) for d in func.__intypes__) - doc = re.sub(r'[ \n\t]+', ' ', func.__doc__ or '') - entries.append((name, doc, args)) - entries.sort() - out = io.StringIO() - wrapper = textwrap.TextWrapper(initial_indent=' ', subsequent_indent=' ', width=80) - for key, entries in itertools.groupby(entries, key=lambda x: x[:2]): # noqa: B020 - for name, doc, args in entries: - print(f'{name}({args})', file=out) - print(wrapper.fill(doc), file=out) - print(file=out) - return out.getvalue().rstrip() - - -def _describe(table, functions): - return dict( - columns=_describe_columns(table.columns), - functions=_describe_functions(functions, aggregates=False), - aggregates=_describe_functions(functions, aggregates=True)) - - def summary_statistics(entries): """Calculate basic summary statistics to output a brief welcome message. diff --git a/beanquery/sources/beancount.py b/beanquery/sources/beancount.py index e388b6c6..93d604c8 100644 --- a/beanquery/sources/beancount.py +++ b/beanquery/sources/beancount.py @@ -104,6 +104,13 @@ class Position(types.Structure): class Cost(types.Structure): + """The amount which was payed for a position. This object + saves besides the amount also the date and label (if assigned). + This serves to identify the position to sell according to + a `given booking rule`__. + + __ https://beancount.github.io/docs/beancount_language_syntax.html#reducing-positions + """ name = 'cost' columns = _typed_namedtuple_to_columns(data.Cost) @@ -296,7 +303,8 @@ def id(entry): @columns.register(str) def type(entry): - """The data type of the directive.""" + """The data type of the directive. Currently, beanquery only can list + the directives of type 'transaction'.""" return type(entry).__name__.lower() @columns.register(str) @@ -359,20 +367,22 @@ def description(entry): @columns.register(set) def tags(entry): - """The set of tags of the transaction.""" + """The set of tags (#abc) of the transaction.""" return getattr(entry, 'tags', None) @columns.register(set) def links(entry): - """The set of links of the transaction.""" + """The set of links (^abc) of the transaction.""" return getattr(entry, 'links', None) @columns.register(dict) def meta(entry): + """The metadata of the transaction.""" return entry.meta @columns.register(typing.Set[str]) def accounts(entry): + """The set of accounts of the transaction.""" return getters.get_entry_accounts(entry) _TABLES.append(EntriesTable) diff --git a/docs/bql_functions.rst.j2 b/docs/bql_functions.rst.j2 new file mode 100644 index 00000000..b5ecf81d --- /dev/null +++ b/docs/bql_functions.rst.j2 @@ -0,0 +1,58 @@ +List of all BQL Functions +========================== + +This page documents all available BQL functions. + +.. + Disambiguate functions and types by pretending they are in + different modules (we are documenting BQL with the Sphinx Python + domain). We put functions in the virtual module "fun" (thus + functions must be explicitly linked like :func:`~fun.myfunction`). + Types have no module such that they are auto-linked by the + `.. function:` directives. + +.. currentmodule:: fun + +{# Render documentation for all functions. This uses the function docstrings +defined in beancount/query_env.py. There are multiple variants of many functions, +differing in the type and number of arguments. We group the variants which +have the same docstrings to render them en bloc. If all variants of a function +of a certain name have the same docstring, we list all signatures directly in +the RST function:: directive. Otherwise, we write .. function:: myfunction(...) +and render code blocks with the signatures, followed by the documentation for +this group of variants. +#} +{% macro render_functions(functions) %} +{% for name, args, has_multiple_docs, docs in preprocess_function_documentation(functions) %} +{% if has_multiple_docs %} +.. function:: {{ name }}(...) + +{% for variant in docs %} + :: + + {{ variant.signatures }} + +{{ variant.doc_text | indent(2, first=True) }} + +{% endfor %} +{% else %} +.. function:: +{% for sig in args %} + {{ sig }} +{% endfor %} + +{{ docs | indent(2, first=True) }} + +{% endif %} +{% endfor %} +{% endmacro %} + +Simple functions +---------------- + +{{ render_functions(_describe_functions(FUNCTIONS)) }} + +Aggregation functions +--------------------- + +{{ render_functions(_describe_functions(FUNCTIONS, aggregates=True)) }} diff --git a/docs/columns_entry.rst.j2 b/docs/columns_entry.rst.j2 new file mode 100644 index 00000000..a2c66272 --- /dev/null +++ b/docs/columns_entry.rst.j2 @@ -0,0 +1,14 @@ +Targets: Entry +============== + +This is the list of all the fields you can use in the +SELECT ... clause and the FROM ... clause of a BQL query. + +For available functions and aggregates to use with these +columns, see :doc:`functions_index`. + +{% for name, type_name, doc in preprocess_targets(EntriesTable) -%} +* **{{ name }}**: ({{ type_name }}) {{ doc }} + +{% endfor %} + diff --git a/docs/columns_postings.rst.j2 b/docs/columns_postings.rst.j2 new file mode 100644 index 00000000..30ca91db --- /dev/null +++ b/docs/columns_postings.rst.j2 @@ -0,0 +1,14 @@ +Fields: Posting +=============== + +This is the list of all the fields you can use in the +WHERE ... clause of a BQL query. + +For available functions and aggregates to use with these +columns, see :doc:`functions_index`. + +{% for name, type_name, doc in preprocess_targets(PostingsTable) -%} +* **{{ name }}**: ({{ type_name }}) {{ doc }} + +{% endfor %} + diff --git a/docs/conf.py b/docs/conf.py index 264e3e72..aec5188a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,16 +1,18 @@ +import beanquery + project = 'beanquery' copyright = '2014-2022, beanquery Contributors' author = 'beanquery Contributors' -version = '0.1' +version = beanquery.__version__ language = 'en' html_theme = 'furo' html_title = f'{project} {version}' html_logo = 'logo.svg' extensions = [ - 'sphinx.ext.autodoc', 'sphinx.ext.napoleon', - 'sphinx.ext.intersphinx', - 'sphinx.ext.extlinks', + 'sphinx.ext.autodoc', + #'sphinx.ext.intersphinx', + #'sphinx.ext.extlinks', 'sphinx.ext.githubpages', ] extlinks = { @@ -25,3 +27,5 @@ napoleon_use_param = False autodoc_typehints = 'none' autodoc_member_order = 'bysource' +# see make_bql_doc.py, we use virtual module "fun" for BQL functions +add_module_names = False diff --git a/docs/functions_account.rst.j2 b/docs/functions_account.rst.j2 new file mode 100644 index 00000000..f0e52a43 --- /dev/null +++ b/docs/functions_account.rst.j2 @@ -0,0 +1,7 @@ +Account Functions +================= + +{% for name, first_sentence in preprocess_function_summary(_describe_functions(FUNCTIONS, aggregates=False, type_filter='account')) -%} +* :func:`~fun.{{ name }}`{% if first_sentence %} - {{ first_sentence }}{% endif %} + +{% endfor %} diff --git a/docs/functions_aggregates.rst.j2 b/docs/functions_aggregates.rst.j2 new file mode 100644 index 00000000..1dcfb1e8 --- /dev/null +++ b/docs/functions_aggregates.rst.j2 @@ -0,0 +1,9 @@ +Aggregation Functions +===================== + +Functions that compute summary values across groups, as defined by the SELECT ... GROUP BY clause. + +{% for name, first_sentence in preprocess_function_summary(_describe_functions(FUNCTIONS, aggregates=True)) -%} +* :func:`~fun.{{ name }}`{% if first_sentence %} - {{ first_sentence }}{% endif %} + +{% endfor %} diff --git a/docs/functions_amount.rst.j2 b/docs/functions_amount.rst.j2 new file mode 100644 index 00000000..07b36228 --- /dev/null +++ b/docs/functions_amount.rst.j2 @@ -0,0 +1,23 @@ +Amount and Commodity Functions +=============================== + +.. py:type:: amount + + An amount is a value with a currency/commodity. + + Examples: ``10.50 USD``, ``5 HOOL``. + +**Commodities** (synonymous to **currencies**) are not represented by their own type +but are simply stated as all-caps :type:`strings`. + +An amount is wrapped in a :type:`position` when it is registered in an account, +potentially with the original cost attached. + +Amounts can be :func:`summed` up, resulting in :type:`inventories` + +The following functions are available that involve amounts and currencies: + +{% for name, first_sentence in preprocess_function_summary(_describe_functions(FUNCTIONS, aggregates=False, type_filter='amount')) -%} +* :func:`~fun.{{ name }}`{% if first_sentence %} - {{ first_sentence }}{% endif %} + +{% endfor %} diff --git a/docs/functions_atomic.rst.j2 b/docs/functions_atomic.rst.j2 new file mode 100644 index 00000000..46f2caa7 --- /dev/null +++ b/docs/functions_atomic.rst.j2 @@ -0,0 +1,37 @@ +Atomic Functions +================ + +.. type:: str + + Strings in Beanquery should be enclosed in *single* quotes. + Example: ``SELECT * FROM payee == 'Supermarket'``. + + Note: For historical reasons, strings in double quotes can sometimes be used, + but if they match a column name, they will be interpreted as this column + name, not a string. Do not use them. + +.. type:: bool + + This represents a logical value (yes/no). The acceptable values are ``TRUE`` + or ``FALSE``. + +.. type:: set + + This represents a set of values, for example all accounts that are touched + by a transaction. + +.. type:: int + + A whole number, -1, 0, 1, 2... + +.. type:: decimal + + This is a decimal number, as used for example for amounts of currencies + or commodities. Example: 1.25. + +The following functions can be used to work on basic types: + +{% for name, first_sentence in preprocess_function_summary(_describe_functions(FUNCTIONS, aggregates=False, type_filter='atomic')) -%} +* :func:`~fun.{{ name }}`{% if first_sentence %} - {{ first_sentence }}{% endif %} + +{% endfor %} diff --git a/docs/functions_date.rst.j2 b/docs/functions_date.rst.j2 new file mode 100644 index 00000000..9af0b2be --- /dev/null +++ b/docs/functions_date.rst.j2 @@ -0,0 +1,17 @@ +Date Functions +============== + +.. type:: date + + Enter dates in Beanquery by writing them in the format ``YYYY-MM-DD``. + Example: ``SELECT account, amount FROM date >= 2022-01-01``. + Note: Do *not* enclose them in quotes. For historical reasons, + dates in double quotes are accepted, but this is deprecated. + +The following functions are available for inspecting and +manipulating dates: + +{% for name, first_sentence in preprocess_function_summary(_describe_functions(FUNCTIONS, aggregates=False, type_filter='date')) -%} +* :func:`~fun.{{ name }}`{% if first_sentence %} - {{ first_sentence }}{% endif %} + +{% endfor %} diff --git a/docs/functions_index.rst.j2 b/docs/functions_index.rst.j2 new file mode 100644 index 00000000..ee4cf5cc --- /dev/null +++ b/docs/functions_index.rst.j2 @@ -0,0 +1,18 @@ +BQL Functions by Category +========================= + +This page provides an organized view of BQL functions by category. + +For a complete alphabetical list of all functions, see :doc:`bql_functions`. + +.. toctree:: + :maxdepth: 1 + + functions_account + functions_atomic + functions_aggregates + functions_amount + functions_date + functions_metadata + functions_position + \ No newline at end of file diff --git a/docs/functions_metadata.rst.j2 b/docs/functions_metadata.rst.j2 new file mode 100644 index 00000000..efa9faf6 --- /dev/null +++ b/docs/functions_metadata.rst.j2 @@ -0,0 +1,9 @@ +Access metadata +=============== + +Access metadata from postings, transactions, and accounts. + +{% for name, first_sentence in preprocess_function_summary(_describe_functions(FUNCTIONS, aggregates=False, type_filter='metadata')) -%} +* :func:`~fun.{{ name }}`{% if first_sentence %} - {{ first_sentence }}{% endif %} + +{% endfor %} diff --git a/docs/functions_position.rst.j2 b/docs/functions_position.rst.j2 new file mode 100644 index 00000000..e580b3af --- /dev/null +++ b/docs/functions_position.rst.j2 @@ -0,0 +1,22 @@ +Position & Inventory Functions +=============================== +.. py:type:: position + + A position is a single amount held at cost. + + Example: `10 HOOL {100.30 USD}` + +.. py:type:: inventory + + A collection of multiple positions is an inventory. + +Positions can be :func:`summed` up, resulting in +:type:`inventories`. + +Functions to inspect or modify positions and inventories: + +{% for name, first_sentence in preprocess_function_summary(_describe_functions(FUNCTIONS, aggregates=False, type_filter='position')) -%} +* :func:`~fun.{{ name }}` - {{ first_sentence | indent(2)}} + +{% endfor %} + diff --git a/docs/index.rst b/docs/index.rst index 1db11c69..53964966 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,7 +1,43 @@ -beanquery: Customizable lightweight SQL query tool + +.. toctree:: + :hidden: + + Main Page + columns_entry + columns_postings + functions_index + bql_functions + +Beanquery: Customizable lightweight SQL query tool ================================================== beanquery is a customizable and extensible lightweight SQL query tool that works on tabular data, including `Beancount`__ ledger data. __ https://beancount.github.io/ + +With this tool you can write *queries* to extract information from your +Beancount ledger. This is the documentation of functions and field names +which are available for queries. + +Please read the Manual of the `Beancount Query Language (BQL)`__ if you +do not yet know how to write queries. + + +__ http://furius.ca/beancount/doc/query + +Targets +------- + +In a BQL query, you can reference various data fields as targets. Here +you find the list of targets: + +* ...for the ``SELECT ...`` and ``FROM ...`` clauses: :doc:`columns_entry`. +* ...for the ``WHERE ...`` clause: :doc:`columns_postings` + +Functions in the Beancount query language +----------------------------------------- + +If you are looking for a particular function, best start at the list of :doc:`functions_index` + +You can also see the alphabetical list of :doc:`bql_functions` diff --git a/docs/make_bql_doc.py b/docs/make_bql_doc.py new file mode 100644 index 00000000..fdf52922 --- /dev/null +++ b/docs/make_bql_doc.py @@ -0,0 +1,312 @@ +import itertools +import re +import os +import shutil + +from pathlib import Path +from jinja2 import Environment, FileSystemLoader + +# Convert Google-style docstrings to reStructuredText +from sphinx.ext.napoleon.docstring import GoogleDocstring + +from beanquery import types +import beanquery.query_env as qe +import beanquery.query_compile as qc + +from beanquery.query_compile import FUNCTIONS +from beanquery.query_env import TYPE_CATEGORIES +from beanquery.sources.beancount import EntriesTable, PostingsTable + + +# Category information for help display. Tuples: (title, description) +CATEGORY_INFO = { + 'amount': ("Amount and Commodity Functions", "An amount is a value with a currency/commodity."), + 'account': ("Account Functions", ""), + 'position': ("Position & Inventory Functions", "A position is a single amount held at cost.\n\nExample: 10 HOOL {100.30 USD}\n\nA collection of multiple positions is an inventory"), + 'metadata': ("Access metadata", "Access metadata from postings, transactions, and accounts."), + 'date': ("Date Functions", ""), + 'atomic': ("Atomic Functions", "Work on basic types: strings, numbers, etc.") +} + +# ============================================================================ +# Low-level RST formatting helpers +# ============================================================================ + +def convert_docstring_to_rst(docstring): + """Convert Google-style docstring to RST.""" + google_doc = GoogleDocstring(docstring) + return '\n'.join(google_doc.lines()) + + +def extract_first_sentence(text): + """Extract the first sentence from text.""" + if not text: + return "" + first_part = text.split('.')[0] + return f"{first_part.strip()}." if first_part else "" + + +# ============================================================================ +# Function documentation generation +# ============================================================================ + +def group_function_variants(functions): + """Group function variants by their name. + + Args: + functions: List of (name, doc, args) tuples. + + Returns: + Iterator of (name, list[(name, doc, args)] ) tuples. + """ + for name, group in itertools.groupby(functions, key=lambda x: x[0]): + yield name, [ g[1:] for g in group] + + +def preprocess_function_with_variant_docs(function_name, variants): + """Preprocess function with multiple docstrings into structured data for Jinja2. + + Args: + function_name: Name of the function + variants: List of variants of this function; (doc, args, outtype) tuples + + Returns: + List of dicts, each containing: + - 'signatures': formatted block of function signatures + - 'doc_text': RST documentation string + + Example of one element: + + dict(signatures = + "myfunc(a: str)\\n" + "myfunc(a: str, default: str)", + doc_text = "Here goes the documentation for both function variants" + ) + + """ + variant_groups = [] + + # Make a joint documentation for all functions of equal name and docstring: + for doc, entries in itertools.groupby(variants, key=lambda x: x[0]): + entries_list = list(entries) + + # Format signatures into a code block + signatures = [f"{function_name}({args})" + (f" -> {outtype}" if outtype else "") + for _, args, outtype in entries_list] + sig_block = "\n ".join(signatures) + + doc_text = convert_docstring_to_rst(doc) if doc else '' + + variant_groups.append({ + 'signatures': sig_block, + 'doc_text': doc_text + }) + + return variant_groups + + +def preprocess_function_with_single_doc(function_name, variants): + """Preprocess function with single docstring into structured data for Jinja2. + + Args: + function_name: Name of the function + variants: List of variants of for this function; (doc, args, outtype) tuples + + Returns: + Dict containing: + - 'signatures': list of signature strings + - 'doc_text': RST documentation string + """ + + signatures = [f"{function_name}({args})" + (f" -> {outtype}" if outtype else "") + for _, args, outtype in variants] + + unique_docs = {doc for doc, _, _ in variants if doc} + doc_text = '' + if unique_docs: + shared_doc = list(unique_docs)[0] + doc_text = convert_docstring_to_rst(shared_doc) + + return { + 'signatures': signatures, + 'doc_text': doc_text + } + + +def preprocess_function_documentation(functions): + """Preprocess functions into structured data for Jinja2 template. + + Args: + functions: List of (name, doc, args) tuples from _describe_functions(). + + Returns: + List of tuples (name, args, has_multiple_docs, docs) where: + - name: function name (str) + - args: '...' if we have multiple function variants, else list of + all arg strings (str | list[str]) + - has_multiple_docs: boolean indicating if function has multiple docstrings + - docs: list[dict] if multiple docs (each dict has 'signatures' and 'doc_text'), + else str with doc_text if single doc + """ + result = [] + + for function_name, variants in group_function_variants(functions): + variants_list = list(variants) + + unique_docs = {doc for doc, _, _ in variants_list if doc} + has_multiple_docstrings = len(unique_docs) > 1 + + if has_multiple_docstrings: + docs = preprocess_function_with_variant_docs(function_name, variants_list) + result.append((function_name, '...', True, docs)) + else: + docs = preprocess_function_with_single_doc(function_name, variants_list) + result.append((function_name, docs['signatures'], False, docs['doc_text'])) + + return result + + +def preprocess_function_summary(functions): + """Preprocess function list into structured data for summary rendering. + + Args: + functions: List of (name, doc, args) tuples from _describe_functions(). + + Returns: + List of tuples (function_name, first_sentence) where first_sentence + may be empty string if no docstring available. + """ + result = [] + + for function_name, variants in group_function_variants(functions): + # Extract first sentence from docstring if available + unique_docs = {doc for doc, _, _ in variants if doc} + first_sentence = '' + if unique_docs: + first_sentence = extract_first_sentence(list(unique_docs)[0]) + + result.append((function_name, first_sentence)) + + return result + +def preprocess_targets(table_class): + """Preprocess table columns into structured data for Jinja2 template. + + Args: + table_class: Table subclass with a columns attribute. + + Returns: + List of tuples (name, type_name, doc) for each column. + """ + result = [] + + for name, column in table_class.columns.items(): + # Clean up docstring whitespace + doc = re.sub(r'[ \n\t]+', ' ', column.__doc__ or '').strip() + type_name = types.name(column.dtype) + result.append((name, type_name, doc)) + + return result + + +# ============================================================================ +# Template rendering +# ============================================================================ + +def create_global_context(): + """Create global context with all functions and variables for templates. + + Returns: + Dictionary containing all functions and variables templates can use. + """ + return { + # Helper functions + 'preprocess_function_documentation': preprocess_function_documentation, + 'preprocess_function_summary': preprocess_function_summary, + 'preprocess_targets': preprocess_targets, + + # Data access functions + '_describe_functions': qe._describe_functions, + + # Constants and data + 'FUNCTIONS': qc.FUNCTIONS, + 'TYPE_CATEGORIES': TYPE_CATEGORIES, + 'CATEGORY_INFO': CATEGORY_INFO, + 'PostingsTable': PostingsTable, + 'EntriesTable': EntriesTable, + } + + +def render_templates(docs_dir, template_dir): + """Render Jinja2 files to Sphinx (RST) documentation. + + Args: + env: Jinja2 Environment with global context. + docs_dir: Output directory path. + template_dir: Directory containing template (*.j2) files. + """ + configs = [] + + # Scan for all .j2 template files + for template_file in sorted(template_dir.glob("*.j2")): + template_name = template_file.name + # Output filename: remove .j2 extension + output_filename = template_name[:-3] + + configs.append((template_name, output_filename)) + + env = Environment( + loader=FileSystemLoader(template_dir), + trim_blocks=True, + lstrip_blocks=True + ) + + # Add global context to environment + env.globals.update(create_global_context()) + + # Render the detected files + for template_name, output_filename in configs: + + template = env.get_template(template_name) + + content = template.render() + with open(docs_dir / output_filename, "w") as f: + f.write(content) + +def main(): + """Generate all BQL documentation files and build HTML output. + + Orchestrates the generation of: + - Complete function reference (bql_functions.rst) + - Category-specific function summaries (functions_.rst) + - Aggregate functions summary (functions_aggregates.rst) + - Table column documentation (columns_*.rst) + - Category index (functions.rst) + - Final HTML output via Sphinx + """ + script_dir = Path(__file__).parent.parent + + # Setup output directory + docs_dir = script_dir / "build" / "rst" + docs_dir.mkdir(parents=True, exist_ok=True) + + # Copy documentation assets (logos, sphinx configuration, ...) + # to output directory + source_docs = script_dir / "docs" + for file in source_docs.glob("*"): + if not file.name.endswith('.j2'): + shutil.copy(file, docs_dir) + + # Create Jinja2 environment with global context + template_dir = script_dir / "docs" + + # Prepare and render all templates (*.rst.j2 -> *.rst) + render_templates(docs_dir, template_dir) + + # Build Sphinx documentation + html_dir = docs_dir.parent / "html" + os.system(f"sphinx-build -W {docs_dir} {html_dir}") + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 2778f7ee..540f444a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,8 @@ dependencies = [ [project.optional-dependencies] docs = [ 'furo >= 2024.08.06', - 'sphinx ~= 8.1.0', + 'sphinx >= 9.0.0', + 'jinja2 >= 3.0', ] [project.scripts]