All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
as_scan.h
Go to the documentation of this file.
1 /*
2  * Copyright 2008-2023 Aerospike, Inc.
3  *
4  * Portions may be licensed to Aerospike, Inc. under one or more contributor
5  * license agreements.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
8  * use this file except in compliance with the License. You may obtain a copy of
9  * the License at http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14  * License for the specific language governing permissions and limitations under
15  * the License.
16  */
17 #pragma once
18 
19 #include <aerospike/as_bin.h>
20 #include <aerospike/as_key.h>
22 #include <aerospike/as_udf.h>
23 
24 #ifdef __cplusplus
25 extern "C" {
26 #endif
27 
28 /******************************************************************************
29  * MACROS
30  *****************************************************************************/
31 
32 /**
33  * Default value for as_scan.no_bins
34  */
35 #define AS_SCAN_NOBINS_DEFAULT false
36 
37 /**
38  * Default value for as_scan.concurrent
39  */
40 #define AS_SCAN_CONCURRENT_DEFAULT false
41 
42 /**
43  * Default value for as_scan.deserialize_list_map
44  */
45 #define AS_SCAN_DESERIALIZE_DEFAULT true
46 
47 /******************************************************************************
48  * TYPES
49  *****************************************************************************/
50 
51 struct as_operations_s;
52 
53 /**
54  * The status of a particular background scan.
55  */
56 typedef enum as_scan_status_e {
57 
58  /**
59  * The scan status is undefined.
60  * This is likely due to the status not being properly checked.
61  */
63 
64  /**
65  * The scan is currently running.
66  */
68 
69  /**
70  * The scan was aborted. Due to failure or the user.
71  */
73 
74  /**
75  * The scan completed successfully.
76  */
78 
80 
81 /**
82  * Information about a particular background scan.
83  *
84  * @ingroup as_scan_object
85  */
86 typedef struct as_scan_info_s {
87 
88  /**
89  * Status of the scan.
90  */
92 
93  /**
94  * Progress estimate for the scan, as percentage.
95  */
96  uint32_t progress_pct;
97 
98  /**
99  * How many records have been scanned.
100  */
101  uint32_t records_scanned;
102 
103 } as_scan_info;
104 
105 /**
106  * Sequence of bins which should be selected during a scan.
107  *
108  * Entries can either be initialized on the stack or on the heap.
109  *
110  * Initialization should be performed via a query object, using:
111  * - as_scan_select_init()
112  * - as_scan_select_inita()
113  */
114 typedef struct as_scan_bins_s {
115 
116  /**
117  * Sequence of entries
118  */
120 
121  /**
122  * Number of entries allocated
123  */
124  uint16_t capacity;
125 
126  /**
127  * Number of entries used
128  */
129  uint16_t size;
130 
131  /**
132  * @private
133  * If true, then as_scan_destroy() will free this instance.
134  */
135  bool _free;
136 
137 } as_scan_bins;
138 
139 /**
140  * In order to execute a scan using the Scan API, an as_scan object
141  * must be initialized and populated.
142  *
143  * ## Initialization
144  *
145  * Before using an as_scan, it must be initialized via either:
146  * - as_scan_init()
147  * - as_scan_new()
148  *
149  * as_scan_init() should be used on a stack allocated as_scan. It will
150  * initialize the as_scan with the given namespace and set. On success,
151  * it will return a pointer to the initialized as_scan. Otherwise, NULL
152  * is returned.
153  *
154  * ~~~~~~~~~~{.c}
155  * as_scan scan;
156  * as_scan_init(&scan, "namespace", "set");
157  * ~~~~~~~~~~
158  *
159  * as_scan_new() should be used to allocate and initialize a heap allocated
160  * as_scan. It will allocate the as_scan, then initialized it with the
161  * given namespace and set. On success, it will return a pointer to the
162  * initialized as_scan. Otherwise, NULL is returned.
163  *
164  * ~~~~~~~~~~{.c}
165  * as_scan* scan = as_scan_new("namespace", "set");
166  * ~~~~~~~~~~
167  *
168  * ## Destruction
169  *
170  * When you are finished with the as_scan, you can destroy it and associated
171  * resources:
172  *
173  * ~~~~~~~~~~{.c}
174  * as_scan_destroy(scan);
175  * ~~~~~~~~~~
176  *
177  * ## Usage
178  *
179  * An initialized as_scan can be populated with additional fields.
180  *
181  * ### Selecting Bins
182  *
183  * as_scan_select() is used to specify the bins to be selected by the scan.
184  * If a scan specifies bins to be selected, then only those bins will be
185  * returned. If no bins are selected, then all bins will be returned.
186  *
187  * ~~~~~~~~~~{.c}
188  * as_scan_select(query, "bin1");
189  * as_scan_select(query, "bin2");
190  * ~~~~~~~~~~
191  *
192  * Before adding bins to select, the select structure must be initialized via
193  * either:
194  * - as_scan_select_inita() - Initializes the structure on the stack.
195  * - as_scan_select_init() - Initializes the structure on the heap.
196  *
197  * Both functions are given the number of bins to be selected.
198  *
199  * A complete example using as_scan_select_inita()
200  *
201  * ~~~~~~~~~~{.c}
202  * as_scan_select_inita(query, 2);
203  * as_scan_select(query, "bin1");
204  * as_scan_select(query, "bin2");
205  * ~~~~~~~~~~
206  *
207  * ### Returning only meta data
208  *
209  * A scan can return only record meta data, and exclude bins.
210  *
211  * ~~~~~~~~~~{.c}
212  * as_scan_set_nobins(scan, true);
213  * ~~~~~~~~~~
214  *
215  * ### Scan nodes in parallel
216  *
217  * A scan can be made to scan all the nodes in parallel
218  *
219  * ~~~~~~~~~~{.c}
220  * as_scan_set_concurrent(scan, true);
221  * ~~~~~~~~~~
222  *
223  * ### Scan a Percentage of Records
224  *
225  * A scan can define the percentage of record in the cluster to be scaned.
226  *
227  * ~~~~~~~~~~{.c}
228  * as_scan_set_percent(scan, 100);
229  * ~~~~~~~~~~
230  *
231  * ### Scan a Priority
232  *
233  * To set the priority of the scan, the set as_scan.priority.
234  *
235  * The priority of a scan can be defined as either:
236  * - `AS_SCAN_PRIORITY_AUTO`
237  * - `AS_SCAN_PRIORITY_LOW`
238  * - `AS_SCAN_PRIORITY_MEDIUM`
239  * - `AS_SCAN_PRIORITY_HIGH`
240  *
241  * ~~~~~~~~~~{.c}
242  * as_scan_set_priority(scan, AS_SCAN_PRIORITY_LOW);
243  * ~~~~~~~~~~
244  *
245  * ### Applying a UDF to each Record Scanned
246  *
247  * A UDF can be applied to each record scanned.
248  *
249  * To define the UDF for the scan, use as_scan_apply_each().
250  *
251  * ~~~~~~~~~~{.c}
252  * as_scan_apply_each(scan, "udf_module", "udf_function", arglist);
253  * ~~~~~~~~~~
254  *
255  * @ingroup client_objects
256  */
257 typedef struct as_scan_s {
258 
259  /**
260  * @memberof as_scan
261  * Namespace to be scanned.
262  *
263  * Should be initialized via either:
264  * - as_scan_init() - To initialize a stack allocated scan.
265  * - as_scan_new() - To heap allocate and initialize a scan.
266  */
268 
269  /**
270  * Set to be scanned.
271  *
272  * Should be initialized via either:
273  * - as_scan_init() - To initialize a stack allocated scan.
274  * - as_scan_new() - To heap allocate and initialize a scan.
275  */
277 
278  /**
279  * Name of bins to select.
280  *
281  * Use either of the following function to initialize:
282  * - as_scan_select_init() - To initialize on the heap.
283  * - as_scan_select_inita() - To initialize on the stack.
284  *
285  * Use as_scan_select() to populate.
286  */
288 
289  /**
290  * UDF to apply to results of the background scan.
291  *
292  * Should be set via `as_scan_apply_each()`.
293  */
295 
296  /**
297  * Perform write operations on a background scan.
298  * If ops is set, ops will be destroyed when as_scan_destroy() is called.
299  */
300  struct as_operations_s* ops;
301 
302  /**
303  * Status of all partitions.
304  */
306 
307  /**
308  * The time-to-live (expiration) of the record in seconds. Note that ttl
309  * is only used on background scan writes.
310  *
311  * There are also special values that can be set in the record ttl:
312  * <ul>
313  * <li>AS_RECORD_DEFAULT_TTL: Use the server default ttl from the namespace.</li>
314  * <li>AS_RECORD_NO_EXPIRE_TTL: Do not expire the record.</li>
315  * <li>AS_RECORD_NO_CHANGE_TTL: Keep the existing record ttl when the record is updated.</li>
316  * <li>AS_RECORD_CLIENT_DEFAULT_TTL: Use the default client ttl in as_policy_scan.</li>
317  * </ul>
318  */
319  uint32_t ttl;
320 
321  /**
322  * Set to true if as_policy_scan.max_records is set and you need to scan data in pages.
323  *
324  * Default: false
325  */
326  bool paginate;
327 
328  /**
329  * Set to true if the scan should return only the metadata of the record.
330  *
331  * Default value is AS_SCAN_NOBINS_DEFAULT.
332  */
333  bool no_bins;
334 
335  /**
336  * Set to true if the scan should scan all the nodes in parallel
337  *
338  * Default value is AS_SCAN_CONCURRENT_DEFAULT.
339  */
341 
342  /**
343  * Set to true if the scan should deserialize list and map raw bytes.
344  * Set to false for backup programs that just need access to raw bytes.
345  *
346  * Default value is AS_SCAN_DESERIALIZE_DEFAULT.
347  */
349 
350  /**
351  * @private
352  * If true, then as_scan_destroy() will free this instance.
353  */
354  bool _free;
355 
356 } as_scan;
357 
358 /******************************************************************************
359  * INSTANCE FUNCTIONS
360  *****************************************************************************/
361 
362 /**
363  * Initializes a scan.
364  *
365  * ~~~~~~~~~~{.c}
366  * as_scan scan;
367  * as_scan_init(&scan, "test", "demo");
368  * ~~~~~~~~~~
369  *
370  * When you no longer require the scan, you should release the scan and
371  * related resources via `as_scan_destroy()`.
372  *
373  * @param scan The scan to initialize.
374  * @param ns The namespace to scan.
375  * @param set The set to scan.
376  *
377  * @returns On succes, the initialized scan. Otherwise NULL.
378  *
379  * @relates as_scan
380  * @ingroup as_scan_object
381  */
383 as_scan_init(as_scan* scan, const char* ns, const char* set);
384 
385 /**
386  * Create and initializes a new scan on the heap.
387  *
388  * ~~~~~~~~~~{.c}
389  * as_scan* scan = as_scan_new("test","demo");
390  * ~~~~~~~~~~
391  *
392  * When you no longer require the scan, you should release the scan and
393  * related resources via `as_scan_destroy()`.
394  *
395  * @param ns The namespace to scan.
396  * @param set The set to scan.
397  *
398  * @returns On success, a new scan. Otherwise NULL.
399  *
400  * @relates as_scan
401  * @ingroup as_scan_object
402  */
404 as_scan_new(const char* ns, const char* set);
405 
406 /**
407  * Releases all resources allocated to the scan.
408  *
409  * ~~~~~~~~~~{.c}
410  * as_scan_destroy(scan);
411  * ~~~~~~~~~~
412  *
413  * @relates as_scan
414  * @ingroup as_scan_object
415  */
416 AS_EXTERN void
417 as_scan_destroy(as_scan* scan);
418 
419 /******************************************************************************
420  * SELECT FUNCTIONS
421  *****************************************************************************/
422 
423 /**
424  * Initializes `as_scan.select` with a capacity of `n` using `alloca`
425  *
426  * For heap allocation, use `as_scan_select_init()`.
427  *
428  * ~~~~~~~~~~{.c}
429  * as_scan_select_inita(&scan, 2);
430  * as_scan_select(&scan, "bin1");
431  * as_scan_select(&scan, "bin2");
432  * ~~~~~~~~~~
433  *
434  * @param __scan The scan to initialize.
435  * @param __n The number of bins to allocate.
436  *
437  * @ingroup as_scan_object
438  */
439 #define as_scan_select_inita(__scan, __n) \
440  do {\
441  if ((__scan)->select.entries == NULL) {\
442  (__scan)->select.entries = (as_bin_name*) alloca(sizeof(as_bin_name) * (__n));\
443  if ((__scan)->select.entries) {\
444  (__scan)->select.capacity = (__n);\
445  (__scan)->select.size = 0;\
446  (__scan)->select._free = false;\
447  }\
448  }\
449  } while(0)
450 
451 /**
452  * Initializes `as_scan.select` with a capacity of `n` using `malloc()`.
453  *
454  * For stack allocation, use `as_scan_select_inita()`.
455  *
456  * ~~~~~~~~~~{.c}
457  * as_scan_select_init(&scan, 2);
458  * as_scan_select(&scan, "bin1");
459  * as_scan_select(&scan, "bin2");
460  * ~~~~~~~~~~
461  *
462  * @param scan The scan to initialize.
463  * @param n The number of bins to allocate.
464  *
465  * @return On success, the initialized. Otherwise an error occurred.
466  *
467  * @relates as_scan
468  * @ingroup as_scan_object
469  */
470 AS_EXTERN bool
471 as_scan_select_init(as_scan* scan, uint16_t n);
472 
473 /**
474  * Select bins to be projected from matching records.
475  *
476  * You have to ensure as_scan.select has sufficient capacity, prior to
477  * adding a bin. If capacity is insufficient then false is returned.
478  *
479  * ~~~~~~~~~~{.c}
480  * as_scan_select_init(&scan, 2);
481  * as_scan_select(&scan, "bin1");
482  * as_scan_select(&scan, "bin2");
483  * ~~~~~~~~~~
484  *
485  * @param scan The scan to modify.
486  * @param bin The name of the bin to select.
487  *
488  * @return On success, true. Otherwise an error occurred.
489  *
490  * @relates as_scan
491  * @ingroup as_scan_object
492  */
493 AS_EXTERN bool
494 as_scan_select(as_scan* scan, const char * bin);
495 
496 /******************************************************************************
497  * MODIFIER FUNCTIONS
498  *****************************************************************************/
499 
500 /**
501  * Do not return bins. This will only return the metadata for the records.
502  *
503  * ~~~~~~~~~~{.c}
504  * as_scan_set_nobins(&q, true);
505  * ~~~~~~~~~~
506  *
507  * @param scan The scan to set the priority on.
508  * @param nobins If true, then do not return bins.
509  *
510  * @return On success, true. Otherwise an error occurred.
511  *
512  * @relates as_scan
513  * @ingroup as_scan_object
514  */
515 AS_EXTERN bool
516 as_scan_set_nobins(as_scan* scan, bool nobins);
517 
518 /**
519  * Scan all the nodes in prallel
520  *
521  * ~~~~~~~~~~{.c}
522  * as_scan_set_concurrent(&q, true);
523  * ~~~~~~~~~~
524  *
525  * @param scan The scan to set the concurrency on.
526  * @param concurrent If true, scan all the nodes in parallel
527  *
528  * @return On success, true. Otherwise an error occurred.
529  */
530 AS_EXTERN bool
531 as_scan_set_concurrent(as_scan* scan, bool concurrent);
532 
533 /**
534  * Apply a UDF to each record scanned on the server.
535  *
536  * ~~~~~~~~~~{.c}
537  * as_arraylist arglist;
538  * as_arraylist_init(&arglist, 2, 0);
539  * as_arraylist_append_int64(&arglist, 1);
540  * as_arraylist_append_int64(&arglist, 2);
541  *
542  * as_scan_apply_each(&q, "module", "func", (as_list *) &arglist);
543  *
544  * as_arraylist_destroy(&arglist);
545  * ~~~~~~~~~~
546  *
547  * @param scan The scan to apply the UDF to.
548  * @param module The module containing the function to execute.
549  * @param function The function to execute.
550  * @param arglist The arguments for the function.
551  *
552  * @return On success, true. Otherwise an error occurred.
553  *
554  * @relates as_scan
555  * @ingroup as_scan_object
556  */
557 AS_EXTERN bool
558 as_scan_apply_each(as_scan* scan, const char* module, const char* function, as_list* arglist);
559 
560 /**
561  * Set to true if as_policy_scan.max_records is set and you need to scan data in pages.
562  *
563  * @relates as_scan
564  * @ingroup as_scan_object
565  */
566 static inline void
567 as_scan_set_paginate(as_scan* scan, bool paginate)
568 {
569  scan->paginate = paginate;
570 }
571 
572 /**
573  * Set completion status of all partitions from a previous scan that ended early.
574  * The scan will resume from this point.
575  *
576  * @relates as_scan
577  * @ingroup as_scan_object
578  */
579 static inline void
581 {
582  scan->parts_all = as_partitions_status_reserve(parts_all);
583 }
584 
585 /**
586  * If using scan pagination, did previous paginated scan with this scan instance
587  * return all records?
588  *
589  * @relates as_scan
590  * @ingroup as_scan_object
591  */
592 static inline bool
594 {
595  return scan->parts_all && scan->parts_all->done;
596 }
597 
598 /**
599  * Serialize scan definition to bytes.
600  *
601  * @relates as_scan
602  * @ingroup as_scan_object
603  */
604 AS_EXTERN bool
605 as_scan_to_bytes(const as_scan* scan, uint8_t** bytes, uint32_t* bytes_size);
606 
607 /**
608  * Deserialize bytes to scan definition. Scan definition is assumed to be on the stack.
609  * as_scan_destroy() should be called when done with the scan definition.
610  *
611  * @returns true on success and false on failure.
612  * @relates as_scan
613  * @ingroup as_scan_object
614  */
615 AS_EXTERN bool
616 as_scan_from_bytes(as_scan* scan, const uint8_t* bytes, uint32_t bytes_size);
617 
618 /**
619  * Create scan definition on the heap and deserialize bytes to that scan definition.
620  * as_scan_destroy() should be called when done with the scan definition.
621  *
622  * @returns scan definition on success and NULL on failure.
623  * @relates as_scan
624  * @ingroup as_scan_object
625  */
627 as_scan_from_bytes_new(const uint8_t* bytes, uint32_t bytes_size);
628 
629 /**
630  * Compare scan objects.
631  * @private
632  * @relates as_scan
633  * @ingroup as_scan_object
634  */
635 AS_EXTERN bool
637 
638 #ifdef __cplusplus
639 } // end extern "C"
640 #endif
static bool as_scan_is_done(as_scan *scan)
Definition: as_scan.h:593
as_namespace ns
Definition: as_scan.h:267
AS_EXTERN bool as_scan_to_bytes(const as_scan *scan, uint8_t **bytes, uint32_t *bytes_size)
as_udf_call apply_each
Definition: as_scan.h:294
uint32_t records_scanned
Definition: as_scan.h:101
as_partitions_status * parts_all
Definition: as_scan.h:305
AS_EXTERN as_scan * as_scan_new(const char *ns, const char *set)
AS_EXTERN as_scan * as_scan_from_bytes_new(const uint8_t *bytes, uint32_t bytes_size)
AS_EXTERN bool as_scan_compare(as_scan *s1, as_scan *s2)
bool paginate
Definition: as_scan.h:326
char as_namespace[AS_NAMESPACE_MAX_SIZE]
Definition: as_key.h:63
uint32_t ttl
Definition: as_scan.h:319
as_scan_status status
Definition: as_scan.h:91
as_set set
Definition: as_scan.h:276
#define AS_EXTERN
Definition: as_std.h:25
uint16_t capacity
Definition: as_scan.h:124
AS_EXTERN bool as_scan_from_bytes(as_scan *scan, const uint8_t *bytes, uint32_t bytes_size)
uint16_t size
Definition: as_scan.h:129
uint32_t progress_pct
Definition: as_scan.h:96
static as_partitions_status * as_partitions_status_reserve(as_partitions_status *parts_all)
AS_EXTERN as_scan * as_scan_init(as_scan *scan, const char *ns, const char *set)
bool deserialize_list_map
Definition: as_scan.h:348
bool no_bins
Definition: as_scan.h:333
AS_EXTERN bool as_scan_set_nobins(as_scan *scan, bool nobins)
AS_EXTERN void as_scan_destroy(as_scan *scan)
char as_bin_name[AS_BIN_NAME_MAX_SIZE]
Definition: as_bin.h:53
as_bin_name * entries
Definition: as_scan.h:119
static void as_scan_set_paginate(as_scan *scan, bool paginate)
Definition: as_scan.h:567
struct as_operations_s * ops
Definition: as_scan.h:300
AS_EXTERN bool as_scan_apply_each(as_scan *scan, const char *module, const char *function, as_list *arglist)
as_scan_bins select
Definition: as_scan.h:287
AS_EXTERN bool as_scan_select_init(as_scan *scan, uint16_t n)
bool concurrent
Definition: as_scan.h:340
AS_EXTERN bool as_scan_set_concurrent(as_scan *scan, bool concurrent)
as_scan_status
Definition: as_scan.h:56
static void as_scan_set_partitions(as_scan *scan, as_partitions_status *parts_all)
Definition: as_scan.h:580
char as_set[AS_SET_MAX_SIZE]
Definition: as_key.h:70
AS_EXTERN bool as_scan_select(as_scan *scan, const char *bin)